diff --git a/CMakeLists.txt b/CMakeLists.txt index 710b4774ca021c2e916460e7253d4fbf979a38cc..cfaab206e1f321a55119d4a8d65c4a99d3819fff 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -57,7 +57,10 @@ option(GLIDE_INSTALL "Download and install go dependencies " ON) option(USE_NNPACK "Compile PaddlePaddle with NNPACK library" OFF) option(WITH_DISTRIBUTE "Compile with grpc distributed support" OFF) option(USE_EIGEN_FOR_BLAS "Use matrix multiplication in Eigen" OFF) +option(EIGEN_USE_THREADS "Compile with multi-threaded Eigen" OFF) option(WITH_ARM_FP16 "Use half precision support on armv8.2-a cpu" OFF) +option(WITH_FAST_BUNDLE_TEST "Bundle tests that can be run in a single process together to reduce launch overhead" OFF) +option(WITH_CONTRIB "Compile the third-party contributation" OFF) # CMAKE_BUILD_TYPE if(NOT CMAKE_BUILD_TYPE) @@ -202,7 +205,7 @@ endif(USE_NNPACK) add_subdirectory(proto) -if(NOT MOBILE_INFERENCE) +if(NOT MOBILE_INFERENCE AND NOT WITH_FLUID_ONLY) # "add_subdirectory(go)" should be placed after the following loine, # because it depends on paddle/optimizer. add_subdirectory(paddle/optimizer) @@ -230,3 +233,7 @@ if(WITH_DOC) find_python_module(recommonmark REQUIRED) add_subdirectory(doc) endif() + +if (WITH_CONTRIB) + add_subdirectory(paddle/contrib) +endif() diff --git a/Dockerfile b/Dockerfile index ea39efd00bb5c0a7deb3f6d57083d83a673b883c..e5508486d6df6a7465998b7e2926b21a1604dfb4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -101,6 +101,3 @@ RUN echo 'root:root' | chpasswd RUN sed -ri 's/^PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config RUN sed -ri 's/UsePAM yes/#UsePAM yes/g' /etc/ssh/sshd_config EXPOSE 22 - -# development image default do build work -CMD ["bash", "/paddle/paddle/scripts/docker/build.sh"] diff --git a/Dockerfile.android b/Dockerfile.android index 848a7eba6f1421432addae8acff407b611adb4ae..48db2efea21a648657e3f490c95429b9a29ede52 100644 --- a/Dockerfile.android +++ b/Dockerfile.android @@ -40,5 +40,3 @@ RUN mkdir -p ${ANDROID_TOOLCHAINS_DIR} && \ unzip -q android-ndk-r14b-linux-x86_64.zip && \ mv android-ndk-r14b ${ANDROID_NDK_HOME} && \ rm -rf /opt/android-ndk-tmp - -CMD ["bash", "/paddle/paddle/scripts/docker/build_android.sh"] diff --git a/benchmark/fluid/README.md b/benchmark/fluid/README.md index 0fc02b704362f79f2219252538b4b3195e665b2c..065df2edb8d3152ab0891798628653d3b283f1df 100644 --- a/benchmark/fluid/README.md +++ b/benchmark/fluid/README.md @@ -24,22 +24,22 @@ Currently supported `--model` argument include: * Run the following command to start a benchmark job locally: ```bash - python fluid_benchmark.py --model mnist --parallel 1 --device GPU --with_test + python fluid_benchmark.py --model mnist --device GPU ``` You can choose to use GPU/CPU training. With GPU training, you can specify - `--parallel 1` to run multi GPU training. + `--gpus ` to run multi GPU training. * Run distributed training with parameter servers: * start parameter servers: ```bash - PADDLE_TRAINING_ROLE=PSERVER PADDLE_PSERVER_PORT=7164 PADDLE_PSERVER_IPS=127.0.0.1 PADDLE_TRAINERS=1 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model mnist --parallel 0 --device GPU --update_method pserver + PADDLE_TRAINING_ROLE=PSERVER PADDLE_PSERVER_PORT=7164 PADDLE_PSERVER_IPS=127.0.0.1 PADDLE_TRAINERS=1 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model mnist --device GPU --update_method pserver ``` * start trainers: ```bash - PADDLE_TRAINING_ROLE=PSERVER PADDLE_PSERVER_PORT=7164 PADDLE_PSERVER_IPS=127.0.0.1 PADDLE_TRAINERS=1 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model mnist --parallel 0 --device GPU --update_method pserver + PADDLE_TRAINING_ROLE=TRAINER PADDLE_PSERVER_PORT=7164 PADDLE_PSERVER_IPS=127.0.0.1 PADDLE_TRAINERS=1 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model mnist --device GPU --update_method pserver ``` * Run distributed training using NCCL2 ```bash - PADDLE_PSERVER_PORT=7164 PADDLE_TRAINER_IPS=192.168.0.2,192.168.0.3 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model mnist --parallel 0 --device GPU --update_method nccl2 + PADDLE_PSERVER_PORT=7164 PADDLE_TRAINER_IPS=192.168.0.2,192.168.0.3 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model mnist --device GPU --update_method nccl2 ``` ## Run Distributed Benchmark on Kubernetes Cluster @@ -48,7 +48,7 @@ We provide a script `kube_gen_job.py` to generate Kubernetes yaml files to submi distributed benchmark jobs to your cluster. To generate a job yaml, just run: ```bash -python kube_gen_job.py --jobname myjob --pscpu 4 --cpu 8 --gpu 8 --psmemory 20 --memory 40 --pservers 4 --trainers 4 --entry "python fluid_benchmark.py --model mnist --parallel 1 --device GPU --update_method pserver --with_test" --disttype pserver +python kube_gen_job.py --jobname myjob --pscpu 4 --cpu 8 --gpu 8 --psmemory 20 --memory 40 --pservers 4 --trainers 4 --entry "python fluid_benchmark.py --model mnist --parallel 1 --device GPU --update_method pserver " --disttype pserver ``` Then the yaml files are generated under directory `myjob`, you can run: diff --git a/cmake/configure.cmake b/cmake/configure.cmake index e490397cc0624c310949a4b571bd00cac6e8953b..682614742cf1bd3130c638020a2545e16226d4d6 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -41,6 +41,10 @@ if(USE_EIGEN_FOR_BLAS) add_definitions(-DPADDLE_USE_EIGEN_FOR_BLAS) endif(USE_EIGEN_FOR_BLAS) +if(EIGEN_USE_THREADS) + add_definitions(-DEIGEN_USE_THREADS) +endif(EIGEN_USE_THREADS) + if(NOT WITH_PROFILER) add_definitions(-DPADDLE_DISABLE_PROFILER) endif(NOT WITH_PROFILER) diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake index 0fde4373a4be58e71ff1a305bd4991cc554d7a34..2665996432b1f6681927320a85d6835094abe4cd 100644 --- a/cmake/external/protobuf.cmake +++ b/cmake/external/protobuf.cmake @@ -212,6 +212,7 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST) ${CMAKE_COMMAND} ${PROTOBUF_SOURCES_DIR}/src/${TARGET_NAME}/cmake ${OPTIONAL_ARGS} -Dprotobuf_BUILD_TESTS=OFF + -DCMAKE_SKIP_RPATH=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} -DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR} diff --git a/doc/fluid/api/layers.rst b/doc/fluid/api/layers.rst index 91449042fcdfd48c95f3dd3babf958c5d572e747..f53da4d194f8d2428b4121fa1bb31f3fc95a9f64 100644 --- a/doc/fluid/api/layers.rst +++ b/doc/fluid/api/layers.rst @@ -1003,9 +1003,9 @@ dice_loss .. autofunction:: paddle.fluid.layers.dice_loss :noindex: -bilinear_interp +upsampling_bilinear2d ____ -.. autofunction:: paddle.fluid.layers.bilinear_interp +.. autofunction:: paddle.fluid.layers.upsampling_bilinear2d :noindex: diff --git a/doc/fluid/getstarted/Developer's_Guide_to_Paddle_Fluid.md b/doc/fluid/getstarted/Developer's_Guide_to_Paddle_Fluid.md new file mode 100644 index 0000000000000000000000000000000000000000..0c0156c8e46378e7bbeea8072938b8ccfb9ab6d7 --- /dev/null +++ b/doc/fluid/getstarted/Developer's_Guide_to_Paddle_Fluid.md @@ -0,0 +1,1819 @@ + +# Paddle Fluid 开发者指南 + +--- + +### ==1==. 为什么需要 PaddlePaddle Fluid? + +--- + +### 两个基础问题 + + + +1. 如何描述机器学习模型和优化过程? + - 完备自洽,表达能力足以支持潜在出现的各种计算需求 +1. 如何充分利用资源高效计算? + - 支持异步设备、多卡、分布式计算 + - 降低计算/计算优化的开发成本 + - …… + + + +--- + +### 如何描述模型和优化过程? + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
一组连续执行的layersvariable和operator构成的计算图 不再有模型的概念
2013 Caffe,Theano, Torch, PaddlePaddle
2015 TensorFlow, MxNet, Caffe2, ONNX, n-graph
2016 PyTorch, TensorFlow Eager Execution, **==PaddlePaddle Fluid==**
+ +--- + + +###

目标

+ + + +- 提高对各类机器学习任务的描述能力:能够描述潜在出现的任意机器学习模型。 +- 代码结构逻辑清晰,各模块充分解耦:内外部贡献者能够专注于自己所需的功能模块,基于框架进行再次开发。 +- 从设计上,留下技术优化的空间和潜力。 +- 代码解耦后降低多设备支持、计算优化等的开发成本。 +- 在统一的设计理念下,实现自动可伸缩,自动容错的分布式计算。 + + + +--- + +## ==2.== Design Overview + +--- + +# Fluid: 系统形态 + +- [编译器式的执行流程,区分编译时和运行时](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/design/motivation/fluid_compiler.md) +
+ +

+ +

+ +--- + +#### 让我们在Fluid程序实例中,区分编译时和运行时 + +--- +### Fluid 编译时 + + + +- ==**定义前向计算**== + + ```python + x = fluid.layers.data(name='x',shape=[13], dtype='float32') + y_predict = fluid.layers.fc(input=x, size=1, act=None) + y = fluid.layers.data(name='y', shape=[1], dtype='float32') + cost = fluid.layers.square_error_cost(input=y_predict, label=y) + avg_cost = fluid.layers.mean(x=cost) + ``` + +- ==**添加反向、正则、优化**== + ```python + learning_rate = 0.01 + sgd_optimizer = fluid.optimizer.SGD(learning_rate) + sgd_optimizer.minimize(avg_cost) + ``` + + +--- + +### `Program` vs. 计算图 + + + +- 在科学计算领域,计算图是一种描述计算的经典方式。下图展示了从前向计算图(蓝色)开始,通过添加反向(红色)和优化算法相关(绿色)操作,构建出整个计算图的过程: +- +

+ +

+ + +- Fluid ==使用`Program`而不是计算图==来描述模型和优化过程。`Program`由`Block`、`Operator`和`Variable`构成,相关概念会在后文详细展开。 +- 编译时 Fluid 接受前向计算(这里可以先简单的理解为是一段有序的计算流)`Program`,为这段前向计算按照:前向 -> 反向 -> 梯度 clip -> 正则 -> 优化 的顺序,添加相关 `Operator`和`Variable`到`Program`到完整的计算。 + +
+ +--- + +### Fluid 运行时 + + + +- ==**读入数据**== + + ```python + train_reader = paddle.batch( + paddle.reader.shuffle(paddle.dataset.uci_housing.train(), buf_size=500), + batch_size=20) + feeder = fluid.DataFeeder(place=place, feed_list=[x, y]) + ``` +- ==**定义执行程序的设备**== + ```python + place = fluid.CPUPlace() + feeder = fluid.DataFeeder(place=place,feed_list=[x, y]) + ``` + +- ==创建执行器(Executor),执行初始化 `Program`和训练`Program`== + + ```python + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + PASS_NUM = 100 + for pass_id in range(PASS_NUM): + for data in train_reader(): + avg_loss_value, = exe.run(fluid.default_main_program(), + feed=feeder.feed(data), + fetch_list=[avg_cost]) + print(avg_loss_value) + ``` + + +--- + +### 总结:框架做什么?用户做什么? +
+ + + + + + + + + + + + + + + + +
构建训练执行训练
+用户:描述前向运算
框架:添加反向运算
框架:添加优化运算
框架:添加内存优化
框架:添加并行/多设备/分布式相关的计算单元 +
+框架:创建Operator(计算)+ Variable(数据)
框架:创建`Block`
框架:内存管理/设备管理
框架:执行计算 +
+
+ +--- + +###

总结:编译时

+ + +**用户编写一段Python程序,描述模型的前向计算** +1. 创建变量描述 `VarDesc` +1. 创建operators的描述 `OpDesc` +1. 创建operators的属性 +1. 推断变量的类型和形状,进行静态检查:`inferShape` +1. 规划变量的内存复用 +1. 创建反向计算 +1. 添加优化相关的Operators +1. (可选)添加多卡/多机相关的Operator,生成在多卡/多机上运行的程序 + + + +--- + +###

总结:运行时

+ + +**执行规划好的计算** +1. 创建`Executor` +1. 为将要执行的一段计算,在层级式的`Scope`空间中创建`Scope` +1. 创建`Block`,依次执行`Block` + +

+
+ Figure. 编译时运行时概览 +

+ +
+ +--- + +## ==3==. 用户如何描述计算? +--- + +### Fluid:==像写程序一样==定义计算 + + +- 顺序执行 + ```python + x = fluid.layers.data(name='x',shape=[13], dtype='float32') + y_predict = fluid.layers.fc(input=x, size=1, act=None) + y = fluid.layers.data(name='y', shape=[1], dtype='float32') + cost = fluid.layers.square_error_cost(input=y_predict, label=y) + ``` + +- 条件分支: [swith](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/design/execution/switch.md)、[ifelse](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/design/execution/if_else_op.md) + + ```python + a = fluid.Var(10) + b = fluid.Var(0) + + switch = fluid.switch() + with switch.block(): + with switch.case(fluid.less_equal(a, 10)): + fluid.print("Case 1") + with switch.case(fluid.larger(a, 0)): + fluid.print("Case 2") + with switch.default(): + fluid.print("Case 3") + ``` + +>[A Lisp cond form may be compared to a continued if-then-else as found in many algebraic programming languages](https://www.cs.cmu.edu/Groups/AI/html/cltl/clm/node84.html). + + + +--- + +### Fluid: ==像写程序一样==定义计算 + + + +- 循环:[while](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/book/test_machine_translation.py#L105) + + ```python + d0 = layers.data("d0", shape=[10], dtype='float32') + data_array = layers.array_write(x=d0, i=i) + array_len = layers.fill_constant(shape=[1],dtype='int64', value=3) + + cond = layers.less_than(x=i, y=array_len) + while_op = layers.While(cond=cond) + with while_op.block(): + d = layers.array_read(array=data_array, i=i) + i = layers.increment(x=i, in_place=True) + layers.array_write(result, i=i, array=d) + layers.less_than(x=i, y=array_len, cond=cond) + ``` + +- 完整实例请点查看 [->](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/unittests/test_while_op.py#L36-L44) +- beam search [->]( https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/book/test_machine_translation.py#L105) + + + +--- + +####

总结

+ + + +1. 用户层提供的描述语法具有完备性、自洽性,有能力支持对复杂计算过程描述 +1. 使用方式和核心概念可以类比编程语言,认知能够直接迁移 +1. 能够支持:定义问题,逐步求解 + + + +--- + +## ==3.== 核心概念 + +--- +### 编译时概念 :==变量和计算的描述== + + + +- `VarDesc` + `TensorDesc` + `OpDesc` -> `BlockDesc` -> `ProgramDesc` + - https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/framework/framework.proto + +- 什么是 Fluid Program + + - 在Fluid中,一个神经网络任务(训练/预测)被描述为一段`Program` + - `Program`包含对`Variable`(数据)和 `Operator`(对数据的操作)的描述 + - `Variable` 和 `Operator` 被组织为多个可以嵌套的`Block`,构成一段完整的`Fluid Program` + + +>编译阶段最终,经过 Transpiler 的执行规划,变换处理,生成使用`protobuf`序列化后的`ProgramDesc`。可以发送给多卡或者网络中的其它计算节点执行 + + + +--- + +### 编译时概念 :==**[Transpiler](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/design/motivation/fluid_compiler.md)**== + + +1. 接受一段`ProgramDesc`作为输入,生成一段新的`ProgramDesc` + + - *Memory optimization transpiler*:向原始`ProgramDesc` 中插入 `FreeMemoryOps`,在一次迭代优化结束前提前释放内存,使得能够维持较小的 memory footprint + + - *Distributed training transpiler*:将原始的`ProgramDesc`中转化为对应的分布式版本,生成两段新的`ProgramDesc`: + 1. trainer进程执行的`ProgramDesc` + 1. parameter server执行的`ProgramDesc` + +1. ==**WIP**==: 接受一段`ProgramDesc`,生成可直接被`gcc`, `nvcc`, `icc`等编译的代码,编译后得到可执行文件 + + + +--- +### Transplier + +

+ +

+ +--- + +### 打印 `ProgramDesc` + +

+ +

+ + + +- `default_startup_program`:创建可学习参数,对参数进行初始化 +- `default_main_program`:由用户定义的模型,包括了前向、反向、优化及所有必要的计算 + +- 打印可读的 `Program` + ```python + from paddle.v2.fluid import debuger + print debuger.pprint_program_codes(framework.default_main_program().desc) + ``` + + +--- +### 输出效果 + + + + + + + + + + + + + + +
variable in block 0variable in block 0
+
+ +--- + +### 运行时概念 + + + +- 数据相关 + - `Tensor` / `LoDTensor` / `Variable` + - `Scope` + +- 计算相关 + - `Block` + - `Kernel`、`OpWithKernel`、`OpWithoutKernel` + + + + + + + + + + + + + + + + + + + + + + + + + + + +
protobuf messagesC++ class objects
Data[VarDesc](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/framework/framework.proto#L107) +[Variable](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/framework/variable.h#L24) +
Operation[OpDesc](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/framework/framework.proto#L35) +[Operator](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/framework/operator.h#L64) +
BlockBlockDesc +Block +
+ +- 执行相关 :`Executor` + +
+ +--- +#### Tensor 和 LoD(Level-of-Detail) Tensor + + +- Tensor 是$n$-dimensional arry的推广,LoDTensor是在Tensor基础上附加了序列信息 +- Fluid中输入、输出,网络中的可学习参数全部统一使用LoDTensor(n-dimension array)表示 +- 一个mini-batch输入数据是一个LoDTensor + - 在Fluid中,RNN 处理变长序列无需padding,得益于 `LoDTensor`表示 + - 可以简单将 LoD 理解为:`std::vector>` + - 对非序列数据,LoD 信息为空 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
TensorFlowPaddlePaddle
RNNSupport +Support +
recursive RNNSupport +Support +
padding zerosMust +No need +
blob data typeTensor +LODTensor +
+ +
+ +--- +#### LoD 信息实例 + + + +

+ +

+ +- 图(a)的LoD 信息 + ```cpp + [0, 5, 8, 10, 14] + ``` +- 图(b)的 LoD 信息 + ```cpp + [[0, 5, 8, 10, 14] /*level=1*/, [0, 2, 3, 5, 7, 8, 10, 13, 14] /*level=2*/] + ``` +
+ +--- +#### Tensor, Variable, Scope 之间的关系 + +

+ +

+ + +1. `Block` 是一个实现层的概念,不在应用层暴露给用户。目前用户无法自行创建并利用`Block`,用户能够感知的只有`Program`这个概念。 +1. 逻辑上,可以将 `Block` 类比为编程语言中的大括号:定义了一段作用域,其中运行一段代码 +1. `Executor`会为每一个`Block`创建一个`Scope`,`Block`是可嵌套的,因此`Scope`也是可嵌套的 + + + +--- +### Executor + + + + + + + + + + + + + + +
接口说明

+ +

输入
1. `ProgramDesc`
2. `Scope`
3.`block_id`

解释执行步骤
1. 创建所有 Variables
2. 逐一创建 Operator 并运行 +
+ +--- +### Operator/OpWithKernel/Kernel + + +

+ +

+ +- operator 无状态,Operator的核心是==Run==方法 +- 一个operator可以注册多个kernel +- operator 可以无 kernel:while_op 、ifelse op + +
+ +--- +#### Fluid Operator vs. PaddlePaddle layers + + + + + + + + + + + + + + + + + + +
LayerOperator

+ +

+ +

1. 内部维护状态
2. 包含forward和backward方法
1. 内部无状态
2. 只有Run方法
+ +
+ +--- + +### ==4.== 内存管理 + +--- +### 目标 + +- 为异构设备提供统一的内存分配、回收接口 +- 最小化管理内存所需的时间,最小化管理开销 +- 减少内存碎片 +- 将内存管理与计算(Operators/Kernels)完全剥离 +- 统一内存管理是内存优化的基础 + +--- + + + +### Memory 接口 + +- 内存管理模块向上层应用逻辑提供三个基础接口: + ```cpp + template + void* Alloc(Place place, size_t size); + + template + void Free(Place place, void* ptr); + + template + size_t Used(Place place); + + struct Usage : public boost::static_visitor { + size_t operator()(const platform::CPUPlace& cpu) const; + size_t operator()(const platform::CUDAPlace& gpu) const; + }; + ``` +- 模板参数 `Place` 指示内存分配发生的设备 +- 实现时,需特化支持的 `Place`, 提供以上三个接口的实现 + + + +--- +### 代码结构 + + + +内存管理模块可以理解为由以下两部分构成: + +1. SystemAllocator:实际从物理设备上分配、释放的内存的接口 +1. BuddyAllocator:内存管理算法 + + + +--- +### System Allocator + + + +- SystemAllocator 是实现物理内存分配、回收的基类 + - 不同设备上的内存分配和回收终将转化为标准接口调用 + - 为不同设备实现MemoryAllocator,继承自SystemAllocator + + ```cpp + class SystemAllocator { + public: + virtual ~SystemAllocator() {} + virtual void* Alloc(size_t& index, size_t size) = 0; + virtual void Free(void* p, size_t size, size_t index) = 0; + virtual bool UseGpu() const = 0; + }; + ``` + + +--- + +### CPU/GPU Allocator + + + +```cpp +class CPUAllocator : public SystemAllocator { + public: + virtual void* Alloc(size_t& index, size_t size); + virtual void Free(void* p, size_t size, size_t index); + virtual bool UseGpu() const; +}; + +#ifdef PADDLE_WITH_CUDA +class GPUAllocator : public SystemAllocator { + public: + virtual void* Alloc(size_t& index, size_t size); + virtual void Free(void* p, size_t size, size_t index); + virtual bool UseGpu() const; + private: + size_t gpu_alloc_size_ = 0; + size_t fallback_alloc_size_ = 0; +}; +#endif +``` +- CPUAllocator和GPUAllocator分别继承自SystemAllocator,分别调用相应的标准库函数实现物理内存的分配和释放。 +- 一旦大块、连续的物理内存分配之后,将通过内存管理算法实现内存的按块分配、回收、重用等。 + + + +--- +### CPU Allocator + + + +- CPU 内存的分配提供两种选项: + 1. non-pinned memory:可分页内存 + 2. pinned memory:页锁定内存 + - 分配过大的页锁定内存有可能因为系统可使用的分页内存减少,影响系统性能,默认CPU下分配的是可分页内存 + +- 通过gflags进行设置一次性分配内存的大小以及是否使用页锁定内存。 + + ```cpp + DEFINE_bool(use_pinned_memory, true, "If set, allocate cpu pinned memory."); + DEFINE_double(fraction_of_cpu_memory_to_use, 1, + "Default use 100% of CPU memory for PaddlePaddle," + "reserve the rest for page tables, etc"); + ``` + + + +--- +### GPU Allocator + + + +- 通过 cudaMalloc 分配GPU显存 +- GPUAllocator::Alloc 首先会计算指定GPU device上的可用显存 + - 如果可用显存小于请求分配大小,调用cudaMalloc进行分配 + - 如果可用显存不足,目前会报错退出。 +- 通过gflags控制GPU下一次性分配显存的大小: + + ```cpp + DEFINE_double(fraction_of_gpu_memory_to_use, 0.92, + "Default use 92% of GPU memory for PaddlePaddle," + "reserve the rest for page tables, etc"); + ``` + + + +--- +#### 内存管理算法: [Buddy Memory Allocation](https://en.wikipedia.org/wiki/Buddy_memory_allocation) + + + +- Memory Arena:一次性分配大块连续内存,之后会基于这块内存进行内存管理:动态分配、释放、重用内存块。 +- 伙伴内存分配: + - 将内存划分为 2 的幂次方个分区,使用 best-fit 方法来分配内存请求。 + - 当释放内存时,检查 buddy 块,查看相邻的内存块是否也已被释放。如果是,将内存块合并,以最小化内存碎片。 + - 分配的内存在物理内存的自然边界对齐,提高内存访问效率。 + - 算法的时间效率高,单使用 best-fit 方法的缘故,会产生一定的内存浪费 + + + +--- + +### Buddy Allocator + + + +- BuddyAllocator 是一个单例,每个设备(如: GPU/CPU(0)/GPU(1)) 拥有一个BuddyAllocator +- BuddyAllocator 内部拥有一个私有成员变量 SystemAllocator +- 当请求的内存超过BuddyAllocator管理的空余内存时,将会调用SystemAllocator去指定的设备上分配物理内存 + + + +--- +### 实例:CPU 下内存管理接口的实现 + + + +- 对上层应用,统一通过BuddyAllocator来实现内存的分配、释放以及用量查询 + ```cpp + template <> + void* Alloc(platform::CPUPlace place, size_t size) { + VLOG(10) << "Allocate " << size << " bytes on " << platform::Place(place); + void* p = GetCPUBuddyAllocator()->Alloc(size); + VLOG(10) << " pointer=" << p; + return p; + } + + template <> + void Free(platform::CPUPlace place, void* p) { + VLOG(10) << "Free pointer=" << p << " on " << platform::Place(place); + GetCPUBuddyAllocator()->Free(p); + } + + template <> + size_t Used(platform::CPUPlace place) { + return GetCPUBuddyAllocator()->Used(); + } + ``` + + +--- +### ==5.== 多设备支持 + +--- +### 多设备支持(一) + + + +- step 1:添加Place类型,由用户实现添加到框架 + - 可以将Place类型理解为一个整数加上一个枚举型,包括:设备号 + 设备类型 + +

+ +

+- DeviceContext + - 不同的Place会对应一个相应的DeviceContext,用于组织管理与设备相关的信息 + - 例如,GpuDeviceContext中会管理Cuda stream + - 目前实现中一些特殊的库也会对应有自己的DeviceContext:例如: + ```cpp + class MKLDNNDeviceContext : public CPUDeviceContext {……} + ``` + - 每种设备对应的DeviceContext需要管理的内容不尽相同,视具体需求来实现 + +
+ +--- + +### 多设备支持(二) + + + +- step 2: 增加KernelType,为相应的KernelType注册Kernel对象,由用户实现注册给框架 可以按照: + 1. Place 执行设备 + 1. DataType 执行数据类型 FP32/FP64/INT32/INT64 + 1. Memory layout: 运行时 Tensor 在内存中的排布格式 NCHW、 NHWC + 1. 使用的库 + + 来区分Kernel,为同一个operator注册多个 Kernel。 + + ```cpp + struct OpKernelType { + proto::DataType data_type_; + DataLayout data_layout_; + platform::Place place_; + LibraryType library_type_; + } + ``` + + + +--- + +### 多设备支持(三) + + + +step 3: 运行时的 KernelType 推断和Kernel切换,按需要修改Kernel推断和Kernel切换规则 +- Expected Kernel:期待调用的Kernel:由(1)`Place`和计算精度决定;或(2)用户在配置中显示指定使用的计算库,如`cudnn`、`mkldnn`等。 +- Actual Kernel:运行时从`Operator`的输入(`Variable`)可以推断出实际需要的`KernelType` +- 当Expected Kernel和Actual Kernel不一致的时候,框架会插入`data_transformer`或者`data_layerout_transform`等,保证Expected Kernel可以执行,包括: + - CPUPlace -> GPUPlace :跨设备内存复制 + - NCHW -> nChw8c :Layout转换 + - FP32 -> FP16 :精度转换 _**尚未支持**_ + - …… +- 以上过程实现在OperatorWithKernel类的Run方法中 [->](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/framework/operator.cc#L497) + + + +--- +## ==6.== while_op + +--- +### while_op + + + +- 循环执行一段`Program`,直到条件operator判断循环条件不满足时终止循环 +- while_op 的特殊之处: + 1. while_op 没有 kernel + 1. while_op 拥有自己的`Block`,会形成一段嵌套的`Block` + 1. ==while_op 内部创建了一个 Executor,来循环执行`Block`== + +- while_op 输入输出 : LoDTensorArray + ```cpp + namespace paddle { + namespace framework { + using LoDTensorArray = std::vector; + } + } + ``` + - 每一次循环,从原始输入中“切出”一个片段 + - LoDTensorArray 在Python端暴露,是Fluid支持的基础数据结构之一,用户可以直接创建并使用 + + + +--- +### while_op [Run](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/operators/while_op.cc#L42) 方法概览 + + + +```cpp + +void Run(const framework::Scope &scope, + const platform::Place &dev_place) const override { + PADDLE_ENFORCE_NOT_NULL(scope.FindVar(Input(kCondition))); + auto &cond = scope.FindVar(Input(kCondition))->Get(); + PADDLE_ENFORCE_EQ(cond.dims(), paddle::framework::make_ddim({1})); + + framework::Executor executor(dev_place); + auto *block = Attr(kStepBlock); + + auto *program = block->Program(); + auto step_scopes = + scope.FindVar(Output(kStepScopes))->GetMutable(); + + while (cond.data()[0]) { + auto ¤t_scope = scope.NewScope(); + step_scopes->push_back(¤t_scope); + executor.Run(*program, ¤t_scope, block->ID(), + false /*create_local_scope*/); + } +} + +``` + + + +--- +### while_op 的重要应用:Dynamic RNN + +--- + +### 什么是 `dynamicRNN` ? + + +
+ +1. 用户可以自定义在一个时间步之内的计算, 框架接受序列输入数据,在其上循环调用用户定义的单步计算 +1. 可学习参数在多个时间步之间共享 +1. `dynamicRNN` 由 `while_op` 实现 +1. 如果`dynamicRNN`中定义了`memory`,将会构成一个循环神经网络,否则其行为就等于在输入序列上循环调用预定义的单步计算 + +
+ +--- + +#### `dynamic RNN` 用户接口 + + +

+ +

+ +- `dynamicRNN` 中的重要元素 + 1. **step input**: `dynamicRNN` 每个时间步的输入 + 1. **step function**: 用户定义的单步计算 + 1. **memory**: 用于形成循环连接 + 1. **external/static memory**:单步计算的每一步都可以全部读取到的外部输入 + +
+ +--- + +#### dynamicRNN 中的 Memory + + + +`dynamicRNN`中`memory`的行为非常类似于 C++ 中的引用变量 + - `memory` “指向” 一个operator的输出变量,记作: A + - `memory` 可以被 LoDTensor 初始化(当LoD信息为空时,为非序列,否则为序列),默认`memory`被初始化为零 + - `memory` 在 operator A 前向计算之后,进行前向计算 + - 当 `memory` 的前向计算会 "指向" A 的输出 LoDTensor + - `memory` 的输出可以是另一个 operator 的输入,于是形成了“循环”连接 + + + +--- + +### DynamicRNN 实现细节 + + + +- `while_op` 无法独立构成dynamicRNN,必须和一组相关的 operator 及数据结构配合 + - 依赖的 operators (这里仅列出最重要的,并非全部): + - `lod_rank_table` operator + - `lod_tensor_to_array` operator + - `array_to_lod_tensor` operator + - `shrink_memory` operator + - 依赖的数据结构 + - `TensorArray` + - `LoDRankTable` + +- 在Fluid中,RNN接受变长序列输入,无需填充,以上数据结构和相关的operator配合工作,实现了对变长输入以batch计算 + + + +--- + +### `dynamicRNN` 如何实现 batch 计算 ? + + + +- 问题: + - RNN 可以看作是一个展开的前向网络,前向网络的深度是最长序列的长度 + - 如果不对变长序列进行填充,将它们填充到一样长度,每个mini-batch输入将会不等长,每个样本展开长度不一致,导致前向和反向计算实现困难 + + + +---- +##### 实例 :RNN encoder-decoder with attention + + + +- 以机器翻译的RNN encoder-decoder 模型(涉及了`dynamicRNN`的所有设计要素)为例,下图是 RNN encoder-decoder 的原始输入: +

+
Figure. RNN encoder-decoder 原始batch 输入数据 +

+ +- source word sequences 是encoder RNN的输出,是一个LoDTensor +- target word sequences 是look_uptable的输入,是一个LoDTensor +- 上图中一个矩形方块是CPU/GPU内存中一片连续的内存空间,表示一个dense vector + +
+ +--- + +### `dynamicRNN` 如何实现 batch 计算 ? + + + +1. 对一个mini batch中不等长样本进行排序,最长样本变成batch中的第一个,最短样本是batch中最后一个 + - `LoDTensor` -> `LoDRankTable` :heavy_plus_sign: `lod_rank_table operaator` + - 可以将`LoDRankTable`理解为对LoDTensor中的多个序列按照长度排序LoDRankTable 存储了排序之后的index + +2. 构建每个时间步的batch输入:随着时间步增加,每个时间步的batch输入可能会逐渐缩小 + - `TensorArray` :heavy_plus_sign: `lod_tensor_to_array` -> `LoDTensor` (without LoD) +3. 每个时间步输出写入一个输出 `LoDTensorArray` +3. `dynamicRNN`循环结束后, 按照`LoDRankTable`中记录的信息对输出`LoDTensorArray`重排序,还原会原始输入顺序 + - `TensorArray` :heavy_plus_sign: `array_to_lod_tensor` -> `LoDTensor` + + + +--- + +### 运行实例 + +

+ +

+ +--- +### 运行实例 + +

+ +

+ + + +- 执行到第5~7个batch时,batch size将会缩小 + + + +--- +### 运行实例 + +

+ +

+ + + +- 第5 ~ 7个batch时RNN的`memory`会发生什么? + - `memory` 指向某个operator的输出Tensor,在该operator前向计算之后,“取回”其计算结果 + - 5 ~ 7时,遇到了序列的结束,==下一个时间步计算不再需要在已经结束的序列上展开== + - 在`dynamicRNN`中`shrink_memory` operator 用来缩小`memory`的batch输入 + + + +--- +### 运行实例:batch 1 ~ 2 + +

+
Figure. 第1、2个batch输入dynamicRNN的batch输入 +

+ +--- +### 运行实例:batch 3 ~ 4 + +

+
Figure. 第3、4个batch输入dynamicRNN的batch输入 +

+ +--- + +### 运行实例:batch 5 ~ 7 + +

+
Figure. 第5、6、7个batch输入dynamicRNN的batch输入 +

+ +--- +### ==7.== Fluid 代码结构 + +--- +### Fluid 代码结构 + + + + + + + + + + + + + + + +
代码结构模块结构
+

+ +

+
+

+ +

+
+ +--- + +### ==8.== 文档总结 + +--- + + +- 设计概览 + - 重构概览 [->](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/refactorization.md) + - fluid [->](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/fluid.md) + - fluid_compiler [->](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/design/motivation/fluid_compiler.md) +- 核心概念 + - variable 描述 [->](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/var_desc.md) + - Tensor [->](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/tensor.md) + - LoDTensor [->](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/lod_tensor.md) + - TensorArray [->](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/tensor_array.md) + - Program [->](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/program.md) + - Block [->](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/block.md) + - Scope [->](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/scope.md) + +--- + +- 重要功能模块 + - backward [->](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/backward.md) + - 内存优化 [->](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/memory_optimization.md) + - evaluator [->](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/executor.md) + - python API [->](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/python_api.md) + - regularization [->](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/regularization.md) + +- 开发指南 + - 支持新设硬件设备库 [->](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/support_new_device.md) + - 添加新的Operator [->](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/howto/dev/new_op_cn.md) + - 添加新的Kernel [->]( +https://github.com/PaddlePaddle/Paddle/blob/develop/doc/howto/dev/new_op_kernel_en.md) + + + +--- + +### ==9.== 开发指南 + +--- + +#### 建议开发环境:使用 Docker 编译和测试 + + + +Docker编译PaddlePaddle源码: [->](http://www.paddlepaddle.org/docs/develop/documentation/fluid/zh/build_and_install/docker_install_cn.html) + +PaddlePaddle 在 Dockerhub 地址:[->]( + https://hub.docker.com/r/paddlepaddle/paddle/tags/) + +1. 获取PaddlePaddle的Docker镜像 + ```bash + docker pull paddlepaddle/paddle:latest-dev + ``` + +1. 启动 docker container + + ```bash + docker run -it -v $PWD/Paddle:/paddle paddlepaddle/paddle:latest-dev /bin/bash + ``` + +1. 进入docker container后,从源码编译,请参考文档 [->]( http://www.paddlepaddle.org/docs/develop/documentation/fluid/zh/build_and_install/build_from_source_cn.html) + + + +--- + +### 一些说明 + + + +1. PaddlePaddle的Docker镜像为了减小体积,默认没有安装vim,可以在容器中执行`apt-get install -y vim`来安装vim。 +1. 开发推荐使用tag为`latest-dev`的镜像,其中打包了所有编译依赖。`latest`及`lastest-gpu`是production镜像,主要用于运行PaddlePaddle程序。 +2. 在Docker中运行GPU程序,推荐使用nvidia-docker,[否则需要将CUDA库和设备挂载到Docker容器内](http://www.paddlepaddle.org/docs/develop/documentation/fluid/zh/build_and_install/docker_install_cn.html)。 + + + ```bash + nvidia-docker run -it -v $PWD/Paddle:/paddle paddlepaddle/paddle:latest-dev /bin/bash + ``` + + + + + +--- + +### [如何贡献](http://www.paddlepaddle.org/docs/develop/documentation/fluid/zh/dev/contribute_to_paddle_cn.html) + + + +- ==提交PullRequest前请务必阅读==: [->](http://www.paddlepaddle.org/docs/develop/documentation/fluid/zh/dev/contribute_to_paddle_cn.html) +- 代码要求 + 1. 代码注释遵守 Doxygen 的样式 + 1. 确保编译器选项 WITH_STYLE_CHECK 已打开,并且编译能通过代码样式检查 + 1. 所有代码必须具有单元测试,且能够通过所有单元测试 +- 使用 `pre-commit` 钩子提交Pull Request + 1. 帮助格式化源代码(C++,Python) + 1. 在提交前自动检查一些基本事宜:如每个文件只有一个 EOL,Git 中不要添加大文件等 + 1. 安装pre-commit,并在PaddlePaddle根目录运行: + ```bash + ➜ pip install pre-commit + ➜ pre-commit install + ``` + + +--- + +### 如何贡献 + + + +1. 开始开发之前请先建立issue。 + - 让其它同学知道某项工作已经有人在进行,以避免多人开发同一功能的情况。 +1. 提交PR必须关联相关的issue。做法请参考:[->](https://help.github.com/articles/closing-issues-using-keywords/) + - 目的:为了在提交的版本中留有记录描述这个PR是为了开发什么样的功能,为了解决什么样的问题。 + - 当PR被merge后,关联的issue会被自动关闭。 +1. PR review 中,reviewer的每条comment都必须回复。 + - 如修改完可直接回复:Done。 + - 目的:review comment 中可能会有(1)询问类型的问题;(2)可以在下一个PR修改的问题;(3)comment意见不合理等。需要明确回复,以便reviewer和其他人有历史可查,便于区分是否已经进行修改,或者准备下一个PR修改,或者意见不合理可以不用进行修改。 + + + +--- + +### ==10.== 添加新的 Operator + +--- + +### 概念简介 + + + +添加一个新的operator,会涉及实现以下C++类的派生类: + +1. `framework::OperatorBase`: Operator(简写,Op)基类。 +1. `framework::OpKernel`: Op计算函数的基类,称作Kernel。 +1. `framework::OperatorWithKernel`:继承自OperatorBase,Op有计算函数,称作有Kernel。 +1. `class OpProtoAndCheckerMaker`:描述该Op的输入、输出、属性、注释,主要用于Python API接口生成 + +依据是否包含kernel,可以将Op分为两种: +1. 包含Kernel的Op:继承自OperatorWithKernel,==绝大多数operator都属于这一类== +1. 不包含kernel的Op,继承自OperatorBase,只有少量Op属于这一类,例如while_op,ifelse_op + +这里主要介绍带Kernel的Op如何编写。 + + + +--- + +#### 添加新的Operator需要修改/添加哪些文件? + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
内容定义位置
+OpProtoMake定义 + +`.cc`文件,Backward Op不需要OpProtoMaker +
+Op定义 + +`.cc`文件 +
+Kernel实现 + +CPU、CUDA共享Kernel实现在`.h`文件中,否则,CPU 实现在`.cc`文件中,CUDA 实现在`.cu`文件中。 +
+注册Op + +Op注册实现在`.cc`文件;Kernel注册CPU实现在`.cc`文件中,CUDA实现在`.cu`文件中 +
+ +- 添加 Operator 之前请阅读:[Operator 命名规范](https://github.com/PaddlePaddle/Paddle/blob/63cca04cfd488a4dab6d6273fd04a8017ef45932/doc/fluid/dev/name_convention.md)及[Operator Markdown注释规范](https://github.com/PaddlePaddle/Paddle/blob/63cca04cfd488a4dab6d6273fd04a8017ef45932/doc/fluid/dev/op_markdown_format.md)。 +- 实现新的op都添加至目录[paddle/operators](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/fluid/operators)下,文件命名以`*_op.h`(如有) 、 `*_op.cc` 、`*_op.cu`(如有)结尾。 +- 根据文件名自动构建op和Python端绑定,请务必遵守以上命名,否则需要进一步修改PyBind相关文件及CMakeLists.txt。 +
+ +--- + +###### 实现带Kernel的Operator step1: 定义ProtoMaker类 + + + +下面均以[clip_op](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/operators/clip_op.h)为例进行介绍 + +- clip_op计算公式:$Out = \min(\max(X, min), max)$ +- 首先定义`ProtoMaker`来描述该Op的输入、输出,并添加注释(*下面代码段的中注释进行了简化,实现时需按照规范添加注释*): + + ```cpp + template + class ClipOpMaker : public framework::OpProtoAndCheckerMaker { + public: + ClipOpMaker(OpProto* proto, OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X","(Tensor)The input of clip op."); + AddOutput("Out", "(Tensor),The output of clip op."); + AddAttr( + "min", "(float),Minimum value."); + AddAttr( + "max", "(float),Maximum value."); + AddComment(R"DOC( + …… + )DOC"); + } + }; + ``` + + + +--- + +###### 实现带Kernel的Operator step2: 定义Operator类 + + + +下面的代码段实现了`clip_op`的定义: + +```cpp +class ClipOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), + "Input(X) of ClipOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), + "Output(Out) of ClipOp should not be null."); + auto x_dims = ctx->GetInputDim("X"); + auto max = ctx->Attrs().Get("max"); + auto min = ctx->Attrs().Get("min"); + PADDLE_ENFORCE_LT(min, max, "max should be greater than min."); + ctx->SetOutputDim("Out", x_dims); + ctx->ShareLoD("X", /*->*/ "Out"); + } +}; +``` + + +--- + +### Operator 类中需要完成的工作 + + + +1. clip_op 继承自`OperatorWithKernel`, + + ```cpp + using framework::OperatorWithKernel::OperatorWithKernel; + ``` + 表示使用基类`OperatorWithKernel`的构造函数。 + +1. 重写`InferShape`接口。 + - `InferShape` 为const函数,不能修改Op的成员变 + - `InferShape` 的参数为 `const framework::InferShapeContext &ctx`,从中可获取到输入输出以及属性 + - `InferShape` 会被调用两次,一次是编译时(创建op),一次是运行时(调用op的`Run`方法时),需要完成以下功能: + 1. 做检查, 尽早报错:检查输入数据维度、类型等是否合法 + 2. 设置输出Tensor的形状 + +通常`OpProtoMaker`和`Op`类的定义写在`.cc`文件中。 + + + +--- + +### 补充说明 + + + +1. `InferShape`目前支持两种实现方式,二者最后都会生成一个functor注册给OpInfo结构体。 + 1. 继承framework::InferShapeBase,实现为一个functor(参考 [mul_op](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/operators/mul_op.cc#L22)) + 2. override InferShape函数(参考 [clip_op](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/operators/clip_op.cc#L24)) + +1. 什么是`functor` ? + + - 类或结构体仅重载了`()`,一般是可被多个kernel复用的计算函数。 + + + + ```cpp + template + class CrossEntropyFunctor { + public: + void operator()(const platform::CPUDeviceContext& ctx, + framework::Tensor* out, + const framework::Tensor* prob, + const framework::Tensor* labels, const bool softLabel) { + …… + } + }; + ``` + + + - 在 clip_op 内也会看到将一段计算函数抽象为functor的使用法: [->](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/operators/clip_op.h#L27)。 + + + +--- + +###### 实现带Kernel的Operator step3: 定义OpKernel类 + + + +- `ClipKernel`继承自`framework::OpKernel`,带有下面两个模板参数: + 1. `typename DeviceContext`: 表示设备类型,不同设备共享同一个Kernel时,需添加该模板参数。不共享时,需要提供针对不同设备的特化实现。 + 1. `typename T` : 表示支持的数据类型,如`float`, `double`等 + +- 在`ClipKernel`类中重写`Compute`方法 + 1. `Compute`接受输入参数:`const framework::ExecutionContext& context` + - `ExecutionContext` 是从 `Scope`中将运行时Op的输入、输出`Variable`组织在一起,使得Op在调用`Compute`方法时,能够简单地通过名字拿到需要的输入输出`Variable` + - 与`InferShapeContext`相比,`ExecutionContext` 中增加了设备类型 + 1. 在`Compute`函数里实现`OpKernel`的具体计算逻辑 + + + +--- +#### ClipKernel 代码概览 + + + +```cpp +template +class ClipKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto max = context.Attr("max"); + auto min = context.Attr("min"); + auto* x = context.Input("X"); + auto* out = context.Output("Out"); + T* out_data = out->mutable_data(context.GetPlace()); + const T* x_data = x->data(); + int64_t numel = x->numel(); + Transform trans; + trans(context.template device_context(), x_data, + x_data + numel, out_data, ClipFunctor(min, max)); + } +}; +``` + +- 为了使`OpKernel`的计算过程书写更加简单,并且CPU、CUDA的代码可以复用, Fluid 使用 Eigen 作为基础的矩阵运算库 +- Fluid对Eigen unsupported Tensor提供了一些基本的封装,可以在`Compute`接口中直接调用 + - 关于在PaddlePaddle中如何使用Eigen库,请参考[使用文档](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/dev/use_eigen_cn.md)。 + + + +--- +###### 实现带Kernel的Operator step4: 实现反向Op + + + +- ==**反向Op没有`ProtoMaker`**==,除此之外定义与实现方式前向Op完全一致,不再赘述 +- 这里仅对反向Op的输入输出进行说明: + 1. 反向Op的输入 + - 前向Op的输出 + - 反向传播过程中传递给当前Op的梯度 + - 需要注意,Fluid中,不区分Cost Op和中间层Op,所有Op都必须正确处理接收到的梯度 + 2. 反向Op的输出 + - 对可学习参数的求导结果 + - 对所有输入的求导结果 + + + + +--- + +###### 实现带Kernel的Operator step5: 注册Op及Kernel + + + +至此Op和Op kernel都已经实现完毕,接下来,需要在`.cc`和`cu`文件中注册op和kernel + +1. 在`.cc`文件中注册前向、反向Op类,注册CPU Kernel。 + + + + ```cpp + namespace ops = paddle::operators; + REGISTER_OP(clip, ops::ClipOp, ops::ClipOpMaker, clip_grad, + ops::ClipOpGrad); + REGISTER_OP_CPU_KERNEL( + clip, ops::ClipKernel); + REGISTER_OP_CPU_KERNEL( + clip_grad, ops::ClipGradKernel); + ``` + + - 在上面的代码片段中: + + 1. `REGISTER_OP` : 注册`ops::ClipOp`类,类型名为`clip`,该类的`ProtoMaker`为`ops::ClipOpMaker`,注册`ops::ClipOpGrad`,类型名为`clip_grad` + 1. `REGISTER_OP_WITHOUT_GRADIENT` : 用于注册没有反向的Op,例如:优化算法相关的Op + 1. `REGISTER_OP_CPU_KERNEL` :注册`ops::ClipKernel`类,并特化模板参数为`paddle::platform::CPUPlace`和`float`类型,同理,注册`ops::ClipGradKernel`类 + + +1. 按照同样方法,在`.cu`文件中注册GPU Kernel + - 如果CUDA Kernel的实现基于Eigen,需在 `.cu`的开始加上宏定义 `#define EIGEN_USE_GPU` + + + +--- + +##### 编译和Python端绑定 + + + +- 运行下面命令可以仅编译新添加的Op: + + ``` + make mul_op + ``` + - 需注意,运行单元测试需要编译整个工程 + +- 如果遵循前文的文件命名规则,构建过程中,会自动为新增的op添加Python端绑定,并链接到生成的lib库中 + + + +--- + +###### 实现带Kernel的Operator step6: 添加前向单测及梯度检测 + + + +- 新增Op的单元测试统一添加至:[python/paddle/v2/fluid/tests/unittests](https://github.com/PaddlePaddle/Paddle/tree/develop/python/paddle/fluid/tests/unittests)目录 +- 前向Operator单测 + + 1. Op单元测试继承自`OpTest`,各项具体的单元测试在`TestClipOp`里完成,所有单测case都以`TestXX`命名 + 1. 单元测试Operator,需要: + 1. 在`setUp`函数定义输入、输出,以及相关的属性参数 + 1. 生成随机的输入数据 + 1. 在Python脚本中实现与前向operator相同的计算逻辑,得到输出值,与operator前向计算的输出进行对比 + 1. 反向梯度检测流程测试框架已经实现,直接调用相应接口`check_grad`即可 + +- `clip_op` 单测代码请参考 [->](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/unittests/test_clip_op.py),这里不再展开 + + + +--- +#### 编译执行单测 + + + +- `python/paddle/v2/framework/tests` 目录下新增的 `test_*.py` 单元测试会被自动加入工程进行编译 + + - 运行单元测试测时需要编译整个工程,并且编译时需要打开`WITH_TESTING`, 即`cmake paddle_dir -DWITH_TESTING=ON` +- 编译成功后,执行下面的命令来运行单元测试: + + ```bash + make test ARGS="-R test_mul_op -V" + ``` + + 或者: + + ``` + ctest -R test_mul_op + ``` + + +--- + +### 添加Op的一些注意事项 + + + +- 为每个Op创建单独的`*_op.h`(如有)、`*_op.cc`和`*_op.cu`(如有)。不允许一个文件中包含多个Op,将会导致编译出错。 +- 注册Op时的类型名,需要和该Op的名字一样。不允许在`A_op.cc`里面,注册`REGISTER_OP(B, ...)`,会导致单元测试出错。 +- 如果Op没有实现CUDA Kernel,不要创建空的`*_op.cu`,会导致单元测试出错。 +- 如果多个Op依赖一些共用的函数,可以创建非`*_op.*`格式的文件来存放,如`gather.h`文件。 + + + +--- + +### ==10.== 使用相关问题 + +--- + +### 定义前向计算 + + + +- 当在python端执行时: + ```python + import paddle.v2.fluid as fluid + ``` + [`framework.py`](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/framework.py#L1040)定义了两个全局`Program`: + ```python + # program is a global instance. + _main_program_ = Program() + _startup_program_ = Program() + ``` + +- 前向定义的过程就是不断往`mian_program`中添加Op和Variable +- 如果需要执行一个新的`mian_program`时,可以调用调用: + ```python + def switch_main_program(program): + """ + Switch the main program to a new program. + This funtion returns the previous main program. + """ + …… + ``` + + +--- + +### 自定义参数的初始化 + + + +- 调用`fluid.ParamAttr(……)`接口,自定义参数的初始化 + + ```python + w_param_attrs = ParamAttr(name=None, + initializer=UniformInitializer(low=-1.0, high=1.0, seed=0), + learning_rate=1.0, + regularizer=L1Decay(1.0), + trainable=True, + clip=GradientClipByValue(-1.0, 1.0), + ) + y_predict = fluid.layers.fc(input=x, size=1, param_attr=w_param_attrs) + ``` + +- 补充问题:如何创建 `Variable` + ```python + cur_program = Program() + cur_block = cur_program.current_block() + new_var = cur_block.create_var(name="X", shape=[-1, 16, 16], dtype="float32") + ``` + + + +--- + +### 添加反向Op + + + +- 调用`fluid.backward.append_backward(X)`(`X`是一个Variable),来为一段前向`ProgramDesc`添加反Op + + ```python + data = fluid.layers.data(name="data", shape=(2,3,4)) + out = fluid.layers.fc(input=data,size=128,act=None) + loss = fluid.layers.reduce_sum(out) + fluid.backward.append_backward(loss=loss) + ``` + +- 添加优化相关的Op + ```python + sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) + sgd_optimizer.minimize(loss) + ``` + +- 可以随时调用`print(fluid.default_main_program())`来输出当前的`main_program` + +- 当构建完成整个`Program`后,调用下面的接口执行内存优化: + ```python + fluid.memory_optimize(fluid.default_main_program()) + ``` + - _注:内存优化目前仍在持续开发中,有可能不够稳定。_ + + + +--- + +### 总结:编译时执行流程 + + + +- 用户定义前向计算 +- 添加反向Op到`default_main_program` +- 添加 gradient clipping Op 到 +- 添加 regularization Op 到`default_main_program` +- 为指定的优化算法,添加相关的状态 variable of optimizer 到`default_startup_program` + - 状态相关 variable是指如学习率, 历史 momentum, 二阶momentum等 +- 添加初始化 variable 的Op 到 `default_startup_program` +- 为整个网络最后一个op,添加设置其接受到的梯度的Op到`default_main_program` +- 进行内存优化规划 + + + +--- + +### Feed 数据 (一):通过 feed 字典 + + + +- 执行executor的run方法时,指定feed字典,feed op 会将指定的数据放到`x`和`y`两个Variable中 + ```python + y_data = np.random.randint(0, 8, [1]).astype("int32") + y_tensor = core.Tensor() + y_tensor.set(y_data, place) + + x_data = np.random.uniform(0.1, 1, [11, 8]).astype("float32") + x_tensor = core.Tensor() + x_tensor.set(x_data, place) + …… + cost = exe.run( + fluid.default_main_program(), + feed={'x': x_tensor, + 'y': y_tensor}, + fetchlist=[avg_cost]) + ``` + +- 这种方法较为底层,一般用于单测中 + + + +--- + +### Feed 数据 (二):使用 DataFeeder接口 + + + +- 编写一个data_reader函数,data_reader是一个Python generator + + ```python + def demo_reader(): + def random_generator(): + yield np.random.uniform(0.1, 1, [4]), np.random.randint(0, 1, [1]) + return random_generator + ``` +- 在训练任务中使用 DataFeeder 接口 + ```python + cost = exe.run( + fluid.default_main_program(), + feed={'x': x_tensor, + 'y': y_tensor}, + fetchlist=[avg_cost]) + + train_reader = paddle.batch( + paddle.reader.shuffle(demo_reader(), buf_size=500), batch_size=4) + feeder = fluid.DataFeeder(place=place, feed_list=[x, y]) + for data in train_reader(): + cost = exe.run( + fluid.default_main_program(), + feed=feeder.feed(data), + fetch_list=[cost]) + ``` + + + +--- + +### 常见问题 + + + +- 如何使用 evaluator ? [->](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/book/test_label_semantic_roles.py#L168) + + ```python + accuracy = fluid.evaluator.Accuracy(input=predict, label=label) + for pass_id in range(PASS_NUM): + accuracy.reset() + for data in train_reader(): + loss, acc = exe.run(fluid.default_main_program(), + feed=feeder.feed(data), + fetch_list=[avg_cost] + accuracy.metrics) + pass_acc = accuracy.eval(exe) + # acc 当前一个batch 的 accuracy + # pass_acc 当前batch 的 accuracy + pass_total_acc = accuracy.eval(exe) # 整个pass的accuracy + ``` + +- 如何在训练中测试?[->](https://github.com/dzhwinter/benchmark/blob/master/fluid/vgg16.py#L144) +- 如何保存训练好的模型?[->](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/book/test_recognize_digits.py#L143) +- 如何加载训练好的模型进行预测?[->](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/book/test_recognize_digits.py#L154) +- 如何在同一个训练任务中定义多个Program,并交替运行? [->](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/demo/fc_gan.py) +- 如何profile?Fluid 实现了profile 工具,可以直接调用。请参考示例 [->](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/unittests/test_profiler.py) + + + + +--- diff --git a/doc/fluid/images/1.png b/doc/fluid/images/1.png new file mode 100644 index 0000000000000000000000000000000000000000..67daf566f91aab570e60971c4ea8e2be876e214d Binary files /dev/null and b/doc/fluid/images/1.png differ diff --git a/doc/fluid/images/2.png b/doc/fluid/images/2.png new file mode 100644 index 0000000000000000000000000000000000000000..43367777f41449a666e7a3b571f09ac5d5dfb1ae Binary files /dev/null and b/doc/fluid/images/2.png differ diff --git a/doc/fluid/images/3.png b/doc/fluid/images/3.png new file mode 100644 index 0000000000000000000000000000000000000000..481021ef306e2596818aab7fe17a570754f63635 Binary files /dev/null and b/doc/fluid/images/3.png differ diff --git a/doc/fluid/images/4.png b/doc/fluid/images/4.png new file mode 100644 index 0000000000000000000000000000000000000000..4279f41e06de459f18b9a622539511d555e9a0af Binary files /dev/null and b/doc/fluid/images/4.png differ diff --git a/doc/fluid/images/LoDTensor.png b/doc/fluid/images/LoDTensor.png new file mode 100644 index 0000000000000000000000000000000000000000..75369f5378309e0f304b83f6bb69bdb195eac079 Binary files /dev/null and b/doc/fluid/images/LoDTensor.png differ diff --git a/doc/fluid/images/compile_run_time.png b/doc/fluid/images/compile_run_time.png new file mode 100644 index 0000000000000000000000000000000000000000..0bc9b2fd0e81b4851e6d96171ccb9a05d0f42a48 Binary files /dev/null and b/doc/fluid/images/compile_run_time.png differ diff --git a/doc/fluid/images/executor.png b/doc/fluid/images/executor.png new file mode 100644 index 0000000000000000000000000000000000000000..b29c0d779e3d46b779b5baeabe3176adaeb00a6d Binary files /dev/null and b/doc/fluid/images/executor.png differ diff --git a/doc/fluid/images/fluid_examples.png b/doc/fluid/images/fluid_examples.png new file mode 100644 index 0000000000000000000000000000000000000000..aa99472c0f914cde128fd7b3bd8dc29ac24f94b6 Binary files /dev/null and b/doc/fluid/images/fluid_examples.png differ diff --git a/doc/fluid/images/fluid_module_1.png b/doc/fluid/images/fluid_module_1.png new file mode 100644 index 0000000000000000000000000000000000000000..554782ba54e43efc3d6babbb94e3cac3530ac649 Binary files /dev/null and b/doc/fluid/images/fluid_module_1.png differ diff --git a/doc/fluid/images/fluid_module_2.png b/doc/fluid/images/fluid_module_2.png new file mode 100644 index 0000000000000000000000000000000000000000..4219efccbb1e87839adf6b5720fe46808b7d2fcf Binary files /dev/null and b/doc/fluid/images/fluid_module_2.png differ diff --git a/doc/fluid/images/layer.png b/doc/fluid/images/layer.png new file mode 100644 index 0000000000000000000000000000000000000000..e46db4c9c6f5b65ff274b498b716b11de343a8b0 Binary files /dev/null and b/doc/fluid/images/layer.png differ diff --git a/doc/fluid/images/operator1.png b/doc/fluid/images/operator1.png new file mode 100644 index 0000000000000000000000000000000000000000..3975b06f615b7a88dfc11e71b6451fdf4ce42d60 Binary files /dev/null and b/doc/fluid/images/operator1.png differ diff --git a/doc/fluid/images/operator2.png b/doc/fluid/images/operator2.png new file mode 100644 index 0000000000000000000000000000000000000000..b7bb1fae2050d3a70797517bc20dbbdef3dfcb7c Binary files /dev/null and b/doc/fluid/images/operator2.png differ diff --git a/doc/fluid/images/place.png b/doc/fluid/images/place.png new file mode 100644 index 0000000000000000000000000000000000000000..14e77511d639af155e5a3725cde05323e0cc94f2 Binary files /dev/null and b/doc/fluid/images/place.png differ diff --git a/doc/fluid/images/print_fluid_program.png b/doc/fluid/images/print_fluid_program.png new file mode 100644 index 0000000000000000000000000000000000000000..e8e459e1b3d5c8706b3caa05dc371db8d46df4a5 Binary files /dev/null and b/doc/fluid/images/print_fluid_program.png differ diff --git a/doc/fluid/images/program_desc1.png b/doc/fluid/images/program_desc1.png new file mode 100644 index 0000000000000000000000000000000000000000..0656336914ece957f2e5bb4d70ad337a63e31d88 Binary files /dev/null and b/doc/fluid/images/program_desc1.png differ diff --git a/doc/fluid/images/program_desc2.png b/doc/fluid/images/program_desc2.png new file mode 100644 index 0000000000000000000000000000000000000000..db5bfa1231345add8661b4f8ef0fc9d861f40d24 Binary files /dev/null and b/doc/fluid/images/program_desc2.png differ diff --git a/doc/fluid/images/raw_input.png b/doc/fluid/images/raw_input.png new file mode 100644 index 0000000000000000000000000000000000000000..0725f92d2b169c2b59ec7c68b402859c2a2dd1d8 Binary files /dev/null and b/doc/fluid/images/raw_input.png differ diff --git a/doc/fluid/images/scope_variable_tensor.png b/doc/fluid/images/scope_variable_tensor.png new file mode 100644 index 0000000000000000000000000000000000000000..59b0de6fb36f9f6b469227c05760a7612bb30b4d Binary files /dev/null and b/doc/fluid/images/scope_variable_tensor.png differ diff --git a/doc/fluid/images/sorted_input.png b/doc/fluid/images/sorted_input.png new file mode 100644 index 0000000000000000000000000000000000000000..ff601128368ee179e3fd33e5e295a9ddd3dcbaeb Binary files /dev/null and b/doc/fluid/images/sorted_input.png differ diff --git a/doc/fluid/images/transpiler.png b/doc/fluid/images/transpiler.png new file mode 100644 index 0000000000000000000000000000000000000000..422973c0dc7aa2b544d2fc86a97ace706388cb9e Binary files /dev/null and b/doc/fluid/images/transpiler.png differ diff --git a/doc/fluid/images/user_interface.png b/doc/fluid/images/user_interface.png new file mode 100644 index 0000000000000000000000000000000000000000..ffc94e3d8945ec6291460afd90e8fcc600828390 Binary files /dev/null and b/doc/fluid/images/user_interface.png differ diff --git a/doc/v2/build_and_install/build_from_source_cn.rst b/doc/v2/build_and_install/build_from_source_cn.rst index 077f5e9b189269f9f6c9cf68310e2bfd43d8cb67..741c01ce5428c0046daa5a784da70d4bb492438c 100644 --- a/doc/v2/build_and_install/build_from_source_cn.rst +++ b/doc/v2/build_and_install/build_from_source_cn.rst @@ -35,13 +35,11 @@ PaddlePaddle需要使用Docker环境完成编译,这样可以免去单独安 # 2. 可选步骤:源码中构建用于编译PaddlePaddle的Docker镜像 docker build -t paddle:dev . # 3. 执行下面的命令编译CPU-Only的二进制 - docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 bash -x /paddle/paddle/scripts/paddle_build.sh build + docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 ./paddle/scripts/paddle_build.sh build # 4. 或者也可以使用为上述可选步骤构建的镜像(必须先执行第2步) - docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddle:dev + docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddle:dev ./paddle/scripts/paddle_build.sh build -注:上述命令把当前目录(源码树根目录)映射为 container 里的 :code:`/paddle` 目录。如果使用自行 -构建的镜像(上述第4步)会执行 :code:`Dockerfile` 描述的默认入口程序 :code:`build.sh` 可以省略步骤3中 -最后的执行脚本的命令。 +注:上述命令把当前目录(源码树根目录)映射为 container 里的 :code:`/paddle` 目录。 编译完成后会在build/python/dist目录下生成输出的whl包,可以选在在当前机器安装也可以拷贝到目标机器安装: @@ -72,15 +70,15 @@ PaddlePaddle需要使用Docker环境完成编译,这样可以免去单独安 .. code-block:: bash - docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=ON" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 bash -x /paddle/paddle/scripts/docker/build.sh + docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=ON" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 ./paddle/scripts/paddle_build.sh test 如果期望执行其中一个单元测试,(比如 :code:`test_sum_op` ): .. code-block:: bash - docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 /bin/bash - bash /paddle/paddle/scripts/docker/build.sh - cd /paddle/build + docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 /bin/bash + ./paddle/scripts/paddle_build.sh build + cd build ctest -R test_sum_op -V .. _faq_docker: diff --git a/doc/v2/build_and_install/build_from_source_en.rst b/doc/v2/build_and_install/build_from_source_en.rst index 545e61ce9602240807d515e9eae971dfca9ddd7f..b06c43e19dcfc52ad0f074a85517a16744895a3a 100644 --- a/doc/v2/build_and_install/build_from_source_en.rst +++ b/doc/v2/build_and_install/build_from_source_en.rst @@ -34,14 +34,12 @@ Or you can build your own image from source as the optional step below: # 2. Optional: build development docker image from source docker build -t paddle:dev . # 3. Run the following command to build a CPU-Only binaries - docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 bash -x /paddle/paddle/scripts/paddle_build.sh build + docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 ./paddle/scripts/paddle_build.sh build # 4. Or, use your built Docker image to build PaddlePaddle (must run step 2) - docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddle:dev + docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddle:dev ./paddle/scripts/paddle_build.sh build NOTE: The above command try to mount the current working directory (root directory of source code) -into :code:`/paddle` directory inside docker container. If you are using your own image -(Step 4) it will run default entry-point :code:`build.sh` , so you could omit the last -command in step 3. +into :code:`/paddle` directory inside docker container. When the compile finishes, you can get the output whl package under build/python/dist, then you can choose to install the whl on local @@ -74,15 +72,15 @@ Set :code:`WITH_GPU=ON` Can also run tests on GPU. .. code-block:: bash - docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=ON" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 bash -x paddle/paddle/scripts/docker/build.sh + docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=ON" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 ./paddle/scripts/paddle_build.sh test If you wish to run only one unit test, like :code:`test_sum_op`: .. code-block:: bash - docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 /bin/bash - bash /paddle/paddle/scripts/docker/build.sh - cd /paddle/build + docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 /bin/bash + ./paddle/scripts/paddle_build.sh build + cd build ctest -R test_sum_op -V .. _faq_docker: diff --git a/doc/v2/build_and_install/docker_install_cn.rst b/doc/v2/build_and_install/docker_install_cn.rst index da876b03e384a8175b27f78756af648c80fc6784..106c86bace075764c84bc2a7f7cb09d466fa8794 100644 --- a/doc/v2/build_and_install/docker_install_cn.rst +++ b/doc/v2/build_and_install/docker_install_cn.rst @@ -98,7 +98,7 @@ PaddlePaddle Book是为用户和开发者制作的一个交互式的Jupyter Note 国内用户可以使用下面的镜像源来加速访问: - .. code-block: bash + .. code-block:: bash docker run -p 8888:8888 docker.paddlepaddlehub.com/book diff --git a/doc/v2/build_and_install/docker_install_en.rst b/doc/v2/build_and_install/docker_install_en.rst index 5dbdedc4cb064ef415e8d19f00727a16d1c175c6..25aecb8d0da9feb00006da6259b529b7011d91cb 100644 --- a/doc/v2/build_and_install/docker_install_en.rst +++ b/doc/v2/build_and_install/docker_install_en.rst @@ -105,7 +105,7 @@ We provide a packaged book image, simply issue the command: For users in China, we provide a faster mirror: - .. code-block: bash + .. code-block:: bash docker run -p 8888:8888 docker.paddlepaddlehub.com/book diff --git a/paddle/.gitignore b/paddle/.gitignore index 1c1c0c2c829f088d7e3f52ca007fcb8f33a16a36..01904aa6ef2057afee95ddd6e30cde064b06c52e 100644 --- a/paddle/.gitignore +++ b/paddle/.gitignore @@ -11,7 +11,6 @@ GTAGS *.pb.cc *.pb.h *_pb2.py -paddle_* output/ google/ Makefile diff --git a/paddle/contrib/CMakeLists.txt b/paddle/contrib/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..4b19256ef4533a09162edf907f6cd51146517e46 --- /dev/null +++ b/paddle/contrib/CMakeLists.txt @@ -0,0 +1,16 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +add_subdirectory(inference) diff --git a/paddle/contrib/float16/README.md b/paddle/contrib/float16/README.md index ded959c47cb81b9384abbb9815773e25969344ec..58b4a50666bfb622af8acbce29355f2a4a870a82 100644 --- a/paddle/contrib/float16/README.md +++ b/paddle/contrib/float16/README.md @@ -89,7 +89,7 @@ cd Paddle # to `FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04` and similarly for other configurations nvidia-docker build -t paddle:float16 . # After running this, different results will be written to different log files in Paddle/contrib/float16/ -nvidia-docker run -it -v $PWD:/paddle paddle:float16 /paddle/contrib/float16/run_float16_demo.sh +nvidia-docker run -it -v $PWD:/paddle paddle:float16 /paddle/paddle/contrib/float16/run_float16_demo.sh ``` #### Accuracy diff --git a/paddle/contrib/float16/run_float16_demo.sh b/paddle/contrib/float16/run_float16_demo.sh index d8a34ee67b8fab214fa6e96104304689211f84da..031225a85dabb26e5d9ea06f58909c049e7f0c08 100755 --- a/paddle/contrib/float16/run_float16_demo.sh +++ b/paddle/contrib/float16/run_float16_demo.sh @@ -3,7 +3,7 @@ BUILD_PATH=/paddle/fp16_build WHEEL_PATH=$BUILD_PATH/python/dist INFER_PATH=$BUILD_PATH/paddle/fluid/inference/tests/book -DEMO_PATH=/paddle/contrib/float16 +DEMO_PATH=/paddle/paddle/contrib/float16 # Use the single most powerful CUDA GPU on your machine export CUDA_VISIBLE_DEVICES=0 @@ -50,7 +50,6 @@ do --repeat=1 \ $INFER_PATH/test_inference_image_classification_vgg \ - --data_set=imagenet \ --dirname=$DEMO_PATH/image_classification_imagenet_vgg.inference.model \ --fp16_dirname=$DEMO_PATH/float16_image_classification_imagenet_vgg.inference.model \ --repeat=$REPEAT \ @@ -68,7 +67,6 @@ do --repeat=1 \ $INFER_PATH/test_inference_image_classification_resnet \ - --data_set=imagenet \ --dirname=$DEMO_PATH/image_classification_imagenet_resnet.inference.model \ --fp16_dirname=$DEMO_PATH/float16_image_classification_imagenet_resnet.inference.model \ --repeat=$REPEAT \ @@ -86,7 +84,6 @@ do --repeat=1 \ $INFER_PATH/test_inference_image_classification_vgg \ - --data_set=cifar10 \ --dirname=$DEMO_PATH/image_classification_cifar10_vgg.inference.model \ --fp16_dirname=$DEMO_PATH/float16_image_classification_cifar10_vgg.inference.model \ --repeat=$REPEAT \ @@ -104,7 +101,6 @@ do --repeat=1 \ $INFER_PATH/test_inference_image_classification_vgg \ - --data_set=cifar10 \ --dirname=$DEMO_PATH/image_classification_cifar10_resnet.inference.model \ --fp16_dirname=$DEMO_PATH/float16_image_classification_cifar10_resnet.inference.model \ --repeat=$REPEAT \ diff --git a/paddle/contrib/inference/CMakeLists.txt b/paddle/contrib/inference/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..a4fe10f708e5bb8b28e34b2d91b2254c346c467f --- /dev/null +++ b/paddle/contrib/inference/CMakeLists.txt @@ -0,0 +1,57 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +function(inference_api_test TARGET_NAME TEST_SRC DEP_TEST) + set(options "") + set(oneValueArgs "") + set(multiValueArgs ARGS) + cmake_parse_arguments(inference_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + set(PYTHON_TESTS_DIR ${PADDLE_BINARY_DIR}/python/paddle/fluid/tests) + set(arg_list "") + if(inference_test_ARGS) + foreach(arg ${inference_test_ARGS}) + list(APPEND arg_list "_${arg}") + endforeach() + else() + list(APPEND arg_list "_") + endif() + foreach(arg ${arg_list}) + string(REGEX REPLACE "^_$" "" arg "${arg}") + cc_test(${TARGET_NAME} + SRCS ${TEST_SRC} + DEPS paddle_fluid_api paddle_inference_api paddle_inference_api_impl + ARGS --dirname=${PYTHON_TESTS_DIR}/book/) + # set_tests_properties(${TARGET_NAME} + # PROPERTIES DEPENDS ${DEP_TEST}) + endforeach() +endfunction(inference_api_test) + + +cc_library(paddle_inference_api + SRCS paddle_inference_api.cc + DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB}) + +cc_library(paddle_inference_api_impl + SRCS paddle_inference_api_impl.cc + DEPS paddle_inference_api paddle_fluid_api) + +cc_test(test_paddle_inference_api + SRCS test_paddle_inference_api.cc + DEPS paddle_inference_api) + +inference_api_test(test_paddle_inference_api_impl + test_paddle_inference_api_impl.cc + test_word2vec) diff --git a/paddle/contrib/inference/paddle_inference_api.cc b/paddle/contrib/inference/paddle_inference_api.cc new file mode 100644 index 0000000000000000000000000000000000000000..d67e1e7667800d6dd00cb8915b0d6dc7c664970b --- /dev/null +++ b/paddle/contrib/inference/paddle_inference_api.cc @@ -0,0 +1,15 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/contrib/inference/paddle_inference_api.h" diff --git a/paddle/contrib/inference/paddle_inference_api.h b/paddle/contrib/inference/paddle_inference_api.h index dbaa7c95b97e954537707566e5b7458e6afd14c8..9ac8ebdef8151f2a144b479fa258b8bc830fc2e9 100644 --- a/paddle/contrib/inference/paddle_inference_api.h +++ b/paddle/contrib/inference/paddle_inference_api.h @@ -12,49 +12,74 @@ See the License for the specific language governing permissions and limitations under the License. */ +/* + * This file contains the definition of a simple Inference API for Paddle. + * + * ATTENTION: It requires some C++ features, for lower version C++ or C, we + * might release another API. + */ + #pragma once +#include #include #include namespace paddle { -class Predictor { -public: - struct Attr; - Predictor() = default; +enum PaddleDType { + FLOAT32, + INT64, +}; - // Build the network before inference. - bool Init(const Attr& attr); +struct PaddleBuf { + void* data; // pointer to the data memory. + size_t length; // number of memory bytes. +}; + +struct PaddleTensor { + std::string name; // variable name. + std::vector shape; + PaddleBuf data; // blob of data. + PaddleDType dtype; +}; + +/* +* A simple Inference API for Paddle. Currently this API might just be used by +* non-sequence scenerios. +* TODO(Superjomn) Prepare another API for NLP-related usages. +*/ +class PaddlePredictor { +public: + struct Config; + PaddlePredictor() = default; + PaddlePredictor(const PaddlePredictor&) = delete; // Predict an record. - // Arguments: - // inputs: the name of the input variables. - // outputs: the name of the output varaibles. - // input_shapes: the shape of the input variables. - // output_shapes: the shape of the output variables. - // input_data: the data of the input variables. - // output_data: the data of the output variables. - bool Run(const std::vector& inputs, - const std::vector& outputs, - const std::vector>& input_shapes, - const std::vector>& output_shapes, - const std::vector>& input_data, - std::vector>* output_data); - - // Clone a predictor that share the model weights. - Predictor* Clone(); + // The caller should be responsible for allocating and releasing the memory of + // `inputs`. `inputs` should be alive until Run returns. caller should be + // responsible for releasing the memory of `output_data`. + virtual bool Run(const std::vector& inputs, + std::vector* output_data) = 0; + + // Clone a predictor that share the model weights, the Cloned predictor should + // be thread-safe. + virtual std::unique_ptr Clone() = 0; // Destroy the Predictor. - ~Predictor(); + virtual ~PaddlePredictor() {} - struct Attr { + friend std::unique_ptr CreatePaddlePredictor( + const PaddlePredictor::Config& config); + + // The common configs for all the predictors. + struct Config { enum class EngineKind; std::string model_dir; // path to the model directory. bool enable_engine{false}; // Enable to execute (part of) the model on - // third-party engines. - EngineKind engine_kind{Attr::EngineKind::kNone}; + // third-party engines. + EngineKind engine_kind{Config::EngineKind::kNone}; enum class EngineKind { kNone = -1, // Use the native Fluid facility. @@ -66,4 +91,8 @@ public: }; }; +// A factory to help create difference predictor. +template +std::unique_ptr CreatePaddlePredictor(const ConfigT& config); + } // namespace paddle diff --git a/paddle/contrib/inference/paddle_inference_api_impl.cc b/paddle/contrib/inference/paddle_inference_api_impl.cc new file mode 100644 index 0000000000000000000000000000000000000000..ecca16d3f82bbeee6858883a0f9e577a479f9d06 --- /dev/null +++ b/paddle/contrib/inference/paddle_inference_api_impl.cc @@ -0,0 +1,309 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "paddle/contrib/inference/paddle_inference_api_impl.h" + +namespace paddle { +namespace { + +// Timer for timer +class Timer { +public: + double start; + double startu; + void tic() { + struct timeval tp; + gettimeofday(&tp, NULL); + start = tp.tv_sec; + startu = tp.tv_usec; + } + double toc() { + struct timeval tp; + gettimeofday(&tp, NULL); + double used_time_ms = + (tp.tv_sec - start) * 1000.0 + (tp.tv_usec - startu) / 1000.0; + return used_time_ms; + } +}; + +template +std::string num2str(T a) { + std::stringstream istr; + istr << a; + return istr.str(); +} +} // namespace + +bool PaddlePredictorImpl::Init() { + VLOG(3) << "Predictor::init()"; + + // TODO(panyx0718): Should CPU vs GPU device be decided by id? + if (config_.device >= 0) { + place_ = paddle::platform::CUDAPlace(config_.device); + } else { + place_ = paddle::platform::CPUPlace(); + } + paddle::framework::InitDevices(false); + executor_.reset(new paddle::framework::Executor(place_)); + scope_.reset(new paddle::framework::Scope()); + + // Initialize the inference program + if (!config_.model_dir.empty()) { + // Parameters are saved in separate files sited in + // the specified `dirname`. + inference_program_ = paddle::inference::Load( + executor_.get(), scope_.get(), config_.model_dir); + } else if (!config_.prog_file.empty() && !config_.param_file.empty()) { + // All parameters are saved in a single file. + // The file names should be consistent with that used + // in Python API `fluid.io.save_inference_model`. + inference_program_ = paddle::inference::Load( + executor_.get(), scope_.get(), config_.prog_file, config_.param_file); + } else { + LOG(ERROR) << "fail to load inference model."; + return false; + } + ctx_ = executor_->Prepare(*inference_program_, 0); + + // Create variables + // TODO(panyx0718): Why need to test share_variables here? + if (config_.share_variables) { + executor_->CreateVariables(*inference_program_, scope_.get(), 0); + } + // Get the feed_target_names and fetch_target_names + feed_target_names_ = inference_program_->GetFeedTargetNames(); + fetch_target_names_ = inference_program_->GetFetchTargetNames(); + return true; +} + +bool PaddlePredictorImpl::Run(const std::vector &inputs, + std::vector *output_data) { + VLOG(3) << "Predictor::predict"; + Timer timer; + timer.tic(); + // set feed variable + std::map feed_targets; + std::vector feeds; + if (!SetFeed(inputs, &feeds)) { + LOG(ERROR) << "fail to set feed"; + return false; + } + for (size_t i = 0; i < feed_target_names_.size(); ++i) { + feed_targets[feed_target_names_[i]] = &feeds[i]; + } + // get fetch variable + std::map fetch_targets; + std::vector fetchs; + fetchs.resize(fetch_target_names_.size()); + for (size_t i = 0; i < fetch_target_names_.size(); ++i) { + fetch_targets[fetch_target_names_[i]] = &fetchs[i]; + } + // Run the inference program + // if share variables, we need not create variables + executor_->RunPreparedContext(ctx_.get(), + scope_.get(), + &feed_targets, + &fetch_targets, + !config_.share_variables); + if (!GetFetch(fetchs, output_data)) { + LOG(ERROR) << "fail to get fetchs"; + return false; + } + VLOG(3) << "predict cost: " << timer.toc() << "ms"; + return true; +} + +std::unique_ptr PaddlePredictorImpl::Clone() { + VLOG(3) << "Predictor::clone"; + std::unique_ptr cls(new PaddlePredictorImpl(config_)); + if (!cls->InitShared(this)) { + LOG(ERROR) << "fail to call InitShared"; + return nullptr; + } + return cls; +} + +// TODO(panyx0718): Consider merge with Init()? +bool PaddlePredictorImpl::InitShared(PaddlePredictorImpl *cls) { + VLOG(3) << "Predictor::init_shared"; + // 1. Define place, executor, scope + if (this->config_.device >= 0) { + place_ = paddle::platform::CUDAPlace(); + } else { + place_ = paddle::platform::CPUPlace(); + } + this->executor_.reset(new paddle::framework::Executor(this->place_)); + this->scope_.reset(new paddle::framework::Scope()); + // Initialize the inference program + if (!this->config_.model_dir.empty()) { + // Parameters are saved in separate files sited in + // the specified `dirname`. + this->inference_program_ = paddle::inference::Load( + this->executor_.get(), this->scope_.get(), this->config_.model_dir); + } else if (!this->config_.prog_file.empty() && + !this->config_.param_file.empty()) { + // All parameters are saved in a single file. + // The file names should be consistent with that used + // in Python API `fluid.io.save_inference_model`. + this->inference_program_ = + paddle::inference::Load(this->executor_.get(), + this->scope_.get(), + this->config_.prog_file, + this->config_.param_file); + } + this->ctx_ = this->executor_->Prepare(*this->inference_program_, 0); + // 3. create variables + // TODO(panyx0718): why test share_variables. + if (config_.share_variables) { + this->executor_->CreateVariables( + *this->inference_program_, this->scope_.get(), 0); + } + // 4. Get the feed_target_names and fetch_target_names + this->feed_target_names_ = this->inference_program_->GetFeedTargetNames(); + this->fetch_target_names_ = this->inference_program_->GetFetchTargetNames(); + return true; +} + +bool PaddlePredictorImpl::SetFeed( + const std::vector &inputs, + std::vector *feeds) { + VLOG(3) << "Predictor::set_feed"; + if (inputs.size() != feed_target_names_.size()) { + LOG(ERROR) << "wrong feed input size."; + return false; + } + for (size_t i = 0; i < feed_target_names_.size(); ++i) { + paddle::framework::LoDTensor input; + paddle::framework::DDim ddim = + paddle::framework::make_ddim(inputs[i].shape); + void *input_ptr; + if (inputs[i].dtype == PaddleDType::INT64) { + input_ptr = + input.mutable_data(ddim, paddle::platform::CPUPlace()); + } else if (inputs[i].dtype == PaddleDType::FLOAT32) { + input_ptr = input.mutable_data(ddim, paddle::platform::CPUPlace()); + } else { + LOG(ERROR) << "unsupported feed type " << inputs[i].dtype; + return false; + } + + // TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy. + std::memcpy(static_cast(input_ptr), + inputs[i].data.data, + inputs[i].data.length); + feeds->push_back(input); + LOG(ERROR) << "Actual feed type " << feeds->back().type().name(); + } + return true; +} + +bool PaddlePredictorImpl::GetFetch( + const std::vector &fetchs, + std::vector *outputs) { + VLOG(3) << "Predictor::get_fetch"; + outputs->resize(fetchs.size()); + for (size_t i = 0; i < fetchs.size(); ++i) { + // TODO(panyx0718): Support fetch of other types. + if (fetchs[i].type() != typeid(float)) { + LOG(ERROR) << "only support fetching float now."; + return false; + } + std::vector shape; + auto dims_i = fetchs[i].dims(); + auto lod = fetchs[i].lod(); + const float *output_ptr = fetchs[i].data(); + // const int64_t* output_ptr = fetchs[i].data(); + auto num = fetchs[i].numel(); + std::vector data; + if (0 == lod.size()) { + std::copy(output_ptr, output_ptr + num, std::back_inserter(data)); + for (int j = 0; j < dims_i.size(); ++j) { + shape.push_back(dims_i[j]); + } + } else { + // for batch detection + // image[0] -> output[0] shape {145, 6} + // image[1] -> output[1] shape {176, 6} + // then, + // the batch output shape {321, 6} + // the lod {{0, 145, 321}} + // so we should append output[0] to {176, 6} + size_t max_dim = 0; + for (size_t j = 1; j < lod[0].size(); j++) { + max_dim = std::max(max_dim, lod[0][j] - lod[0][j - 1]); + } + size_t common_dim = lod[0].back() == 0 ? 0 : num / lod[0].back(); + if (max_dim > 0) { + data.resize((lod[0].size() - 1) * max_dim * common_dim, 0); + } + for (size_t j = 1; j < lod[0].size(); j++) { + size_t start = lod[0][j - 1] * common_dim; + size_t end = lod[0][j] * common_dim; + if (end > start) { + std::copy(output_ptr + start, + output_ptr + end, + data.begin() + (j - 1) * max_dim * common_dim); + } + } + shape.push_back(lod[0].size() - 1); + shape.push_back(max_dim); + for (int j = 1; j < dims_i.size(); ++j) { + shape.push_back(dims_i[j]); + } + } + + outputs->at(i).shape = shape; + outputs->at(i).data.length = sizeof(float) * data.size(); + outputs->at(i).data.data = malloc(outputs->at(i).data.length); + std::memcpy( + outputs->at(i).data.data, data.data(), outputs->at(i).data.length); + outputs->at(i).dtype = PaddleDType::FLOAT32; + // TODO(panyx0718): support other types? fill tensor name? avoid a copy. + } + return true; +} + +std::unique_ptr CreatePaddlePredictorImpl( + const VisConfig &config) { + VLOG(3) << "create PaddlePredictorImpl"; + // 1. GPU memeroy + std::vector flags; + if (config.fraction_of_gpu_memory >= 0.0f || + config.fraction_of_gpu_memory <= 0.95f) { + flags.push_back("dummpy"); + std::string flag = "--fraction_of_gpu_memory_to_use=" + + num2str(config.fraction_of_gpu_memory); + flags.push_back(flag); + VLOG(3) << "set flag: " << flag; + framework::InitGflags(flags); + } + + std::unique_ptr predictor( + new PaddlePredictorImpl(config)); + if (!predictor->Init()) { + return nullptr; + } + return predictor; +} + +} // namespace paddle diff --git a/paddle/contrib/inference/paddle_inference_api_impl.h b/paddle/contrib/inference/paddle_inference_api_impl.h new file mode 100644 index 0000000000000000000000000000000000000000..831abce5da58f90b38e27b5638e953de5167647a --- /dev/null +++ b/paddle/contrib/inference/paddle_inference_api_impl.h @@ -0,0 +1,76 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include +#include +#include +#include + +#include "paddle/contrib/inference/paddle_inference_api.h" + +#include "paddle/fluid/framework/ddim.h" +#include "paddle/fluid/framework/init.h" +#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/inference/io.h" +#include "paddle/fluid/platform/profiler.h" + +namespace paddle { + +struct VisConfig : public PaddlePredictor::Config { + int device; + float fraction_of_gpu_memory; + std::string prog_file; + std::string param_file; + bool share_variables; +}; + +/* + * Do not use this, just a demo indicating how to customize a Predictor. + */ +class PaddlePredictorImpl : public PaddlePredictor { +public: + explicit PaddlePredictorImpl(const VisConfig &config) : config_(config) {} + + bool Init(); + + bool Run(const std::vector &inputs, + std::vector *output_data) override; + + std::unique_ptr Clone() override; + + ~PaddlePredictorImpl() override{}; + +private: + bool InitShared(PaddlePredictorImpl *cls); + bool SetFeed(const std::vector &input_datas, + std::vector *feeds); + bool GetFetch(const std::vector &fetchs, + std::vector *output_data); + + VisConfig config_; + paddle::platform::Place place_; + std::unique_ptr executor_; + std::unique_ptr scope_; + std::unique_ptr ctx_; + std::unique_ptr inference_program_; + std::vector feed_target_names_; + std::vector fetch_target_names_; +}; + +std::unique_ptr CreatePaddlePredictorImpl( + const VisConfig &config); + +} // namespace paddle diff --git a/paddle/contrib/inference/test_paddle_inference_api.cc b/paddle/contrib/inference/test_paddle_inference_api.cc new file mode 100644 index 0000000000000000000000000000000000000000..a19173087649e8493b8c72e758456cc5b8970e23 --- /dev/null +++ b/paddle/contrib/inference/test_paddle_inference_api.cc @@ -0,0 +1,64 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/contrib/inference/paddle_inference_api.h" + +#include +#include + +namespace paddle { + +/* + * Do not use this, just a demo indicating how to customize a config for a + * specific predictor. + */ +struct DemoConfig : public PaddlePredictor::Config { + float other_config; +}; + +/* + * Do not use this, just a demo indicating how to customize a Predictor. + */ +class DemoPredictor : public PaddlePredictor { +public: + explicit DemoPredictor(const DemoConfig &config) { + LOG(INFO) << "I get other_config " << config.other_config; + } + bool Run(const std::vector &inputs, + std::vector *output_data) override { + LOG(INFO) << "Run"; + return false; + } + + std::unique_ptr Clone() override { return nullptr; } + + ~DemoPredictor() override {} +}; + +template <> +std::unique_ptr CreatePaddlePredictor( + const DemoConfig &config) { + std::unique_ptr x(new DemoPredictor(config)); + return x; +} + +TEST(paddle_inference_api, demo) { + DemoConfig config; + config.other_config = 1.7; + auto predictor = CreatePaddlePredictor(config); + std::vector outputs; + predictor->Run({}, &outputs); +} + +} // namespace paddle diff --git a/paddle/contrib/inference/test_paddle_inference_api_impl.cc b/paddle/contrib/inference/test_paddle_inference_api_impl.cc new file mode 100644 index 0000000000000000000000000000000000000000..43b068fb42c5e5c58f2932c3e528fd0fa0502ec3 --- /dev/null +++ b/paddle/contrib/inference/test_paddle_inference_api_impl.cc @@ -0,0 +1,83 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include + +#include "gflags/gflags.h" +#include "paddle/contrib/inference/paddle_inference_api_impl.h" +#include "paddle/fluid/inference/tests/test_helper.h" + +DEFINE_string(dirname, "", "Directory of the inference model."); + +namespace paddle { + +PaddleTensor LodTensorToPaddleTensor(framework::LoDTensor* t) { + PaddleTensor pt; + pt.data.data = t->data(); + + if (t->type() == typeid(int64_t)) { + pt.data.length = t->numel() * sizeof(int64_t); + pt.dtype = PaddleDType::INT64; + } else if (t->type() == typeid(float)) { + pt.data.length = t->numel() * sizeof(float); + pt.dtype = PaddleDType::FLOAT32; + } else { + LOG(FATAL) << "unsupported type."; + } + pt.shape = framework::vectorize2int(t->dims()); + return pt; +} + +TEST(paddle_inference_api_impl, word2vec) { + VisConfig config; + config.model_dir = FLAGS_dirname + "word2vec.inference.model"; + LOG(INFO) << "dirname " << config.model_dir; + config.fraction_of_gpu_memory = 0.85; + config.device = 0; + config.share_variables = true; + + std::unique_ptr predictor = + CreatePaddlePredictorImpl(config); + + framework::LoDTensor first_word, second_word, third_word, fourth_word; + framework::LoD lod{{0, 1}}; + int64_t dict_size = 2073; // The size of dictionary + + SetupLoDTensor(&first_word, lod, static_cast(0), dict_size - 1); + SetupLoDTensor(&second_word, lod, static_cast(0), dict_size - 1); + SetupLoDTensor(&third_word, lod, static_cast(0), dict_size - 1); + SetupLoDTensor(&fourth_word, lod, static_cast(0), dict_size - 1); + + std::vector cpu_feeds; + cpu_feeds.push_back(LodTensorToPaddleTensor(&first_word)); + cpu_feeds.push_back(LodTensorToPaddleTensor(&second_word)); + cpu_feeds.push_back(LodTensorToPaddleTensor(&third_word)); + cpu_feeds.push_back(LodTensorToPaddleTensor(&fourth_word)); + + std::vector outputs; + ASSERT_TRUE(predictor->Run(cpu_feeds, &outputs)); + ASSERT_EQ(outputs.size(), 1); + for (size_t i = 0; i < outputs.size(); ++i) { + size_t len = outputs[i].data.length; + float* data = static_cast(outputs[i].data.data); + for (int j = 0; j < len / sizeof(float); ++j) { + ASSERT_LT(data[j], 1.0); + ASSERT_GT(data[j], -1.0); + } + free(outputs[i].data.data); + } +} + +} // namespace paddle diff --git a/paddle/fluid/framework/op_desc.cc b/paddle/fluid/framework/op_desc.cc index 1b9c685866763ed126a1bf5d7fdd851c38ac1c63..09b67e5a1741c68c5f5487340e8fc86ff31e00a4 100644 --- a/paddle/fluid/framework/op_desc.cc +++ b/paddle/fluid/framework/op_desc.cc @@ -243,13 +243,8 @@ const std::unordered_map &OpDesc::GetAttrMap() const { } void OpDesc::Rename(const std::string &old_name, const std::string &new_name) { - for (auto &input : inputs_) { - std::replace(input.second.begin(), input.second.end(), old_name, new_name); - } - for (auto &output : outputs_) { - std::replace(output.second.begin(), output.second.end(), old_name, - new_name); - } + RenameInput(old_name, new_name); + RenameOutput(old_name, new_name); need_update_ = true; } @@ -274,6 +269,13 @@ void OpDesc::RenameInput(const std::string &old_name, for (auto &input : inputs_) { std::replace(input.second.begin(), input.second.end(), old_name, new_name); } + + auto it = attrs_.find(framework::OpProtoAndCheckerMaker::OpRoleVarAttrName()); + if (it != attrs_.end()) { + auto &op_vars = boost::get>(it->second); + std::replace(op_vars.begin(), op_vars.end(), old_name, new_name); + } + need_update_ = true; } diff --git a/paddle/fluid/inference/CMakeLists.txt b/paddle/fluid/inference/CMakeLists.txt index b98aeed8a0aaabfd39560fad3c074a6668b4f024..cc4a725dfb3b3e7723a3a3a4008b20acdb53899d 100644 --- a/paddle/fluid/inference/CMakeLists.txt +++ b/paddle/fluid/inference/CMakeLists.txt @@ -1,5 +1,6 @@ set(FLUID_CORE_MODULES proto_desc memory lod_tensor executor init) +# TODO(panyx0718): Should this be called paddle_fluid_inference_api_internal? cc_library(paddle_fluid_api SRCS io.cc DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB}) diff --git a/paddle/fluid/operators/detail/sendrecvop_utils.cc b/paddle/fluid/operators/detail/sendrecvop_utils.cc index 3bae56532d655a1725e18276e09e0cade47b5c68..507b465435609a91ebca97dd70b176c3b79bee02 100644 --- a/paddle/fluid/operators/detail/sendrecvop_utils.cc +++ b/paddle/fluid/operators/detail/sendrecvop_utils.cc @@ -149,12 +149,14 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var, } if (platform::is_gpu_place(ctx.GetPlace())) { +#ifdef PADDLE_WITH_CUDA // GPU data is copied to CPU buffer when sending, // free the buffer when possible. destroy_callback = [](void* backing) { platform::CUDAPinnedPlace cuda_pinned; memory::Free(cuda_pinned, backing); }; +#endif } std::string header; diff --git a/paddle/fluid/operators/detection/CMakeLists.txt b/paddle/fluid/operators/detection/CMakeLists.txt index a5bb58c2f4047a3bf2f8592b605772b4fa166c57..20d960f9fee1eae42b2241fb96c163e15db5e24d 100644 --- a/paddle/fluid/operators/detection/CMakeLists.txt +++ b/paddle/fluid/operators/detection/CMakeLists.txt @@ -24,6 +24,8 @@ detection_library(multiclass_nms_op SRCS multiclass_nms_op.cc) detection_library(prior_box_op SRCS prior_box_op.cc prior_box_op.cu) detection_library(target_assign_op SRCS target_assign_op.cc target_assign_op.cu) +detection_library(polygon_box_transform_op SRCS polygon_box_transform_op.cc + polygon_box_transform_op.cu) # Export local libraries to parent set(DETECTION_LIBRARY ${LOCAL_DETECTION_LIBS} PARENT_SCOPE) diff --git a/paddle/fluid/operators/detection/polygon_box_transform_op.cc b/paddle/fluid/operators/detection/polygon_box_transform_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..335e8dd470f851d8c5f6bdbc94cfc343da269034 --- /dev/null +++ b/paddle/fluid/operators/detection/polygon_box_transform_op.cc @@ -0,0 +1,105 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/framework/op_registry.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +template +class PolygonBoxTransformCPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + PADDLE_ENFORCE(platform::is_cpu_place(ctx.GetPlace()), + "It must use CUDAPlace."); + auto* in = ctx.Input("Input"); + auto in_dims = in->dims(); + const T* in_data = in->data(); + auto* out = ctx.Output("Output"); + T* out_data = out->mutable_data(ctx.GetPlace()); + + int batch_size = in_dims[0]; + int geo_channel = in_dims[1]; + int height = in_dims[2]; + int width = in_dims[3]; + int id = 0; + for (int id_n = 0; id_n < batch_size * geo_channel; ++id_n) { + for (int id_h = 0; id_h < height; ++id_h) { + for (int id_w = 0; id_w < width; ++id_w) { + id = id_n * height * width + width * id_h + id_w; + if (id_n % 2 == 0) { + out_data[id] = id_w - in_data[id]; + } else { + out_data[id] = id_h - in_data[id]; + } + } + } + } + } +}; + +class PolygonBoxTransformOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE( + ctx->HasInput("Input"), + "Input (Input) of polygon_box transform op should not be null."); + PADDLE_ENFORCE( + ctx->HasOutput("Output"), + "Output (Output) of polygon_box transform op should not be null."); + + auto in_dim = ctx->GetInputDim("Input"); + + PADDLE_ENFORCE_EQ(in_dim.size(), 4, "input's rank must be 4."); + PADDLE_ENFORCE_EQ(in_dim[1] % 2, 0, + "input's second dimension must be even."); + + ctx->SetOutputDim("Output", in_dim); + } +}; + +class PolygonBoxTransformOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput( + "Input", + "The input with shape [batch_size, geometry_channels, height, width]"); + AddOutput("Output", "The output with the same shape as input"); + + AddComment(R"DOC( +PolygonBoxTransform Operator. +The input is the final geometry output in detection network. +We use 2*n numbers to denote the coordinate shift from n corner vertices of +the polygon_box to the pixel location. As each distance offset contains two numbers (xi, yi), +the geometry output contains 2*n channels. +PolygonBoxTransform Operator is used to transform the coordinate shift to the real coordinate. +)DOC"); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR(polygon_box_transform, ops::PolygonBoxTransformOp, + ops::PolygonBoxTransformOpMaker, + paddle::framework::EmptyGradOpMaker); +REGISTER_OP_CPU_KERNEL( + polygon_box_transform, + ops::PolygonBoxTransformCPUKernel, + ops::PolygonBoxTransformCPUKernel); diff --git a/paddle/fluid/operators/detection/polygon_box_transform_op.cu b/paddle/fluid/operators/detection/polygon_box_transform_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..6187ac6622c65d2bbc525c3fe2cb397cf74ac612 --- /dev/null +++ b/paddle/fluid/operators/detection/polygon_box_transform_op.cu @@ -0,0 +1,76 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/platform/cuda_primitives.h" +#include "paddle/fluid/platform/gpu_info.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +using platform::PADDLE_CUDA_NUM_THREADS; +#define CUDA_BLOCK_SIZE 16 + +template +__global__ void PolygonBoxTransformKernel(const int n, const int h, const int w, + const T* input, T* output) { + int id_n = threadIdx.x + blockDim.x * blockIdx.x; + int id_h = threadIdx.y + blockDim.y * blockIdx.y; + int id_w = threadIdx.z + blockDim.z * blockIdx.z; + if (id_n < n && id_h < h && id_w < w) { + int id = id_n * h * w + w * id_h + id_w; + if (id_n % 2 == 0) { + output[id] = id_w - input[id]; + } else { + output[id] = id_h - input[id]; + } + } +} + +template +class PolygonBoxTransformOpCUDAKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), + "It must use CUDAPlace."); + auto* in = ctx.Input("Input"); + auto in_dims = in->dims(); + const T* in_data = in->data(); + auto* out = ctx.Output("Output"); + T* out_data = out->mutable_data(ctx.GetPlace()); + + int batch_size = in_dims[0]; + int geo_channels = in_dims[1]; + int height = in_dims[2]; + int width = in_dims[3]; + dim3 threadsPerBlock( + PADDLE_CUDA_NUM_THREADS / (CUDA_BLOCK_SIZE * CUDA_BLOCK_SIZE), + CUDA_BLOCK_SIZE, CUDA_BLOCK_SIZE); + dim3 numBlocks((batch_size * geo_channels) / threadsPerBlock.x, + (height + threadsPerBlock.y - 1) / threadsPerBlock.y, + (width + threadsPerBlock.z - 1) / threadsPerBlock.z); + auto stream = ctx.cuda_device_context().stream(); + PolygonBoxTransformKernel<<>>( + batch_size * geo_channels, height, width, in_data, out_data); + } +}; + +} // namespace operators +} // namespace paddle + +REGISTER_OP_CUDA_KERNEL( + polygon_box_transform, + paddle::operators::PolygonBoxTransformOpCUDAKernel, + paddle::operators::PolygonBoxTransformOpCUDAKernel); diff --git a/paddle/fluid/operators/fill_constant_batch_size_like_op.h b/paddle/fluid/operators/fill_constant_batch_size_like_op.h index 2a7df149a9f4b03676f172da980c927d7fa5e8a4..63ea60678f80708f5a8340edd22588553b9ec139 100644 --- a/paddle/fluid/operators/fill_constant_batch_size_like_op.h +++ b/paddle/fluid/operators/fill_constant_batch_size_like_op.h @@ -24,6 +24,14 @@ class FillConstantBatchSizeLikeOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto* out = ctx.Output("Out"); + auto* in = ctx.Input("Input"); + if (in->lod().size() && ctx.Attr("input_dim_idx") == 0) { + // set the correct batch size for the LoDTensor. + auto odims = out->dims(); + int output_dim_idx = ctx.Attr("output_dim_idx"); + odims[output_dim_idx] = static_cast(in->lod().back().size()) - 1; + out->mutable_data(odims, ctx.GetPlace()); + } out->mutable_data(ctx.GetPlace()); auto value = ctx.Attr("value"); diff --git a/paddle/fluid/operators/math/cross_entropy.cc b/paddle/fluid/operators/math/cross_entropy.cc index fc0fca5ad3370633b2f60db65fdb7c01c417dc50..caff35e03ae3a144f799d982c859ded62cb3e93d 100644 --- a/paddle/fluid/operators/math/cross_entropy.cc +++ b/paddle/fluid/operators/math/cross_entropy.cc @@ -46,7 +46,10 @@ class CrossEntropyFunctor { const int64_t* label_data = labels->data(); for (int i = 0; i < batch_size; ++i) { - int index = i * class_num + label_data[i]; + int lbl = label_data[i]; + PADDLE_ENFORCE_GE(lbl, 0); + PADDLE_ENFORCE_LT(lbl, class_num); + int index = i * class_num + lbl; loss_data[i] = -math::TolerableValue()(std::log(prob_data[index])); } } diff --git a/paddle/function/EigenGemm.cpp b/paddle/function/EigenGemm.cpp index bac4659e62b107dd80ef95dd0907b3da4becffbc..8e9dbbd7a154095a7298bb2f59a82d13a60f9bd3 100644 --- a/paddle/function/EigenGemm.cpp +++ b/paddle/function/EigenGemm.cpp @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include -#include "unsupported/Eigen/CXX11/Tensor" +#include "paddle/function/EigenThreadDevice.h" namespace paddle { @@ -70,25 +70,26 @@ struct EigenBlasGemm { dims[0].first = transA ? 0 : 1; dims[0].second = transB ? 1 : 0; - Eigen::DefaultDevice device; + auto* device = EigenDeviceWarpper::device(); if (N == ldc) { if (alpha == T(1) && beta == T(0)) { - c.device(device) = a.contract(b, dims); + c.device(*device) = a.contract(b, dims); } else if (alpha == T(1) && beta == T(1)) { - c.device(device) += a.contract(b, dims); + c.device(*device) += a.contract(b, dims); } else { - c.device(device) = alpha * a.contract(b, dims) + beta * c; + c.device(*device) = alpha * a.contract(b, dims) + beta * c; } } else { if (alpha == T(1) && beta == T(0)) { - c.slice(offsetC, extentC).device(device) = a.contract(b, dims); + c.slice(offsetC, extentC).device(*device) = a.contract(b, dims); } else if (alpha == T(1) && beta == T(1)) { - c.slice(offsetC, extentC).device(device) += a.contract(b, dims); + c.slice(offsetC, extentC).device(*device) += a.contract(b, dims); } else { - c.slice(offsetC, extentC).device(device) = + c.slice(offsetC, extentC).device(*device) = alpha * a.contract(b, dims) + beta * c.slice(offsetC, extentC); } } + EigenDeviceWarpper::free_device(device); } }; diff --git a/paddle/function/EigenThreadDevice.h b/paddle/function/EigenThreadDevice.h new file mode 100644 index 0000000000000000000000000000000000000000..74269aa664a711c905e12a61958c9ab01e2340c0 --- /dev/null +++ b/paddle/function/EigenThreadDevice.h @@ -0,0 +1,73 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ + +#pragma once + +#if defined(__OSX__) || defined(__APPLE__) +#include +#include +#endif +#include "unsupported/Eigen/CXX11/Tensor" + +namespace paddle { + +#if defined(__ANDROID__) +int GetCpuCount() { + FILE* fp = fopen("/sys/devices/system/cpu/possible", "r"); + if (!fp) { + return 1; + } + int rank0, rank1; + int num = fscanf(fp, "%d-%d", &rank0, &rank1); + fclose(fp); + if (num < 2) return 1; + return rank1 + 1; +} +#elif defined(__OSX__) || defined(__APPLE__) +int GetCpuCount() { + int count = 0; + size_t len = sizeof(int); + sysctlbyname("hw.ncpu", &count, &len, NULL, 0); + return count > 0 ? count : 1; +} +#else +int GetCpuCount() { return 1; } +#endif + +class EigenDeviceWarpper { +public: // NOLINT +#if EIGEN_USE_THREADS + static Eigen::ThreadPoolDevice* device() { + const int num_cpus = GetCpuCount(); + const int num_threads = (num_cpus > 2) ? 2 : num_cpus; + static Eigen::ThreadPool tp(num_threads); + static Eigen::ThreadPoolDevice* device = + new Eigen::ThreadPoolDevice(&tp, num_threads); + return device; + } + + static void free_device(Eigen::ThreadPoolDevice* device) { + // do nothing + } +#else + static Eigen::DefaultDevice* device() { + Eigen::DefaultDevice* device = new Eigen::DefaultDevice; + return device; + } + + static void free_device(Eigen::DefaultDevice* device) { delete device; } +#endif +}; + +} // namespace paddle diff --git a/paddle/optimizer/CMakeLists.txt b/paddle/optimizer/CMakeLists.txt index 25fc35311fc63988c64a445d72fc6255e49e8d4b..7c80faa48ce960a3a7eb7d88eda4f2b09756410e 100644 --- a/paddle/optimizer/CMakeLists.txt +++ b/paddle/optimizer/CMakeLists.txt @@ -7,6 +7,10 @@ set(OPITMIZER_SRCS sgd_optimizer.cc ) -cc_library(paddle_optimizer STATIC SRCS ${OPITMIZER_SRCS} DEPS paddle_proto glog) -cc_test(serialization_test SRCS serialization_test.cc DEPS paddle_proto) -cc_test(parameter_optimizer_test SRCS parameter_optimizer_test.cc DEPS paddle_optimizer) +add_library(paddle_optimizer ${OPITMIZER_SRCS}) +target_link_libraries(paddle_optimizer paddle_proto glog) + +if (WITH_TESTING) + add_unittest(serialization_test serialization_test.cc) + add_unittest(parameter_optimizer_test parameter_optimizer_test.cc) +endif() diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh deleted file mode 100755 index baff7628ea01caa0248af82c6eed2c3b546cdb35..0000000000000000000000000000000000000000 --- a/paddle/scripts/docker/build.sh +++ /dev/null @@ -1,256 +0,0 @@ -#!/bin/bash - -function cmake_gen() { - mkdir -p /paddle/build - cd /paddle/build - - # build script will not fail if *.deb does not exist - rm *.deb 2>/dev/null || true - # delete previous built whl packages - rm -rf /paddle/paddle/dist 2>/dev/null || true - - # Support build for all python versions, currently - # including cp27-cp27m and cp27-cp27mu. - PYTHON_FLAGS="" - if [ "$1" != "" ]; then - echo "using python abi: $1" - if [ "$1" == "cp27-cp27m" ]; then - export LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs2/lib:${LD_LIBRARY_PATH#/opt/_internal/cpython-2.7.11-ucs4/lib:} - export PATH=/opt/python/cp27-cp27m/bin/:${PATH} - PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/python/cp27-cp27m/bin/python - -DPYTHON_INCLUDE_DIR:PATH=/opt/python/cp27-cp27m/include/python2.7 - -DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.11-ucs2/lib/libpython2.7.so" - elif [ "$1" == "cp27-cp27mu" ]; then - export LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs4/lib:${LD_LIBRARY_PATH#/opt/_internal/cpython-2.7.11-ucs2/lib:} - export PATH=/opt/python/cp27-cp27mu/bin/:${PATH} - PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/python/cp27-cp27mu/bin/python - -DPYTHON_INCLUDE_DIR:PATH=/opt/python/cp27-cp27mu/include/python2.7 - -DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.11-ucs4/lib/libpython2.7.so" - fi - fi - - cat < /paddle/build/Dockerfile < - ENV HOME /root -EOF - - if [[ ${WITH_GPU} == "ON" ]]; then - NCCL_DEPS="apt-get install -y libnccl2=2.1.2-1+cuda8.0 libnccl-dev=2.1.2-1+cuda8.0 &&" - else - NCCL_DEPS="" - fi - - if [[ ${WITH_FLUID_ONLY:-OFF} == "OFF" ]]; then - PADDLE_VERSION="paddle version" - CMD='"paddle", "version"' - else - PADDLE_VERSION="true" - CMD='"true"' - fi - - cat >> /paddle/build/Dockerfile <> /paddle/build/Dockerfile <> /paddle/build/Dockerfile <= 21." - ANDROID_API=21 - fi -else # armeabi, armeabi-v7a - ANDROID_ARCH=arm -fi - -ANDROID_STANDALONE_TOOLCHAIN=$ANDROID_TOOLCHAINS_DIR/$ANDROID_ARCH-android-$ANDROID_API - -cat <&2 - echo "Please use pre-commit to check what is wrong." 1>&2 - exit 1 -} - -trap 'abort' 0 -set -e - -# install glide -curl https://glide.sh/get | bash -eval "$(GIMME_GO_VERSION=1.8.3 gimme)" - -# set up go environment for running gometalinter -mkdir -p $GOPATH/src/github.com/PaddlePaddle/ -ln -sf $TRAVIS_BUILD_DIR $GOPATH/src/github.com/PaddlePaddle/Paddle -cd $GOPATH/src/github.com/PaddlePaddle/Paddle/go; glide install; cd - - -go get github.com/alecthomas/gometalinter -gometalinter --install - -cd $TRAVIS_BUILD_DIR -export PATH=/usr/bin:$PATH -pre-commit install -clang-format --version - - - -if ! pre-commit run -a ; then - git diff - exit 1 -fi - -trap : 0 diff --git a/paddle/scripts/travis/deploy_key.enc b/paddle/scripts/travis/deploy_key.enc deleted file mode 100644 index b0aa45c5ac626c735735fd8541a43bf8b099d0a0..0000000000000000000000000000000000000000 Binary files a/paddle/scripts/travis/deploy_key.enc and /dev/null differ diff --git a/python/paddle/fluid/layers/io.py b/python/paddle/fluid/layers/io.py index faa2599f62d0e91767ea2dbc80bb25a58bc5c88d..03d4602f7a99dc335260cffdcdc30a839f3988cd 100644 --- a/python/paddle/fluid/layers/io.py +++ b/python/paddle/fluid/layers/io.py @@ -23,7 +23,7 @@ from ..executor import global_scope __all__ = [ 'data', 'BlockGuardServ', 'ListenAndServ', 'Send', 'open_recordio_file', 'open_files', 'read_file', 'shuffle', 'batch', 'double_buffer', - 'Preprocessor' + 'random_data_generator', 'Preprocessor' ] diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 04ee8ac9aee92a0e161e83bf1bb34d3ce727a0fb..b6c47aa9a65b9145983513715233784d77e3d904 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -81,7 +81,7 @@ __all__ = [ 'label_smooth', 'roi_pool', 'dice_loss', - 'bilinear_interp', + 'upsampling_bilinear2d', ] @@ -3917,8 +3917,10 @@ def dice_loss(input, label, epsilon=0.00001): return reduce_mean(dice_score) -def bilinear_interp(input, out_h, out_w, name=None): +def upsampling_bilinear2d(input, out_shape=None, scale=None, name=None): """ + The mathematical meaning of upsampling_bilinear2d is also called + Bilinear interpolation. Bilinear interpolation is an extension of linear interpolation for interpolating functions of two variables (e.g. H-direction and W-direction in this layer) on a rectilinear 2D grid. @@ -3930,8 +3932,13 @@ def bilinear_interp(input, out_h, out_w, name=None): input (Variable): The input tensor of bilinear interpolation, This is a 4-D tensor of the shape (num_batches, channels, in_h, in_w). - out_h (int): output height of bilinear interpolation layer. - out_w (int): output width of bilinear interpolation layer. + out_shape(list|tuple|None): Output shape of bilinear interpolation + layer, the shape is (out_h, out_w). + Default: None + scale(int|None): The multiplier for the input height or width. + At least one of out_shape or scale must be set. + And out_shape has a higher priority than scale. + Default: None name(str|None): A name for this layer(optional). If set None, the layer will be named automatically. @@ -3942,10 +3949,27 @@ def bilinear_interp(input, out_h, out_w, name=None): Examples: .. code-block:: python - out = fluid.layers.bilinear_interp(input, out_h=12, out_w=12) + out = fluid.layers.bilinear_interp(input, out_shape=[12, 12]) """ + if out_shape is None and scale is None: + raise ValueError("One of out_shape and scale must not be None") helper = LayerHelper('bilinear_interp', **locals()) dtype = helper.input_dtype() + + def _is_list_or_turple_(data): + return (isinstance(data, list) or isinstance(data, tuple)) + + if out_shape is not None: + if not (_is_list_or_turple_(out_shape) and len(out_shape) == 2): + raise ValueError('out_shape should be a list or tuple ', + 'with length 2, (out_h, out_w).') + out_shape = list(map(int, out_shape)) + out_h = out_shape[0] + out_w = out_shape[1] + else: + out_h = int(input.shape[2] * scale) + out_w = int(input.shape[3] * scale) + out = helper.create_tmp_variable(dtype) helper.append_op( type="bilinear_interp", diff --git a/python/paddle/fluid/lod_tensor.py b/python/paddle/fluid/lod_tensor.py index 555e371952d0f902063133c2a227eb78f082726c..9946d0a4ff33b2f5040f6d2e31aa20fcf9c609a7 100644 --- a/python/paddle/fluid/lod_tensor.py +++ b/python/paddle/fluid/lod_tensor.py @@ -93,12 +93,12 @@ def _convert_lod(lod): def create_lod_tensor(data, lod, place): - """Create a lod tensor from a numpy array or an existing lod tensor. + """Create a lod tensor from a numpy array, a list, or an existing lod tensor. Create a lod tensor by doing the following: 1. Check that the length-based input lod is valid. 2. Convert the length-based lod to a offset-based LoD. - 3. Copy the data from a numpy array or a existing lod tensor to + 3. Copy the data from a numpy array, a list or a existing lod tensor to CPU or GPU device (based on input place). 4. Set the level of detail (LoD) using the offset-based LoD. @@ -117,7 +117,7 @@ def create_lod_tensor(data, lod, place): for more details regarding LoD. Args: - data: a numpy array or a LoDTensor holding the data to be copied. + data: a numpy array or a LoDTensor or a list holding the data to be copied. lod: a list of lists indicating the length-based LoD info specified by the user. place: CPU or GPU place indicating where the data in the new LoDTensor will be stored. @@ -126,6 +126,18 @@ def create_lod_tensor(data, lod, place): """ if isinstance(data, core.LoDTensor): return create_lod_tensor(np.array(data), lod, place) + elif isinstance(data, list): + # When input data is a list, it only deal with the case where the base element + # is an index of shape [1] and dtype int64 (e.g., word id). Hence, the generated + # LoDTensor will be of shape [n, 1] and dtype int64, where `n` is the total number + # of words or other indexes in the sequence. + new_lod = [] + for seq in data: + new_lod.append(len(seq)) + assert [new_lod] == lod, "data and lod do not match" + flattened_data = np.concatenate(data, axis=0).astype("int64") + flattened_data = flattened_data.reshape([len(flattened_data), 1]) + return create_lod_tensor(flattened_data, lod, place) elif isinstance(data, np.ndarray): assert _validate_lod(lod, data.shape[0]), "the provided lod info is invalid" @@ -134,9 +146,8 @@ def create_lod_tensor(data, lod, place): tensor.set_lod(_convert_lod(lod)) return tensor else: - raise Exception( - "data should be either a LoDTensor or a Numpy array, but you pass type %s instead" - % (type(data))) + raise TypeError( + "data should be either a LoDTensor, a Numpy array or a list") def create_random_int_lodtensor(lod, base_shape, place, low, high): diff --git a/python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py b/python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py index 5fba561e024b0690f10939267146f2622c567fa5..de3906fc6a005181b0ab04a846eb2e7ce14004c2 100644 --- a/python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py +++ b/python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py @@ -48,7 +48,7 @@ def linear(): return avg_loss -def train(use_cuda, train_program, save_dirname): +def train(use_cuda, train_program, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() trainer = fluid.Trainer( @@ -68,8 +68,8 @@ def train(use_cuda, train_program, save_dirname): ['15.343549569447836'] ... ''' - if save_dirname is not None: - trainer.save_params(save_dirname) + if params_dirname is not None: + trainer.save_params(params_dirname) trainer.stop() trainer.train( @@ -80,13 +80,13 @@ def train(use_cuda, train_program, save_dirname): # infer -def infer(use_cuda, inference_program, save_dirname=None): - if save_dirname is None: +def infer(use_cuda, inference_program, params_dirname=None): + if params_dirname is None: return place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() inferencer = fluid.Inferencer( - infer_func=inference_program, param_path=save_dirname, place=place) + infer_func=inference_program, param_path=params_dirname, place=place) batch_size = 10 tensor_x = numpy.random.uniform(0, 10, [batch_size, 13]).astype("float32") @@ -100,10 +100,10 @@ def main(use_cuda): return # Directory for saving the trained model - save_dirname = "fit_a_line.inference.model" + params_dirname = "fit_a_line.inference.model" - train(use_cuda, linear, save_dirname) - infer(use_cuda, inference_program, save_dirname) + train(use_cuda, linear, params_dirname) + infer(use_cuda, inference_program, params_dirname) class TestFitALine(unittest.TestCase): diff --git a/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py b/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py index 1160e500dbd6db784eeb81b72968386347fec59a..63dc1b6ce30974ede22a3f7772b76bf207bbae39 100644 --- a/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py +++ b/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py @@ -85,7 +85,7 @@ def train_network(): return [avg_cost, accuracy] -def train(use_cuda, train_program, save_dirname): +def train(use_cuda, train_program, params_dirname): BATCH_SIZE = 128 EPOCH_NUM = 1 @@ -105,8 +105,8 @@ def train(use_cuda, train_program, save_dirname): print('Loss {0:2.2}, Acc {1:2.2}'.format(avg_cost, accuracy)) if accuracy > 0.01: # Low threshold for speeding up CI - if save_dirname is not None: - trainer.save_params(save_dirname) + if params_dirname is not None: + trainer.save_params(params_dirname) return place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() @@ -122,10 +122,10 @@ def train(use_cuda, train_program, save_dirname): feed_order=['pixel', 'label']) -def infer(use_cuda, inference_program, save_dirname=None): +def infer(use_cuda, inference_program, params_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() inferencer = fluid.Inferencer( - infer_func=inference_program, param_path=save_dirname, place=place) + infer_func=inference_program, param_path=params_dirname, place=place) # The input's dimension of conv should be 4-D or 5-D. # Use normilized image pixels as input data, which should be in the range @@ -142,12 +142,14 @@ def main(use_cuda): save_path = "image_classification_resnet.inference.model" train( - use_cuda=use_cuda, train_program=train_network, save_dirname=save_path) + use_cuda=use_cuda, + train_program=train_network, + params_dirname=save_path) infer( use_cuda=use_cuda, inference_program=inference_network, - save_dirname=save_path) + params_dirname=save_path) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_vgg.py b/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_vgg.py index 1e3e955ba0299f2cc0fcc02d79ae6fd8ff4c1171..0bf8f265a1c1b11364ecfa11061af183ce20d51e 100644 --- a/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_vgg.py +++ b/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_vgg.py @@ -64,7 +64,7 @@ def train_network(): return [avg_cost, accuracy] -def train(use_cuda, train_program, save_dirname): +def train(use_cuda, train_program, params_dirname): BATCH_SIZE = 128 train_reader = paddle.batch( paddle.reader.shuffle( @@ -82,8 +82,8 @@ def train(use_cuda, train_program, save_dirname): print('Loss {0:2.2}, Acc {1:2.2}'.format(avg_cost, accuracy)) if accuracy > 0.01: # Low threshold for speeding up CI - if save_dirname is not None: - trainer.save_params(save_dirname) + if params_dirname is not None: + trainer.save_params(params_dirname) return place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() @@ -99,10 +99,10 @@ def train(use_cuda, train_program, save_dirname): feed_order=['pixel', 'label']) -def infer(use_cuda, inference_program, save_dirname=None): +def infer(use_cuda, inference_program, params_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() inferencer = fluid.Inferencer( - infer_func=inference_program, param_path=save_dirname, place=place) + infer_func=inference_program, param_path=params_dirname, place=place) # The input's dimension of conv should be 4-D or 5-D. # Use normilized image pixels as input data, which should be in the range @@ -119,12 +119,14 @@ def main(use_cuda): save_path = "image_classification_vgg.inference.model" train( - use_cuda=use_cuda, train_program=train_network, save_dirname=save_path) + use_cuda=use_cuda, + train_program=train_network, + params_dirname=save_path) infer( use_cuda=use_cuda, inference_program=inference_network, - save_dirname=save_path) + params_dirname=save_path) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/test_label_semantic_roles_newapi.py b/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/test_label_semantic_roles_newapi.py index f4344988141af44af83fda24d73da25f597796ef..9464df59797c0b8c35611ee56de6bf362ac7a4a5 100755 --- a/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/test_label_semantic_roles_newapi.py +++ b/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/test_label_semantic_roles_newapi.py @@ -141,7 +141,7 @@ def train_program(): return [avg_cost] -def train(use_cuda, train_program, save_path): +def train(use_cuda, train_program, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() optimizer = fluid.optimizer.SGD(learning_rate=0.01) @@ -172,7 +172,7 @@ def train(use_cuda, train_program, save_path): print("avg_cost: %s" % avg_cost) if float(avg_cost) < 100.0: # Large value to increase CI speed - trainer.save_params(save_path) + trainer.save_params(params_dirname) else: print('BatchID {0}, Test Loss {1:0.2}'.format(event.epoch + 1, float(avg_cost))) @@ -183,7 +183,7 @@ def train(use_cuda, train_program, save_path): print("Step {0}, Epoch {1} Metrics {2}".format( event.step, event.epoch, map(np.array, event.metrics))) if event.step == 1: # Run 2 iterations to speed CI - trainer.save_params(save_path) + trainer.save_params(params_dirname) trainer.stop() train_reader = paddle.batch( @@ -197,10 +197,10 @@ def train(use_cuda, train_program, save_path): feed_order=feed_order) -def infer(use_cuda, inference_program, save_path): +def infer(use_cuda, inference_program, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() inferencer = fluid.Inferencer( - inference_program, param_path=save_path, place=place) + inference_program, param_path=params_dirname, place=place) # Setup inputs by creating LoDTensors to represent sequences of words. # Here each word is the basic element of these LoDTensors and the shape of @@ -251,9 +251,9 @@ def infer(use_cuda, inference_program, save_path): def main(use_cuda): if use_cuda and not fluid.core.is_compiled_with_cuda(): return - save_path = "label_semantic_roles.inference.model" - train(use_cuda, train_program, save_path) - infer(use_cuda, inference_program, save_path) + params_dirname = "label_semantic_roles.inference.model" + train(use_cuda, train_program, params_dirname) + infer(use_cuda, inference_program, params_dirname) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py index 2aac70463c64019ec97b0c3893b4b52f77967797..03439cbd37671b4727879bf3d0793f016f55247a 100644 --- a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py +++ b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py @@ -57,7 +57,7 @@ def train_program(): return [avg_cost, acc] -def train(use_cuda, train_program, save_dirname): +def train(use_cuda, train_program, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() optimizer = fluid.optimizer.Adam(learning_rate=0.001) @@ -78,7 +78,7 @@ def train(use_cuda, train_program, save_dirname): print("acc : %s" % acc) if acc > 0.2: # Smaller value to increase CI speed - trainer.save_params(save_dirname) + trainer.save_params(params_dirname) else: print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format( event.epoch + 1, avg_cost, acc)) @@ -100,11 +100,11 @@ def train(use_cuda, train_program, save_dirname): feed_order=['img', 'label']) -def infer(use_cuda, inference_program, save_dirname=None): +def infer(use_cuda, inference_program, params_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() inferencer = fluid.Inferencer( - infer_func=inference_program, param_path=save_dirname, place=place) + infer_func=inference_program, param_path=params_dirname, place=place) batch_size = 1 tensor_img = numpy.random.uniform(-1.0, 1.0, @@ -116,17 +116,17 @@ def infer(use_cuda, inference_program, save_dirname=None): def main(use_cuda): - save_dirname = "recognize_digits_conv.inference.model" + params_dirname = "recognize_digits_conv.inference.model" # call train() with is_local argument to run distributed train train( use_cuda=use_cuda, train_program=train_program, - save_dirname=save_dirname) + params_dirname=params_dirname) infer( use_cuda=use_cuda, inference_program=inference_program, - save_dirname=save_dirname) + params_dirname=params_dirname) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py index 32653157994f81c46f420c1b55ceddbbbf06f2fe..89bbd21bea7d64a8dd6fc32829b6addb680da62e 100644 --- a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py +++ b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py @@ -44,7 +44,7 @@ def train_program(): return [avg_cost, acc] -def train(use_cuda, train_program, save_dirname): +def train(use_cuda, train_program, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() optimizer = fluid.optimizer.Adam(learning_rate=0.001) @@ -62,7 +62,7 @@ def train(use_cuda, train_program, save_dirname): print("acc : %s" % acc) if acc > 0.2: # Smaller value to increase CI speed - trainer.save_params(save_dirname) + trainer.save_params(params_dirname) else: print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format( event.epoch + 1, avg_cost, acc)) @@ -81,11 +81,11 @@ def train(use_cuda, train_program, save_dirname): feed_order=['img', 'label']) -def infer(use_cuda, inference_program, save_dirname=None): +def infer(use_cuda, inference_program, params_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() inferencer = fluid.Inferencer( - infer_func=inference_program, param_path=save_dirname, place=place) + infer_func=inference_program, param_path=params_dirname, place=place) batch_size = 1 tensor_img = numpy.random.uniform(-1.0, 1.0, @@ -97,17 +97,17 @@ def infer(use_cuda, inference_program, save_dirname=None): def main(use_cuda): - save_dirname = "recognize_digits_mlp.inference.model" + params_dirname = "recognize_digits_mlp.inference.model" # call train() with is_local argument to run distributed train train( use_cuda=use_cuda, train_program=train_program, - save_dirname=save_dirname) + params_dirname=params_dirname) infer( use_cuda=use_cuda, inference_program=inference_program, - save_dirname=save_dirname) + params_dirname=params_dirname) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/book/high-level-api/recommender_system/test_recommender_system_newapi.py b/python/paddle/fluid/tests/book/high-level-api/recommender_system/test_recommender_system_newapi.py index 259680cb097a12a4fc92107f6fd8595393f88bd5..dfc7325acf23176c05fe42761b9997b98d23372a 100644 --- a/python/paddle/fluid/tests/book/high-level-api/recommender_system/test_recommender_system_newapi.py +++ b/python/paddle/fluid/tests/book/high-level-api/recommender_system/test_recommender_system_newapi.py @@ -155,7 +155,7 @@ def train_program(): return [avg_cost, scale_infer] -def train(use_cuda, train_program, save_path): +def train(use_cuda, train_program, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() optimizer = fluid.optimizer.SGD(learning_rate=0.2) @@ -180,7 +180,7 @@ def train(use_cuda, train_program, save_path): print("avg_cost: %s" % avg_cost) if float(avg_cost) < 4: # Smaller value to increase CI speed - trainer.save_params(save_path) + trainer.save_params(params_dirname) trainer.stop() else: print('BatchID {0}, Test Loss {1:0.2}'.format(event.epoch + 1, @@ -197,43 +197,30 @@ def train(use_cuda, train_program, save_path): num_epochs=1, event_handler=event_handler, reader=train_reader, - feed_order=[ - 'user_id', 'gender_id', 'age_id', 'job_id', 'movie_id', - 'category_id', 'movie_title', 'score' - ]) + feed_order=feed_order) -def infer(use_cuda, inference_program, save_path): +def infer(use_cuda, inference_program, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() inferencer = fluid.Inferencer( - inference_program, param_path=save_path, place=place) - - def create_lod_tensor(data, lod=None): - tensor = fluid.LoDTensor() - if lod is None: - # Tensor, the shape is [batch_size, 1] - index = 0 - lod_0 = [index] - for l in range(len(data)): - index += 1 - lod_0.append(index) - lod = [lod_0] - tensor.set_lod(lod) - - flattened_data = np.concatenate(data, axis=0).astype("int64") - flattened_data = flattened_data.reshape([len(flattened_data), 1]) - tensor.set(flattened_data, place) - return tensor - - # Generate a random input for inference - user_id = create_lod_tensor([[1]]) - gender_id = create_lod_tensor([[1]]) - age_id = create_lod_tensor([[0]]) - job_id = create_lod_tensor([[10]]) - movie_id = create_lod_tensor([[783]]) - category_id = create_lod_tensor([[10], [8], [9]], [[0, 3]]) - movie_title = create_lod_tensor([[1069], [4140], [2923], [710], [988]], - [[0, 5]]) + inference_program, param_path=params_dirname, place=place) + + # Use the first data from paddle.dataset.movielens.test() as input. + # Use create_lod_tensor(data, lod, place) API to generate LoD Tensor, + # where `data` is a list of sequences of index numbers, `lod` is + # the level of detail (lod) info associated with `data`. + # For example, data = [[10, 2, 3], [2, 3]] means that it contains + # two sequences of indexes, of length 3 and 2, respectively. + # Correspondingly, lod = [[3, 2]] contains one level of detail info, + # indicating that `data` consists of two sequences of length 3 and 2. + user_id = fluid.create_lod_tensor([[1]], [[1]], place) + gender_id = fluid.create_lod_tensor([[1]], [[1]], place) + age_id = fluid.create_lod_tensor([[0]], [[1]], place) + job_id = fluid.create_lod_tensor([[10]], [[1]], place) + movie_id = fluid.create_lod_tensor([[783]], [[1]], place) + category_id = fluid.create_lod_tensor([[10, 8, 9]], [[3]], place) + movie_title = fluid.create_lod_tensor([[1069, 4140, 2923, 710, 988]], [[5]], + place) results = inferencer.infer( { @@ -253,12 +240,15 @@ def infer(use_cuda, inference_program, save_path): def main(use_cuda): if use_cuda and not fluid.core.is_compiled_with_cuda(): return - save_path = "recommender_system.inference.model" - train(use_cuda=use_cuda, train_program=train_program, save_path=save_path) + params_dirname = "recommender_system.inference.model" + train( + use_cuda=use_cuda, + train_program=train_program, + params_dirname=params_dirname) infer( use_cuda=use_cuda, inference_program=inference_program, - save_path=save_path) + params_dirname=params_dirname) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/CMakeLists.txt b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/CMakeLists.txt index 673c965b662a022739f8d489c331f4de9455a926..d71147a85e77ea6dc5b6391aa169abd9b02a0aa1 100644 --- a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/CMakeLists.txt +++ b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/CMakeLists.txt @@ -1,6 +1,11 @@ file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") +# This test is buggy +# py_test(test_understand_sentiment_dynamic_rnn SRCS +# test_understand_sentiment_dynamic_rnn.py SERIAL) +LIST(REMOVE_ITEM TEST_OPS test_understand_sentiment_dynamic_rnn) + # default test foreach(src ${TEST_OPS}) py_test(${src} SRCS ${src}.py) diff --git a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_conv.py b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_conv.py index 7e32696f9909a0a440f6bdc401ac9f9594c4dec7..11e9fd1bec1450f6753dbe38c7014090d6e136b6 100644 --- a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_conv.py +++ b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_conv.py @@ -64,7 +64,7 @@ def train_program(word_dict): return [avg_cost, accuracy] -def train(use_cuda, train_program, save_dirname): +def train(use_cuda, train_program, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() optimizer = fluid.optimizer.Adagrad(learning_rate=0.002) @@ -85,7 +85,7 @@ def train(use_cuda, train_program, save_dirname): print("acc : %s" % acc) if acc > 0.2: # Smaller value to increase CI speed - trainer.save_params(save_dirname) + trainer.save_params(params_dirname) trainer.stop() else: @@ -97,7 +97,7 @@ def train(use_cuda, train_program, save_dirname): print("Step {0}, Epoch {1} Metrics {2}".format( event.step, event.epoch, map(np.array, event.metrics))) if event.step == 1: # Run 2 iterations to speed CI - trainer.save_params(save_dirname) + trainer.save_params(params_dirname) trainer.stop() train_reader = paddle.batch( @@ -112,13 +112,13 @@ def train(use_cuda, train_program, save_dirname): feed_order=['words', 'label']) -def infer(use_cuda, inference_program, save_dirname=None): +def infer(use_cuda, inference_program, params_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() word_dict = paddle.dataset.imdb.word_dict() inferencer = fluid.Inferencer( infer_func=partial(inference_program, word_dict), - param_path=save_dirname, + param_path=params_dirname, place=place) # Setup input by creating LoDTensor to represent sequence of words. @@ -143,9 +143,9 @@ def infer(use_cuda, inference_program, save_dirname=None): def main(use_cuda): if use_cuda and not fluid.core.is_compiled_with_cuda(): return - save_path = "understand_sentiment_conv.inference.model" - train(use_cuda, train_program, save_path) - infer(use_cuda, inference_program, save_path) + params_dirname = "understand_sentiment_conv.inference.model" + train(use_cuda, train_program, params_dirname) + infer(use_cuda, inference_program, params_dirname) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_dynamic_rnn.py b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_dynamic_rnn.py index e50b7920b17f86eada3abc700c5403053fca8771..90757d54f99715163518ce5a094e6ba3a67efed3 100644 --- a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_dynamic_rnn.py +++ b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_dynamic_rnn.py @@ -79,7 +79,7 @@ def train_program(word_dict): return [avg_cost, accuracy] -def train(use_cuda, train_program, save_dirname): +def train(use_cuda, train_program, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() optimizer = fluid.optimizer.Adagrad(learning_rate=0.002) @@ -100,7 +100,7 @@ def train(use_cuda, train_program, save_dirname): print("acc : %s" % acc) if acc > 0.2: # Smaller value to increase CI speed - trainer.save_params(save_dirname) + trainer.save_params(params_dirname) trainer.stop() else: @@ -112,7 +112,7 @@ def train(use_cuda, train_program, save_dirname): print("Step {0}, Epoch {1} Metrics {2}".format( event.step, event.epoch, map(np.array, event.metrics))) if event.step == 1: # Run 2 iterations to speed CI - trainer.save_params(save_dirname) + trainer.save_params(params_dirname) trainer.stop() train_reader = paddle.batch( @@ -127,13 +127,13 @@ def train(use_cuda, train_program, save_dirname): feed_order=['words', 'label']) -def infer(use_cuda, inference_program, save_dirname=None): +def infer(use_cuda, inference_program, params_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() word_dict = paddle.dataset.imdb.word_dict() inferencer = fluid.Inferencer( infer_func=partial(inference_program, word_dict), - param_path=save_dirname, + param_path=params_dirname, place=place) # Setup input by creating LoDTensor to represent sequence of words. @@ -158,9 +158,9 @@ def infer(use_cuda, inference_program, save_dirname=None): def main(use_cuda): if use_cuda and not fluid.core.is_compiled_with_cuda(): return - save_path = "understand_sentiment_conv.inference.model" - train(use_cuda, train_program, save_path) - infer(use_cuda, inference_program, save_path) + params_dirname = "understand_sentiment_conv.inference.model" + train(use_cuda, train_program, params_dirname) + infer(use_cuda, inference_program, params_dirname) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_stacked_lstm.py b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_stacked_lstm.py index d4fb80168814359827708ad921bd3f53b14bb2ee..52b7d4a83779d01936afb3d9d1e4864b05d55b5a 100644 --- a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_stacked_lstm.py +++ b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_stacked_lstm.py @@ -71,7 +71,7 @@ def train_program(word_dict): return [avg_cost, accuracy] -def train(use_cuda, train_program, save_dirname): +def train(use_cuda, train_program, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() optimizer = fluid.optimizer.Adagrad(learning_rate=0.002) @@ -92,7 +92,7 @@ def train(use_cuda, train_program, save_dirname): print("acc : %s" % acc) if acc > 0.2: # Smaller value to increase CI speed - trainer.save_params(save_dirname) + trainer.save_params(params_dirname) trainer.stop() else: @@ -104,7 +104,7 @@ def train(use_cuda, train_program, save_dirname): print("Step {0}, Epoch {1} Metrics {2}".format( event.step, event.epoch, map(np.array, event.metrics))) if event.step == 1: # Run 2 iterations to speed CI - trainer.save_params(save_dirname) + trainer.save_params(params_dirname) trainer.stop() train_reader = paddle.batch( @@ -119,13 +119,13 @@ def train(use_cuda, train_program, save_dirname): feed_order=['words', 'label']) -def infer(use_cuda, inference_program, save_dirname=None): +def infer(use_cuda, inference_program, params_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() word_dict = paddle.dataset.imdb.word_dict() inferencer = fluid.Inferencer( infer_func=partial(inference_program, word_dict), - param_path=save_dirname, + param_path=params_dirname, place=place) # Setup input by creating LoDTensor to represent sequence of words. @@ -150,9 +150,9 @@ def infer(use_cuda, inference_program, save_dirname=None): def main(use_cuda): if use_cuda and not fluid.core.is_compiled_with_cuda(): return - save_path = "understand_sentiment_stacked_lstm.inference.model" - train(use_cuda, train_program, save_path) - infer(use_cuda, inference_program, save_path) + params_dirname = "understand_sentiment_stacked_lstm.inference.model" + train(use_cuda, train_program, params_dirname) + infer(use_cuda, inference_program, params_dirname) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/book/high-level-api/word2vec/test_word2vec_new_api.py b/python/paddle/fluid/tests/book/high-level-api/word2vec/test_word2vec_new_api.py index 16d73d4aff4ba31327e6d8f5ac04a36387f59daa..eeb8e67087334ea96aab9cdb6272e34e2eb99939 100644 --- a/python/paddle/fluid/tests/book/high-level-api/word2vec/test_word2vec_new_api.py +++ b/python/paddle/fluid/tests/book/high-level-api/word2vec/test_word2vec_new_api.py @@ -80,7 +80,7 @@ def train_program(is_sparse): return avg_cost -def train(use_cuda, train_program, save_dirname): +def train(use_cuda, train_program, params_dirname): train_reader = paddle.batch( paddle.dataset.imikolov.train(word_dict, N), BATCH_SIZE) test_reader = paddle.batch( @@ -97,7 +97,7 @@ def train(use_cuda, train_program, save_dirname): print("loss= ", avg_cost) if avg_cost < 10.0: - trainer.save_params(save_dirname) + trainer.save_params(params_dirname) trainer.stop() if math.isnan(avg_cost): @@ -115,10 +115,10 @@ def train(use_cuda, train_program, save_dirname): feed_order=['firstw', 'secondw', 'thirdw', 'forthw', 'nextw']) -def infer(use_cuda, inference_program, save_dirname=None): +def infer(use_cuda, inference_program, params_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() inferencer = fluid.Inferencer( - infer_func=inference_program, param_path=save_dirname, place=place) + infer_func=inference_program, param_path=params_dirname, place=place) # Setup inputs by creating 4 LoDTensors representing 4 words. Here each word # is simply an index to look up for the corresponding word vector and hence @@ -153,17 +153,17 @@ def main(use_cuda, is_sparse): if use_cuda and not fluid.core.is_compiled_with_cuda(): return - save_path = "word2vec.inference.model" + params_dirname = "word2vec.inference.model" train( use_cuda=use_cuda, train_program=partial(train_program, is_sparse), - save_dirname=save_path) + params_dirname=params_dirname) infer( use_cuda=use_cuda, inference_program=partial(inference_program, is_sparse), - save_dirname=save_path) + params_dirname=params_dirname) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/book/test_recommender_system.py b/python/paddle/fluid/tests/book/test_recommender_system.py index 7be924f762ddeb045dda890dbfdcd96a65449553..65d6552acc9b3d31a97a45290e4613a633fffa3c 100644 --- a/python/paddle/fluid/tests/book/test_recommender_system.py +++ b/python/paddle/fluid/tests/book/test_recommender_system.py @@ -173,63 +173,33 @@ def train(use_cuda, save_dirname, is_local=True): test_reader = paddle.batch( paddle.dataset.movielens.test(), batch_size=BATCH_SIZE) - feeding = { - 'user_id': 0, - 'gender_id': 1, - 'age_id': 2, - 'job_id': 3, - 'movie_id': 4, - 'category_id': 5, - 'movie_title': 6, - 'score': 7 - } - - def func_feed(feeding, data): - feed_tensors = {} - for (key, idx) in feeding.iteritems(): - tensor = fluid.LoDTensor() - if key != "category_id" and key != "movie_title": - if key == "score": - numpy_data = np.array(map(lambda x: x[idx], data)).astype( - "float32") - else: - numpy_data = np.array(map(lambda x: x[idx], data)).astype( - "int64") - else: - numpy_data = map(lambda x: np.array(x[idx]).astype("int64"), - data) - lod_info = [len(item) for item in numpy_data] - offset = 0 - lod = [offset] - for item in lod_info: - offset += item - lod.append(offset) - numpy_data = np.concatenate(numpy_data, axis=0) - tensor.set_lod([lod]) - - numpy_data = numpy_data.reshape([numpy_data.shape[0], 1]) - tensor.set(numpy_data, place) - feed_tensors[key] = tensor - return feed_tensors + feed_order = [ + 'user_id', 'gender_id', 'age_id', 'job_id', 'movie_id', 'category_id', + 'movie_title', 'score' + ] def train_loop(main_program): exe.run(framework.default_startup_program()) + feed_list = [ + main_program.global_block().var(var_name) for var_name in feed_order + ] + feeder = fluid.DataFeeder(feed_list, place) + PASS_NUM = 100 for pass_id in range(PASS_NUM): for batch_id, data in enumerate(train_reader()): # train a mini-batch outs = exe.run(program=main_program, - feed=func_feed(feeding, data), + feed=feeder.feed(data), fetch_list=[avg_cost]) out = np.array(outs[0]) if (batch_id + 1) % 10 == 0: avg_cost_set = [] for test_data in test_reader(): - avg_cost_np = exe.run( - program=test_program, - feed=func_feed(feeding, test_data), - fetch_list=[avg_cost]) + avg_cost_np = exe.run(program=test_program, + feed=feeder.feed(test_data), + fetch_list=[avg_cost]) avg_cost_set.append(avg_cost_np[0]) break # test only 1 segment for speeding up CI @@ -279,23 +249,6 @@ def infer(use_cuda, save_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) - def create_lod_tensor(data, lod=None): - tensor = fluid.LoDTensor() - if lod is None: - # Tensor, the shape is [batch_size, 1] - index = 0 - lod_0 = [index] - for l in range(len(data)): - index += 1 - lod_0.append(index) - lod = [lod_0] - tensor.set_lod(lod) - - flattened_data = np.concatenate(data, axis=0).astype("int64") - flattened_data = flattened_data.reshape([len(flattened_data), 1]) - tensor.set(flattened_data, place) - return tensor - inference_scope = fluid.core.Scope() with fluid.scope_guard(inference_scope): # Use fluid.io.load_inference_model to obtain the inference program desc, @@ -307,26 +260,33 @@ def infer(use_cuda, save_dirname=None): # Use the first data from paddle.dataset.movielens.test() as input assert feed_target_names[0] == "user_id" - user_id = create_lod_tensor([[1]]) + # Use create_lod_tensor(data, lod, place) API to generate LoD Tensor + # where `data` is a list of sequences of index numbers, `lod` is + # the level of detail (lod) info associated with `data`. + # For example, data = [[10, 2, 3], [2, 3]] means that it contains + # two sequences of indexes, of length 3 and 2, respectively. + # Correspondingly, lod = [[3, 2]] contains one level of detail info, + # indicating that `data` consists of two sequences of length 3 and 2. + user_id = fluid.create_lod_tensor([[1]], [[1]], place) assert feed_target_names[1] == "gender_id" - gender_id = create_lod_tensor([[1]]) + gender_id = fluid.create_lod_tensor([[1]], [[1]], place) assert feed_target_names[2] == "age_id" - age_id = create_lod_tensor([[0]]) + age_id = fluid.create_lod_tensor([[0]], [[1]], place) assert feed_target_names[3] == "job_id" - job_id = create_lod_tensor([[10]]) + job_id = fluid.create_lod_tensor([[10]], [[1]], place) assert feed_target_names[4] == "movie_id" - movie_id = create_lod_tensor([[783]]) + movie_id = fluid.create_lod_tensor([[783]], [[1]], place) assert feed_target_names[5] == "category_id" - category_id = create_lod_tensor([[10], [8], [9]], [[0, 3]]) + category_id = fluid.create_lod_tensor([[10, 8, 9]], [[3]], place) assert feed_target_names[6] == "movie_title" - movie_title = create_lod_tensor([[1069], [4140], [2923], [710], [988]], - [[0, 5]]) + movie_title = fluid.create_lod_tensor([[1069, 4140, 2923, 710, 988]], + [[5]], place) # Construct feed as a dictionary of {feed_target_name: feed_target_data} # and results will contain a list of data corresponding to fetch_targets. diff --git a/python/paddle/fluid/tests/test_lod_tensor.py b/python/paddle/fluid/tests/test_lod_tensor.py index b11131456a1f87419407c4d8626ebcde26dd7640..013d72f418cf7ac11eb31fd221052039e896e203 100644 --- a/python/paddle/fluid/tests/test_lod_tensor.py +++ b/python/paddle/fluid/tests/test_lod_tensor.py @@ -53,11 +53,14 @@ class TestLoDTensor(unittest.TestCase): self.assertEqual(_convert_lod(lod), converted_lod) def test_create_lod_tensor(self): - # Only numpy array or a fluid LoDTensor is valid input to - # create_lod_tensor function, currently a list of lists is not. - data = [[1, 2], [3, 4]] - self.assertRaises(Exception, create_lod_tensor, data, [], + # Create LoDTensor from a list + data = [[1, 2, 3], [3, 4]] + wrong_lod = [[2, 2]] + correct_lod = [[3, 2]] + self.assertRaises(AssertionError, create_lod_tensor, data, wrong_lod, fluid.CPUPlace()) + tensor = create_lod_tensor(data, correct_lod, fluid.CPUPlace()) + self.assertEqual(tensor.lod(), [[0, 3, 5]]) # Create LoDTensor from numpy array data = numpy.random.random([10, 1]) diff --git a/python/paddle/fluid/tests/unittests/op_test.py b/python/paddle/fluid/tests/unittests/op_test.py index 709b4bf2fcfb180c747ba3539711a58a57e3b77f..b611470fa1ff326df960c349b71006f52d586d8e 100644 --- a/python/paddle/fluid/tests/unittests/op_test.py +++ b/python/paddle/fluid/tests/unittests/op_test.py @@ -479,9 +479,9 @@ class OpTest(unittest.TestCase): def np_dtype_to_fluid_dtype(input): """Change the dtype of float16 numpy array - numpy float16 is binded to paddle::platform::float16 + numpy float16 is binded to paddle::platform::float16 in tensor_py.h via the help of uint16 data type since - the internal memory representation of float16 is + the internal memory representation of float16 is uint16_t in paddle and np.uint16 in numpy, which are themselves binded together by pybind. @@ -489,9 +489,9 @@ class OpTest(unittest.TestCase): input: input numpy array Returns: - input: The dtype of input will be changed to np.uint16 if + input: The dtype of input will be changed to np.uint16 if it is originally np.float16, such that the internal memory - of input will be reinterpreted as of dtype np.uint16. + of input will be reinterpreted as of dtype np.uint16. """ if input.dtype == np.float16: input.dtype = np.uint16 diff --git a/python/paddle/fluid/tests/unittests/test_fill_constant_batch_size_like_op.py b/python/paddle/fluid/tests/unittests/test_fill_constant_batch_size_like_op.py index 66e3e2d51d118756d4881955b4df8eb4d2bbc094..533d8ccfac82a2e298af16181ab16bf7aa3db282 100644 --- a/python/paddle/fluid/tests/unittests/test_fill_constant_batch_size_like_op.py +++ b/python/paddle/fluid/tests/unittests/test_fill_constant_batch_size_like_op.py @@ -50,5 +50,27 @@ class TestFillConstantBatchSizeLikeWhenSecondDimIsBatchSize(OpTest): self.check_output() +class TestFillConstantBatchSizeLikeWithLoDTensor(OpTest): + def setUp(self): + self.op_type = "fill_constant_batch_size_like" + self.inputs = { + 'Input': (np.random.random((31, 28)).astype("float32"), + [[0, 9, 23, 31]]) + } + self.attrs = { + 'value': 3.5, + 'shape': [-1, 16], + 'input_dim_idx': 0, + 'output_dim_idx': 0 + } + + out = np.random.random((3, 16)).astype("float32") + out.fill(3.5) + self.outputs = {'Out': out} + + def test_check_output(self): + self.check_output() + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index c44ac59ccdb7fa212ab2a8ab83ee0c70fc498f9f..60dc1f83fc32e2551eb2a04ef35f1c8a0ffec769 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -369,11 +369,13 @@ class TestBook(unittest.TestCase): self.assertIsNotNone(output) print(str(program)) - def test_bilinear_interp(self): + def test_upsampling_bilinear2d(self): program = Program() with program_guard(program): x = layers.data(name='x', shape=[3, 9, 6], dtype="float32") - output = layers.bilinear_interp(x, 12, 12) + output = layers.upsampling_bilinear2d(x, out_shape=[12, 12]) + self.assertIsNotNone(output) + output = layers.upsampling_bilinear2d(x, scale=3) self.assertIsNotNone(output) print(str(program)) diff --git a/python/paddle/fluid/tests/unittests/test_polygon_box_transform.py b/python/paddle/fluid/tests/unittests/test_polygon_box_transform.py new file mode 100644 index 0000000000000000000000000000000000000000..2105d320665367e3ec1bfd7b3a353a144c91244f --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_polygon_box_transform.py @@ -0,0 +1,68 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy as np +from op_test import OpTest + + +def PolygonBoxRestore(input): + shape = input.shape + batch_size = shape[0] + geo_channels = shape[1] + h = shape[2] + w = shape[3] + h_indexes = np.array(range(h) * w).reshape( + [w, h]).transpose()[np.newaxis, :] # [1, h, w] + w_indexes = np.array(range(w) * h).reshape( + [h, w])[np.newaxis, :] # [1, h, w] + indexes = np.concatenate( + (w_indexes, h_indexes))[np.newaxis, :] # [1, 2, h, w] + indexes = indexes.repeat( + [geo_channels / 2], + axis=0)[np.newaxis, :] # [1, geo_channels/2, 2, h, w] + indexes = indexes.repeat( + [batch_size], axis=0) # [batch_size, geo_channels/2, 2, h, w] + return indexes.reshape( + input.shape) - input # [batch_size, geo_channels, h, w] + + +class TestPolygonBoxRestoreOp(OpTest): + def config(self): + self.input_shape = (1, 8, 2, 2) + + def setUp(self): + self.config() + self.op_type = "polygon_box_transform" + input = np.random.random(self.input_shape).astype("float32") + self.inputs = {'Input': input} + output = PolygonBoxRestore(input) + self.outputs = {'Output': output} + + def test_check_output(self): + self.check_output() + + +class TestCase1(TestPolygonBoxRestoreOp): + def config(self): + self.input_shape = (2, 10, 3, 2) + + +class TestCase2(TestPolygonBoxRestoreOp): + def config(self): + self.input_shape = (3, 12, 4, 5) + + +if __name__ == '__main__': + unittest.main()