diff --git a/CMakeLists.txt b/CMakeLists.txt
index b309ff37e52b4fd28b14925bdd7e3740e1e2fa47..5df83499d5dde29b205ee17fba81a63c9a643235 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -16,8 +16,6 @@ cmake_minimum_required(VERSION 3.0)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
set(PADDLE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
set(PADDLE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
-SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG")
-SET(CMAKE_C_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG")
include(system)
@@ -201,6 +199,10 @@ if(WITH_GOLANG)
endif(WITH_GOLANG)
set(PADDLE_PYTHON_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/python/build")
+
+SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG")
+SET(CMAKE_C_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG")
+
add_subdirectory(paddle)
if(WITH_PYTHON)
add_subdirectory(python)
diff --git a/benchmark/IntelOptimizedPaddle.md b/benchmark/IntelOptimizedPaddle.md
index 8ee7fd28c58f2a2bcb82040eb824a37062bd4e9c..6cc9598947acbdacfbf4c4379987bab8ed7611b0 100644
--- a/benchmark/IntelOptimizedPaddle.md
+++ b/benchmark/IntelOptimizedPaddle.md
@@ -22,6 +22,7 @@ On each machine, we will test and compare the performance of training on single
#### Training
Test on batch size 64, 128, 256 on Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz
+Pay attetion that the speed below includes forward, backward and parameter update time. So we can not directly compare the data with the benchmark of caffe `time` [command](https://github.com/PaddlePaddle/Paddle/blob/develop/benchmark/caffe/image/run.sh#L9), which only contain forward and backward. The updating time of parameter would become very heavy when the weight size are large, especially on alexnet.
Input image size - 3 * 224 * 224, Time: images/second
@@ -55,6 +56,16 @@ Input image size - 3 * 224 * 224, Time: images/second
+- Alexnet
+
+| BatchSize | 64 | 128 | 256 |
+|--------------|--------| ------ | -------|
+| OpenBLAS | 2.13 | 2.45 | 2.68 |
+| MKLML | 66.37 | 105.60 | 144.04 |
+| MKL-DNN | 399.00 | 498.94 | 626.53 |
+
+chart TBD
+
#### Inference
Test on batch size 1, 2, 4, 8, 16 on Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz
- VGG-19
diff --git a/doc/design/mkl/mkldnn_fluid.md b/doc/design/mkl/mkldnn_fluid.md
new file mode 100644
index 0000000000000000000000000000000000000000..bef126f3f0577b69f646dfe5d10539b372c6a8a5
--- /dev/null
+++ b/doc/design/mkl/mkldnn_fluid.md
@@ -0,0 +1,149 @@
+# Design Doc: Add MKLDNN Kernel in Fluid Operator
+
+## Principles
+
+First of all, we should follow some basical principles like:
+1. [How to write a new operator](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/howto/dev/new_op_en.md). We are trying to add a new kind of kernel into operators, so basically we should follow this doc.
+2. [Supporting new Device/Library](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/support_new_device.md). Since MKLDNN is a new library to fluid, we should add `MKLDNNDeviceContext` and maybe `mkldnn_helper.h`, just like [cudnn_helper.h](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/platform/cudnn_helper.h).
+3. [Switch Kernel](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/switch_kernel.md). Another important point is that we should ensure the data synchronization between different kernel types, which is this [topic](https://github.com/PaddlePaddle/Paddle/issues/6549). So basically we should override `GetExpectedKernelType` and `trans` functions to support switching kernels.
+4. [The Keys of Operator Kernel Type](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/operator_kernel_type.md). Kernel Type is a pivotal conception which can record the `Place`, `Library`, `DataType` and `Layout`.
+
+## Sulution
+
+In general, there are four parts we should follow to run a MKL-DNN primitive.
+- Create a primitive descriptor that describe this operator
+- Create a primitive itself by primitive descriptor and the engine
+- Create all memory buffers that primitive needed
+- Launch a stream to execute the primitive created
+More details can refer to [here](http://01org.github.io/mkl-dnn).
+
+It's better to avoid reinitialization of primitives and memory handles in the first three stages in every iteration. \
+So we plan to create a map to record all the `primitive` and `memory`, which should not take too much memories as discussed [here](https://github.com/PaddlePaddle/Paddle/issues/6822).
+
+It's assumed that following three conditions should be satisfied.
+1. there is a unique key for each operator instance. May be the actual name of `Output Tensor`.
+2. the `Input Tensor` inside `Compute` function is the one after converted.
+3. we can get the phase(eg. `is_test`) inside `Compute` function, otherwise we need to expose this attribue to user.
+
+### Compute
+The algorithm of `Compute` would be described as follow, let's take conv like an example.
+
+```c++
+
+ PADDLE_ENFORCE(platform::is_cpu_place(ctx.GetPlace()), "It must use CPUPlace.");
+ PADDLE_ENFORCE(platform::is_mkldnn_library(ctx.GetLibrary()), "It must use MKLDNN Library.");
+
+ auto& dev_ctx = ctx.template device_context();
+
+ // find primitive by unique key from mkldnn context
+ // the op_key should be a unique name of this op instance
+ auto& p = dev_ctx.findPrimitive(op_key + "_fwd");
+
+ // assuming the input tensor inside this compute function is the one after converted
+ // this point should be guarantee by another mechanism
+ auto& i = dev_ctx.findMemory(op_key + "_input");
+
+ if (p == nullptr || i == nullptr || inputSizeChanged(p, i)) {
+ auto fwd_primitive_desc = createPrimitiveDesc(ctx);
+ auto* input = ctx.Input("Input");
+ auto* filter = ctx.Input("Filter");
+ auto* output = ctx.Output("Output");
+ shared_ptr in(new mkldnn::memory(fwd_primitive_desc->src_primitive_desc(), input->data()));
+ shared_ptr wgt(new mkldnn::memory(fwd_primitive_desc->weights_primitive_desc(), filter->data()));
+ shared_ptr out(new mkldnn::memory(fwd_primitive_desc->dst_primitive_desc(), output->mutable_data(ctx.GetPlace())));
+ shared_ptr fwd_primitive(new mkldnn::conv_fwd(*fwd_primitive_desc, *in, *wgt, *out));
+
+ dev_ctx.addMemory(op_key+"_input", in);
+ dev_ctx.addMemory(op_key+"_output", out);
+ dev_ctx.addMemory(op_key+"_filer", wgt);
+ dev_ctx.addPrimitive(op_key+"_fwd", fwd_primitive);
+ dev_ctx.addPrimitiveDesc(op_key+"_fwd_PD", fwd_primitive_desc);
+ }
+
+ p = dev_ctx.findPrimitive(op_key + "_fwd");
+
+ PADDLE_ENFORCE(p, "Should have forward Primitive");
+ PADDLE_ENFORCE(dev_ctx.findMemory(op_unique_key+"_input"), "Should have input memory");
+ PADDLE_ENFORCE(dev_ctx.findMemory(op_unique_key+"_output"), "Should have output memory");
+ PADDLE_ENFORCE(dev_ctx.findMemory(op_unique_key+"_filter"), "Should have filter memory");
+ PADDLE_ENFORCE(dev_ctx.findPrimitiveDesc(op_unique_key+"_fwd_PD"), "Should have forward PrimitiveDesc");
+ dev_ctx.submit(p);
+ dev_ctx.execute(); // the convert primitive should have already contained.
+
+```
+
+The `createPrimitiveDesc` returns the primitive descripotor of this operator, would be like this:
+```c++
+ auto* input = ctx.Input("Input");
+ auto* filter = ctx.Input("Filter");
+ auto* output = ctx.Output("Output");
+ std::vector strides = ctx.Attr>("strides");
+ std::vector paddings = ctx.Attr>("paddings");
+ std::vector dilations = ctx.Attr>("dilations");
+ int groups = ctx.Attr("groups");
+ algorithm algo = static_cast(ctx.Attr("convolution_algorithm_option"));
+ prop_kind pk = ctx.Attr("is_test") ? prop_kind::forward_inference : prop_kind::forward_training;
+
+ auto fwd_desc = mkldnn::conv_fwd::desc(/* all the setting above*/);
+ shared_ptr fwd_primitive_desc(new mkldnn::conv_fwd::primitive_desc(fwd_desc, ctx.getEngine()));
+
+ return fwd_primitive_desc;
+ }
+```
+
+### MKLDNNDeviceContext
+`MKLDNNDeviceContext`, which is very straightforward, should contain some base information like: `stream`, `engine` and the map needed.
+
+
+### mkldnn_helper
+Some functions would be put in `paddle/platform/mkldnn_helper.h`.
+- create MKLDNN memories
+- create MKLDNN primitives
+- error check function
+- etc
+
+
+### Kernel Switch
+We should `reorder` the different Layout from other device or to other device. `GetExpectedKernelType` and `trans` functions can help us to implement it.
+
+`GetExpectedKernelType` should get the context, and this operator can return the best `KernelType`.
+`trans` would be like this:
+
+```c++
+void trans(inputs, ctx) override {
+ if (NoNeedTrans()) {
+ return;
+ }
+ // find reorder primitive by op_key from context
+ auto& dev_ctx = ctx.template device_context();
+ auto& p = dev_ctx.findPrimitive(op_key + "_reorder_input");
+ auto& i = dev_ctx.findMemory(op_key + "_src_input");
+
+ if (p == nullptr || i == nullptr || changeSized(i, input)) {
+ auto prim = createPrimitiveDesc(ctx);
+ auto src = createMemory(memoryDesc(input->dims(), actual_layout), input->data);
+ auto newbuffer = paddle::memory::Alloc(ctx.GetPlace(), input->size_in_bytes());
+ auto dst = createMemory(p->expected_desc(), newbuffer->data);
+ auto reorder_primitive(new mkldnn::reorder(src, dst));
+
+ dev_ctx.addMemory(op_key+"_src_input", src);
+ dev_ctx.addMemory(op_key+"_input", dst);
+ dev_ctx.addPrimitive(op_key+"_reorder_input", reorder_primitive);
+ }
+
+ p = dev_ctx.findPrimitive(op_key + "_reorder_input");
+ PADDLE_ENFORCE(p, "Should have Reorder Primitive");
+ dev_ctx.submit(p);
+ if (! this->isMKLDNNKernel()) {
+ // execute immediately only if this is not mkldnn kernel function.
+ // otherwise, it can be executed with the operator primitive in Compute
+ dev_ctx.stream();
+ }
+ // after submit, the input tensor in ExecutionContext should be changed as the converted one
+ // there should be another mechanism to ensure this
+}
+```
+
+### Unit Test
+All the functions should be tested corresponding.
+TBD
diff --git a/doc/design/support_new_device.md b/doc/design/support_new_device.md
index fd23dc211a35fdc9d87bc9233fcf4e90254da748..f54b2b3694cc2a8f1d892792fd4d39a0484dc750 100644
--- a/doc/design/support_new_device.md
+++ b/doc/design/support_new_device.md
@@ -25,13 +25,14 @@ There are mainly three parts that we have to consider while integrating a new de
### Place and DeviceContext
+Please remind that device and computing library are not one-to-one corresponding. A device can have a lot of computing libraries and a computing library can also support several devices.
#### Place
-Fluid uses class [Place](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/platform/place.h#L55) to represent different devices and computing libraries. There are inheritance relationships between different kinds of `Place`.
+Fluid uses class [Place](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/platform/place.h#L55) to represent the device memory where data is located. If we add another device, we have to add corresponding `DevicePlace`.
```
- | CPUPlace --> MKLDNNPlace
-Place --| CUDAPlace --> CUDNNPlace
+ | CPUPlace
+Place --| CUDAPlace
| FPGAPlace
```
@@ -43,7 +44,7 @@ typedef boost::variant Place;
#### DeviceContext
-Fluid uses class [DeviceContext](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/platform/device_context.h#L30) to manage the resources in different hardwares, such as CUDA stream in `CDUADeviceContext`. There are also inheritance relationships between different kinds of `DeviceContext`.
+Fluid uses class [DeviceContext](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/platform/device_context.h#L30) to manage the resources in different libraries, such as CUDA stream in `CDUADeviceContext`. There are also inheritance relationships between different kinds of `DeviceContext`.
```
@@ -106,7 +107,7 @@ template
size_t Used(Place place);
```
-To implementing these interfaces, we have to implement MemoryAllocator for different Devices
+To implement these interfaces, we have to implement MemoryAllocator for different Devices.
#### Tensor
@@ -243,6 +244,7 @@ REGISTER_OP_CUDA_KERNEL(
Generally, we will impelement OpKernel for all Device/Library of an Operator. We can easily train a Convolutional Neural Network in GPU. However, some OpKernel is not sutibale on a specific Device. For example, crf operator can only run on CPU, whereas most other operators can run at GPU. To achieve high performance in such circumstance, we have to switch between different Device/Library.
-We will discuss how to implement an efficient OpKernel switch policy.
+For more details, please refer to following docs:
-- TBD
+- operator kernel type [doc](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/operator_kernel_type.md)
+- switch kernel [doc](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/switch_kernel.md)
diff --git a/doc/faq/build_and_install/index_cn.rst b/doc/faq/build_and_install/index_cn.rst
index a2bdeead7841393fdfe90c78e5b91d9e61678a24..ed8a0c7e87da133138ecfc7ba6a8217d58b8f71d 100644
--- a/doc/faq/build_and_install/index_cn.rst
+++ b/doc/faq/build_and_install/index_cn.rst
@@ -109,3 +109,31 @@ PaddlePaddle使用avx SIMD指令提高cpu执行效率,因此错误的使用二
解决办法是:
* 卸载PaddlePaddle包 :code:`pip uninstall paddle`, 清理掉老旧的PaddlePaddle安装包,使得单元测试有一个干净的环境。如果PaddlePaddle包已经在python的site-packages里面,单元测试会引用site-packages里面的python包,而不是源码目录里 :code:`/python` 目录下的python包。同时,即便设置 :code:`PYTHONPATH` 到 :code:`/python` 也没用,因为python的搜索路径是优先已经安装的python包。
+
+8. 下载MKLML库失败
+------------------
+
+.. code-block:: bash
+
+ make[2]: *** [third_party/mklml/src/extern_mklml-stamp/extern_mklml-download] 错误 4
+ make[1]: *** [CMakeFiles/extern_mklml.dir/all] 错误 2
+ make[1]: *** 正在等待未完成的任务....
+
+原因:网速或SSL链接原因,导致MKLML库下载不成功。
+
+解决办法是:手动下载并安装,具体步骤如下。
+
+.. code-block:: bash
+
+ // 1. 进入对应的目录
+ cd build/third_party/mklml/src/extern_mklml
+
+ // 2. 查看包的大小, 正常情况下是75M,如果小于75M,即下载失败:
+ du -sh mklml_lnx_2018.0.1.20171007.tgz
+
+ // 3. 手动下载且解压缩,并手动生成download成功标签:
+ wget --no-check-certificate https://github.com/01org/mkl-dnn/releases/download/v0.11/mklml_lnx_2018.0.1.20171007.tgz -c -O mklml_lnx_2018.0.1.20171007.tgz
+ tar zxf mklml_lnx_2018.0.1.20171007.tgz
+ touch ../extern_mklml-stamp/extern_mklml-download
+
+ // 4. 接着编译即可
diff --git a/go/pserver/client/c/test/test_cclient.c b/go/pserver/client/c/test/test_cclient.c
index 89c4d7f00aae2a92ae30ba7b4305550d150dd985..05ec421fff6e1c57b0bace080668d3793f85480f 100644
--- a/go/pserver/client/c/test/test_cclient.c
+++ b/go/pserver/client/c/test/test_cclient.c
@@ -1,16 +1,16 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
+ http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
#include
#include
diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt
index 25a0db27688a9984ad401cbd462fe70048b204da..c2a57a95ee6aa1b03a687f07de74810e8e753f29 100644
--- a/paddle/framework/CMakeLists.txt
+++ b/paddle/framework/CMakeLists.txt
@@ -59,7 +59,8 @@ cc_test(var_type_inference_test SRCS var_type_inference_test.cc DEPS op_registry
cc_library(selected_rows SRCS selected_rows.cc DEPS tensor)
cc_test(selected_rows_test SRCS selected_rows_test.cc DEPS selected_rows)
-cc_test(threadpool_test SRCS threadpool_test.cc)
+cc_library(threadpool SRCS threadpool.cc)
+cc_test(threadpool_test SRCS threadpool_test.cc DEPS threadpool)
cc_library(init SRCS init.cc DEPS gflags device_context place stringpiece)
cc_test(init_test SRCS init_test.cc DEPS init)
diff --git a/paddle/framework/backward.cc b/paddle/framework/backward.cc
index 222aee5974ca82c12a9c3b18f7fac4f96ee251bd..eaf13ddcefcd8dc5a6b0438f765d8d325925aa30 100644
--- a/paddle/framework/backward.cc
+++ b/paddle/framework/backward.cc
@@ -1,16 +1,16 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
+ http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
#include "paddle/framework/backward.h"
#include "paddle/operators/net_op.h"
diff --git a/paddle/framework/backward.h b/paddle/framework/backward.h
index 2d3b75fe6966cb5dad32dc185a3973e92b23e26e..69ee3802369c16a8b21c0710d2008ef3c085cc5c 100644
--- a/paddle/framework/backward.h
+++ b/paddle/framework/backward.h
@@ -1,16 +1,16 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
+ http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
#pragma once
diff --git a/paddle/framework/backward_test.cc b/paddle/framework/backward_test.cc
index 0957646b5642cd9afce5d88b2c638679cb01f198..692406b1c37d0c02714eafb5cf9a28329ed873bc 100644
--- a/paddle/framework/backward_test.cc
+++ b/paddle/framework/backward_test.cc
@@ -1,16 +1,16 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
+ http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
#include "paddle/framework/backward.h"
diff --git a/paddle/framework/data_layout.h b/paddle/framework/data_layout.h
index 7d7a444cf02ba0da88178a34c98f5ef7a95c3852..4a8669c3a41fceaad26878a79eabfd0affce86fd 100644
--- a/paddle/framework/data_layout.h
+++ b/paddle/framework/data_layout.h
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
+#include "paddle/platform/enforce.h"
#include
#include "paddle/platform/enforce.h"
@@ -20,7 +21,7 @@ limitations under the License. */
namespace paddle {
namespace framework {
-enum DataLayout {
+enum class DataLayout {
kNHWC = 0,
kNCHW = 1,
kAnyLayout = 2,
@@ -38,11 +39,11 @@ inline DataLayout StringToDataLayout(const std::string& str) {
inline std::string DataLayoutToString(const DataLayout& data_layout) {
switch (data_layout) {
- case kNHWC:
+ case DataLayout::kNHWC:
return "NHWC";
- case kNCHW:
+ case DataLayout::kNCHW:
return "NCHW";
- case kAnyLayout:
+ case DataLayout::kAnyLayout:
return "ANY_LAYOUT";
default:
PADDLE_THROW("unknown DataLayou %d", data_layout);
diff --git a/paddle/framework/data_type.h b/paddle/framework/data_type.h
index e94ee2ed52bc40f52caf783f971dd0b560534e08..6a372ac32e48131eed28e2d42125feb5b92a11c7 100644
--- a/paddle/framework/data_type.h
+++ b/paddle/framework/data_type.h
@@ -1,16 +1,16 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
+ http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
#pragma once
#include
diff --git a/paddle/framework/ddim_test.cc b/paddle/framework/ddim_test.cc
index bd5ea09d7da700479aa387283d7bde77c64c1293..bc259d1f603fb34ac8546c388669d8c5c1250bd1 100644
--- a/paddle/framework/ddim_test.cc
+++ b/paddle/framework/ddim_test.cc
@@ -1,16 +1,16 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
+ http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
#include
#include
diff --git a/paddle/framework/details/op_registry.h b/paddle/framework/details/op_registry.h
index 7f5151c41d6046f21f7a9707e45de85ec50219ad..6d50e820b2b625f932768d2ca671d999071f1ca6 100644
--- a/paddle/framework/details/op_registry.h
+++ b/paddle/framework/details/op_registry.h
@@ -1,16 +1,16 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
+ http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
#pragma once
diff --git a/paddle/framework/feed_fetch_type.h b/paddle/framework/feed_fetch_type.h
index bc4ae440fc708f696c18bb9d5ab3ba7dd59e21ab..9bc4a90c44828ecb7458d524f59609f01848cc5c 100644
--- a/paddle/framework/feed_fetch_type.h
+++ b/paddle/framework/feed_fetch_type.h
@@ -1,16 +1,16 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
+ http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
#pragma once
#include
diff --git a/paddle/framework/grad_op_desc_maker.h b/paddle/framework/grad_op_desc_maker.h
index cf411fa710103350713342b43946697a8dd2aa46..2de5242831835b47893a5825e5532500ad5ec3f9 100644
--- a/paddle/framework/grad_op_desc_maker.h
+++ b/paddle/framework/grad_op_desc_maker.h
@@ -1,16 +1,16 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
+ http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
#pragma once
#include
diff --git a/paddle/framework/init.cc b/paddle/framework/init.cc
index 4deb4fa903dec04e9b76c5a620f1eb76c9f1db07..d6601090d5b6150a5aa467210038d3693c3e67a8 100644
--- a/paddle/framework/init.cc
+++ b/paddle/framework/init.cc
@@ -1,16 +1,16 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
+ http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
#include
#include
@@ -54,7 +54,7 @@ bool InitDevices(const std::vector &devices) {
#ifdef PADDLE_WITH_CUDA
auto pos = string::RFind(p, ':', string::Piece::npos);
auto number = device.substr(pos + 1);
- places.emplace_back(platform::GPUPlace(std::stoi(number)));
+ places.emplace_back(platform::CUDAPlace(std::stoi(number)));
#else
LOG(WARNING)
<< "'GPU' is not supported, Please re-compile with WITH_GPU option";
diff --git a/paddle/framework/init.h b/paddle/framework/init.h
index 1715cd81e6647158e269e39d4d91fbe065cd0008..33907f9eb00fb3469b53dcf8151557cc7a2d3791 100644
--- a/paddle/framework/init.h
+++ b/paddle/framework/init.h
@@ -1,16 +1,16 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
+ http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
#pragma once
#include
diff --git a/paddle/framework/init_test.cc b/paddle/framework/init_test.cc
index cb1ba7ce8fdbf740846689356c94c4f2fabb95cb..f0788051d4855a175d2d7ea1f1a0805c776c462b 100644
--- a/paddle/framework/init_test.cc
+++ b/paddle/framework/init_test.cc
@@ -1,16 +1,16 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
+ http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
#include "gtest/gtest.h"
#include "paddle/framework/init.h"
diff --git a/paddle/framework/library_type.h b/paddle/framework/library_type.h
index aa66cf00f3be14c4ac9496326190428688fc3496..6baae6c2bb80a4f631cad89231ce4fcb8d94ed86 100644
--- a/paddle/framework/library_type.h
+++ b/paddle/framework/library_type.h
@@ -20,15 +20,15 @@ namespace framework {
// For more details about the design of LibraryType, Please refer to
// https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/operator_kernel_type.md#library
-enum LibraryType { kPlain = 0, kMKLDNN = 1, kCUDNN = 2 };
+enum class LibraryType { kPlain = 0, kMKLDNN = 1, kCUDNN = 2 };
inline std::string LibraryTypeToString(const LibraryType& library_type) {
switch (library_type) {
- case kPlain:
+ case LibraryType::kPlain:
return "PLAIN";
- case kMKLDNN:
+ case LibraryType::kMKLDNN:
return "MKLDNN";
- case kCUDNN:
+ case LibraryType::kCUDNN:
return "CUDNN";
default:
PADDLE_THROW("unknown LibraryType %d", library_type);
diff --git a/paddle/framework/lod_rank_table.cc b/paddle/framework/lod_rank_table.cc
index 17d524c09276fc0eb166925bd79bc0bdfcead195..704bce2a0eb60b974efd41a4edda0af2933da825 100644
--- a/paddle/framework/lod_rank_table.cc
+++ b/paddle/framework/lod_rank_table.cc
@@ -1,16 +1,16 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
+ http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
#include "paddle/framework/lod_rank_table.h"
diff --git a/paddle/framework/lod_rank_table.h b/paddle/framework/lod_rank_table.h
index d3007d3d7379a59b32465cbd55780c6268e0e4a8..df188709e91871ded0258fa5703ee16a5664f057 100644
--- a/paddle/framework/lod_rank_table.h
+++ b/paddle/framework/lod_rank_table.h
@@ -1,16 +1,16 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
+ http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
#pragma once
#include
diff --git a/paddle/framework/lod_tensor.cc b/paddle/framework/lod_tensor.cc
index 465f8c62b5fe2efd549f68bb3a9823d299ba5393..f8a3be9a82bdbaf82550634d36122eb7bbe85e54 100644
--- a/paddle/framework/lod_tensor.cc
+++ b/paddle/framework/lod_tensor.cc
@@ -1,16 +1,16 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
+ http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
#include "paddle/framework/lod_tensor.h"
#include "paddle/framework/data_type.h"
@@ -224,7 +224,7 @@ void SerializeToStream(std::ostream &os, const LoDTensor &tensor,
while (size != 0) {
size_t size_to_write = std::min(kBufSize, static_cast(size));
memory::Copy(cpu, buf.get(),
- boost::get(tensor.place()),
+ boost::get(tensor.place()),
reinterpret_cast(data), size_to_write,
gpu_dev_ctx.stream());
gpu_dev_ctx.Wait();
diff --git a/paddle/framework/lod_tensor.h b/paddle/framework/lod_tensor.h
index 0923c52a0ad2fe10cea760df20c99021984ad39d..147db3ab0877662d9e47ae7ee6df05638b5fcbd1 100644
--- a/paddle/framework/lod_tensor.h
+++ b/paddle/framework/lod_tensor.h
@@ -1,16 +1,16 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
+ http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
#pragma once
diff --git a/paddle/framework/lod_tensor_array.h b/paddle/framework/lod_tensor_array.h
index 13f0608d24be97d8bba149b74f1a4deb57deeb48..4a8e7f4fa540b1c2f19a6e3ec236a0dd5c0daf0b 100644
--- a/paddle/framework/lod_tensor_array.h
+++ b/paddle/framework/lod_tensor_array.h
@@ -1,16 +1,16 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
+ http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
#pragma once
#include
diff --git a/paddle/framework/lod_tensor_test.cu b/paddle/framework/lod_tensor_test.cu
index 5b90fbfca7f6bec4f2c862d0ff18dfd7cf39e181..e8508ad2658ae850e4c98aa798b5db6d007e67d0 100644
--- a/paddle/framework/lod_tensor_test.cu
+++ b/paddle/framework/lod_tensor_test.cu
@@ -27,7 +27,7 @@ __global__ void test(size_t* a, int size) {
TEST(LoDTensor, LoDInGPU) {
paddle::framework::LoDTensor lod_tensor;
- paddle::platform::GPUPlace place(0);
+ paddle::platform::CUDAPlace place(0);
paddle::framework::LoD src_lod;
src_lod.push_back(std::vector{0, 2, 4, 6, 8, 10, 12, 14});
diff --git a/paddle/framework/op_info.cc b/paddle/framework/op_info.cc
index 81ba29797c5f478e5d6a91236f3e8de1e6b43e49..b520108109bb2f72b80f83559fa065a5ca58e9e1 100644
--- a/paddle/framework/op_info.cc
+++ b/paddle/framework/op_info.cc
@@ -1,16 +1,16 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
+ http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
#include "paddle/framework/op_info.h"
diff --git a/paddle/framework/op_info.h b/paddle/framework/op_info.h
index 7772d6e745c2207024863d3dd5cbef052358272e..d9b89f9cac9611fcecb18bef87940632df1e2234 100644
--- a/paddle/framework/op_info.h
+++ b/paddle/framework/op_info.h
@@ -1,16 +1,16 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
+ http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
#pragma once
#include
diff --git a/paddle/framework/op_kernel_type.h b/paddle/framework/op_kernel_type.h
index e9c45b958cd0a65bca62099324e951f298d9ecb1..97b542e345feab0bab701dd967558ce23375dc7f 100644
--- a/paddle/framework/op_kernel_type.h
+++ b/paddle/framework/op_kernel_type.h
@@ -40,6 +40,7 @@ struct OpKernelType {
// place, data_type, library_type kinds less than 2^8
constexpr static int LEFT_SHIFT = 8;
+
proto::DataType data_type_;
DataLayout data_layout_;
platform::Place place_;
diff --git a/paddle/framework/op_kernel_type_test.cc b/paddle/framework/op_kernel_type_test.cc
index 899676b5c1a3799427d6ef03bae47284550c781b..8753d7cc378662ce116e447dc6a340a07e5dd2ca 100644
--- a/paddle/framework/op_kernel_type_test.cc
+++ b/paddle/framework/op_kernel_type_test.cc
@@ -37,13 +37,13 @@ TEST(OpKernelType, Hash) {
using OpKernelType = paddle::framework::OpKernelType;
using DataType = paddle::framework::proto::DataType;
using CPUPlace = paddle::platform::CPUPlace;
- using GPUPlace = paddle::platform::GPUPlace;
+ using CUDAPlace = paddle::platform::CUDAPlace;
using DataLayout = paddle::framework::DataLayout;
using LibraryType = paddle::framework::LibraryType;
OpKernelType op_kernel_type_1(DataType::FP32, CPUPlace(), DataLayout::kNCHW,
LibraryType::kCUDNN);
- OpKernelType op_kernel_type_2(DataType::FP32, GPUPlace(0), DataLayout::kNCHW,
+ OpKernelType op_kernel_type_2(DataType::FP32, CUDAPlace(0), DataLayout::kNCHW,
LibraryType::kCUDNN);
OpKernelType::Hash hasher;
diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h
index 244c1174655f61cad7176a211b07863dbfbba9aa..9bb2a3b5c2931d03152cc3262c0ad8da17b8aacb 100644
--- a/paddle/framework/op_registry.h
+++ b/paddle/framework/op_registry.h
@@ -188,7 +188,7 @@ class OpKernelRegistrar : public Registrar {
}
#define REGISTER_OP_CUDA_KERNEL(op_type, ...) \
- REGISTER_OP_KERNEL(op_type, CUDA, ::paddle::platform::GPUPlace, __VA_ARGS__)
+ REGISTER_OP_KERNEL(op_type, CUDA, ::paddle::platform::CUDAPlace, __VA_ARGS__)
#define REGISTER_OP_CPU_KERNEL(op_type, ...) \
REGISTER_OP_KERNEL(op_type, CPU, ::paddle::platform::CPUPlace, __VA_ARGS__)
diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc
index f147cc5a6e233ff88208f4f966ab93e8737b6f13..66840a2e037e7ca0fd1eacc64421865b170b47f8 100644
--- a/paddle/framework/operator.cc
+++ b/paddle/framework/operator.cc
@@ -402,19 +402,28 @@ void OperatorWithKernel::Run(const Scope& scope,
OpKernelMap& kernels = kernels_iter->second;
ExecutionContext ctx(*this, scope, *dev_ctx);
- auto kernel_key = GetKernelType(ctx);
- auto kernel_iter = kernels.find(kernel_key);
+ auto actual_kernel_key = GetActualKernelType(ctx);
+ auto expected_kernel_key = GetExpectedKernelType(actual_kernel_key);
+ auto kernel_iter = kernels.find(expected_kernel_key);
if (kernel_iter == kernels.end()) {
- PADDLE_THROW("The operator %s does not support %s", type_, kernel_key);
+ PADDLE_THROW("The operator %s does not support %s", type_,
+ expected_kernel_key);
}
kernel_iter->second->Compute(ctx);
}
-OpKernelType OperatorWithKernel::GetKernelType(
+
+OpKernelType OperatorWithKernel::GetActualKernelType(
const ExecutionContext& ctx) const {
return OpKernelType(IndicateDataType(ctx), ctx.GetPlace());
}
+
+OpKernelType OperatorWithKernel::GetExpectedKernelType(
+ const OpKernelType& actual_kernel_type) const {
+ return actual_kernel_type;
+}
+
proto::DataType OperatorWithKernel::IndicateDataType(
const ExecutionContext& ctx) const {
auto& scope = ctx.scope();
diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h
index 03cd578e9b98d54aa6c5ed9936999ab0b6edd8da..d0a9b643d565d6651fd7ec0b515f088362852ba3 100644
--- a/paddle/framework/operator.h
+++ b/paddle/framework/operator.h
@@ -52,6 +52,11 @@ constexpr char kGradVarSuffix[] = "@GRAD";
/// Variables with this suffix are supposed to be filled up with zeros.
constexpr char kZeroVarSuffix[] = "@ZERO";
+// define some kernel hint
+const std::string kUseCPU = "use_cpu";
+const std::string kUseCUDNN = "use_cudnn";
+const std::string kUseMKLDNN = "use_mkldnn";
+
inline std::string GradVarName(const std::string& var_name) {
return var_name + kGradVarSuffix;
}
@@ -376,7 +381,9 @@ class OperatorWithKernel : public OperatorBase {
}
protected:
- virtual OpKernelType GetKernelType(const ExecutionContext& ctx) const;
+ virtual OpKernelType GetActualKernelType(const ExecutionContext& ctx) const;
+ virtual OpKernelType GetExpectedKernelType(
+ const OpKernelType& actual_kernel_type) const;
private:
// indicate kernel DataType by input data. Defaultly all input data must be
diff --git a/paddle/framework/operator_test.cc b/paddle/framework/operator_test.cc
index fbca45b59dc5446e93e79599f471d80a06ea3661..4d38a7ada91af834aa1a19b49e36d606ebe786ba 100644
--- a/paddle/framework/operator_test.cc
+++ b/paddle/framework/operator_test.cc
@@ -114,7 +114,7 @@ class OpWithKernelTest : public OperatorWithKernel {
protected:
void InferShape(framework::InferShapeContext* ctx) const override {}
- OpKernelType GetKernelType(const ExecutionContext& ctx) const override {
+ OpKernelType GetActualKernelType(const ExecutionContext& ctx) const override {
return OpKernelType(proto::DataType::FP32, ctx.GetPlace());
}
};
diff --git a/paddle/framework/program_desc_test.cc b/paddle/framework/program_desc_test.cc
index a49886f7ea56bc57459202dba65e3f76a902cd70..59947c9f2189348226b7ff6c2b9315196bbf55fa 100644
--- a/paddle/framework/program_desc_test.cc
+++ b/paddle/framework/program_desc_test.cc
@@ -1,16 +1,16 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
+ http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
#include "paddle/framework/program_desc.h"
#include "gtest/gtest.h"
diff --git a/paddle/framework/prune_test.cc b/paddle/framework/prune_test.cc
index bdd57659432ea4f9bdd05425a802110b0c202fb8..d76c5abca94cb87220ce73537a8657c3ec695f4d 100644
--- a/paddle/framework/prune_test.cc
+++ b/paddle/framework/prune_test.cc
@@ -1,16 +1,16 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
+ http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
#include "paddle/framework/prune.h"
diff --git a/paddle/framework/shape_inference.cc b/paddle/framework/shape_inference.cc
index 86dc01665bda5e7f988e60780c0600b049d737ef..e53cc0cdabc623ae358f1a3e21823a2f38ec3c62 100644
--- a/paddle/framework/shape_inference.cc
+++ b/paddle/framework/shape_inference.cc
@@ -1,16 +1,16 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
+ http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
#include "paddle/framework/shape_inference.h"
#include "grad_op_desc_maker.h"
#include "paddle/framework/operator.h"
diff --git a/paddle/framework/tensor.cc b/paddle/framework/tensor.cc
index ea7b2a1f7b17d9abc2c2e14de5ecd1cf4a1a5027..f922e606249849e621e679f71d6dbe0f007c8464 100644
--- a/paddle/framework/tensor.cc
+++ b/paddle/framework/tensor.cc
@@ -1,16 +1,16 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
+ http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
#include "paddle/framework/tensor.h"
diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h
index 6a0c5133c9a6bb326ca51755242e75b6eb9e5474..b9f6884f7c4b6eeacda722762d485dea97fdcb63 100644
--- a/paddle/framework/tensor.h
+++ b/paddle/framework/tensor.h
@@ -20,12 +20,12 @@ limitations under the License. */
#include
#include
+#include "paddle/framework/data_layout.h"
#include "paddle/framework/ddim.h"
#include "paddle/memory/memory.h"
#include "paddle/platform/device_context.h"
#include "paddle/platform/enforce.h"
#include "paddle/platform/place.h"
-#include "unsupported/Eigen/CXX11/Tensor"
namespace paddle {
@@ -115,6 +115,10 @@ class Tensor {
inline void check_memory_size() const;
+ inline DataLayout layout() const { return layout_; }
+
+ inline void set_layout(const DataLayout layout) { layout_ = layout; }
+
private:
friend class LoDTensor;
@@ -173,6 +177,19 @@ class Tensor {
DDim dims_;
+ /**
+ * @brief the layout of memory block, default is NCHW.
+ *
+ * @note the memory allocation order, describe how weight/data is stored
+ * For example, in 4-D Tensor(rank=4), there are three commonly
+ * used layout. They are
+ * NCHW, NHWC, CHWN.
+ * N,C,H,W for respectively the batch size, the number of
+ * feature maps, the height.
+ */
+
+ DataLayout layout_ = DataLayout::kNHWC;
+
/**
* @brief A PlaceHolder may be shared by more than one tensor.
*
diff --git a/paddle/framework/tensor.md b/paddle/framework/tensor.md
index 7a80816d8e4ffa3a9462f3d9b87eff0f048466aa..0a27ac9bb6b03649d42e12100fda9e80a56e7f56 100644
--- a/paddle/framework/tensor.md
+++ b/paddle/framework/tensor.md
@@ -71,7 +71,7 @@ private:
```
```c++
-typedef boost::variant Place;
+typedef boost::variant Place;
typedef boost::variant, Dim<2>, Dim<3>, Dim<4>, Dim<5>,
Dim<6>, Dim<7>, Dim<8>, Dim<9>> DDimVar;
typedef boost::variant<
diff --git a/paddle/framework/tensor_impl.h b/paddle/framework/tensor_impl.h
index aba1f9f09329f890ef190f8820b958c56f017e89..6c6f298edc187a87677089e54c4c9046821282df 100644
--- a/paddle/framework/tensor_impl.h
+++ b/paddle/framework/tensor_impl.h
@@ -125,11 +125,11 @@ inline void* Tensor::mutable_data(platform::Place place, std::type_index type) {
boost::get(place), size, type));
} else if (platform::is_gpu_place(place)) {
#ifndef PADDLE_WITH_CUDA
- PADDLE_THROW("'GPUPlace' is not supported in CPU only device.");
+ PADDLE_THROW("'CUDAPlace' is not supported in CPU only device.");
}
#else
- holder_.reset(new PlaceholderImpl(
- boost::get(place), size, type));
+ holder_.reset(new PlaceholderImpl(
+ boost::get(place), size, type));
}
#endif
offset_ = 0;
@@ -165,6 +165,7 @@ inline Tensor Tensor::Slice(int begin_idx, int end_idx) const {
size_t base = numel() / dims_[0];
Tensor dst;
dst.holder_ = holder_;
+ dst.set_layout(layout_);
DDim dst_dims = dims_;
dst_dims[0] = end_idx - begin_idx;
dst.Resize(dst_dims);
diff --git a/paddle/framework/tensor_test.cc b/paddle/framework/tensor_test.cc
index ceca64365a1a628642eb374a3e3bbdff490c955a..ca76a9fcb9079bab22f7b192c45903852c91797f 100644
--- a/paddle/framework/tensor_test.cc
+++ b/paddle/framework/tensor_test.cc
@@ -80,20 +80,20 @@ TEST(Tensor, MutableData) {
float* p1 = nullptr;
float* p2 = nullptr;
// initialization
- p1 = src_tensor.mutable_data(make_ddim({1, 2, 3}), GPUPlace());
+ p1 = src_tensor.mutable_data(make_ddim({1, 2, 3}), CUDAPlace());
EXPECT_NE(p1, nullptr);
// set src_tensor a new dim with large size
// momery is supposed to be re-allocated
- p2 = src_tensor.mutable_data(make_ddim({3, 4}), GPUPlace());
+ p2 = src_tensor.mutable_data(make_ddim({3, 4}), CUDAPlace());
EXPECT_NE(p2, nullptr);
EXPECT_NE(p1, p2);
// set src_tensor a new dim with same size
// momery block is supposed to be unchanged
- p1 = src_tensor.mutable_data(make_ddim({2, 2, 3}), GPUPlace());
+ p1 = src_tensor.mutable_data(make_ddim({2, 2, 3}), CUDAPlace());
EXPECT_EQ(p1, p2);
// set src_tensor a new dim with smaller size
// momery block is supposed to be unchanged
- p2 = src_tensor.mutable_data(make_ddim({2, 2}), GPUPlace());
+ p2 = src_tensor.mutable_data(make_ddim({2, 2}), CUDAPlace());
EXPECT_EQ(p1, p2);
}
#endif
@@ -130,7 +130,7 @@ TEST(Tensor, ShareDataWith) {
{
Tensor src_tensor;
Tensor dst_tensor;
- src_tensor.mutable_data(make_ddim({2, 3, 4}), GPUPlace());
+ src_tensor.mutable_data(make_ddim({2, 3, 4}), CUDAPlace());
dst_tensor.ShareDataWith(src_tensor);
ASSERT_EQ(src_tensor.data(), dst_tensor.data());
}
@@ -166,7 +166,7 @@ TEST(Tensor, Slice) {
#ifdef PADDLE_WITH_CUDA
{
Tensor src_tensor;
- src_tensor.mutable_data(make_ddim({6, 9}), GPUPlace());
+ src_tensor.mutable_data(make_ddim({6, 9}), CUDAPlace());
Tensor slice_tensor = src_tensor.Slice(2, 6);
DDim slice_dims = slice_tensor.dims();
ASSERT_EQ(arity(slice_dims), 2);
@@ -176,11 +176,11 @@ TEST(Tensor, Slice) {
uintptr_t src_data_address =
reinterpret_cast(src_tensor.data());
uintptr_t src_mutable_data_address = reinterpret_cast(
- src_tensor.mutable_data(src_tensor.dims(), GPUPlace()));
+ src_tensor.mutable_data(src_tensor.dims(), CUDAPlace()));
uintptr_t slice_data_address =
reinterpret_cast(slice_tensor.data());
uintptr_t slice_mutable_data_address = reinterpret_cast(
- slice_tensor.mutable_data(slice_tensor.dims(), GPUPlace()));
+ slice_tensor.mutable_data(slice_tensor.dims(), CUDAPlace()));
EXPECT_EQ(src_data_address, src_mutable_data_address);
EXPECT_EQ(slice_data_address, slice_mutable_data_address);
EXPECT_EQ(src_data_address + 9 * 2 * sizeof(double), slice_data_address);
@@ -200,3 +200,12 @@ TEST(Tensor, ReshapeToMatrix) {
ASSERT_EQ(res.dims()[0], 2 * 3);
ASSERT_EQ(res.dims()[1], 4 * 9);
}
+
+TEST(Tensor, Layout) {
+ using namespace paddle::framework;
+ using namespace paddle::platform;
+ Tensor src;
+ ASSERT_EQ(src.layout(), DataLayout::kNHWC);
+ src.set_layout(DataLayout::kAnyLayout);
+ ASSERT_EQ(src.layout(), DataLayout::kAnyLayout);
+}
diff --git a/paddle/framework/tensor_util.h b/paddle/framework/tensor_util.h
index 4e34b90d57eed8fea84b83045df61a98483c8849..ea4e4f22ea82ccc9f8b683d2fd773a7bc37f78a3 100644
--- a/paddle/framework/tensor_util.h
+++ b/paddle/framework/tensor_util.h
@@ -1,16 +1,16 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
+ http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
#pragma once
#include "paddle/framework/tensor.h"
@@ -33,6 +33,7 @@ inline void CopyFrom(const Tensor& src, const platform::Place& dst_place,
src.check_memory_size();
dst->Resize(src.dims());
+ dst->set_layout(src.layout());
auto src_place = src.place();
auto src_ptr = src.data();
@@ -47,11 +48,11 @@ inline void CopyFrom(const Tensor& src, const platform::Place& dst_place,
#ifdef PADDLE_WITH_CUDA
else if (platform::is_gpu_place(src_place) && // NOLINT
platform::is_cpu_place(dst_place)) {
- auto src_gpu_place = boost::get(src_place);
+ auto src_gpu_place = boost::get(src_place);
auto dst_cpu_place = boost::get(dst_place);
auto ctx_place = ctx.GetPlace();
PADDLE_ENFORCE(platform::is_gpu_place(ctx_place));
- auto ctx_gpu_place = boost::get(ctx_place);
+ auto ctx_gpu_place = boost::get(ctx_place);
PADDLE_ENFORCE_EQ(src_gpu_place, ctx_gpu_place);
memory::Copy(
dst_cpu_place, dst_ptr, src_gpu_place, src_ptr, size,
@@ -59,21 +60,21 @@ inline void CopyFrom(const Tensor& src, const platform::Place& dst_place,
} else if (platform::is_cpu_place(src_place) &&
platform::is_gpu_place(dst_place)) {
auto src_cpu_place = boost::get(src_place);
- auto dst_gpu_place = boost::get(dst_place);
+ auto dst_gpu_place = boost::get(dst_place);
auto ctx_place = ctx.GetPlace();
PADDLE_ENFORCE(platform::is_gpu_place(ctx_place));
- auto ctx_gpu_place = boost::get(ctx_place);
+ auto ctx_gpu_place = boost::get(ctx_place);
PADDLE_ENFORCE_EQ(dst_gpu_place, ctx_gpu_place);
memory::Copy(
dst_gpu_place, dst_ptr, src_cpu_place, src_ptr, size,
reinterpret_cast(ctx).stream());
} else if (platform::is_gpu_place(src_place) &&
platform::is_gpu_place(dst_place)) {
- auto src_gpu_place = boost::get(src_place);
- auto dst_gpu_place = boost::get(dst_place);
+ auto src_gpu_place = boost::get(src_place);
+ auto dst_gpu_place = boost::get(dst_place);
auto ctx_place = ctx.GetPlace();
PADDLE_ENFORCE(platform::is_gpu_place(ctx_place));
- auto ctx_gpu_place = boost::get(ctx_place);
+ auto ctx_gpu_place = boost::get(ctx_place);
PADDLE_ENFORCE_EQ(src_gpu_place, ctx_gpu_place);
memory::Copy(
dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size,
@@ -82,6 +83,29 @@ inline void CopyFrom(const Tensor& src, const platform::Place& dst_place,
#endif
}
+/**
+ * @brief CopyFrom support CPU <-> CPU
+ */
+inline void CopyFrom(const Tensor& src, const platform::Place& dst_place,
+ Tensor* dst) {
+ src.check_memory_size();
+ dst->Resize(src.dims());
+ dst->set_layout(src.layout());
+
+ auto src_place = src.place();
+ auto src_ptr = src.data();
+
+ auto dst_ptr = dst->mutable_data(dst_place, src.type());
+
+ auto size = src.numel() * SizeOfType(src.type());
+
+ PADDLE_ENFORCE(platform::is_cpu_place(src_place) &&
+ platform::is_cpu_place(dst_place));
+
+ memory::Copy(boost::get(dst_place), dst_ptr,
+ boost::get(src_place), src_ptr, size);
+}
+
/**
* @brief Copy the content of an external vector to a tensor.
*
@@ -108,13 +132,28 @@ inline void CopyFromVector(const std::vector& src,
#ifdef PADDLE_WITH_CUDA
else if (platform::is_gpu_place(dst_place)) { // NOLINT
memory::Copy(
- boost::get(dst_place), dst_ptr, src_place, src_ptr,
+ boost::get(dst_place), dst_ptr, src_place, src_ptr,
size,
reinterpret_cast(ctx).stream());
}
#endif
}
+/**
+ * @brief CopyFromVector CPU vector -> CPU Tensor
+ */
+template
+inline void CopyFromVector(const std::vector& src, Tensor* dst) {
+ platform::CPUPlace dst_place = platform::CPUPlace();
+ auto src_ptr = static_cast(src.data());
+ platform::CPUPlace src_place;
+ dst->Resize({static_cast(src.size())});
+ auto dst_ptr = static_cast(dst->mutable_data(dst_place));
+ auto size = src.size() * sizeof(T);
+
+ memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size);
+}
+
/**
* @brief Copy the content of a tensor to a vector
*
@@ -141,12 +180,30 @@ inline void CopyToVector(const Tensor& src, const platform::DeviceContext& ctx,
#ifdef PADDLE_WITH_CUDA
else if (platform::is_gpu_place(src.place())) { // NOLINT
memory::Copy(
- dst_place, dst_ptr, boost::get(src.place()),
+ dst_place, dst_ptr, boost::get(src.place()),
src_ptr, size,
reinterpret_cast(ctx).stream());
}
#endif
}
+/**
+ * @brief CopyToVector CPUTensor <-> CPU Vector
+ */
+template
+inline void CopyToVector(const Tensor& src, std::vector* dst) {
+ auto src_ptr = static_cast(src.data());
+ auto size = src.numel() * sizeof(T);
+
+ platform::CPUPlace dst_place;
+ dst->resize(src.numel());
+ auto dst_ptr = static_cast(dst->data());
+
+ PADDLE_ENFORCE(platform::is_cpu_place(src.place()));
+
+ memory::Copy(dst_place, dst_ptr, boost::get(src.place()),
+ src_ptr, size);
+}
+
} // namespace framework
} // namespace paddle
diff --git a/paddle/framework/tensor_util_test.cc b/paddle/framework/tensor_util_test.cc
index 03a70de182d0eb499a81413d38229c81c4378b91..f388c19f28ed28335818733f946d8eaf18464627 100644
--- a/paddle/framework/tensor_util_test.cc
+++ b/paddle/framework/tensor_util_test.cc
@@ -17,6 +17,7 @@
namespace paddle {
namespace framework {
+
TEST(CopyFrom, Tensor) {
Tensor src_tensor;
Tensor dst_tensor;
@@ -27,9 +28,10 @@ TEST(CopyFrom, Tensor) {
int arr[9] = {1, 2, 3, 4, 5, 6, 7, 8, 9};
memcpy(src_ptr, arr, 9 * sizeof(int));
+ src_tensor.set_layout(DataLayout::kAnyLayout);
auto cpu_place = new platform::CPUPlace();
- CopyFrom(src_tensor, *cpu_place, cpu_ctx, &dst_tensor);
+ CopyFrom(src_tensor, *cpu_place, &dst_tensor);
const int* dst_ptr = dst_tensor.data();
ASSERT_NE(src_ptr, dst_ptr);
@@ -37,14 +39,18 @@ TEST(CopyFrom, Tensor) {
EXPECT_EQ(src_ptr[i], dst_ptr[i]);
}
+ EXPECT_TRUE(dst_tensor.layout() == src_tensor.layout());
+
Tensor slice_tensor = src_tensor.Slice(1, 2);
- CopyFrom(slice_tensor, *cpu_place, cpu_ctx, &dst_tensor);
+ CopyFrom(slice_tensor, *cpu_place, &dst_tensor);
const int* slice_ptr = slice_tensor.data();
dst_ptr = dst_tensor.data();
ASSERT_NE(dst_ptr, slice_ptr);
for (size_t i = 0; i < 3; ++i) {
EXPECT_EQ(dst_ptr[i], slice_ptr[i]);
}
+ EXPECT_TRUE(dst_tensor.layout() == src_tensor.layout());
+
#ifdef PADDLE_WITH_CUDA
{
Tensor src_tensor;
@@ -58,7 +64,7 @@ TEST(CopyFrom, Tensor) {
memcpy(src_ptr, arr, 9 * sizeof(int));
// CPU Tensor to GPU Tensor
- auto gpu_place = new platform::GPUPlace(0);
+ auto gpu_place = new platform::CUDAPlace(0);
platform::CUDADeviceContext gpu_ctx(*gpu_place);
CopyFrom(src_tensor, *gpu_place, gpu_ctx, &gpu_tensor);
@@ -90,6 +96,8 @@ TEST(CopyFrom, Tensor) {
for (size_t i = 0; i < 3; ++i) {
EXPECT_EQ(dst_ptr[i], slice_ptr[i]);
}
+
+ EXPECT_TRUE(dst_tensor.layout() == src_tensor.layout());
}
#endif
}
@@ -104,8 +112,7 @@ TEST(CopyFromVector, Tensor) {
// Copy to CPU Tensor
cpu_tensor.Resize(make_ddim({3, 3}));
auto cpu_place = new paddle::platform::CPUPlace();
- CPUDeviceContext cpu_ctx(*cpu_place);
- CopyFromVector(src_vec, cpu_ctx, &cpu_tensor);
+ CopyFromVector(src_vec, &cpu_tensor);
// Compare Tensors
const int* cpu_ptr = cpu_tensor.data();
@@ -117,7 +124,7 @@ TEST(CopyFromVector, Tensor) {
src_vec.erase(src_vec.begin(), src_vec.begin() + 5);
cpu_tensor.Resize(make_ddim({2, 2}));
- CopyFromVector(src_vec, cpu_ctx, &cpu_tensor);
+ CopyFromVector(src_vec, &cpu_tensor);
cpu_ptr = cpu_tensor.data();
src_ptr = src_vec.data();
ASSERT_NE(src_ptr, cpu_ptr);
@@ -143,7 +150,7 @@ TEST(CopyFromVector, Tensor) {
// Copy to GPUTensor
gpu_tensor.Resize(make_ddim({3, 3}));
- auto gpu_place = new paddle::platform::GPUPlace();
+ auto gpu_place = new paddle::platform::CUDAPlace();
CUDADeviceContext gpu_ctx(*gpu_place);
CopyFromVector(src_vec, gpu_ctx, &gpu_tensor);
// Copy from GPU to CPU tensor for comparison
@@ -198,9 +205,8 @@ TEST(CopyToVector, Tensor) {
}
CPUPlace place;
- CPUDeviceContext cpu_ctx(place);
std::vector dst;
- CopyToVector(src, cpu_ctx, &dst);
+ CopyToVector(src, &dst);
for (int i = 0; i < 3 * 3; ++i) {
EXPECT_EQ(src_ptr[i], dst[i]);
@@ -210,7 +216,7 @@ TEST(CopyToVector, Tensor) {
{
std::vector src_vec = {1, 2, 3, 4, 5, 6, 7, 8, 9};
Tensor gpu_tensor;
- GPUPlace place;
+ CUDAPlace place;
CUDADeviceContext gpu_ctx(place);
CopyFromVector(src_vec, gpu_ctx, &gpu_tensor);
diff --git a/paddle/framework/threadpool.cc b/paddle/framework/threadpool.cc
new file mode 100644
index 0000000000000000000000000000000000000000..109a7e7dc440d91e8223f2c0924f489f54a06f64
--- /dev/null
+++ b/paddle/framework/threadpool.cc
@@ -0,0 +1,24 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/framework/threadpool.h"
+
+namespace paddle {
+namespace framework {
+
+std::unique_ptr ThreadPool::threadpool(nullptr);
+std::once_flag ThreadPool::init_flag;
+
+} // namespace framework
+} // namespace paddle
diff --git a/paddle/framework/threadpool.h b/paddle/framework/threadpool.h
index 9a1ece3ae8452399205e71fbaa26977710fcdcac..5f6b2d458f7ee764c22d203f285b78023b6012f3 100644
--- a/paddle/framework/threadpool.h
+++ b/paddle/framework/threadpool.h
@@ -13,15 +13,13 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
+
#include
-#include
#include
-#include
#include
#include
#include
-#include "paddle/platform/call_once.h"
#include "paddle/platform/enforce.h"
namespace paddle {
@@ -81,10 +79,9 @@ class ThreadPool {
}
private:
- ThreadPool& operator=(const ThreadPool&) = delete;
- ThreadPool(const ThreadPool&) = delete;
+ DISABLE_COPY_AND_ASSIGN(ThreadPool);
- ThreadPool(int num_threads)
+ explicit ThreadPool(int num_threads)
: num_threads_(num_threads), available_(num_threads), running_(true) {
threads_.resize(num_threads);
for (auto& thread : threads_) {
@@ -155,7 +152,5 @@ class ThreadPool {
std::condition_variable completed_;
};
-std::unique_ptr ThreadPool::threadpool(nullptr);
-std::once_flag ThreadPool::init_flag;
} // namespace framework
} // namespace paddle
diff --git a/paddle/framework/threadpool_test.cc b/paddle/framework/threadpool_test.cc
index 78c762608ed920c1a586c99d68952ffcc653c75e..012d92a5edc415f0bb2f8a0ea38ffeb9549d54fa 100644
--- a/paddle/framework/threadpool_test.cc
+++ b/paddle/framework/threadpool_test.cc
@@ -12,12 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-#include "threadpool.h"
#include
#include
-#include
-#include