diff --git a/benchmark/IntelOptimizedPaddle.md b/benchmark/IntelOptimizedPaddle.md
index 8ee7fd28c58f2a2bcb82040eb824a37062bd4e9c..6cc9598947acbdacfbf4c4379987bab8ed7611b0 100644
--- a/benchmark/IntelOptimizedPaddle.md
+++ b/benchmark/IntelOptimizedPaddle.md
@@ -22,6 +22,7 @@ On each machine, we will test and compare the performance of training on single
#### Training
Test on batch size 64, 128, 256 on Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz
+Pay attetion that the speed below includes forward, backward and parameter update time. So we can not directly compare the data with the benchmark of caffe `time` [command](https://github.com/PaddlePaddle/Paddle/blob/develop/benchmark/caffe/image/run.sh#L9), which only contain forward and backward. The updating time of parameter would become very heavy when the weight size are large, especially on alexnet.
Input image size - 3 * 224 * 224, Time: images/second
@@ -55,6 +56,16 @@ Input image size - 3 * 224 * 224, Time: images/second
+- Alexnet
+
+| BatchSize | 64 | 128 | 256 |
+|--------------|--------| ------ | -------|
+| OpenBLAS | 2.13 | 2.45 | 2.68 |
+| MKLML | 66.37 | 105.60 | 144.04 |
+| MKL-DNN | 399.00 | 498.94 | 626.53 |
+
+chart TBD
+
#### Inference
Test on batch size 1, 2, 4, 8, 16 on Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz
- VGG-19
diff --git a/doc/design/mkl/mkldnn_fluid.md b/doc/design/mkl/mkldnn_fluid.md
new file mode 100644
index 0000000000000000000000000000000000000000..bef126f3f0577b69f646dfe5d10539b372c6a8a5
--- /dev/null
+++ b/doc/design/mkl/mkldnn_fluid.md
@@ -0,0 +1,149 @@
+# Design Doc: Add MKLDNN Kernel in Fluid Operator
+
+## Principles
+
+First of all, we should follow some basical principles like:
+1. [How to write a new operator](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/howto/dev/new_op_en.md). We are trying to add a new kind of kernel into operators, so basically we should follow this doc.
+2. [Supporting new Device/Library](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/support_new_device.md). Since MKLDNN is a new library to fluid, we should add `MKLDNNDeviceContext` and maybe `mkldnn_helper.h`, just like [cudnn_helper.h](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/platform/cudnn_helper.h).
+3. [Switch Kernel](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/switch_kernel.md). Another important point is that we should ensure the data synchronization between different kernel types, which is this [topic](https://github.com/PaddlePaddle/Paddle/issues/6549). So basically we should override `GetExpectedKernelType` and `trans` functions to support switching kernels.
+4. [The Keys of Operator Kernel Type](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/operator_kernel_type.md). Kernel Type is a pivotal conception which can record the `Place`, `Library`, `DataType` and `Layout`.
+
+## Sulution
+
+In general, there are four parts we should follow to run a MKL-DNN primitive.
+- Create a primitive descriptor that describe this operator
+- Create a primitive itself by primitive descriptor and the engine
+- Create all memory buffers that primitive needed
+- Launch a stream to execute the primitive created
+More details can refer to [here](http://01org.github.io/mkl-dnn).
+
+It's better to avoid reinitialization of primitives and memory handles in the first three stages in every iteration. \
+So we plan to create a map to record all the `primitive` and `memory`, which should not take too much memories as discussed [here](https://github.com/PaddlePaddle/Paddle/issues/6822).
+
+It's assumed that following three conditions should be satisfied.
+1. there is a unique key for each operator instance. May be the actual name of `Output Tensor`.
+2. the `Input Tensor` inside `Compute` function is the one after converted.
+3. we can get the phase(eg. `is_test`) inside `Compute` function, otherwise we need to expose this attribue to user.
+
+### Compute
+The algorithm of `Compute` would be described as follow, let's take conv like an example.
+
+```c++
+
+ PADDLE_ENFORCE(platform::is_cpu_place(ctx.GetPlace()), "It must use CPUPlace.");
+ PADDLE_ENFORCE(platform::is_mkldnn_library(ctx.GetLibrary()), "It must use MKLDNN Library.");
+
+ auto& dev_ctx = ctx.template device_context();
+
+ // find primitive by unique key from mkldnn context
+ // the op_key should be a unique name of this op instance
+ auto& p = dev_ctx.findPrimitive(op_key + "_fwd");
+
+ // assuming the input tensor inside this compute function is the one after converted
+ // this point should be guarantee by another mechanism
+ auto& i = dev_ctx.findMemory(op_key + "_input");
+
+ if (p == nullptr || i == nullptr || inputSizeChanged(p, i)) {
+ auto fwd_primitive_desc = createPrimitiveDesc(ctx);
+ auto* input = ctx.Input("Input");
+ auto* filter = ctx.Input("Filter");
+ auto* output = ctx.Output("Output");
+ shared_ptr in(new mkldnn::memory(fwd_primitive_desc->src_primitive_desc(), input->data()));
+ shared_ptr wgt(new mkldnn::memory(fwd_primitive_desc->weights_primitive_desc(), filter->data()));
+ shared_ptr out(new mkldnn::memory(fwd_primitive_desc->dst_primitive_desc(), output->mutable_data(ctx.GetPlace())));
+ shared_ptr fwd_primitive(new mkldnn::conv_fwd(*fwd_primitive_desc, *in, *wgt, *out));
+
+ dev_ctx.addMemory(op_key+"_input", in);
+ dev_ctx.addMemory(op_key+"_output", out);
+ dev_ctx.addMemory(op_key+"_filer", wgt);
+ dev_ctx.addPrimitive(op_key+"_fwd", fwd_primitive);
+ dev_ctx.addPrimitiveDesc(op_key+"_fwd_PD", fwd_primitive_desc);
+ }
+
+ p = dev_ctx.findPrimitive(op_key + "_fwd");
+
+ PADDLE_ENFORCE(p, "Should have forward Primitive");
+ PADDLE_ENFORCE(dev_ctx.findMemory(op_unique_key+"_input"), "Should have input memory");
+ PADDLE_ENFORCE(dev_ctx.findMemory(op_unique_key+"_output"), "Should have output memory");
+ PADDLE_ENFORCE(dev_ctx.findMemory(op_unique_key+"_filter"), "Should have filter memory");
+ PADDLE_ENFORCE(dev_ctx.findPrimitiveDesc(op_unique_key+"_fwd_PD"), "Should have forward PrimitiveDesc");
+ dev_ctx.submit(p);
+ dev_ctx.execute(); // the convert primitive should have already contained.
+
+```
+
+The `createPrimitiveDesc` returns the primitive descripotor of this operator, would be like this:
+```c++
+ auto* input = ctx.Input("Input");
+ auto* filter = ctx.Input("Filter");
+ auto* output = ctx.Output("Output");
+ std::vector strides = ctx.Attr>("strides");
+ std::vector paddings = ctx.Attr>("paddings");
+ std::vector dilations = ctx.Attr>("dilations");
+ int groups = ctx.Attr("groups");
+ algorithm algo = static_cast(ctx.Attr("convolution_algorithm_option"));
+ prop_kind pk = ctx.Attr("is_test") ? prop_kind::forward_inference : prop_kind::forward_training;
+
+ auto fwd_desc = mkldnn::conv_fwd::desc(/* all the setting above*/);
+ shared_ptr fwd_primitive_desc(new mkldnn::conv_fwd::primitive_desc(fwd_desc, ctx.getEngine()));
+
+ return fwd_primitive_desc;
+ }
+```
+
+### MKLDNNDeviceContext
+`MKLDNNDeviceContext`, which is very straightforward, should contain some base information like: `stream`, `engine` and the map needed.
+
+
+### mkldnn_helper
+Some functions would be put in `paddle/platform/mkldnn_helper.h`.
+- create MKLDNN memories
+- create MKLDNN primitives
+- error check function
+- etc
+
+
+### Kernel Switch
+We should `reorder` the different Layout from other device or to other device. `GetExpectedKernelType` and `trans` functions can help us to implement it.
+
+`GetExpectedKernelType` should get the context, and this operator can return the best `KernelType`.
+`trans` would be like this:
+
+```c++
+void trans(inputs, ctx) override {
+ if (NoNeedTrans()) {
+ return;
+ }
+ // find reorder primitive by op_key from context
+ auto& dev_ctx = ctx.template device_context();
+ auto& p = dev_ctx.findPrimitive(op_key + "_reorder_input");
+ auto& i = dev_ctx.findMemory(op_key + "_src_input");
+
+ if (p == nullptr || i == nullptr || changeSized(i, input)) {
+ auto prim = createPrimitiveDesc(ctx);
+ auto src = createMemory(memoryDesc(input->dims(), actual_layout), input->data);
+ auto newbuffer = paddle::memory::Alloc(ctx.GetPlace(), input->size_in_bytes());
+ auto dst = createMemory(p->expected_desc(), newbuffer->data);
+ auto reorder_primitive(new mkldnn::reorder(src, dst));
+
+ dev_ctx.addMemory(op_key+"_src_input", src);
+ dev_ctx.addMemory(op_key+"_input", dst);
+ dev_ctx.addPrimitive(op_key+"_reorder_input", reorder_primitive);
+ }
+
+ p = dev_ctx.findPrimitive(op_key + "_reorder_input");
+ PADDLE_ENFORCE(p, "Should have Reorder Primitive");
+ dev_ctx.submit(p);
+ if (! this->isMKLDNNKernel()) {
+ // execute immediately only if this is not mkldnn kernel function.
+ // otherwise, it can be executed with the operator primitive in Compute
+ dev_ctx.stream();
+ }
+ // after submit, the input tensor in ExecutionContext should be changed as the converted one
+ // there should be another mechanism to ensure this
+}
+```
+
+### Unit Test
+All the functions should be tested corresponding.
+TBD
diff --git a/doc/faq/build_and_install/index_cn.rst b/doc/faq/build_and_install/index_cn.rst
index a2bdeead7841393fdfe90c78e5b91d9e61678a24..ed8a0c7e87da133138ecfc7ba6a8217d58b8f71d 100644
--- a/doc/faq/build_and_install/index_cn.rst
+++ b/doc/faq/build_and_install/index_cn.rst
@@ -109,3 +109,31 @@ PaddlePaddle使用avx SIMD指令提高cpu执行效率,因此错误的使用二
解决办法是:
* 卸载PaddlePaddle包 :code:`pip uninstall paddle`, 清理掉老旧的PaddlePaddle安装包,使得单元测试有一个干净的环境。如果PaddlePaddle包已经在python的site-packages里面,单元测试会引用site-packages里面的python包,而不是源码目录里 :code:`/python` 目录下的python包。同时,即便设置 :code:`PYTHONPATH` 到 :code:`/python` 也没用,因为python的搜索路径是优先已经安装的python包。
+
+8. 下载MKLML库失败
+------------------
+
+.. code-block:: bash
+
+ make[2]: *** [third_party/mklml/src/extern_mklml-stamp/extern_mklml-download] 错误 4
+ make[1]: *** [CMakeFiles/extern_mklml.dir/all] 错误 2
+ make[1]: *** 正在等待未完成的任务....
+
+原因:网速或SSL链接原因,导致MKLML库下载不成功。
+
+解决办法是:手动下载并安装,具体步骤如下。
+
+.. code-block:: bash
+
+ // 1. 进入对应的目录
+ cd build/third_party/mklml/src/extern_mklml
+
+ // 2. 查看包的大小, 正常情况下是75M,如果小于75M,即下载失败:
+ du -sh mklml_lnx_2018.0.1.20171007.tgz
+
+ // 3. 手动下载且解压缩,并手动生成download成功标签:
+ wget --no-check-certificate https://github.com/01org/mkl-dnn/releases/download/v0.11/mklml_lnx_2018.0.1.20171007.tgz -c -O mklml_lnx_2018.0.1.20171007.tgz
+ tar zxf mklml_lnx_2018.0.1.20171007.tgz
+ touch ../extern_mklml-stamp/extern_mklml-download
+
+ // 4. 接着编译即可
diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt
index 25a0db27688a9984ad401cbd462fe70048b204da..c2a57a95ee6aa1b03a687f07de74810e8e753f29 100644
--- a/paddle/framework/CMakeLists.txt
+++ b/paddle/framework/CMakeLists.txt
@@ -59,7 +59,8 @@ cc_test(var_type_inference_test SRCS var_type_inference_test.cc DEPS op_registry
cc_library(selected_rows SRCS selected_rows.cc DEPS tensor)
cc_test(selected_rows_test SRCS selected_rows_test.cc DEPS selected_rows)
-cc_test(threadpool_test SRCS threadpool_test.cc)
+cc_library(threadpool SRCS threadpool.cc)
+cc_test(threadpool_test SRCS threadpool_test.cc DEPS threadpool)
cc_library(init SRCS init.cc DEPS gflags device_context place stringpiece)
cc_test(init_test SRCS init_test.cc DEPS init)
diff --git a/paddle/framework/data_layout.h b/paddle/framework/data_layout.h
index 7d7a444cf02ba0da88178a34c98f5ef7a95c3852..4a8669c3a41fceaad26878a79eabfd0affce86fd 100644
--- a/paddle/framework/data_layout.h
+++ b/paddle/framework/data_layout.h
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
+#include "paddle/platform/enforce.h"
#include
#include "paddle/platform/enforce.h"
@@ -20,7 +21,7 @@ limitations under the License. */
namespace paddle {
namespace framework {
-enum DataLayout {
+enum class DataLayout {
kNHWC = 0,
kNCHW = 1,
kAnyLayout = 2,
@@ -38,11 +39,11 @@ inline DataLayout StringToDataLayout(const std::string& str) {
inline std::string DataLayoutToString(const DataLayout& data_layout) {
switch (data_layout) {
- case kNHWC:
+ case DataLayout::kNHWC:
return "NHWC";
- case kNCHW:
+ case DataLayout::kNCHW:
return "NCHW";
- case kAnyLayout:
+ case DataLayout::kAnyLayout:
return "ANY_LAYOUT";
default:
PADDLE_THROW("unknown DataLayou %d", data_layout);
diff --git a/paddle/framework/library_type.h b/paddle/framework/library_type.h
index aa66cf00f3be14c4ac9496326190428688fc3496..6baae6c2bb80a4f631cad89231ce4fcb8d94ed86 100644
--- a/paddle/framework/library_type.h
+++ b/paddle/framework/library_type.h
@@ -20,15 +20,15 @@ namespace framework {
// For more details about the design of LibraryType, Please refer to
// https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/operator_kernel_type.md#library
-enum LibraryType { kPlain = 0, kMKLDNN = 1, kCUDNN = 2 };
+enum class LibraryType { kPlain = 0, kMKLDNN = 1, kCUDNN = 2 };
inline std::string LibraryTypeToString(const LibraryType& library_type) {
switch (library_type) {
- case kPlain:
+ case LibraryType::kPlain:
return "PLAIN";
- case kMKLDNN:
+ case LibraryType::kMKLDNN:
return "MKLDNN";
- case kCUDNN:
+ case LibraryType::kCUDNN:
return "CUDNN";
default:
PADDLE_THROW("unknown LibraryType %d", library_type);
diff --git a/paddle/framework/op_kernel_type.h b/paddle/framework/op_kernel_type.h
index e9c45b958cd0a65bca62099324e951f298d9ecb1..97b542e345feab0bab701dd967558ce23375dc7f 100644
--- a/paddle/framework/op_kernel_type.h
+++ b/paddle/framework/op_kernel_type.h
@@ -40,6 +40,7 @@ struct OpKernelType {
// place, data_type, library_type kinds less than 2^8
constexpr static int LEFT_SHIFT = 8;
+
proto::DataType data_type_;
DataLayout data_layout_;
platform::Place place_;
diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h
index 6a0c5133c9a6bb326ca51755242e75b6eb9e5474..b9f6884f7c4b6eeacda722762d485dea97fdcb63 100644
--- a/paddle/framework/tensor.h
+++ b/paddle/framework/tensor.h
@@ -20,12 +20,12 @@ limitations under the License. */
#include
#include
+#include "paddle/framework/data_layout.h"
#include "paddle/framework/ddim.h"
#include "paddle/memory/memory.h"
#include "paddle/platform/device_context.h"
#include "paddle/platform/enforce.h"
#include "paddle/platform/place.h"
-#include "unsupported/Eigen/CXX11/Tensor"
namespace paddle {
@@ -115,6 +115,10 @@ class Tensor {
inline void check_memory_size() const;
+ inline DataLayout layout() const { return layout_; }
+
+ inline void set_layout(const DataLayout layout) { layout_ = layout; }
+
private:
friend class LoDTensor;
@@ -173,6 +177,19 @@ class Tensor {
DDim dims_;
+ /**
+ * @brief the layout of memory block, default is NCHW.
+ *
+ * @note the memory allocation order, describe how weight/data is stored
+ * For example, in 4-D Tensor(rank=4), there are three commonly
+ * used layout. They are
+ * NCHW, NHWC, CHWN.
+ * N,C,H,W for respectively the batch size, the number of
+ * feature maps, the height.
+ */
+
+ DataLayout layout_ = DataLayout::kNHWC;
+
/**
* @brief A PlaceHolder may be shared by more than one tensor.
*
diff --git a/paddle/framework/tensor_impl.h b/paddle/framework/tensor_impl.h
index 3d93b7808bc96143b5261e1ec41ad98b15c74975..6c6f298edc187a87677089e54c4c9046821282df 100644
--- a/paddle/framework/tensor_impl.h
+++ b/paddle/framework/tensor_impl.h
@@ -165,6 +165,7 @@ inline Tensor Tensor::Slice(int begin_idx, int end_idx) const {
size_t base = numel() / dims_[0];
Tensor dst;
dst.holder_ = holder_;
+ dst.set_layout(layout_);
DDim dst_dims = dims_;
dst_dims[0] = end_idx - begin_idx;
dst.Resize(dst_dims);
diff --git a/paddle/framework/tensor_test.cc b/paddle/framework/tensor_test.cc
index f347981f2e11fdc4770df8d5e7277cd55744ab3f..ca76a9fcb9079bab22f7b192c45903852c91797f 100644
--- a/paddle/framework/tensor_test.cc
+++ b/paddle/framework/tensor_test.cc
@@ -200,3 +200,12 @@ TEST(Tensor, ReshapeToMatrix) {
ASSERT_EQ(res.dims()[0], 2 * 3);
ASSERT_EQ(res.dims()[1], 4 * 9);
}
+
+TEST(Tensor, Layout) {
+ using namespace paddle::framework;
+ using namespace paddle::platform;
+ Tensor src;
+ ASSERT_EQ(src.layout(), DataLayout::kNHWC);
+ src.set_layout(DataLayout::kAnyLayout);
+ ASSERT_EQ(src.layout(), DataLayout::kAnyLayout);
+}
diff --git a/paddle/framework/tensor_util.h b/paddle/framework/tensor_util.h
index ebfb0e553877b776afe13d8a4e7a7ffa8710405e..692f5f1af7e0225a63d4ff7d47129d572c237f61 100644
--- a/paddle/framework/tensor_util.h
+++ b/paddle/framework/tensor_util.h
@@ -33,6 +33,7 @@ inline void CopyFrom(const Tensor& src, const platform::Place& dst_place,
src.check_memory_size();
dst->Resize(src.dims());
+ dst->set_layout(src.layout());
auto src_place = src.place();
auto src_ptr = src.data();
@@ -89,6 +90,7 @@ inline void CopyFrom(const Tensor& src, const platform::Place& dst_place,
Tensor* dst) {
src.check_memory_size();
dst->Resize(src.dims());
+ dst->set_layout(src.layout());
auto src_place = src.place();
auto src_ptr = src.data();
diff --git a/paddle/framework/tensor_util_test.cc b/paddle/framework/tensor_util_test.cc
index 6fc243aaf6ea68d716e7bb6a73289197fde56247..f388c19f28ed28335818733f946d8eaf18464627 100644
--- a/paddle/framework/tensor_util_test.cc
+++ b/paddle/framework/tensor_util_test.cc
@@ -28,6 +28,7 @@ TEST(CopyFrom, Tensor) {
int arr[9] = {1, 2, 3, 4, 5, 6, 7, 8, 9};
memcpy(src_ptr, arr, 9 * sizeof(int));
+ src_tensor.set_layout(DataLayout::kAnyLayout);
auto cpu_place = new platform::CPUPlace();
CopyFrom(src_tensor, *cpu_place, &dst_tensor);
@@ -38,14 +39,18 @@ TEST(CopyFrom, Tensor) {
EXPECT_EQ(src_ptr[i], dst_ptr[i]);
}
+ EXPECT_TRUE(dst_tensor.layout() == src_tensor.layout());
+
Tensor slice_tensor = src_tensor.Slice(1, 2);
- CopyFrom(slice_tensor, *cpu_place, cpu_ctx, &dst_tensor);
+ CopyFrom(slice_tensor, *cpu_place, &dst_tensor);
const int* slice_ptr = slice_tensor.data();
dst_ptr = dst_tensor.data();
ASSERT_NE(dst_ptr, slice_ptr);
for (size_t i = 0; i < 3; ++i) {
EXPECT_EQ(dst_ptr[i], slice_ptr[i]);
}
+ EXPECT_TRUE(dst_tensor.layout() == src_tensor.layout());
+
#ifdef PADDLE_WITH_CUDA
{
Tensor src_tensor;
@@ -91,6 +96,8 @@ TEST(CopyFrom, Tensor) {
for (size_t i = 0; i < 3; ++i) {
EXPECT_EQ(dst_ptr[i], slice_ptr[i]);
}
+
+ EXPECT_TRUE(dst_tensor.layout() == src_tensor.layout());
}
#endif
}
diff --git a/paddle/framework/threadpool.cc b/paddle/framework/threadpool.cc
new file mode 100644
index 0000000000000000000000000000000000000000..2b9be0646cffb16188e4a66981698e3891d10d51
--- /dev/null
+++ b/paddle/framework/threadpool.cc
@@ -0,0 +1,24 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+#include "paddle/framework/threadpool.h"
+
+namespace paddle {
+namespace framework {
+
+std::unique_ptr ThreadPool::threadpool(nullptr);
+std::once_flag ThreadPool::init_flag;
+
+} // namespace framework
+} // namespace paddle
diff --git a/paddle/framework/threadpool.h b/paddle/framework/threadpool.h
index 9a1ece3ae8452399205e71fbaa26977710fcdcac..5f6b2d458f7ee764c22d203f285b78023b6012f3 100644
--- a/paddle/framework/threadpool.h
+++ b/paddle/framework/threadpool.h
@@ -13,15 +13,13 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
+
#include
-#include
#include
-#include
#include
#include
#include
-#include "paddle/platform/call_once.h"
#include "paddle/platform/enforce.h"
namespace paddle {
@@ -81,10 +79,9 @@ class ThreadPool {
}
private:
- ThreadPool& operator=(const ThreadPool&) = delete;
- ThreadPool(const ThreadPool&) = delete;
+ DISABLE_COPY_AND_ASSIGN(ThreadPool);
- ThreadPool(int num_threads)
+ explicit ThreadPool(int num_threads)
: num_threads_(num_threads), available_(num_threads), running_(true) {
threads_.resize(num_threads);
for (auto& thread : threads_) {
@@ -155,7 +152,5 @@ class ThreadPool {
std::condition_variable completed_;
};
-std::unique_ptr ThreadPool::threadpool(nullptr);
-std::once_flag ThreadPool::init_flag;
} // namespace framework
} // namespace paddle
diff --git a/paddle/framework/threadpool_test.cc b/paddle/framework/threadpool_test.cc
index 78c762608ed920c1a586c99d68952ffcc653c75e..012d92a5edc415f0bb2f8a0ea38ffeb9549d54fa 100644
--- a/paddle/framework/threadpool_test.cc
+++ b/paddle/framework/threadpool_test.cc
@@ -12,12 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-#include "threadpool.h"
#include
#include
-#include
-#include