Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
e84d3a7f
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 2 年 前同步成功
通知
2325
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
e84d3a7f
编写于
4月 18, 2018
作者:
F
fengjiayi
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into add_parallel_executor_tests
上级
1de9edee
fee5b24c
变更
22
隐藏空白更改
内联
并排
Showing
22 changed file
with
423 addition
and
208 deletion
+423
-208
CMakeLists.txt
CMakeLists.txt
+1
-6
Dockerfile
Dockerfile
+1
-1
cmake/configure.cmake
cmake/configure.cmake
+10
-0
cmake/tensorrt.cmake
cmake/tensorrt.cmake
+33
-0
paddle/fluid/framework/parallel_executor.cc
paddle/fluid/framework/parallel_executor.cc
+22
-12
paddle/fluid/framework/parallel_executor.h
paddle/fluid/framework/parallel_executor.h
+11
-5
paddle/fluid/inference/CMakeLists.txt
paddle/fluid/inference/CMakeLists.txt
+1
-1
paddle/fluid/platform/dynload/CMakeLists.txt
paddle/fluid/platform/dynload/CMakeLists.txt
+1
-1
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+8
-0
paddle/fluid/pybind/tensor_py.h
paddle/fluid/pybind/tensor_py.h
+10
-0
python/paddle/fluid/parallel_executor.py
python/paddle/fluid/parallel_executor.py
+83
-23
python/paddle/fluid/tests/unittests/CMakeLists.txt
python/paddle/fluid/tests/unittests/CMakeLists.txt
+6
-3
python/paddle/fluid/tests/unittests/test_activation_mkldnn_op.py
...paddle/fluid/tests/unittests/test_activation_mkldnn_op.py
+99
-0
python/paddle/fluid/tests/unittests/test_activation_op.py
python/paddle/fluid/tests/unittests/test_activation_op.py
+0
-77
python/paddle/fluid/tests/unittests/test_conv2d_mkldnn_op.py
python/paddle/fluid/tests/unittests/test_conv2d_mkldnn_op.py
+36
-0
python/paddle/fluid/tests/unittests/test_conv2d_op.py
python/paddle/fluid/tests/unittests/test_conv2d_op.py
+0
-17
python/paddle/fluid/tests/unittests/test_fc_mkldnn_op.py
python/paddle/fluid/tests/unittests/test_fc_mkldnn_op.py
+0
-0
python/paddle/fluid/tests/unittests/test_lrn_mkldnn_op.py
python/paddle/fluid/tests/unittests/test_lrn_mkldnn_op.py
+49
-0
python/paddle/fluid/tests/unittests/test_lrn_op.py
python/paddle/fluid/tests/unittests/test_lrn_op.py
+0
-29
python/paddle/fluid/tests/unittests/test_parallel_executor.py
...on/paddle/fluid/tests/unittests/test_parallel_executor.py
+2
-2
python/paddle/fluid/tests/unittests/test_pool2d_mkldnn_op.py
python/paddle/fluid/tests/unittests/test_pool2d_mkldnn_op.py
+50
-0
python/paddle/fluid/tests/unittests/test_pool2d_op.py
python/paddle/fluid/tests/unittests/test_pool2d_op.py
+0
-31
未找到文件。
CMakeLists.txt
浏览文件 @
e84d3a7f
...
...
@@ -39,7 +39,6 @@ option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_F
option
(
WITH_AMD_GPU
"Compile PaddlePaddle with AMD GPU"
OFF
)
option
(
WITH_AVX
"Compile PaddlePaddle with AVX intrinsics"
${
AVX_FOUND
}
)
option
(
WITH_MKL
"Compile PaddlePaddle with MKL support."
${
AVX_FOUND
}
)
option
(
WITH_TENSORRT
"Compile PaddlePaddle with TensorRT support."
OFF
)
option
(
WITH_DSO
"Compile PaddlePaddle with dynamic linked CUDA"
ON
)
option
(
WITH_TESTING
"Compile PaddlePaddle with unit testing"
OFF
)
option
(
WITH_SWIG_PY
"Compile PaddlePaddle with inference api"
ON
)
...
...
@@ -180,13 +179,9 @@ set(EXTERNAL_LIBS
if
(
WITH_GPU
)
include
(
cuda
)
include
(
tensorrt
)
endif
(
WITH_GPU
)
# TensorRT depends on GPU.
if
(
NOT WITH_GPU
)
set
(
WITH_TENSORRT OFF
)
endif
()
if
(
WITH_AMD_GPU
)
find_package
(
HIP
)
include
(
hip
)
...
...
Dockerfile
浏览文件 @
e84d3a7f
...
...
@@ -46,7 +46,7 @@ ENV PATH=${PATH}:${GOROOT}/bin:${GOPATH}/bin
RUN
curl
-s
-q
https://glide.sh/get | sh
# Install TensorRT
# The unnecessary files has been removed to make the library small.
# The unnecessary files has been removed to make the library small.
It only contains include and lib now.
RUN
wget
-qO-
http://paddlepaddledeps.bj.bcebos.com/TensorRT-4.0.0.3.Ubuntu-16.04.4.x86_64-gnu.cuda-8.0.cudnn7.0.tar.gz |
\
tar
-xz
-C
/usr/local
&&
\
cp
-rf
/usr/local/TensorRT/include /usr
&&
\
...
...
cmake/configure.cmake
浏览文件 @
e84d3a7f
...
...
@@ -80,6 +80,16 @@ if(WITH_GPU)
# Include cuda and cudnn
include_directories
(
${
CUDNN_INCLUDE_DIR
}
)
include_directories
(
${
CUDA_TOOLKIT_INCLUDE
}
)
if
(
TENSORRT_FOUND
)
if
(
${
CUDA_VERSION_MAJOR
}
VERSION_LESS 8
)
message
(
FATAL_ERROR
"TensorRT needs CUDA >= 8.0 to compile"
)
endif
()
if
(
${
CUDNN_MAJOR_VERSION
}
VERSION_LESS 7
)
message
(
FATAL_ERROR
"TensorRT needs CUDNN >= 7.0 to compile"
)
endif
()
include_directories
(
${
TENSORRT_INCLUDE_DIR
}
)
endif
()
elseif
(
WITH_AMD_GPU
)
add_definitions
(
-DPADDLE_WITH_HIP
)
set
(
CMAKE_C_FLAGS
"
${
CMAKE_C_FLAGS
}
-D__HIP_PLATFORM_HCC__"
)
...
...
cmake/tensorrt.cmake
0 → 100644
浏览文件 @
e84d3a7f
if
(
NOT WITH_GPU
)
return
()
endif
()
set
(
TENSORRT_ROOT
"/usr"
CACHE PATH
"TENSORRT ROOT"
)
find_path
(
TENSORRT_INCLUDE_DIR NvInfer.h
PATHS
${
TENSORRT_ROOT
}
${
TENSORRT_ROOT
}
/include
$ENV{TENSORRT_ROOT} $ENV{TENSORRT_ROOT}/include
NO_DEFAULT_PATH
)
find_library
(
TENSORRT_LIBRARY NAMES libnvinfer.so libnvinfer.a
PATHS
${
TENSORRT_ROOT
}
${
TENSORRT_ROOT
}
/lib
$ENV{TENSORRT_ROOT} $ENV{TENSORRT_ROOT}/lib
NO_DEFAULT_PATH
DOC
"Path to TensorRT library."
)
if
(
TENSORRT_INCLUDE_DIR AND TENSORRT_LIBRARY
)
set
(
TENSORRT_FOUND ON
)
else
()
set
(
TENSORRT_FOUND OFF
)
endif
()
if
(
TENSORRT_FOUND
)
file
(
READ
${
TENSORRT_INCLUDE_DIR
}
/NvInfer.h TENSORRT_VERSION_FILE_CONTENTS
)
string
(
REGEX MATCH
"define NV_TENSORRT_MAJOR +([0-9]+)"
TENSORRT_MAJOR_VERSION
"
${
TENSORRT_VERSION_FILE_CONTENTS
}
"
)
string
(
REGEX REPLACE
"define NV_TENSORRT_MAJOR +([0-9]+)"
"
\\
1"
TENSORRT_MAJOR_VERSION
"
${
TENSORRT_MAJOR_VERSION
}
"
)
message
(
STATUS
"Current TensorRT header is
${
TENSORRT_INCLUDE_DIR
}
/NvInfer.h. "
"Current TensorRT version is v
${
TENSORRT_MAJOR_VERSION
}
. "
)
endif
()
paddle/fluid/framework/parallel_executor.cc
浏览文件 @
e84d3a7f
...
...
@@ -155,13 +155,9 @@ void ParallelExecutor::BCastParamsToGPUs(
#endif
}
void
ParallelExecutor
::
Run
(
const
std
::
vector
<
std
::
string
>
&
fetch_tensors
,
const
std
::
string
&
fetched_var_name
,
const
std
::
unordered_map
<
std
::
string
,
LoDTensor
>
&
feed_tensors
)
{
void
ParallelExecutor
::
Run
(
const
std
::
vector
<
std
::
string
>
&
fetch_tensors
,
const
std
::
string
&
fetched_var_name
)
{
platform
::
RecordBlock
b
(
0
);
SplitTensorToPlaces
(
feed_tensors
);
// Create local scopes.
for
(
auto
&
scope
:
member_
->
local_scopes_
)
{
Scope
&
local_scope
=
scope
->
NewScope
();
...
...
@@ -195,14 +191,28 @@ void ParallelExecutor::Run(
auto
&
local_scope
=
*
scope
->
Var
(
details
::
kLocalExecScopeName
)
->
GetMutable
<
Scope
*>
();
scope
->
DeleteScope
(
local_scope
);
local_scope
=
nullptr
;
}
}
void
ParallelExecutor
::
SplitTensorToPlaces
(
const
std
::
unordered_map
<
std
::
string
,
LoDTensor
>
&
feed_tensors
)
{
for
(
auto
it
:
feed_tensors
)
{
auto
lod_tensors
=
it
.
second
.
SplitLoDTensor
(
member_
->
places_
);
void
ParallelExecutor
::
FeedTensorsIntoLocalScopes
(
const
std
::
vector
<
std
::
unordered_map
<
std
::
string
,
LoDTensor
>>
&
tensors
)
{
PADDLE_ENFORCE_EQ
(
member_
->
local_scopes_
.
size
(),
tensors
.
size
());
for
(
size_t
i
=
0
;
i
<
tensors
.
size
();
++
i
)
{
auto
&
map
=
tensors
[
i
];
auto
*
scope
=
member_
->
local_scopes_
[
i
];
for
(
auto
&
pair
:
map
)
{
auto
*
trg
=
scope
->
Var
(
pair
.
first
)
->
GetMutable
<
LoDTensor
>
();
trg
->
ShareDataWith
(
pair
.
second
);
trg
->
set_lod
(
pair
.
second
.
lod
());
}
}
}
void
ParallelExecutor
::
FeedAndSplitTensorIntoLocalScopes
(
const
std
::
unordered_map
<
std
::
string
,
LoDTensor
>
&
tensors
)
{
for
(
auto
pair
:
tensors
)
{
auto
lod_tensors
=
pair
.
second
.
SplitLoDTensor
(
member_
->
places_
);
PADDLE_ENFORCE_EQ
(
member_
->
places_
.
size
(),
lod_tensors
.
size
(),
"The number of samples of current batch is less than the count of "
...
...
@@ -211,7 +221,7 @@ void ParallelExecutor::SplitTensorToPlaces(
for
(
size_t
j
=
0
;
j
<
member_
->
places_
.
size
();
++
j
)
{
// TODO(panxy0718): Do I need to delete this var?
auto
t
=
member_
->
local_scopes_
[
j
]
->
Var
(
it
.
first
)
->
GetMutable
<
LoDTensor
>
();
member_
->
local_scopes_
[
j
]
->
Var
(
pair
.
first
)
->
GetMutable
<
LoDTensor
>
();
t
->
ShareDataWith
(
lod_tensors
[
j
]);
t
->
set_lod
(
lod_tensors
[
j
].
lod
());
}
...
...
paddle/fluid/framework/parallel_executor.h
浏览文件 @
e84d3a7f
...
...
@@ -44,16 +44,22 @@ class ParallelExecutor {
std
::
vector
<
Scope
*>&
GetLocalScopes
();
/**
* Feed tensors to local scopes. The size of tensors should be equal to the
* size of local scopes.
*/
void
FeedTensorsIntoLocalScopes
(
const
std
::
vector
<
std
::
unordered_map
<
std
::
string
,
LoDTensor
>>&
tensors
);
void
FeedAndSplitTensorIntoLocalScopes
(
const
std
::
unordered_map
<
std
::
string
,
LoDTensor
>&
tensors
);
void
Run
(
const
std
::
vector
<
std
::
string
>&
fetch_tensors
,
const
std
::
string
&
fetched_var_name
,
const
std
::
unordered_map
<
std
::
string
,
LoDTensor
>&
feed_tensors
);
const
std
::
string
&
fetched_var_name
);
void
BCastParamsToGPUs
(
const
std
::
unordered_set
<
std
::
string
>&
vars
)
const
;
private:
void
SplitTensorToPlaces
(
const
std
::
unordered_map
<
std
::
string
,
LoDTensor
>&
feed_tensors
);
ParallelExecutorPrivate
*
member_
;
};
...
...
paddle/fluid/inference/CMakeLists.txt
浏览文件 @
e84d3a7f
...
...
@@ -21,7 +21,7 @@ endif()
if
(
WITH_TESTING
)
add_subdirectory
(
tests/book
)
if
(
WITH_TENSORRT
)
if
(
TENSORRT_FOUND
)
add_subdirectory
(
tensorrt
)
endif
()
endif
()
paddle/fluid/platform/dynload/CMakeLists.txt
浏览文件 @
e84d3a7f
cc_library
(
dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags enforce
)
list
(
APPEND CUDA_SRCS cublas.cc cudnn.cc curand.cc nccl.cc
)
if
(
WITH_TENSORRT
)
if
(
TENSORRT_FOUND
)
list
(
APPEND CUDA_SRCS tensorrt.cc
)
endif
()
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
e84d3a7f
...
...
@@ -505,11 +505,19 @@ All parameter, weight, gradient are variables in Paddle.
scope
,
local_scopes
,
allow_op_delay
);
})
.
def
(
"bcast_params"
,
&
ParallelExecutor
::
BCastParamsToGPUs
)
// NOTE: even we return a vec<Scope*>* to Python use reference policy.
// We still cannot get local_scope from this vector, since the element
// of vec<Scope*> will be freed by Python GC. We can only return Scope*
// one by one and mark them as reference.
.
def
(
"local_scopes"
,
[](
ParallelExecutor
&
self
)
->
std
::
vector
<
Scope
*>
*
{
return
&
self
.
GetLocalScopes
();
},
py
::
return_value_policy
::
reference
)
.
def
(
"feed_tensors_into_local_scopes"
,
&
ParallelExecutor
::
FeedTensorsIntoLocalScopes
)
.
def
(
"feed_and_split_tensor_into_local_scopes"
,
&
ParallelExecutor
::
FeedAndSplitTensorIntoLocalScopes
)
.
def
(
"run"
,
&
ParallelExecutor
::
Run
);
BindRecordIOWriter
(
&
m
);
...
...
paddle/fluid/pybind/tensor_py.h
浏览文件 @
e84d3a7f
...
...
@@ -190,6 +190,11 @@ void PyCUDATensorSetFromArray(
static_cast
<
const
platform
::
CUDADeviceContext
*>
(
pool
.
Get
(
place
));
paddle
::
platform
::
GpuMemcpyAsync
(
dst
,
array
.
data
(),
sizeof
(
T
)
*
array
.
size
(),
cudaMemcpyHostToDevice
,
dev_ctx
->
stream
());
// NOTE: For safety, here wait the copy complete.
// It because the CPU array.data() could be destroyed after this method.
// If we make this method async, it could be copied data from a memory buffer
// that has been freed.
dev_ctx
->
Wait
();
}
template
<
>
...
...
@@ -216,6 +221,11 @@ void PyCUDATensorSetFromArray(
paddle
::
platform
::
GpuMemcpyAsync
(
dst
,
array
.
data
(),
sizeof
(
uint16_t
)
*
array
.
size
(),
cudaMemcpyHostToDevice
,
dev_ctx
->
stream
());
// NOTE: For safety, here wait the copy complete.
// It because the CPU array.data() could be destroyed after this method.
// If we make this method async, it could be copied data from a memory buffer
// that has been freed.
dev_ctx
->
Wait
();
}
template
<
typename
T
>
...
...
python/paddle/fluid/parallel_executor.py
浏览文件 @
e84d3a7f
...
...
@@ -17,6 +17,7 @@ import multiprocessing
import
framework
import
executor
import
warnings
import
sys
__all__
=
[
'ParallelExecutor'
]
...
...
@@ -103,8 +104,8 @@ class ParallelExecutor(object):
self
.
persistable_vars
=
[
v
.
name
for
v
in
filter
(
lambda
var
:
\
var
.
persistable
and
var
.
type
!=
core
.
VarDesc
.
VarType
.
RAW
,
for
v
in
filter
(
lambda
var
:
var
.
persistable
and
var
.
type
!=
core
.
VarDesc
.
VarType
.
RAW
,
main
.
list_vars
())
]
...
...
@@ -124,34 +125,93 @@ class ParallelExecutor(object):
allow_op_delay
)
self
.
scope
=
scope
def
run
(
self
,
fetch_list
,
feed
=
{},
feed_dict
=
{}
):
def
run
(
self
,
fetch_list
,
feed
=
None
,
feed_dict
=
None
):
"""
:param fetch_list: A list of variable names that will be fetched.
:param feed: A dict mapping for feed variable name to LoDTensor
or numpy array.
:return: fetched value list.
Run a parallel executor with fetch_list.
The feed parameter can be a dict or a list. If feed is a dict, the
feed data will be split into multiple devices. If feed is a list, we
assume the data has been splitted into multiple devices, the each
element in the list will be copied to each device directly.
For example, if the feed is a dict:
>>> exe = ParallelExecutor()
>>> # the image will be splitted into devices. If there is two devices
>>> # each device will process an image with shape (24, 1, 28, 28)
>>> exe.run(feed={'image': numpy.random.random(size=(48, 1, 28, 28))})
For example, if the feed is a list:
>>> exe = ParallelExecutor()
>>> # each device will process each element in the list.
>>> # the 1st device will process an image with shape (48, 1, 28, 28)
>>> # the 2nd device will process an image with shape (32, 1, 28, 28)
>>> #
>>> # you can use exe.device_count to get the device number.
>>> exe.run(feed=[{"image": numpy.random.random(size=(48, 1, 28, 28))},
>>> {"image": numpy.random.random(size=(32, 1, 28, 28))},
>>> ])
Args:
fetch_list(list): The fetched variable names
feed(list|dict|None): The feed variables. If the feed is a dict,
tensors in that dict will be splitted into each devices. If
the feed is a list, each element of the list will be copied
to each device.
feed_dict: Alias for feed parameter, for backward compatibility.
This parameter is deprecated.
Returns: fetched result list.
"""
if
not
feed_dict
==
{}:
warnings
.
warn
(
"The 'feed_dict' of ParallelExecutor.run() is deprecated. Please use 'feed' instead."
)
if
feed
==
{}:
if
feed
is
None
and
feed_dict
is
not
None
:
feed
=
feed_dict
if
not
isinstance
(
feed
,
dict
):
raise
TypeError
(
"feed should be a dict"
)
feed_tensor_dict
=
{}
for
i
,
feed_name
in
enumerate
(
feed
):
feed_tensor
=
feed
[
feed_name
]
if
not
isinstance
(
feed_tensor
,
core
.
LoDTensor
):
feed_tensor
=
core
.
LoDTensor
()
feed_tensor
.
set
(
feed
[
feed_name
],
self
.
_act_places
[
0
])
feed_tensor_dict
[
feed_name
]
=
feed_tensor
print
>>
sys
.
stderr
,
"`feed_dict` is deprecated. Please use `feed=`"
if
isinstance
(
feed
,
dict
):
feed_tensor_dict
=
dict
()
for
feed_name
in
feed
:
feed_tensor
=
feed
[
feed_name
]
if
not
isinstance
(
feed_tensor
,
core
.
LoDTensor
):
feed_tensor
=
core
.
LoDTensor
()
# always set to CPU place, since the tensor need to be splitted
# it is fast in CPU
feed_tensor
.
set
(
feed
[
feed_name
],
core
.
CPUPlace
())
feed_tensor_dict
[
feed_name
]
=
feed_tensor
self
.
executor
.
feed_and_split_tensor_into_local_scopes
(
feed_tensor_dict
)
elif
isinstance
(
feed
,
list
)
or
isinstance
(
feed
,
tuple
):
if
len
(
feed
)
!=
len
(
self
.
_act_places
):
raise
ValueError
(
"Feed a list of tensor, the list should be the same size as places"
)
res
=
list
()
for
i
,
each
in
enumerate
(
feed
):
if
not
isinstance
(
each
,
dict
):
raise
TypeError
(
"Each element of feed list should be a dict"
)
res_dict
=
dict
()
for
feed_name
in
each
:
tensor
=
each
[
feed_name
]
if
not
isinstance
(
tensor
,
core
.
LoDTensor
):
tmp
=
core
.
LoDTensor
()
tmp
.
set
(
tensor
,
self
.
_act_places
[
i
])
tensor
=
tmp
res_dict
[
feed_name
]
=
tensor
res
.
append
(
res_dict
)
self
.
executor
.
feed_tensors_into_local_scopes
(
res
)
fetch_var_name
=
'@FETCHED_VAR_NAME@'
self
.
executor
.
run
(
fetch_list
,
fetch_var_name
,
feed_tensor_dict
)
self
.
executor
.
run
(
fetch_list
,
fetch_var_name
)
arr
=
self
.
scope
.
find_var
(
fetch_var_name
).
get_lod_tensor_array
()
return
[
arr
[
i
]
for
i
in
range
(
len
(
arr
))]
def
bcast_params
(
self
):
self
.
executor
.
bcast_params
(
set
(
self
.
persistable_vars
))
@
property
def
device_count
(
self
):
return
len
(
self
.
_act_places
)
python/paddle/fluid/tests/unittests/CMakeLists.txt
浏览文件 @
e84d3a7f
file
(
GLOB TEST_OPS RELATIVE
"
${
CMAKE_CURRENT_SOURCE_DIR
}
"
"test_*.py"
)
string
(
REPLACE
".py"
""
TEST_OPS
"
${
TEST_OPS
}
"
)
# The fully connected test is removed whe the WITH_MKLDNN flag is OFF
# Because the fully connected layer has only one kernel (MKLDNN)
# The MKLDNN tests are skiped when the MKLDNN flag is OFF
if
(
NOT WITH_MKLDNN
)
list
(
REMOVE_ITEM TEST_OPS test_fc_op
)
foreach
(
src
${
TEST_OPS
}
)
if
(
${
src
}
MATCHES
".*_mkldnn_op$"
)
list
(
REMOVE_ITEM TEST_OPS
${
src
}
)
endif
()
endforeach
()
endif
(
NOT WITH_MKLDNN
)
if
(
NOT WITH_DISTRIBUTE
)
...
...
python/paddle/fluid/tests/unittests/test_activation_mkldnn_op.py
0 → 100644
浏览文件 @
e84d3a7f
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
import
numpy
as
np
import
paddle.fluid.core
as
core
from
op_test
import
OpTest
from
scipy.special
import
expit
from
test_activation_op
import
TestRelu
,
TestTanh
,
TestSqrt
,
TestAbs
class
TestMKLDNNReluDim2
(
TestRelu
):
def
setUp
(
self
):
super
(
TestMKLDNNReluDim2
,
self
).
setUp
()
self
.
attrs
=
{
"use_mkldnn"
:
True
}
class
TestMKLDNNTanhDim2
(
TestTanh
):
def
setUp
(
self
):
super
(
TestMKLDNNTanhDim2
,
self
).
setUp
()
self
.
attrs
=
{
"use_mkldnn"
:
True
}
class
TestMKLDNNSqrtDim2
(
TestSqrt
):
def
setUp
(
self
):
super
(
TestMKLDNNSqrtDim2
,
self
).
setUp
()
self
.
attrs
=
{
"use_mkldnn"
:
True
}
class
TestMKLDNNAbsDim2
(
TestAbs
):
def
setUp
(
self
):
super
(
TestMKLDNNAbsDim2
,
self
).
setUp
()
self
.
attrs
=
{
"use_mkldnn"
:
True
}
class
TestMKLDNNReluDim4
(
TestRelu
):
def
setUp
(
self
):
super
(
TestMKLDNNReluDim4
,
self
).
setUp
()
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
2
,
4
,
3
,
5
]).
astype
(
"float32"
)
# The same reason with TestAbs
x
[
np
.
abs
(
x
)
<
0.005
]
=
0.02
out
=
np
.
maximum
(
x
,
0
)
self
.
inputs
=
{
'X'
:
OpTest
.
np_dtype_to_fluid_dtype
(
x
)}
self
.
outputs
=
{
'Out'
:
out
}
self
.
attrs
=
{
"use_mkldnn"
:
True
}
class
TestMKLDNNTanhDim4
(
TestTanh
):
def
setUp
(
self
):
super
(
TestMKLDNNTanhDim4
,
self
).
setUp
()
self
.
inputs
=
{
'X'
:
np
.
random
.
uniform
(
0.1
,
1
,
[
2
,
4
,
3
,
5
]).
astype
(
"float32"
)
}
self
.
outputs
=
{
'Out'
:
np
.
tanh
(
self
.
inputs
[
'X'
])}
self
.
attrs
=
{
"use_mkldnn"
:
True
}
class
TestMKLDNNSqrtDim4
(
TestSqrt
):
def
setUp
(
self
):
super
(
TestMKLDNNSqrtDim4
,
self
).
setUp
()
self
.
inputs
=
{
'X'
:
np
.
random
.
uniform
(
0.1
,
1
,
[
2
,
4
,
3
,
5
]).
astype
(
"float32"
)
}
self
.
outputs
=
{
'Out'
:
np
.
sqrt
(
self
.
inputs
[
'X'
])}
self
.
attrs
=
{
"use_mkldnn"
:
True
}
class
TestMKLDNNAbsDim4
(
TestAbs
):
def
setUp
(
self
):
super
(
TestMKLDNNAbsDim4
,
self
).
setUp
()
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
2
,
4
,
3
,
5
]).
astype
(
"float32"
)
# The same reason with TestAbs
x
[
np
.
abs
(
x
)
<
0.005
]
=
0.02
self
.
inputs
=
{
'X'
:
x
}
self
.
outputs
=
{
'Out'
:
np
.
abs
(
self
.
inputs
[
'X'
])}
self
.
attrs
=
{
"use_mkldnn"
:
True
}
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_activation_op.py
浏览文件 @
e84d3a7f
...
...
@@ -1098,82 +1098,5 @@ class TestFP16Swish(TestSwish):
self
.
check_output_with_place
(
place
,
atol
=
1e-3
)
#--------------------test MKLDNN--------------------
class
TestMKLDNNReluDim2
(
TestRelu
):
def
setUp
(
self
):
super
(
TestMKLDNNReluDim2
,
self
).
setUp
()
self
.
attrs
=
{
"use_mkldnn"
:
True
}
class
TestMKLDNNTanhDim2
(
TestTanh
):
def
setUp
(
self
):
super
(
TestMKLDNNTanhDim2
,
self
).
setUp
()
self
.
attrs
=
{
"use_mkldnn"
:
True
}
class
TestMKLDNNSqrtDim2
(
TestSqrt
):
def
setUp
(
self
):
super
(
TestMKLDNNSqrtDim2
,
self
).
setUp
()
self
.
attrs
=
{
"use_mkldnn"
:
True
}
class
TestMKLDNNAbsDim2
(
TestAbs
):
def
setUp
(
self
):
super
(
TestMKLDNNAbsDim2
,
self
).
setUp
()
self
.
attrs
=
{
"use_mkldnn"
:
True
}
class
TestMKLDNNReluDim4
(
TestRelu
):
def
setUp
(
self
):
super
(
TestMKLDNNReluDim4
,
self
).
setUp
()
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
2
,
4
,
3
,
5
]).
astype
(
"float32"
)
# The same reason with TestAbs
x
[
np
.
abs
(
x
)
<
0.005
]
=
0.02
out
=
np
.
maximum
(
x
,
0
)
self
.
inputs
=
{
'X'
:
OpTest
.
np_dtype_to_fluid_dtype
(
x
)}
self
.
outputs
=
{
'Out'
:
out
}
self
.
attrs
=
{
"use_mkldnn"
:
True
}
class
TestMKLDNNTanhDim4
(
TestTanh
):
def
setUp
(
self
):
super
(
TestMKLDNNTanhDim4
,
self
).
setUp
()
self
.
inputs
=
{
'X'
:
np
.
random
.
uniform
(
0.1
,
1
,
[
2
,
4
,
3
,
5
]).
astype
(
"float32"
)
}
self
.
outputs
=
{
'Out'
:
np
.
tanh
(
self
.
inputs
[
'X'
])}
self
.
attrs
=
{
"use_mkldnn"
:
True
}
class
TestMKLDNNSqrtDim4
(
TestSqrt
):
def
setUp
(
self
):
super
(
TestMKLDNNSqrtDim4
,
self
).
setUp
()
self
.
inputs
=
{
'X'
:
np
.
random
.
uniform
(
0.1
,
1
,
[
2
,
4
,
3
,
5
]).
astype
(
"float32"
)
}
self
.
outputs
=
{
'Out'
:
np
.
sqrt
(
self
.
inputs
[
'X'
])}
self
.
attrs
=
{
"use_mkldnn"
:
True
}
class
TestMKLDNNAbsDim4
(
TestAbs
):
def
setUp
(
self
):
super
(
TestMKLDNNAbsDim4
,
self
).
setUp
()
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
2
,
4
,
3
,
5
]).
astype
(
"float32"
)
# The same reason with TestAbs
x
[
np
.
abs
(
x
)
<
0.005
]
=
0.02
self
.
inputs
=
{
'X'
:
x
}
self
.
outputs
=
{
'Out'
:
np
.
abs
(
self
.
inputs
[
'X'
])}
self
.
attrs
=
{
"use_mkldnn"
:
True
}
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_conv2d_mkldnn_op.py
0 → 100644
浏览文件 @
e84d3a7f
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
from
test_conv2d_op
import
TestConv2dOp
,
TestWithPad
,
TestWithStride
class
TestMKLDNN
(
TestConv2dOp
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
class
TestMKLDNNWithPad
(
TestWithPad
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
class
TestMKLDNNWithStride
(
TestWithStride
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_conv2d_op.py
浏览文件 @
e84d3a7f
...
...
@@ -373,22 +373,5 @@ class TestDepthwiseConv2(TestConv2dOp):
# def init_op_type(self):
# self.op_type = "conv_cudnn"
#----------------Conv2dMKLDNN----------------
class
TestMKLDNN
(
TestConv2dOp
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
class
TestMKLDNNWithPad
(
TestWithPad
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
class
TestMKLDNNWithStride
(
TestWithStride
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_fc_op.py
→
python/paddle/fluid/tests/unittests/test_fc_
mkldnn_
op.py
浏览文件 @
e84d3a7f
文件已移动
python/paddle/fluid/tests/unittests/test_lrn_mkldnn_op.py
0 → 100644
浏览文件 @
e84d3a7f
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
from
test_lrn_op
import
TestLRNOp
class
TestLRNMKLDNNOp
(
TestLRNOp
):
def
get_attrs
(
self
):
attrs
=
TestLRNOp
.
get_attrs
(
self
)
attrs
[
'use_mkldnn'
]
=
True
return
attrs
def
test_check_output
(
self
):
self
.
check_output
(
atol
=
0.002
)
class
TestLRNMKLDNNOpWithIsTest
(
TestLRNMKLDNNOp
):
def
get_attrs
(
self
):
attrs
=
TestLRNMKLDNNOp
.
get_attrs
(
self
)
attrs
[
'is_test'
]
=
True
return
attrs
def
test_check_grad_normal
(
self
):
def
check_raise_is_test
():
try
:
self
.
check_grad
([
'X'
],
'Out'
,
max_relative_error
=
0.01
)
except
Exception
as
e
:
t
=
\
"is_test attribute should be set to False in training phase."
if
t
in
str
(
e
):
raise
AttributeError
self
.
assertRaises
(
AttributeError
,
check_raise_is_test
)
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_lrn_op.py
浏览文件 @
e84d3a7f
...
...
@@ -87,34 +87,5 @@ class TestLRNOp(OpTest):
self
.
check_grad
([
'X'
],
'Out'
,
max_relative_error
=
0.01
)
class
TestLRNMKLDNNOp
(
TestLRNOp
):
def
get_attrs
(
self
):
attrs
=
TestLRNOp
.
get_attrs
(
self
)
attrs
[
'use_mkldnn'
]
=
True
return
attrs
def
test_check_output
(
self
):
self
.
check_output
(
atol
=
0.002
)
class
TestLRNMKLDNNOpWithIsTest
(
TestLRNMKLDNNOp
):
def
get_attrs
(
self
):
attrs
=
TestLRNMKLDNNOp
.
get_attrs
(
self
)
attrs
[
'is_test'
]
=
True
return
attrs
def
test_check_grad_normal
(
self
):
def
check_raise_is_test
():
try
:
self
.
check_grad
([
'X'
],
'Out'
,
max_relative_error
=
0.01
)
except
Exception
as
e
:
t
=
\
"is_test attribute should be set to False in training phase."
if
t
in
str
(
e
):
raise
AttributeError
self
.
assertRaises
(
AttributeError
,
check_raise_is_test
)
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_parallel_executor.py
浏览文件 @
e84d3a7f
...
...
@@ -203,7 +203,7 @@ class TestParallelExecutorBase(unittest.TestCase):
iter
=
50
,
batch_size
=
None
,
allow_op_delay
=
False
,
feed_dict
=
{}
,
feed_dict
=
None
,
seed
=
None
,
use_parallel_executor
=
True
):
def
run_executor
(
exe
,
feed
,
fetch_list
,
program
=
None
):
...
...
@@ -223,7 +223,7 @@ class TestParallelExecutorBase(unittest.TestCase):
with
fluid
.
program_guard
(
main
,
startup
):
if
seed
is
not
None
:
startup
.
random_seed
=
seed
loss
=
method
(
use_feed
=
len
(
feed_dict
)
>
0
)
loss
=
method
(
use_feed
=
feed_dict
is
not
None
)
adam
=
fluid
.
optimizer
.
Adam
()
adam
.
minimize
(
loss
)
if
memory_opt
:
...
...
python/paddle/fluid/tests/unittests/test_pool2d_mkldnn_op.py
0 → 100644
浏览文件 @
e84d3a7f
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
from
test_pool2d_op
import
TestPool2d_Op
,
TestCase1
,
TestCase2
,
TestCase3
,
TestCase4
,
TestCase5
class
TestMKLDNNCase1
(
TestPool2d_Op
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
class
TestMKLDNNCase2
(
TestCase1
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
class
TestMKLDNNCase3
(
TestCase2
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
class
TestMKLDNNCase4
(
TestCase3
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
class
TestMKLDNNCase5
(
TestCase4
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
class
TestMKLDNNCase6
(
TestCase5
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_pool2d_op.py
浏览文件 @
e84d3a7f
...
...
@@ -317,36 +317,5 @@ class TestCeilModeCase4(TestCase2):
self
.
ceil_mode
=
True
#--------------------test pool2d MKLDNN--------------------
class
TestMKLDNNCase1
(
TestPool2d_Op
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
class
TestMKLDNNCase2
(
TestCase1
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
class
TestMKLDNNCase3
(
TestCase2
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
class
TestMKLDNNCase4
(
TestCase3
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
class
TestMKLDNNCase5
(
TestCase4
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
class
TestMKLDNNCase6
(
TestCase5
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
if
__name__
==
'__main__'
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录