Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
e84d3a7f
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
e84d3a7f
编写于
4月 18, 2018
作者:
F
fengjiayi
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into add_parallel_executor_tests
上级
1de9edee
fee5b24c
变更
22
隐藏空白更改
内联
并排
Showing
22 changed file
with
423 addition
and
208 deletion
+423
-208
CMakeLists.txt
CMakeLists.txt
+1
-6
Dockerfile
Dockerfile
+1
-1
cmake/configure.cmake
cmake/configure.cmake
+10
-0
cmake/tensorrt.cmake
cmake/tensorrt.cmake
+33
-0
paddle/fluid/framework/parallel_executor.cc
paddle/fluid/framework/parallel_executor.cc
+22
-12
paddle/fluid/framework/parallel_executor.h
paddle/fluid/framework/parallel_executor.h
+11
-5
paddle/fluid/inference/CMakeLists.txt
paddle/fluid/inference/CMakeLists.txt
+1
-1
paddle/fluid/platform/dynload/CMakeLists.txt
paddle/fluid/platform/dynload/CMakeLists.txt
+1
-1
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+8
-0
paddle/fluid/pybind/tensor_py.h
paddle/fluid/pybind/tensor_py.h
+10
-0
python/paddle/fluid/parallel_executor.py
python/paddle/fluid/parallel_executor.py
+83
-23
python/paddle/fluid/tests/unittests/CMakeLists.txt
python/paddle/fluid/tests/unittests/CMakeLists.txt
+6
-3
python/paddle/fluid/tests/unittests/test_activation_mkldnn_op.py
...paddle/fluid/tests/unittests/test_activation_mkldnn_op.py
+99
-0
python/paddle/fluid/tests/unittests/test_activation_op.py
python/paddle/fluid/tests/unittests/test_activation_op.py
+0
-77
python/paddle/fluid/tests/unittests/test_conv2d_mkldnn_op.py
python/paddle/fluid/tests/unittests/test_conv2d_mkldnn_op.py
+36
-0
python/paddle/fluid/tests/unittests/test_conv2d_op.py
python/paddle/fluid/tests/unittests/test_conv2d_op.py
+0
-17
python/paddle/fluid/tests/unittests/test_fc_mkldnn_op.py
python/paddle/fluid/tests/unittests/test_fc_mkldnn_op.py
+0
-0
python/paddle/fluid/tests/unittests/test_lrn_mkldnn_op.py
python/paddle/fluid/tests/unittests/test_lrn_mkldnn_op.py
+49
-0
python/paddle/fluid/tests/unittests/test_lrn_op.py
python/paddle/fluid/tests/unittests/test_lrn_op.py
+0
-29
python/paddle/fluid/tests/unittests/test_parallel_executor.py
...on/paddle/fluid/tests/unittests/test_parallel_executor.py
+2
-2
python/paddle/fluid/tests/unittests/test_pool2d_mkldnn_op.py
python/paddle/fluid/tests/unittests/test_pool2d_mkldnn_op.py
+50
-0
python/paddle/fluid/tests/unittests/test_pool2d_op.py
python/paddle/fluid/tests/unittests/test_pool2d_op.py
+0
-31
未找到文件。
CMakeLists.txt
浏览文件 @
e84d3a7f
...
...
@@ -39,7 +39,6 @@ option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_F
option
(
WITH_AMD_GPU
"Compile PaddlePaddle with AMD GPU"
OFF
)
option
(
WITH_AVX
"Compile PaddlePaddle with AVX intrinsics"
${
AVX_FOUND
}
)
option
(
WITH_MKL
"Compile PaddlePaddle with MKL support."
${
AVX_FOUND
}
)
option
(
WITH_TENSORRT
"Compile PaddlePaddle with TensorRT support."
OFF
)
option
(
WITH_DSO
"Compile PaddlePaddle with dynamic linked CUDA"
ON
)
option
(
WITH_TESTING
"Compile PaddlePaddle with unit testing"
OFF
)
option
(
WITH_SWIG_PY
"Compile PaddlePaddle with inference api"
ON
)
...
...
@@ -180,13 +179,9 @@ set(EXTERNAL_LIBS
if
(
WITH_GPU
)
include
(
cuda
)
include
(
tensorrt
)
endif
(
WITH_GPU
)
# TensorRT depends on GPU.
if
(
NOT WITH_GPU
)
set
(
WITH_TENSORRT OFF
)
endif
()
if
(
WITH_AMD_GPU
)
find_package
(
HIP
)
include
(
hip
)
...
...
Dockerfile
浏览文件 @
e84d3a7f
...
...
@@ -46,7 +46,7 @@ ENV PATH=${PATH}:${GOROOT}/bin:${GOPATH}/bin
RUN
curl
-s
-q
https://glide.sh/get | sh
# Install TensorRT
# The unnecessary files has been removed to make the library small.
# The unnecessary files has been removed to make the library small.
It only contains include and lib now.
RUN
wget
-qO-
http://paddlepaddledeps.bj.bcebos.com/TensorRT-4.0.0.3.Ubuntu-16.04.4.x86_64-gnu.cuda-8.0.cudnn7.0.tar.gz |
\
tar
-xz
-C
/usr/local
&&
\
cp
-rf
/usr/local/TensorRT/include /usr
&&
\
...
...
cmake/configure.cmake
浏览文件 @
e84d3a7f
...
...
@@ -80,6 +80,16 @@ if(WITH_GPU)
# Include cuda and cudnn
include_directories
(
${
CUDNN_INCLUDE_DIR
}
)
include_directories
(
${
CUDA_TOOLKIT_INCLUDE
}
)
if
(
TENSORRT_FOUND
)
if
(
${
CUDA_VERSION_MAJOR
}
VERSION_LESS 8
)
message
(
FATAL_ERROR
"TensorRT needs CUDA >= 8.0 to compile"
)
endif
()
if
(
${
CUDNN_MAJOR_VERSION
}
VERSION_LESS 7
)
message
(
FATAL_ERROR
"TensorRT needs CUDNN >= 7.0 to compile"
)
endif
()
include_directories
(
${
TENSORRT_INCLUDE_DIR
}
)
endif
()
elseif
(
WITH_AMD_GPU
)
add_definitions
(
-DPADDLE_WITH_HIP
)
set
(
CMAKE_C_FLAGS
"
${
CMAKE_C_FLAGS
}
-D__HIP_PLATFORM_HCC__"
)
...
...
cmake/tensorrt.cmake
0 → 100644
浏览文件 @
e84d3a7f
if
(
NOT WITH_GPU
)
return
()
endif
()
set
(
TENSORRT_ROOT
"/usr"
CACHE PATH
"TENSORRT ROOT"
)
find_path
(
TENSORRT_INCLUDE_DIR NvInfer.h
PATHS
${
TENSORRT_ROOT
}
${
TENSORRT_ROOT
}
/include
$ENV{TENSORRT_ROOT} $ENV{TENSORRT_ROOT}/include
NO_DEFAULT_PATH
)
find_library
(
TENSORRT_LIBRARY NAMES libnvinfer.so libnvinfer.a
PATHS
${
TENSORRT_ROOT
}
${
TENSORRT_ROOT
}
/lib
$ENV{TENSORRT_ROOT} $ENV{TENSORRT_ROOT}/lib
NO_DEFAULT_PATH
DOC
"Path to TensorRT library."
)
if
(
TENSORRT_INCLUDE_DIR AND TENSORRT_LIBRARY
)
set
(
TENSORRT_FOUND ON
)
else
()
set
(
TENSORRT_FOUND OFF
)
endif
()
if
(
TENSORRT_FOUND
)
file
(
READ
${
TENSORRT_INCLUDE_DIR
}
/NvInfer.h TENSORRT_VERSION_FILE_CONTENTS
)
string
(
REGEX MATCH
"define NV_TENSORRT_MAJOR +([0-9]+)"
TENSORRT_MAJOR_VERSION
"
${
TENSORRT_VERSION_FILE_CONTENTS
}
"
)
string
(
REGEX REPLACE
"define NV_TENSORRT_MAJOR +([0-9]+)"
"
\\
1"
TENSORRT_MAJOR_VERSION
"
${
TENSORRT_MAJOR_VERSION
}
"
)
message
(
STATUS
"Current TensorRT header is
${
TENSORRT_INCLUDE_DIR
}
/NvInfer.h. "
"Current TensorRT version is v
${
TENSORRT_MAJOR_VERSION
}
. "
)
endif
()
paddle/fluid/framework/parallel_executor.cc
浏览文件 @
e84d3a7f
...
...
@@ -155,13 +155,9 @@ void ParallelExecutor::BCastParamsToGPUs(
#endif
}
void
ParallelExecutor
::
Run
(
const
std
::
vector
<
std
::
string
>
&
fetch_tensors
,
const
std
::
string
&
fetched_var_name
,
const
std
::
unordered_map
<
std
::
string
,
LoDTensor
>
&
feed_tensors
)
{
void
ParallelExecutor
::
Run
(
const
std
::
vector
<
std
::
string
>
&
fetch_tensors
,
const
std
::
string
&
fetched_var_name
)
{
platform
::
RecordBlock
b
(
0
);
SplitTensorToPlaces
(
feed_tensors
);
// Create local scopes.
for
(
auto
&
scope
:
member_
->
local_scopes_
)
{
Scope
&
local_scope
=
scope
->
NewScope
();
...
...
@@ -195,14 +191,28 @@ void ParallelExecutor::Run(
auto
&
local_scope
=
*
scope
->
Var
(
details
::
kLocalExecScopeName
)
->
GetMutable
<
Scope
*>
();
scope
->
DeleteScope
(
local_scope
);
local_scope
=
nullptr
;
}
}
void
ParallelExecutor
::
SplitTensorToPlaces
(
const
std
::
unordered_map
<
std
::
string
,
LoDTensor
>
&
feed_tensors
)
{
for
(
auto
it
:
feed_tensors
)
{
auto
lod_tensors
=
it
.
second
.
SplitLoDTensor
(
member_
->
places_
);
void
ParallelExecutor
::
FeedTensorsIntoLocalScopes
(
const
std
::
vector
<
std
::
unordered_map
<
std
::
string
,
LoDTensor
>>
&
tensors
)
{
PADDLE_ENFORCE_EQ
(
member_
->
local_scopes_
.
size
(),
tensors
.
size
());
for
(
size_t
i
=
0
;
i
<
tensors
.
size
();
++
i
)
{
auto
&
map
=
tensors
[
i
];
auto
*
scope
=
member_
->
local_scopes_
[
i
];
for
(
auto
&
pair
:
map
)
{
auto
*
trg
=
scope
->
Var
(
pair
.
first
)
->
GetMutable
<
LoDTensor
>
();
trg
->
ShareDataWith
(
pair
.
second
);
trg
->
set_lod
(
pair
.
second
.
lod
());
}
}
}
void
ParallelExecutor
::
FeedAndSplitTensorIntoLocalScopes
(
const
std
::
unordered_map
<
std
::
string
,
LoDTensor
>
&
tensors
)
{
for
(
auto
pair
:
tensors
)
{
auto
lod_tensors
=
pair
.
second
.
SplitLoDTensor
(
member_
->
places_
);
PADDLE_ENFORCE_EQ
(
member_
->
places_
.
size
(),
lod_tensors
.
size
(),
"The number of samples of current batch is less than the count of "
...
...
@@ -211,7 +221,7 @@ void ParallelExecutor::SplitTensorToPlaces(
for
(
size_t
j
=
0
;
j
<
member_
->
places_
.
size
();
++
j
)
{
// TODO(panxy0718): Do I need to delete this var?
auto
t
=
member_
->
local_scopes_
[
j
]
->
Var
(
it
.
first
)
->
GetMutable
<
LoDTensor
>
();
member_
->
local_scopes_
[
j
]
->
Var
(
pair
.
first
)
->
GetMutable
<
LoDTensor
>
();
t
->
ShareDataWith
(
lod_tensors
[
j
]);
t
->
set_lod
(
lod_tensors
[
j
].
lod
());
}
...
...
paddle/fluid/framework/parallel_executor.h
浏览文件 @
e84d3a7f
...
...
@@ -44,16 +44,22 @@ class ParallelExecutor {
std
::
vector
<
Scope
*>&
GetLocalScopes
();
/**
* Feed tensors to local scopes. The size of tensors should be equal to the
* size of local scopes.
*/
void
FeedTensorsIntoLocalScopes
(
const
std
::
vector
<
std
::
unordered_map
<
std
::
string
,
LoDTensor
>>&
tensors
);
void
FeedAndSplitTensorIntoLocalScopes
(
const
std
::
unordered_map
<
std
::
string
,
LoDTensor
>&
tensors
);
void
Run
(
const
std
::
vector
<
std
::
string
>&
fetch_tensors
,
const
std
::
string
&
fetched_var_name
,
const
std
::
unordered_map
<
std
::
string
,
LoDTensor
>&
feed_tensors
);
const
std
::
string
&
fetched_var_name
);
void
BCastParamsToGPUs
(
const
std
::
unordered_set
<
std
::
string
>&
vars
)
const
;
private:
void
SplitTensorToPlaces
(
const
std
::
unordered_map
<
std
::
string
,
LoDTensor
>&
feed_tensors
);
ParallelExecutorPrivate
*
member_
;
};
...
...
paddle/fluid/inference/CMakeLists.txt
浏览文件 @
e84d3a7f
...
...
@@ -21,7 +21,7 @@ endif()
if
(
WITH_TESTING
)
add_subdirectory
(
tests/book
)
if
(
WITH_TENSORRT
)
if
(
TENSORRT_FOUND
)
add_subdirectory
(
tensorrt
)
endif
()
endif
()
paddle/fluid/platform/dynload/CMakeLists.txt
浏览文件 @
e84d3a7f
cc_library
(
dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags enforce
)
list
(
APPEND CUDA_SRCS cublas.cc cudnn.cc curand.cc nccl.cc
)
if
(
WITH_TENSORRT
)
if
(
TENSORRT_FOUND
)
list
(
APPEND CUDA_SRCS tensorrt.cc
)
endif
()
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
e84d3a7f
...
...
@@ -505,11 +505,19 @@ All parameter, weight, gradient are variables in Paddle.
scope
,
local_scopes
,
allow_op_delay
);
})
.
def
(
"bcast_params"
,
&
ParallelExecutor
::
BCastParamsToGPUs
)
// NOTE: even we return a vec<Scope*>* to Python use reference policy.
// We still cannot get local_scope from this vector, since the element
// of vec<Scope*> will be freed by Python GC. We can only return Scope*
// one by one and mark them as reference.
.
def
(
"local_scopes"
,
[](
ParallelExecutor
&
self
)
->
std
::
vector
<
Scope
*>
*
{
return
&
self
.
GetLocalScopes
();
},
py
::
return_value_policy
::
reference
)
.
def
(
"feed_tensors_into_local_scopes"
,
&
ParallelExecutor
::
FeedTensorsIntoLocalScopes
)
.
def
(
"feed_and_split_tensor_into_local_scopes"
,
&
ParallelExecutor
::
FeedAndSplitTensorIntoLocalScopes
)
.
def
(
"run"
,
&
ParallelExecutor
::
Run
);
BindRecordIOWriter
(
&
m
);
...
...
paddle/fluid/pybind/tensor_py.h
浏览文件 @
e84d3a7f
...
...
@@ -190,6 +190,11 @@ void PyCUDATensorSetFromArray(
static_cast
<
const
platform
::
CUDADeviceContext
*>
(
pool
.
Get
(
place
));
paddle
::
platform
::
GpuMemcpyAsync
(
dst
,
array
.
data
(),
sizeof
(
T
)
*
array
.
size
(),
cudaMemcpyHostToDevice
,
dev_ctx
->
stream
());
// NOTE: For safety, here wait the copy complete.
// It because the CPU array.data() could be destroyed after this method.
// If we make this method async, it could be copied data from a memory buffer
// that has been freed.
dev_ctx
->
Wait
();
}
template
<
>
...
...
@@ -216,6 +221,11 @@ void PyCUDATensorSetFromArray(
paddle
::
platform
::
GpuMemcpyAsync
(
dst
,
array
.
data
(),
sizeof
(
uint16_t
)
*
array
.
size
(),
cudaMemcpyHostToDevice
,
dev_ctx
->
stream
());
// NOTE: For safety, here wait the copy complete.
// It because the CPU array.data() could be destroyed after this method.
// If we make this method async, it could be copied data from a memory buffer
// that has been freed.
dev_ctx
->
Wait
();
}
template
<
typename
T
>
...
...
python/paddle/fluid/parallel_executor.py
浏览文件 @
e84d3a7f
...
...
@@ -17,6 +17,7 @@ import multiprocessing
import
framework
import
executor
import
warnings
import
sys
__all__
=
[
'ParallelExecutor'
]
...
...
@@ -103,8 +104,8 @@ class ParallelExecutor(object):
self
.
persistable_vars
=
[
v
.
name
for
v
in
filter
(
lambda
var
:
\
var
.
persistable
and
var
.
type
!=
core
.
VarDesc
.
VarType
.
RAW
,
for
v
in
filter
(
lambda
var
:
var
.
persistable
and
var
.
type
!=
core
.
VarDesc
.
VarType
.
RAW
,
main
.
list_vars
())
]
...
...
@@ -124,34 +125,93 @@ class ParallelExecutor(object):
allow_op_delay
)
self
.
scope
=
scope
def
run
(
self
,
fetch_list
,
feed
=
{},
feed_dict
=
{}
):
def
run
(
self
,
fetch_list
,
feed
=
None
,
feed_dict
=
None
):
"""
:param fetch_list: A list of variable names that will be fetched.
:param feed: A dict mapping for feed variable name to LoDTensor
or numpy array.
:return: fetched value list.
Run a parallel executor with fetch_list.
The feed parameter can be a dict or a list. If feed is a dict, the
feed data will be split into multiple devices. If feed is a list, we
assume the data has been splitted into multiple devices, the each
element in the list will be copied to each device directly.
For example, if the feed is a dict:
>>> exe = ParallelExecutor()
>>> # the image will be splitted into devices. If there is two devices
>>> # each device will process an image with shape (24, 1, 28, 28)
>>> exe.run(feed={'image': numpy.random.random(size=(48, 1, 28, 28))})
For example, if the feed is a list:
>>> exe = ParallelExecutor()
>>> # each device will process each element in the list.
>>> # the 1st device will process an image with shape (48, 1, 28, 28)
>>> # the 2nd device will process an image with shape (32, 1, 28, 28)
>>> #
>>> # you can use exe.device_count to get the device number.
>>> exe.run(feed=[{"image": numpy.random.random(size=(48, 1, 28, 28))},
>>> {"image": numpy.random.random(size=(32, 1, 28, 28))},
>>> ])
Args:
fetch_list(list): The fetched variable names
feed(list|dict|None): The feed variables. If the feed is a dict,
tensors in that dict will be splitted into each devices. If
the feed is a list, each element of the list will be copied
to each device.
feed_dict: Alias for feed parameter, for backward compatibility.
This parameter is deprecated.
Returns: fetched result list.
"""
if
not
feed_dict
==
{}:
warnings
.
warn
(
"The 'feed_dict' of ParallelExecutor.run() is deprecated. Please use 'feed' instead."
)
if
feed
==
{}:
if
feed
is
None
and
feed_dict
is
not
None
:
feed
=
feed_dict
if
not
isinstance
(
feed
,
dict
):
raise
TypeError
(
"feed should be a dict"
)
feed_tensor_dict
=
{}
for
i
,
feed_name
in
enumerate
(
feed
):
feed_tensor
=
feed
[
feed_name
]
if
not
isinstance
(
feed_tensor
,
core
.
LoDTensor
):
feed_tensor
=
core
.
LoDTensor
()
feed_tensor
.
set
(
feed
[
feed_name
],
self
.
_act_places
[
0
])
feed_tensor_dict
[
feed_name
]
=
feed_tensor
print
>>
sys
.
stderr
,
"`feed_dict` is deprecated. Please use `feed=`"
if
isinstance
(
feed
,
dict
):
feed_tensor_dict
=
dict
()
for
feed_name
in
feed
:
feed_tensor
=
feed
[
feed_name
]
if
not
isinstance
(
feed_tensor
,
core
.
LoDTensor
):
feed_tensor
=
core
.
LoDTensor
()
# always set to CPU place, since the tensor need to be splitted
# it is fast in CPU
feed_tensor
.
set
(
feed
[
feed_name
],
core
.
CPUPlace
())
feed_tensor_dict
[
feed_name
]
=
feed_tensor
self
.
executor
.
feed_and_split_tensor_into_local_scopes
(
feed_tensor_dict
)
elif
isinstance
(
feed
,
list
)
or
isinstance
(
feed
,
tuple
):
if
len
(
feed
)
!=
len
(
self
.
_act_places
):
raise
ValueError
(
"Feed a list of tensor, the list should be the same size as places"
)
res
=
list
()
for
i
,
each
in
enumerate
(
feed
):
if
not
isinstance
(
each
,
dict
):
raise
TypeError
(
"Each element of feed list should be a dict"
)
res_dict
=
dict
()
for
feed_name
in
each
:
tensor
=
each
[
feed_name
]
if
not
isinstance
(
tensor
,
core
.
LoDTensor
):
tmp
=
core
.
LoDTensor
()
tmp
.
set
(
tensor
,
self
.
_act_places
[
i
])
tensor
=
tmp
res_dict
[
feed_name
]
=
tensor
res
.
append
(
res_dict
)
self
.
executor
.
feed_tensors_into_local_scopes
(
res
)
fetch_var_name
=
'@FETCHED_VAR_NAME@'
self
.
executor
.
run
(
fetch_list
,
fetch_var_name
,
feed_tensor_dict
)
self
.
executor
.
run
(
fetch_list
,
fetch_var_name
)
arr
=
self
.
scope
.
find_var
(
fetch_var_name
).
get_lod_tensor_array
()
return
[
arr
[
i
]
for
i
in
range
(
len
(
arr
))]
def
bcast_params
(
self
):
self
.
executor
.
bcast_params
(
set
(
self
.
persistable_vars
))
@
property
def
device_count
(
self
):
return
len
(
self
.
_act_places
)
python/paddle/fluid/tests/unittests/CMakeLists.txt
浏览文件 @
e84d3a7f
file
(
GLOB TEST_OPS RELATIVE
"
${
CMAKE_CURRENT_SOURCE_DIR
}
"
"test_*.py"
)
string
(
REPLACE
".py"
""
TEST_OPS
"
${
TEST_OPS
}
"
)
# The fully connected test is removed whe the WITH_MKLDNN flag is OFF
# Because the fully connected layer has only one kernel (MKLDNN)
# The MKLDNN tests are skiped when the MKLDNN flag is OFF
if
(
NOT WITH_MKLDNN
)
list
(
REMOVE_ITEM TEST_OPS test_fc_op
)
foreach
(
src
${
TEST_OPS
}
)
if
(
${
src
}
MATCHES
".*_mkldnn_op$"
)
list
(
REMOVE_ITEM TEST_OPS
${
src
}
)
endif
()
endforeach
()
endif
(
NOT WITH_MKLDNN
)
if
(
NOT WITH_DISTRIBUTE
)
...
...
python/paddle/fluid/tests/unittests/test_activation_mkldnn_op.py
0 → 100644
浏览文件 @
e84d3a7f
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
import
numpy
as
np
import
paddle.fluid.core
as
core
from
op_test
import
OpTest
from
scipy.special
import
expit
from
test_activation_op
import
TestRelu
,
TestTanh
,
TestSqrt
,
TestAbs
class
TestMKLDNNReluDim2
(
TestRelu
):
def
setUp
(
self
):
super
(
TestMKLDNNReluDim2
,
self
).
setUp
()
self
.
attrs
=
{
"use_mkldnn"
:
True
}
class
TestMKLDNNTanhDim2
(
TestTanh
):
def
setUp
(
self
):
super
(
TestMKLDNNTanhDim2
,
self
).
setUp
()
self
.
attrs
=
{
"use_mkldnn"
:
True
}
class
TestMKLDNNSqrtDim2
(
TestSqrt
):
def
setUp
(
self
):
super
(
TestMKLDNNSqrtDim2
,
self
).
setUp
()
self
.
attrs
=
{
"use_mkldnn"
:
True
}
class
TestMKLDNNAbsDim2
(
TestAbs
):
def
setUp
(
self
):
super
(
TestMKLDNNAbsDim2
,
self
).
setUp
()
self
.
attrs
=
{
"use_mkldnn"
:
True
}
class
TestMKLDNNReluDim4
(
TestRelu
):
def
setUp
(
self
):
super
(
TestMKLDNNReluDim4
,
self
).
setUp
()
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
2
,
4
,
3
,
5
]).
astype
(
"float32"
)
# The same reason with TestAbs
x
[
np
.
abs
(
x
)
<
0.005
]
=
0.02
out
=
np
.
maximum
(
x
,
0
)
self
.
inputs
=
{
'X'
:
OpTest
.
np_dtype_to_fluid_dtype
(
x
)}
self
.
outputs
=
{
'Out'
:
out
}
self
.
attrs
=
{
"use_mkldnn"
:
True
}
class
TestMKLDNNTanhDim4
(
TestTanh
):
def
setUp
(
self
):
super
(
TestMKLDNNTanhDim4
,
self
).
setUp
()
self
.
inputs
=
{
'X'
:
np
.
random
.
uniform
(
0.1
,
1
,
[
2
,
4
,
3
,
5
]).
astype
(
"float32"
)
}
self
.
outputs
=
{
'Out'
:
np
.
tanh
(
self
.
inputs
[
'X'
])}
self
.
attrs
=
{
"use_mkldnn"
:
True
}
class
TestMKLDNNSqrtDim4
(
TestSqrt
):
def
setUp
(
self
):
super
(
TestMKLDNNSqrtDim4
,
self
).
setUp
()
self
.
inputs
=
{
'X'
:
np
.
random
.
uniform
(
0.1
,
1
,
[
2
,
4
,
3
,
5
]).
astype
(
"float32"
)
}
self
.
outputs
=
{
'Out'
:
np
.
sqrt
(
self
.
inputs
[
'X'
])}
self
.
attrs
=
{
"use_mkldnn"
:
True
}
class
TestMKLDNNAbsDim4
(
TestAbs
):
def
setUp
(
self
):
super
(
TestMKLDNNAbsDim4
,
self
).
setUp
()
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
2
,
4
,
3
,
5
]).
astype
(
"float32"
)
# The same reason with TestAbs
x
[
np
.
abs
(
x
)
<
0.005
]
=
0.02
self
.
inputs
=
{
'X'
:
x
}
self
.
outputs
=
{
'Out'
:
np
.
abs
(
self
.
inputs
[
'X'
])}
self
.
attrs
=
{
"use_mkldnn"
:
True
}
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_activation_op.py
浏览文件 @
e84d3a7f
...
...
@@ -1098,82 +1098,5 @@ class TestFP16Swish(TestSwish):
self
.
check_output_with_place
(
place
,
atol
=
1e-3
)
#--------------------test MKLDNN--------------------
class
TestMKLDNNReluDim2
(
TestRelu
):
def
setUp
(
self
):
super
(
TestMKLDNNReluDim2
,
self
).
setUp
()
self
.
attrs
=
{
"use_mkldnn"
:
True
}
class
TestMKLDNNTanhDim2
(
TestTanh
):
def
setUp
(
self
):
super
(
TestMKLDNNTanhDim2
,
self
).
setUp
()
self
.
attrs
=
{
"use_mkldnn"
:
True
}
class
TestMKLDNNSqrtDim2
(
TestSqrt
):
def
setUp
(
self
):
super
(
TestMKLDNNSqrtDim2
,
self
).
setUp
()
self
.
attrs
=
{
"use_mkldnn"
:
True
}
class
TestMKLDNNAbsDim2
(
TestAbs
):
def
setUp
(
self
):
super
(
TestMKLDNNAbsDim2
,
self
).
setUp
()
self
.
attrs
=
{
"use_mkldnn"
:
True
}
class
TestMKLDNNReluDim4
(
TestRelu
):
def
setUp
(
self
):
super
(
TestMKLDNNReluDim4
,
self
).
setUp
()
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
2
,
4
,
3
,
5
]).
astype
(
"float32"
)
# The same reason with TestAbs
x
[
np
.
abs
(
x
)
<
0.005
]
=
0.02
out
=
np
.
maximum
(
x
,
0
)
self
.
inputs
=
{
'X'
:
OpTest
.
np_dtype_to_fluid_dtype
(
x
)}
self
.
outputs
=
{
'Out'
:
out
}
self
.
attrs
=
{
"use_mkldnn"
:
True
}
class
TestMKLDNNTanhDim4
(
TestTanh
):
def
setUp
(
self
):
super
(
TestMKLDNNTanhDim4
,
self
).
setUp
()
self
.
inputs
=
{
'X'
:
np
.
random
.
uniform
(
0.1
,
1
,
[
2
,
4
,
3
,
5
]).
astype
(
"float32"
)
}
self
.
outputs
=
{
'Out'
:
np
.
tanh
(
self
.
inputs
[
'X'
])}
self
.
attrs
=
{
"use_mkldnn"
:
True
}
class
TestMKLDNNSqrtDim4
(
TestSqrt
):
def
setUp
(
self
):
super
(
TestMKLDNNSqrtDim4
,
self
).
setUp
()
self
.
inputs
=
{
'X'
:
np
.
random
.
uniform
(
0.1
,
1
,
[
2
,
4
,
3
,
5
]).
astype
(
"float32"
)
}
self
.
outputs
=
{
'Out'
:
np
.
sqrt
(
self
.
inputs
[
'X'
])}
self
.
attrs
=
{
"use_mkldnn"
:
True
}
class
TestMKLDNNAbsDim4
(
TestAbs
):
def
setUp
(
self
):
super
(
TestMKLDNNAbsDim4
,
self
).
setUp
()
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
2
,
4
,
3
,
5
]).
astype
(
"float32"
)
# The same reason with TestAbs
x
[
np
.
abs
(
x
)
<
0.005
]
=
0.02
self
.
inputs
=
{
'X'
:
x
}
self
.
outputs
=
{
'Out'
:
np
.
abs
(
self
.
inputs
[
'X'
])}
self
.
attrs
=
{
"use_mkldnn"
:
True
}
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_conv2d_mkldnn_op.py
0 → 100644
浏览文件 @
e84d3a7f
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
from
test_conv2d_op
import
TestConv2dOp
,
TestWithPad
,
TestWithStride
class
TestMKLDNN
(
TestConv2dOp
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
class
TestMKLDNNWithPad
(
TestWithPad
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
class
TestMKLDNNWithStride
(
TestWithStride
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_conv2d_op.py
浏览文件 @
e84d3a7f
...
...
@@ -373,22 +373,5 @@ class TestDepthwiseConv2(TestConv2dOp):
# def init_op_type(self):
# self.op_type = "conv_cudnn"
#----------------Conv2dMKLDNN----------------
class
TestMKLDNN
(
TestConv2dOp
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
class
TestMKLDNNWithPad
(
TestWithPad
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
class
TestMKLDNNWithStride
(
TestWithStride
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_fc_op.py
→
python/paddle/fluid/tests/unittests/test_fc_
mkldnn_
op.py
浏览文件 @
e84d3a7f
文件已移动
python/paddle/fluid/tests/unittests/test_lrn_mkldnn_op.py
0 → 100644
浏览文件 @
e84d3a7f
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
from
test_lrn_op
import
TestLRNOp
class
TestLRNMKLDNNOp
(
TestLRNOp
):
def
get_attrs
(
self
):
attrs
=
TestLRNOp
.
get_attrs
(
self
)
attrs
[
'use_mkldnn'
]
=
True
return
attrs
def
test_check_output
(
self
):
self
.
check_output
(
atol
=
0.002
)
class
TestLRNMKLDNNOpWithIsTest
(
TestLRNMKLDNNOp
):
def
get_attrs
(
self
):
attrs
=
TestLRNMKLDNNOp
.
get_attrs
(
self
)
attrs
[
'is_test'
]
=
True
return
attrs
def
test_check_grad_normal
(
self
):
def
check_raise_is_test
():
try
:
self
.
check_grad
([
'X'
],
'Out'
,
max_relative_error
=
0.01
)
except
Exception
as
e
:
t
=
\
"is_test attribute should be set to False in training phase."
if
t
in
str
(
e
):
raise
AttributeError
self
.
assertRaises
(
AttributeError
,
check_raise_is_test
)
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_lrn_op.py
浏览文件 @
e84d3a7f
...
...
@@ -87,34 +87,5 @@ class TestLRNOp(OpTest):
self
.
check_grad
([
'X'
],
'Out'
,
max_relative_error
=
0.01
)
class
TestLRNMKLDNNOp
(
TestLRNOp
):
def
get_attrs
(
self
):
attrs
=
TestLRNOp
.
get_attrs
(
self
)
attrs
[
'use_mkldnn'
]
=
True
return
attrs
def
test_check_output
(
self
):
self
.
check_output
(
atol
=
0.002
)
class
TestLRNMKLDNNOpWithIsTest
(
TestLRNMKLDNNOp
):
def
get_attrs
(
self
):
attrs
=
TestLRNMKLDNNOp
.
get_attrs
(
self
)
attrs
[
'is_test'
]
=
True
return
attrs
def
test_check_grad_normal
(
self
):
def
check_raise_is_test
():
try
:
self
.
check_grad
([
'X'
],
'Out'
,
max_relative_error
=
0.01
)
except
Exception
as
e
:
t
=
\
"is_test attribute should be set to False in training phase."
if
t
in
str
(
e
):
raise
AttributeError
self
.
assertRaises
(
AttributeError
,
check_raise_is_test
)
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_parallel_executor.py
浏览文件 @
e84d3a7f
...
...
@@ -203,7 +203,7 @@ class TestParallelExecutorBase(unittest.TestCase):
iter
=
50
,
batch_size
=
None
,
allow_op_delay
=
False
,
feed_dict
=
{}
,
feed_dict
=
None
,
seed
=
None
,
use_parallel_executor
=
True
):
def
run_executor
(
exe
,
feed
,
fetch_list
,
program
=
None
):
...
...
@@ -223,7 +223,7 @@ class TestParallelExecutorBase(unittest.TestCase):
with
fluid
.
program_guard
(
main
,
startup
):
if
seed
is
not
None
:
startup
.
random_seed
=
seed
loss
=
method
(
use_feed
=
len
(
feed_dict
)
>
0
)
loss
=
method
(
use_feed
=
feed_dict
is
not
None
)
adam
=
fluid
.
optimizer
.
Adam
()
adam
.
minimize
(
loss
)
if
memory_opt
:
...
...
python/paddle/fluid/tests/unittests/test_pool2d_mkldnn_op.py
0 → 100644
浏览文件 @
e84d3a7f
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
from
test_pool2d_op
import
TestPool2d_Op
,
TestCase1
,
TestCase2
,
TestCase3
,
TestCase4
,
TestCase5
class
TestMKLDNNCase1
(
TestPool2d_Op
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
class
TestMKLDNNCase2
(
TestCase1
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
class
TestMKLDNNCase3
(
TestCase2
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
class
TestMKLDNNCase4
(
TestCase3
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
class
TestMKLDNNCase5
(
TestCase4
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
class
TestMKLDNNCase6
(
TestCase5
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_pool2d_op.py
浏览文件 @
e84d3a7f
...
...
@@ -317,36 +317,5 @@ class TestCeilModeCase4(TestCase2):
self
.
ceil_mode
=
True
#--------------------test pool2d MKLDNN--------------------
class
TestMKLDNNCase1
(
TestPool2d_Op
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
class
TestMKLDNNCase2
(
TestCase1
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
class
TestMKLDNNCase3
(
TestCase2
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
class
TestMKLDNNCase4
(
TestCase3
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
class
TestMKLDNNCase5
(
TestCase4
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
class
TestMKLDNNCase6
(
TestCase5
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
if
__name__
==
'__main__'
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录