Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
2725f51d
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
2725f51d
编写于
9月 24, 2020
作者:
S
smallv0221
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into yxp0924
上级
43c4a00d
32ad4f90
变更
31
隐藏空白更改
内联
并排
Showing
31 changed file
with
351 addition
and
210 deletion
+351
-210
cmake/generic.cmake
cmake/generic.cmake
+9
-0
cmake/inference_lib.cmake
cmake/inference_lib.cmake
+7
-4
cmake/init.cmake
cmake/init.cmake
+3
-0
cmake/paddle_win.props
cmake/paddle_win.props
+91
-0
paddle/fluid/inference/CMakeLists.txt
paddle/fluid/inference/CMakeLists.txt
+2
-7
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+30
-4
paddle/fluid/inference/api/analysis_predictor.h
paddle/fluid/inference/api/analysis_predictor.h
+11
-0
paddle/fluid/inference/api/demo_ci/run.sh
paddle/fluid/inference/api/demo_ci/run.sh
+0
-5
paddle/fluid/operators/conv_cudnn_op.cu
paddle/fluid/operators/conv_cudnn_op.cu
+36
-24
paddle/fluid/operators/conv_op.h
paddle/fluid/operators/conv_op.h
+14
-4
paddle/fluid/operators/conv_transpose_cudnn_op.cu
paddle/fluid/operators/conv_transpose_cudnn_op.cu
+10
-10
paddle/fluid/operators/conv_transpose_op.h
paddle/fluid/operators/conv_transpose_op.h
+10
-2
paddle/fluid/operators/pixel_shuffle_op.cc
paddle/fluid/operators/pixel_shuffle_op.cc
+2
-2
python/paddle/distributed/fleet/__init__.py
python/paddle/distributed/fleet/__init__.py
+3
-3
python/paddle/distributed/fleet/base/fleet_base.py
python/paddle/distributed/fleet/base/fleet_base.py
+5
-27
python/paddle/distributed/fleet/base/util_factory.py
python/paddle/distributed/fleet/base/util_factory.py
+33
-30
python/paddle/distributed/fleet/launch.py
python/paddle/distributed/fleet/launch.py
+4
-6
python/paddle/distributed/fleet/utils/__init__.py
python/paddle/distributed/fleet/utils/__init__.py
+2
-0
python/paddle/distributed/fleet/utils/fs.py
python/paddle/distributed/fleet/utils/fs.py
+26
-26
python/paddle/fluid/tests/unittests/dist_fleet_ctr.py
python/paddle/fluid/tests/unittests/dist_fleet_ctr.py
+3
-4
python/paddle/fluid/tests/unittests/dist_fleet_ctr_ps_gpu.py
python/paddle/fluid/tests/unittests/dist_fleet_ctr_ps_gpu.py
+3
-4
python/paddle/fluid/tests/unittests/dist_fleet_heter_ctr.py
python/paddle/fluid/tests/unittests/dist_fleet_heter_ctr.py
+1
-2
python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py
python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py
+2
-3
python/paddle/fluid/tests/unittests/test_dist_fleet_base.py
python/paddle/fluid/tests/unittests/test_dist_fleet_base.py
+2
-5
python/paddle/fluid/tests/unittests/test_dist_fleet_heter_base.py
...addle/fluid/tests/unittests/test_dist_fleet_heter_base.py
+1
-4
python/paddle/fluid/tests/unittests/test_dist_fleet_heter_program.py
...le/fluid/tests/unittests/test_dist_fleet_heter_program.py
+0
-1
python/paddle/fluid/tests/unittests/test_fleet_base.py
python/paddle/fluid/tests/unittests/test_fleet_base.py
+1
-1
python/paddle/fluid/tests/unittests/test_fleet_rolemaker_new.py
.../paddle/fluid/tests/unittests/test_fleet_rolemaker_new.py
+6
-6
python/paddle/fluid/tests/unittests/test_fleet_util.py
python/paddle/fluid/tests/unittests/test_fleet_util.py
+28
-22
python/paddle/fluid/tests/unittests/test_imperative_signal_handler.py
...e/fluid/tests/unittests/test_imperative_signal_handler.py
+1
-1
python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py
...dle/fluid/tests/unittests/test_parallel_executor_mnist.py
+5
-3
未找到文件。
cmake/generic.cmake
浏览文件 @
2725f51d
...
...
@@ -446,6 +446,9 @@ function(nv_library TARGET_NAME)
message
(
FATAL
"Please specify source file or library in nv_library."
)
endif
()
endif
(
nv_library_SRCS
)
if
(
WIN32
)
set_target_properties
(
${
TARGET_NAME
}
PROPERTIES VS_USER_PROPS
${
WIN_PROPS
}
)
endif
(
WIN32
)
endif
()
endfunction
(
nv_library
)
...
...
@@ -461,6 +464,9 @@ function(nv_binary TARGET_NAME)
add_dependencies
(
${
TARGET_NAME
}
${
nv_binary_DEPS
}
)
common_link
(
${
TARGET_NAME
}
)
endif
()
if
(
WIN32
)
set_target_properties
(
${
TARGET_NAME
}
PROPERTIES VS_USER_PROPS
${
WIN_PROPS
}
)
endif
(
WIN32
)
endif
()
endfunction
(
nv_binary
)
...
...
@@ -482,6 +488,9 @@ function(nv_test TARGET_NAME)
set_property
(
TEST
${
TARGET_NAME
}
PROPERTY ENVIRONMENT FLAGS_cpu_deterministic=true
)
set_property
(
TEST
${
TARGET_NAME
}
PROPERTY ENVIRONMENT FLAGS_init_allocated_mem=true
)
set_property
(
TEST
${
TARGET_NAME
}
PROPERTY ENVIRONMENT FLAGS_cudnn_deterministic=true
)
if
(
WIN32
)
set_target_properties
(
${
TARGET_NAME
}
PROPERTIES VS_USER_PROPS
${
WIN_PROPS
}
)
endif
(
WIN32
)
endif
()
endfunction
(
nv_test
)
...
...
cmake/inference_lib.cmake
浏览文件 @
2725f51d
...
...
@@ -19,9 +19,8 @@ set(PADDLE_INSTALL_DIR "${CMAKE_BINARY_DIR}/paddle_install_dir" CACHE STRING
set
(
PADDLE_INFERENCE_INSTALL_DIR
"
${
CMAKE_BINARY_DIR
}
/paddle_inference_install_dir"
CACHE STRING
"A path setting paddle inference shared and static libraries"
)
# TODO(zhaolong)
# At present, the size of static lib in Windows exceeds the system limit,
# so the generation of static lib is temporarily turned off.
# At present, the size of static lib in Windows is very large,
# so we need to crop the library size.
if
(
WIN32
)
#todo: remove the option
option
(
WITH_STATIC_LIB
"Compile demo with static/shared library, default use dynamic."
OFF
)
...
...
@@ -196,7 +195,11 @@ set(PADDLE_INFERENCE_C_INSTALL_DIR "${CMAKE_BINARY_DIR}/paddle_inference_c_insta
copy_part_of_thrid_party
(
inference_lib_dist
${
PADDLE_INFERENCE_C_INSTALL_DIR
}
)
set
(
src_dir
"
${
PADDLE_SOURCE_DIR
}
/paddle/fluid"
)
set
(
paddle_fluid_c_lib
${
PADDLE_BINARY_DIR
}
/paddle/fluid/inference/capi/libpaddle_fluid_c.*
)
if
(
WIN32
)
set
(
paddle_fluid_c_lib
${
PADDLE_BINARY_DIR
}
/paddle/fluid/inference/capi/
${
CMAKE_BUILD_TYPE
}
/paddle_fluid_c.*
)
else
(
WIN32
)
set
(
paddle_fluid_c_lib
${
PADDLE_BINARY_DIR
}
/paddle/fluid/inference/capi/libpaddle_fluid_c.*
)
endif
(
WIN32
)
copy
(
inference_lib_dist
SRCS
${
src_dir
}
/inference/capi/paddle_c_api.h
${
paddle_fluid_c_lib
}
...
...
cmake/init.cmake
浏览文件 @
2725f51d
...
...
@@ -26,4 +26,7 @@ if(WITH_GPU)
set
(
CMAKE_CUDA_FLAGS_MINSIZEREL
"-O1 -DNDEBUG"
)
endif
()
if
(
WIN32
)
set
(
WIN_PROPS
${
CMAKE_SOURCE_DIR
}
/cmake/paddle_win.props
)
endif
()
cmake/paddle_win.props
0 → 100644
浏览文件 @
2725f51d
<?xml version="1.0" encoding="utf-8"?>
<Project
xmlns=
"http://schemas.microsoft.com/developer/msbuild/2003"
>
<ItemDefinitionGroup>
<CudaCompile>
<!-- Project schema: Host properties -->
<UseHostDefines>
true
</UseHostDefines>
<Emulation>
false
</Emulation>
<HostDebugInfo
Condition=
"'$(Configuration)' == 'Debug'"
>
true
</HostDebugInfo>
<HostDebugInfo
Condition=
"'$(Configuration)' != 'Debug'"
>
false
</HostDebugInfo>
<FastMath>
false
</FastMath>
<Optimization>
InheritFromHost
</Optimization>
<Runtime>
InheritFromHost
</Runtime>
<RuntimeChecks>
InheritFromHost
</RuntimeChecks>
<TypeInfo>
InheritFromHost
</TypeInfo>
<Warning>
InheritFromHost
</Warning>
<BaseCommandLineTemplate>
-ccbin "%(VCBinDir)" -x cu [GenerateRelocatableDeviceCode] [Include] [RequiredIncludes] [InterleaveSourceInPTX] [GPUDebugInfo] [GenerateLineInfo] [Keep] [KeepDir] [MaxRegCount] [PtxAsOptionV] [TargetMachinePlatform] [NvccCompilation] [CudaRuntime] [AdditionalOptions]
</BaseCommandLineTemplate>
<BuildCommandLineTemplate>
--use-local-env
</BuildCommandLineTemplate>
<BuildDynamicCommandLineTemplate>
[CodeGeneration]
</BuildDynamicCommandLineTemplate>
<CleanCommandLineTemplate>
-clean
</CleanCommandLineTemplate>
<!-- <HostCommandLineTemplate>-Xcompiler "/EHsc [Warning] /nologo [Optimization] [ProgramDataBaseFileName] $(CudaForceSynchronousPdbWrites) /Zi [RuntimeChecks] [Runtime] [TypeInfo]"</HostCommandLineTemplate> -->
<HostCommandLineTemplate>
-Xcompiler
"
/EHsc [Warning] /nologo [Optimization] [ProgramDataBaseFileName] $(CudaForceSynchronousPdbWrites) [RuntimeChecks] [Runtime] [TypeInfo]
"
</HostCommandLineTemplate>
<DriverApiCommandLineTemplate>
%(BaseCommandLineTemplate) [CompileOut] "%(FullPath)"
</DriverApiCommandLineTemplate>
<RuntimeApiCommandLineTemplate>
%(BaseCommandLineTemplate) [HostDebugInfo] [Emulation] [FastMath] [Defines] %(HostCommandLineTemplate) [CompileOut] "%(FullPath)"
</RuntimeApiCommandLineTemplate>
<CommandLineTemplate>
# (Approximate command-line. Settings inherited from host are not visible below.)
# (Please see the output window after a build for the full command-line)
# Driver API (NVCC Compilation Type is .cubin, .gpu, or .ptx)
set CUDAFE_FLAGS=--sdk_dir "$(WindowsSdkDir)"
"$(CudaToolkitNvccPath)" %(BuildCommandLineTemplate) %(DriverApiCommandLineTemplate)
# Runtime API (NVCC Compilation Type is hybrid object or .c file)
set CUDAFE_FLAGS=--sdk_dir "$(WindowsSdkDir)"
"$(CudaToolkitNvccPath)" %(BuildCommandLineTemplate) %(RuntimeApiCommandLineTemplate)
</CommandLineTemplate>
<ExecutionDescription>
Compiling CUDA source file %(Identity)...
</ExecutionDescription>
<ExclusionDescription>
Skipping CUDA source file %(Identity) (excluded from build).
</ExclusionDescription>
<!-- Miscellaneous -->
<PropsCacheOutputFile>
%(Filename)%(Extension).cache
</PropsCacheOutputFile>
<PropsCacheOutputPath>
$(IntDir)%(PropsCacheOutputFile)
</PropsCacheOutputPath>
<CudaCompileCoreProject>
$(MSBuildProjectFullPath)
</CudaCompileCoreProject>
</CudaCompile>
<CudaLink>
<PerformDeviceLink>
true
</PerformDeviceLink>
<LinkOut>
$(IntDir)$(TargetName).device-link.obj
</LinkOut>
<AdditionalLibraryDirectories></AdditionalLibraryDirectories>
<UseHostLibraryDirectories>
true
</UseHostLibraryDirectories>
<AdditionalDependencies></AdditionalDependencies>
<UseHostLibraryDependencies>
true
</UseHostLibraryDependencies>
<GPUDebugInfo>
InheritFromProject
</GPUDebugInfo>
<Optimization>
InheritFromProject
</Optimization>
<!-- Implicitly inherited from the project via @(CudaCompile) -->
<CodeGeneration></CodeGeneration>
<RuntimeChecks></RuntimeChecks>
<Runtime></Runtime>
<TargetMachinePlatform></TargetMachinePlatform>
<TypeInfo></TypeInfo>
<Warning></Warning>
<Inputs></Inputs>
<!-- <HostCommandLineTemplate>-Xcompiler "/EHsc [Warning] /nologo [Optimization] /Zi [RuntimeChecks] [Runtime] [TypeInfo]"</HostCommandLineTemplate> -->
<HostCommandLineTemplate>
-Xcompiler
"
/EHsc [Warning] /nologo [Optimization] [RuntimeChecks] [Runtime] [TypeInfo]
"
</HostCommandLineTemplate>
<LinkCommandLineTemplate>
"$(CudaToolkitNvccPath)" -dlink [LinkOut] %(HostCommandLineTemplate) [AdditionalLibraryDirectories] [AdditionalDependencies] [AdditionalOptions] [CodeGeneration] [GPUDebugInfo] [TargetMachinePlatform] [Inputs]
</LinkCommandLineTemplate>
<CommandLineTemplate>
# (Approximate command-line. Settings inherited from host are not visible below.)
# (Please see the output window after a build for the full command-line)
%(LinkCommandLineTemplate)
</CommandLineTemplate>
</CudaLink>
<Link>
<AdditionalLibraryDirectories>
%(AdditionalLibraryDirectories);$(CudaToolkitLibDir)
</AdditionalLibraryDirectories>
</Link>
<ClCompile>
<AdditionalIncludeDirectories>
%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir)
</AdditionalIncludeDirectories>
</ClCompile>
</ItemDefinitionGroup>
</Project>
paddle/fluid/inference/CMakeLists.txt
浏览文件 @
2725f51d
...
...
@@ -44,14 +44,9 @@ add_subdirectory(api)
set
(
STATIC_INFERENCE_API paddle_inference_api analysis_predictor
zero_copy_tensor reset_tensor_array
analysis_config paddle_pass_builder activation_functions
${
mkldnn_quantizer_cfg
}
)
# TODO(xingzhaolong, jiweibo): remove this and create_static_lib(paddle_fluid) on windows GPU
if
(
WIN32 AND WITH_GPU
)
cc_library
(
paddle_fluid DEPS
${
fluid_modules
}
${
STATIC_INFERENCE_API
}
)
else
()
create_static_lib
(
paddle_fluid
${
fluid_modules
}
${
STATIC_INFERENCE_API
}
)
endif
()
create_static_lib
(
paddle_fluid
${
fluid_modules
}
${
STATIC_INFERENCE_API
}
)
if
(
NOT APPLE
AND NOT WIN32
)
if
(
NOT APPLE
)
# TODO(liuyiqu: Temporarily disable the link flag because it is not support on Mac.
set
(
LINK_FLAGS
"-Wl,--retain-symbols-file
${
CMAKE_CURRENT_SOURCE_DIR
}
/paddle_fluid.sym"
)
set_target_properties
(
paddle_fluid PROPERTIES LINK_FLAGS
"
${
LINK_FLAGS
}
"
)
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
2725f51d
...
...
@@ -245,7 +245,18 @@ bool AnalysisPredictor::PrepareExecutor() {
void
AnalysisPredictor
::
MkldnnPreSet
(
const
std
::
vector
<
PaddleTensor
>
&
inputs
)
{
#ifdef PADDLE_WITH_MKLDNN
VLOG
(
2
)
<<
"AnalysisPredictor::Run get_cur_mkldnn_session_id="
std
::
vector
<
std
::
vector
<
int
>>
inputs_shape
;
for
(
size_t
i
=
0
;
i
<
inputs
.
size
();
++
i
)
{
inputs_shape
.
emplace_back
(
inputs
[
i
].
shape
);
}
MkldnnPreSet
(
inputs_shape
);
#endif
}
void
AnalysisPredictor
::
MkldnnPreSet
(
const
std
::
vector
<
std
::
vector
<
int
>>
&
inputs_shape
)
{
#ifdef PADDLE_WITH_MKLDNN
VLOG
(
2
)
<<
"AnalysisPredictor::ZeroCopyRun get_cur_mkldnn_session_id="
<<
platform
::
MKLDNNDeviceContext
::
tls
().
get_cur_mkldnn_session_id
();
// In cache clearing mode.
if
(
config_
.
mkldnn_cache_capacity_
>
0
)
{
...
...
@@ -257,9 +268,9 @@ void AnalysisPredictor::MkldnnPreSet(const std::vector<PaddleTensor> &inputs) {
config_
.
mkldnn_cache_capacity_
);
// Set current_input_shape for caching dynamic shape.
std
::
stringstream
ss
;
for
(
size_t
i
=
0
;
i
<
inputs
.
size
();
++
i
)
{
for
(
size_t
j
=
0
;
j
<
inputs
[
i
].
shape
.
size
();
++
j
)
{
ss
<<
inputs
[
i
].
shape
[
j
]
<<
"-"
;
for
(
size_t
i
=
0
;
i
<
inputs
_shape
.
size
();
++
i
)
{
for
(
size_t
j
=
0
;
j
<
inputs
_shape
[
i
]
.
size
();
++
j
)
{
ss
<<
inputs
_shape
[
i
]
[
j
]
<<
"-"
;
}
}
VLOG
(
2
)
<<
"Set input shape="
<<
ss
.
str
();
...
...
@@ -742,6 +753,18 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor(
bool
AnalysisPredictor
::
ZeroCopyRun
()
{
paddle
::
platform
::
SetNumThreads
(
config_
.
cpu_math_library_num_threads
());
#ifdef PADDLE_WITH_MKLDNN
if
(
config_
.
use_mkldnn_
)
{
std
::
vector
<
std
::
vector
<
int
>>
shape_vector
;
auto
names
=
GetInputNames
();
for
(
size_t
i
=
0
;
i
<
names
.
size
();
++
i
)
{
auto
in_tensor
=
GetInputTensor
(
names
[
i
]);
shape_vector
.
emplace_back
(
in_tensor
->
shape
());
}
MkldnnPreSet
(
shape_vector
);
}
#endif
executor_
->
Run
();
// Fix TensorArray reuse not cleaned bug.
tensor_array_batch_cleaner_
.
CollectTensorArrays
(
sub_scope_
);
...
...
@@ -750,6 +773,9 @@ bool AnalysisPredictor::ZeroCopyRun() {
// recover the cpu_math_library_num_threads to 1, in order to avoid thread
// conflict when integrating it into deployment service.
paddle
::
platform
::
SetNumThreads
(
1
);
#ifdef PADDLE_WITH_MKLDNN
if
(
config_
.
use_mkldnn_
)
MkldnnPostReset
();
#endif
#if defined(PADDLE_WITH_MKLML)
// Frees unused memory allocated by the Intel® MKL Memory Allocator to
// avoid memory leak. See:
...
...
paddle/fluid/inference/api/analysis_predictor.h
浏览文件 @
2725f51d
...
...
@@ -317,6 +317,17 @@ class AnalysisPredictor : public PaddlePredictor {
/// \param[in] inputs tensors
///
void
MkldnnPreSet
(
const
std
::
vector
<
PaddleTensor
>
&
inputs
);
///
/// \brief PreSet for Mkldnn multi-thread and dynamic shape input.
///
/// Used in AnalysisPredictor::Run(), do not support
/// AnalysisPredictor::ZeroCopyRun() now.
///
/// \param[in] inputs tensor shape
///
void
MkldnnPreSet
(
const
std
::
vector
<
std
::
vector
<
int
>>
&
inputs_shape
);
///
/// \brief PostReset for Mkldnn multi-thread and dynamic shape input.
///
...
...
paddle/fluid/inference/api/demo_ci/run.sh
浏览文件 @
2725f51d
...
...
@@ -68,11 +68,6 @@ rm -rf *
for
WITH_STATIC_LIB
in
ON OFF
;
do
if
[
$(
echo
`
uname
`
|
grep
"Win"
)
!=
""
]
;
then
# TODO(xingzhaolong, jiweibo): remove this if windows GPU library is ready.
if
[
$TEST_GPU_CPU
==
ON]
&&
[
$WITH_STATIC_LIB
==
ON
]
;
then
return
0
fi
# -----simple_on_word2vec on windows-----
cmake ..
-G
"Visual Studio 14 2015"
-A
x64
-DPADDLE_LIB
=
${
inference_install_dir
}
\
-DWITH_MKL
=
$TURN_ON_MKL
\
...
...
paddle/fluid/operators/conv_cudnn_op.cu
浏览文件 @
2725f51d
...
...
@@ -50,8 +50,9 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
CUDADeviceContext
>();
PADDLE_ENFORCE_EQ
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
true
,
"It must use CUDAPlace."
);
PADDLE_ENFORCE_EQ
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
true
,
paddle
::
platform
::
errors
::
PreconditionNotMet
(
"It must use CUDAPlace."
));
const
Tensor
*
input
=
ctx
.
Input
<
Tensor
>
(
"Input"
);
auto
*
filter
=
ctx
.
Input
<
Tensor
>
(
"Filter"
);
auto
*
output
=
ctx
.
Output
<
Tensor
>
(
"Output"
);
...
...
@@ -60,14 +61,16 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
std
::
vector
<
int
>
paddings
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int
>
dilations
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"dilations"
);
int
groups
=
ctx
.
Attr
<
int
>
(
"groups"
);
bool
exhaustive_search
=
FLAGS_cudnn_exhaustive_search
||
ctx
.
Attr
<
bool
>
(
"exhaustive_search"
);
bool
deterministic
=
FLAGS_cudnn_deterministic
;
auto
exhaustive_deterministic
=
exhaustive_search
&&
deterministic
;
PADDLE_ENFORCE_EQ
(
exhaustive_deterministic
,
false
,
platform
::
errors
::
InvalidArgument
(
"Cann't set exhaustive_search True and "
"FLAGS_cudnn_deterministic True at same time."
));
if
(
exhaustive_search
&&
FLAGS_cudnn_deterministic
)
{
PADDLE_THROW
(
"Cann't set exhaustive_search True and "
"FLAGS_cudnn_deterministic True at same time."
);
}
const
std
::
string
padding_algorithm
=
ctx
.
Attr
<
std
::
string
>
(
"padding_algorithm"
);
const
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
...
...
@@ -197,7 +200,8 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
&
transformed_input
);
}
break
;
default:
PADDLE_THROW
(
"ConvOp only support tensors with 4 or 5 dimensions."
);
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"ConvOp only support tensors with 4 or 5 dimensions."
));
}
}
else
{
...
...
@@ -317,8 +321,9 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
CUDADeviceContext
>();
PADDLE_ENFORCE_EQ
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
true
,
"It must use CUDAPlace."
);
PADDLE_ENFORCE_EQ
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
true
,
paddle
::
platform
::
errors
::
PreconditionNotMet
(
"It must use CUDAPlace."
));
auto
input
=
ctx
.
Input
<
Tensor
>
(
"Input"
);
auto
filter
=
ctx
.
Input
<
Tensor
>
(
"Filter"
);
auto
output_grad
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Output"
));
...
...
@@ -337,14 +342,16 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
std
::
vector
<
int
>
paddings
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
string
padding_algorithm
=
ctx
.
Attr
<
std
::
string
>
(
"padding_algorithm"
);
int
groups
=
ctx
.
Attr
<
int
>
(
"groups"
);
bool
exhaustive_search
=
FLAGS_cudnn_exhaustive_search
||
ctx
.
Attr
<
bool
>
(
"exhaustive_search"
);
bool
deterministic
=
FLAGS_cudnn_deterministic
;
if
(
exhaustive_search
&&
deterministic
)
{
PADDLE_THROW
(
"Can't set exhaustive_search True and "
"FLAGS_cudnn_deterministic True at same time."
);
}
auto
exhaustive_deterministic
=
exhaustive_search
&&
deterministic
;
PADDLE_ENFORCE_EQ
(
exhaustive_deterministic
,
false
,
platform
::
errors
::
InvalidArgument
(
"Cann't set exhaustive_search True and "
"FLAGS_cudnn_deterministic True at same time."
));
const
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
const
bool
channel_last
=
(
data_format
==
"NHWC"
||
data_format
==
"NDHWC"
);
...
...
@@ -495,7 +502,8 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
&
transformed_input
);
}
break
;
default:
PADDLE_THROW
(
"ConvOp only support tensors with 4 or 5 dimensions."
);
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"ConvOp only support tensors with 4 or 5 dimensions."
));
}
}
else
{
transformed_input
.
ShareDataWith
(
transformed_input_channel
);
...
...
@@ -701,8 +709,9 @@ class CUDNNConvDoubleGradOpKernel : public framework::OpKernel<T> {
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
CUDADeviceContext
>();
PADDLE_ENFORCE_EQ
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
true
,
"It must use CUDAPlace."
);
PADDLE_ENFORCE_EQ
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
true
,
paddle
::
platform
::
errors
::
PreconditionNotMet
(
"It must use CUDAPlace."
));
auto
X
=
ctx
.
Input
<
Tensor
>
(
"Input"
);
auto
W
=
ctx
.
Input
<
Tensor
>
(
"Filter"
);
auto
dO
=
ctx
.
Input
<
Tensor
>
(
"DOutput"
);
...
...
@@ -736,14 +745,16 @@ class CUDNNConvDoubleGradOpKernel : public framework::OpKernel<T> {
const
std
::
vector
<
int
>&
strides
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
dilations
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"dilations"
);
int
groups
=
ctx
.
Attr
<
int
>
(
"groups"
);
bool
exhaustive_search
=
FLAGS_cudnn_exhaustive_search
||
ctx
.
Attr
<
bool
>
(
"exhaustive_search"
);
bool
deterministic
=
FLAGS_cudnn_deterministic
;
if
(
exhaustive_search
&&
deterministic
)
{
PADDLE_THROW
(
"Can't set exhaustive_search True and "
"FLAGS_cudnn_deterministic True at same time."
);
}
auto
exhaustive_deterministic
=
exhaustive_search
&&
deterministic
;
PADDLE_ENFORCE_EQ
(
exhaustive_deterministic
,
false
,
platform
::
errors
::
InvalidArgument
(
"Cann't set exhaustive_search True and "
"FLAGS_cudnn_deterministic True at same time."
));
std
::
vector
<
int
>
paddings
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
string
padding_algorithm
=
ctx
.
Attr
<
std
::
string
>
(
"padding_algorithm"
);
...
...
@@ -878,7 +889,8 @@ class CUDNNConvDoubleGradOpKernel : public framework::OpKernel<T> {
}
}
break
;
default:
PADDLE_THROW
(
"ConvOp only support tensors with 4 or 5 dimensions."
);
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"ConvOp only support tensors with 4 or 5 dimensions."
));
}
}
else
{
...
...
paddle/fluid/operators/conv_op.h
浏览文件 @
2725f51d
...
...
@@ -685,8 +685,9 @@ class GemmConvDoubleGradKernel : public framework::OpKernel<T> {
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
CPUDeviceContext
>();
PADDLE_ENFORCE_EQ
(
platform
::
is_cpu_place
(
ctx
.
GetPlace
()),
true
,
"It must use CPUPlace."
);
PADDLE_ENFORCE_EQ
(
platform
::
is_cpu_place
(
ctx
.
GetPlace
()),
true
,
paddle
::
platform
::
errors
::
PreconditionNotMet
(
"It must use CPUPlace."
));
const
Tensor
*
X
=
ctx
.
Input
<
Tensor
>
(
"Input"
);
const
Tensor
*
dY
=
ctx
.
Input
<
Tensor
>
(
"DOutput"
);
const
Tensor
*
ddX
=
ctx
.
Input
<
Tensor
>
(
"DDInput"
);
...
...
@@ -982,11 +983,20 @@ class DepthwiseConvKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_EQ
(
output
->
dims
()[
output
->
dims
().
size
()
-
1
]
%
input
->
dims
()[
input
->
dims
().
size
()
-
1
],
0
,
"The output channels must be a multiple of the input channels"
);
0
,
platform
::
errors
::
InvalidArgument
(
"ShapeError: The output channels must be a multiple of the "
"input channels. But receivced output channel number is %d "
"and input channel number is %d"
,
output
->
dims
()[
output
->
dims
().
size
()
-
1
],
input
->
dims
()[
input
->
dims
().
size
()
-
1
]));
}
else
{
PADDLE_ENFORCE_EQ
(
output
->
dims
()[
1
]
%
input
->
dims
()[
1
],
0
,
"The output channels must be a multiple of the input channels"
);
platform
::
errors
::
InvalidArgument
(
"ShapeError: The output channels must be a multiple of the "
"input channels. But receivced output channel number is %d "
"and input channel number is %d"
,
output
->
dims
()[
1
],
input
->
dims
()[
1
]));
}
// transform tensor
Tensor
transformed_input
(
input
->
type
());
...
...
paddle/fluid/operators/conv_transpose_cudnn_op.cu
浏览文件 @
2725f51d
...
...
@@ -51,8 +51,9 @@ template <typename T>
class
CUDNNConvTransposeOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE_EQ
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
true
,
"It must use CUDAPlace."
);
PADDLE_ENFORCE_EQ
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
true
,
paddle
::
platform
::
errors
::
PreconditionNotMet
(
"It must use CUDAPlace."
));
auto
*
input
=
ctx
.
Input
<
Tensor
>
(
"Input"
);
auto
*
filter
=
ctx
.
Input
<
Tensor
>
(
"Filter"
);
auto
*
output
=
ctx
.
Output
<
Tensor
>
(
"Output"
);
...
...
@@ -145,9 +146,8 @@ class CUDNNConvTransposeOpKernel : public framework::OpKernel<T> {
ctx
,
input_pad
,
input_transpose
,
pad_value
,
&
transformed_input
);
}
break
;
default:
PADDLE_ENFORCE_EQ
(
rank
==
4
||
rank
==
5
,
true
,
"Op(ConvTranspose) only supports 4-D or 5-D input Tensor."
);
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"Op(ConvTranspose) only supports 4-D or 5-D input Tensor."
));
}
}
else
{
transformed_input
=
input_transpose
;
...
...
@@ -290,8 +290,9 @@ template <typename T>
class
CUDNNConvTransposeGradOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
"It must use CUDAPlace."
);
PADDLE_ENFORCE_EQ
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
true
,
paddle
::
platform
::
errors
::
PreconditionNotMet
(
"It must use CUDAPlace."
));
auto
input
=
ctx
.
Input
<
Tensor
>
(
"Input"
);
auto
filter
=
ctx
.
Input
<
Tensor
>
(
"Filter"
);
auto
output_grad
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Output"
));
...
...
@@ -393,9 +394,8 @@ class CUDNNConvTransposeGradOpKernel : public framework::OpKernel<T> {
&
transformed_output_grad
);
}
break
;
default:
PADDLE_ENFORCE_EQ
(
rank
==
4
||
rank
==
5
,
true
,
"Op(ConvTranspose) only supports 4-D or 5-D input Tensor."
);
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"Op(ConvTranspose) only supports 4-D or 5-D input Tensor."
));
}
}
else
{
transformed_output_grad
=
output_grad_transpose
;
...
...
paddle/fluid/operators/conv_transpose_op.h
浏览文件 @
2725f51d
...
...
@@ -580,7 +580,12 @@ class DepthwiseConvTransposeKernel : public framework::OpKernel<T> {
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
int
groups
=
context
.
Attr
<
int
>
(
"groups"
);
PADDLE_ENFORCE_EQ
(
groups
,
filter
.
dims
()[
0
]);
PADDLE_ENFORCE_EQ
(
groups
,
filter
.
dims
()[
0
],
platform
::
errors
::
InvalidArgument
(
"groups should be error to the 1st dimension of filter. But "
"received groups is %d and filter dimension[0] is %d"
,
groups
,
filter
.
dims
()[
0
]));
std
::
vector
<
int
>
strides
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
paddings
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
...
...
@@ -588,7 +593,10 @@ class DepthwiseConvTransposeKernel : public framework::OpKernel<T> {
std
::
string
padding_algorithm
=
context
.
Attr
<
std
::
string
>
(
"padding_algorithm"
);
for
(
auto
v
:
dilations
)
{
PADDLE_ENFORCE_EQ
(
v
,
1
);
PADDLE_ENFORCE_EQ
(
v
,
1
,
platform
::
errors
::
InvalidArgument
(
"dilations should be 1 in depthwise conv. "
"But received dilations is %d"
,
v
));
}
auto
in_dims
=
input
->
dims
();
...
...
paddle/fluid/operators/pixel_shuffle_op.cc
浏览文件 @
2725f51d
...
...
@@ -46,14 +46,14 @@ class PixelShuffleOp : public framework::OperatorWithKernel {
platform
::
errors
::
InvalidArgument
(
"The square of upscale_factor[%u] should divide the "
"number of channel[%u]"
,
input_dims
[
1
],
upscale_factor
*
upscale_factor
));
upscale_factor
*
upscale_factor
,
input_dims
[
1
]
));
}
else
{
PADDLE_ENFORCE_EQ
(
input_dims
[
3
]
%
(
upscale_factor
*
upscale_factor
),
0
,
platform
::
errors
::
InvalidArgument
(
"The square of upscale_factor[%u] should divide the "
"number of channel[%u]"
,
input_dims
[
3
],
upscale_factor
*
upscale_factor
));
upscale_factor
*
upscale_factor
,
input_dims
[
3
]
));
}
auto
output_dims
=
input_dims
;
output_dims
[
0
]
=
input_dims
[
0
];
...
...
python/paddle/distributed/fleet/__init__.py
浏览文件 @
2725f51d
...
...
@@ -13,7 +13,7 @@
# limitations under the License.
# TODO: define distributed api under this directory,
from
.base.role_maker
import
UserDefinedRoleMaker
,
PaddleCloudRoleMaker
from
.base.role_maker
import
Role
,
UserDefinedRoleMaker
,
PaddleCloudRoleMaker
from
.base.distributed_strategy
import
DistributedStrategy
from
.base.fleet_base
import
Fleet
from
.base.util_factory
import
UtilBase
...
...
@@ -26,6 +26,7 @@ __all__ = [
"UserDefinedRoleMaker"
,
"PaddleCloudRoleMaker"
,
"Fleet"
,
"Role"
,
]
fleet
=
Fleet
()
...
...
@@ -39,8 +40,7 @@ server_num = fleet.server_num
server_index
=
fleet
.
server_index
server_endpoints
=
fleet
.
server_endpoints
is_server
=
fleet
.
is_server
set_util
=
fleet
.
set_util
util
=
fleet
.
util
util
=
UtilBase
()
barrier_worker
=
fleet
.
barrier_worker
init_worker
=
fleet
.
init_worker
init_server
=
fleet
.
init_server
...
...
python/paddle/distributed/fleet/base/fleet_base.py
浏览文件 @
2725f51d
...
...
@@ -23,7 +23,6 @@ from .strategy_compiler import StrategyCompiler
from
.distributed_strategy
import
DistributedStrategy
from
.meta_optimizer_factory
import
MetaOptimizerFactory
from
.runtime_factory
import
RuntimeFactory
from
.util_factory
import
UtilFactory
from
paddle.fluid.wrapped_decorator
import
wrap_decorator
from
paddle.fluid.dygraph
import
parallel_helper
...
...
@@ -120,7 +119,6 @@ class Fleet(object):
self
.
strategy_compiler
=
None
self
.
_is_collective
=
False
self
.
_runtime_handle
=
None
self
.
_util
=
None
def
init
(
self
,
role_maker
=
None
,
is_collective
=
False
):
"""
...
...
@@ -182,6 +180,9 @@ class Fleet(object):
format
(
type
(
role_maker
)))
self
.
_role_maker
.
_generate_role
()
import
paddle.distributed.fleet
as
fleet
fleet
.
util
.
_set_role_maker
(
self
.
_role_maker
)
self
.
strategy_compiler
=
StrategyCompiler
()
if
paddle
.
fluid
.
framework
.
in_dygraph_mode
():
if
parallel_helper
.
_is_parallel_ctx_initialized
():
...
...
@@ -353,29 +354,6 @@ class Fleet(object):
return
self
.
_role_maker
.
_is_server
(
)
or
self
.
_role_maker
.
_is_heter_worker
()
def
set_util
(
self
,
util
):
self
.
_util
=
util
def
util
(
self
):
"""
Utility functions that can be used under certain runtime
return util
Returns:
UtilBase: instance of UtilBase, can use distributed ops/tools easily.
Examples:
.. code-block:: python
import paddle.distributed.fleet as fleet
fleet.init()
util = fleet.util
files = ["1.log", "2.log", "3.log", "4.log"]
files = util.get_file_shard()
"""
return
self
.
_util
def
barrier_worker
(
self
):
"""
barrier all workers
...
...
@@ -1102,7 +1080,7 @@ class Fleet(object):
if
self
.
_runtime_handle
is
None
:
self
.
_runtime_handle
=
RuntimeFactory
().
_create_runtime
(
context
)
i
f
self
.
_util
is
None
:
self
.
_util
=
UtilFactory
().
_create_util
(
context
)
i
mport
paddle.distributed.fleet
as
fleet
fleet
.
util
.
_set_strategy
(
context
[
"valid_strategy"
]
)
return
optimize_ops
,
params_grads
python/paddle/distributed/fleet/base/util_factory.py
浏览文件 @
2725f51d
...
...
@@ -73,11 +73,13 @@ class UtilBase(object):
.. code-block:: python
# Save the following code in `train.py` , and then execute the command `fleetrun --server_num 2 --worker_num 2 train.py` .
from paddle.distributed.fleet.base.util_factory import fleet_util
import paddle.distributed.fleet as fleet
from paddle.distributed.fleet import PaddleCloudRoleMaker
import sys
import numpy as np
import os
os.environ["PADDLE_WITH_GLOO"] = "2"
def train():
role = PaddleCloudRoleMaker(
...
...
@@ -85,19 +87,18 @@ class UtilBase(object):
init_gloo=True,
path="./tmp_gloo")
fleet.init(role)
fleet_util._set_role_maker(role)
if fleet.is_server():
input = [1, 2]
output = fleet
_
util.all_reduce(input, "sum", "server")
output = fleet
.
util.all_reduce(input, "sum", "server")
print(output)
# [2, 4]
elif fleet.is_worker():
input = np.array([3, 4])
output = fleet
_
util.all_reduce(input, "sum", "worker")
output = fleet
.
util.all_reduce(input, "sum", "worker")
print(output)
# [6, 8]
output = fleet
_
util.all_reduce(input, "sum", "all")
output = fleet
.
util.all_reduce(input, "sum", "all")
print(output)
# [8, 12]
if __name__ == "__main__":
...
...
@@ -117,10 +118,12 @@ class UtilBase(object):
.. code-block:: python
# Save the following code in `train.py` , and then execute the command `fleetrun --server_num 2 --worker_num 2 train.py` .
from paddle.distributed.fleet.base.util_factory import fleet_util
import paddle.distributed.fleet as fleet
from paddle.distributed.fleet import PaddleCloudRoleMaker
import sys
import os
os.environ["PADDLE_WITH_GLOO"] = "2"
def train():
role = PaddleCloudRoleMaker(
...
...
@@ -128,15 +131,14 @@ class UtilBase(object):
init_gloo=True,
path="./tmp_gloo")
fleet.init(role)
fleet_util._set_role_maker(role)
if fleet.is_server():
fleet
_
util.barrier("server")
fleet
.
util.barrier("server")
print("all server arrive here")
elif fleet.is_worker():
fleet
_
util.barrier("worker")
fleet
.
util.barrier("worker")
print("all server arrive here")
fleet
_
util.barrier("all")
fleet
.
util.barrier("all")
print("all servers and workers arrive here")
if __name__ == "__main__":
...
...
@@ -160,10 +162,12 @@ class UtilBase(object):
.. code-block:: python
# Save the following code in `train.py` , and then execute the command `fleetrun --server_num 2 --worker_num 2 train.py` .
from paddle.distributed.fleet.base.util_factory import fleet_util
import paddle.distributed.fleet as fleet
from paddle.distributed.fleet import PaddleCloudRoleMaker
import sys
import os
os.environ["PADDLE_WITH_GLOO"] = "2"
def train():
role = PaddleCloudRoleMaker(
...
...
@@ -171,19 +175,18 @@ class UtilBase(object):
init_gloo=True,
path="./tmp_gloo")
fleet.init(role)
fleet_util._set_role_maker(role)
if fleet.is_server():
input = fleet.server_index()
output = fleet
_
util.all_gather(input, "server")
output = fleet
.
util.all_gather(input, "server")
print(output)
# output = [0, 1]
elif fleet.is_worker():
input = fleet.worker_index()
output = fleet
_
util.all_gather(input, "worker")
output = fleet
.
util.all_gather(input, "worker")
# output = [0, 1]
print(output)
output = fleet
_
util.all_gather(input, "all")
output = fleet
.
util.all_gather(input, "all")
print(output)
# output = [0, 1, 0, 1]
...
...
@@ -220,18 +223,20 @@ class UtilBase(object):
.. code-block:: python
from paddle.distributed.fleet.base.util_factory import fleet_util
import paddle.distributed.fleet.base.role_maker as role_m
aker
import paddle.distributed.fleet as fleet
from paddle.distributed.fleet import UserDefinedRoleM
aker
role =
role_maker.
UserDefinedRoleMaker(
role = UserDefinedRoleMaker(
is_collective=False,
init_gloo=False,
current_id=0,
role=
role_maker
.Role.WORKER,
role=
fleet
.Role.WORKER,
worker_endpoints=["127.0.0.1:6003", "127.0.0.1:6004"],
server_endpoints=["127.0.0.1:6001", "127.0.0.1:6002"])
fleet_util._set_role_maker(role)
files = fleet_util.get_file_shard(["file1", "file2", "file3"])
fleet.init(role)
files = fleet.util.get_file_shard(["file1", "file2", "file3"])
print(files)
# files = ["file1", "file2"]
"""
if
not
isinstance
(
files
,
list
):
...
...
@@ -267,18 +272,19 @@ class UtilBase(object):
.. code-block:: python
from paddle.distributed.fleet.base.util_factory import fleet_util
import paddle.distributed.fleet.base.role_maker as role_m
aker
import paddle.distributed.fleet as fleet
from paddle.distributed.fleet import UserDefinedRoleM
aker
role =
role_maker.
UserDefinedRoleMaker(
role = UserDefinedRoleMaker(
is_collective=False,
init_gloo=False,
current_id=0,
role=
role_maker
.Role.WORKER,
role=
fleet
.Role.WORKER,
worker_endpoints=["127.0.0.1:6003", "127.0.0.1:6004"],
server_endpoints=["127.0.0.1:6001", "127.0.0.1:6002"])
fleet_util._set_role_maker(role)
fleet_util.print_on_rank("I'm worker 0", 0)
fleet.init(role)
fleet.util.print_on_rank("I'm worker 0", 0)
"""
if
self
.
role_maker
.
_worker_index
()
!=
rank_id
:
return
...
...
@@ -577,6 +583,3 @@ class UtilBase(object):
print
(
"fetch_targets name: %s"
%
v
.
name
)
print
(
"fetch_targets: {}"
.
format
(
results
[
i
]))
return
results
fleet_util
=
UtilFactory
().
_create_util
(
None
)
python/paddle/distributed/fleet/launch.py
浏览文件 @
2725f51d
...
...
@@ -181,8 +181,8 @@ def get_gpus(gpus):
cuda_visible_devices_list
=
cuda_visible_devices
.
split
(
','
)
for
x
in
gpus
.
split
(
','
):
assert
x
in
cuda_visible_devices_list
,
"Can't find "
\
"your gpus %s in CUDA_VISIBLE_DEVICES[%s]."
\
%
(
x
,
cuda_visible_devices
)
"your gpus %s in CUDA_VISIBLE_DEVICES[%s]."
\
%
(
x
,
cuda_visible_devices
)
res_gpus
=
[
cuda_visible_devices_list
.
index
(
x
.
strip
())
for
x
in
gpus
.
split
(
','
)
...
...
@@ -348,8 +348,7 @@ def launch_ps(args):
"PADDLE_PORT"
:
cur_server
.
endpoint
.
split
(
":"
)[
1
],
"TRAINING_ROLE"
:
"PSERVER"
,
"PADDLE_TRAINERS_NUM"
:
str
(
worker_num
),
"POD_IP"
:
cur_server
.
endpoint
.
split
(
":"
)[
0
],
"PADDLE_WITH_GLOO"
:
"1"
"POD_IP"
:
cur_server
.
endpoint
.
split
(
":"
)[
0
]
}
current_env
.
update
(
proc_env
)
...
...
@@ -388,8 +387,7 @@ def launch_ps(args):
"PADDLE_TRAINER_ENDPOINTS"
:
worker_endpoints
,
"PADDLE_TRAINERS_NUM"
:
str
(
worker_num
),
"TRAINING_ROLE"
:
"TRAINER"
,
"PADDLE_TRAINER_ID"
:
str
(
cur_worker
.
rank
),
"PADDLE_WITH_GLOO"
:
"1"
"PADDLE_TRAINER_ID"
:
str
(
cur_worker
.
rank
)
}
current_env
.
update
(
proc_env
)
...
...
python/paddle/distributed/fleet/utils/__init__.py
浏览文件 @
2725f51d
...
...
@@ -11,3 +11,5 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
.fs
import
LocalFS
,
HDFSClient
python/paddle/distributed/fleet/utils/fs.py
浏览文件 @
2725f51d
...
...
@@ -120,7 +120,7 @@ class LocalFS(FS):
Examples:
.. code-block:: python
from paddle.distributed.fleet.utils
.fs
import LocalFS
from paddle.distributed.fleet.utils import LocalFS
client = LocalFS()
subdirs, files = client.ls_dir("./")
...
...
@@ -140,7 +140,7 @@ class LocalFS(FS):
Examples:
.. code-block:: python
from paddle.distributed.fleet.utils
.fs
import LocalFS
from paddle.distributed.fleet.utils import LocalFS
client = LocalFS()
subdirs, files = client.ls_dir("./")
...
...
@@ -160,7 +160,7 @@ class LocalFS(FS):
def
mkdirs
(
self
,
fs_path
):
"""
Create a
remote HDFS
directory.
Create a
local
directory.
Args:
fs_path(str): The local directory path.
...
...
@@ -168,7 +168,7 @@ class LocalFS(FS):
Examples:
.. code-block:: python
from paddle.distributed.fleet.utils
.fs
import LocalFS
from paddle.distributed.fleet.utils import LocalFS
client = LocalFS()
client.mkdirs("test_mkdirs")
...
...
@@ -189,7 +189,7 @@ class LocalFS(FS):
Examples:
.. code-block:: python
from paddle.distributed.fleet.utils
.fs
import LocalFS
from paddle.distributed.fleet.utils import LocalFS
client = LocalFS()
client.touch("test_rename_src")
...
...
@@ -217,7 +217,7 @@ class LocalFS(FS):
Examples:
.. code-block:: python
from paddle.distributed.fleet.utils
.fs
import LocalFS
from paddle.distributed.fleet.utils import LocalFS
client = LocalFS()
client.mkdirs("test_localFS_mkdirs")
...
...
@@ -247,7 +247,7 @@ class LocalFS(FS):
Examples:
.. code-block:: python
from paddle.distributed.fleet.utils
.fs
import LocalFS
from paddle.distributed.fleet.utils import LocalFS
client = LocalFS()
client.touch("test_is_file")
...
...
@@ -269,7 +269,7 @@ class LocalFS(FS):
Examples:
.. code-block:: python
from paddle.distributed.fleet.utils
.fs
import LocalFS
from paddle.distributed.fleet.utils import LocalFS
client = LocalFS()
client.mkdirs("test_is_dir")
...
...
@@ -292,7 +292,7 @@ class LocalFS(FS):
Examples:
.. code-block:: python
from paddle.distributed.fleet.utils
.fs
import LocalFS
from paddle.distributed.fleet.utils import LocalFS
client = LocalFS()
ret = local_fs.is_exist("test_is_exist")
...
...
@@ -311,7 +311,7 @@ class LocalFS(FS):
Examples:
.. code-block:: python
from paddle.distributed.fleet.utils
.fs
import LocalFS
from paddle.distributed.fleet.utils import LocalFS
client = LocalFS()
client.touch("test_touch")
...
...
@@ -332,13 +332,11 @@ class LocalFS(FS):
src_path(str): Name of the file or directory, that's needed to be moved.
dst_path(str): Name of the file or directory to which to move to.
overwrite(bool): Whether to re-write `dst_path` if that exists. Default is False.
test_exists(bool): Check the existence of `src_path` and `dst_path` .
When `test_exists` is set true, if `src_path` doesn't exist or `dst_path` exists, program will throw an Excetption.
Examples:
.. code-block:: python
from paddle.distributed.fleet.utils
.fs
import LocalFS
from paddle.distributed.fleet.utils import LocalFS
client = LocalFS()
client.touch("test_mv_src")
...
...
@@ -369,7 +367,7 @@ class LocalFS(FS):
Examples:
.. code-block:: python
from paddle.distributed.fleet.utils
.fs
import LocalFS
from paddle.distributed.fleet.utils import LocalFS
client = LocalFS()
subdirs = client.list_dirs("./")
...
...
@@ -432,7 +430,7 @@ class HDFSClient(FS):
.. code-block:: text
from paddle.distributed.fleet.utils
.fs
import HDFSClient
from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = {
...
...
@@ -493,7 +491,7 @@ class HDFSClient(FS):
.. code-block:: text
from paddle.distributed.fleet.utils
.fs
import HDFSClient
from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = {
...
...
@@ -526,7 +524,7 @@ class HDFSClient(FS):
.. code-block:: text
from paddle.distributed.fleet.utils
.fs
import HDFSClient
from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = {
...
...
@@ -587,7 +585,7 @@ class HDFSClient(FS):
.. code-block:: text
from paddle.distributed.fleet.utils
.fs
import HDFSClient
from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = {
...
...
@@ -629,7 +627,7 @@ class HDFSClient(FS):
.. code-block:: text
from paddle.distributed.fleet.utils
.fs
import HDFSClient
from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = {
...
...
@@ -661,7 +659,7 @@ class HDFSClient(FS):
.. code-block:: text
from paddle.distributed.fleet.utils
.fs
import HDFSClient
from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = {
...
...
@@ -695,7 +693,7 @@ class HDFSClient(FS):
.. code-block:: text
from paddle.distributed.fleet.utils
.fs
import HDFSClient
from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = {
...
...
@@ -740,7 +738,7 @@ class HDFSClient(FS):
.. code-block:: text
from paddle.distributed.fleet.utils
.fs
import HDFSClient
from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = {
...
...
@@ -784,7 +782,7 @@ class HDFSClient(FS):
.. code-block:: text
from paddle.distributed.fleet.utils
.fs
import HDFSClient
from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = {
...
...
@@ -830,7 +828,7 @@ class HDFSClient(FS):
.. code-block:: text
from paddle.distributed.fleet.utils
.fs
import HDFSClient
from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = {
...
...
@@ -893,7 +891,7 @@ class HDFSClient(FS):
.. code-block:: text
from paddle.distributed.fleet.utils
.fs
import HDFSClient
from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = {
...
...
@@ -919,12 +917,14 @@ class HDFSClient(FS):
Args:
fs_path(str): The HDFS file path.
exist_ok(bool): When `fs_path` exists, if `exist_ok` is set false,
program will throw an Exception. Default is true.
Examples:
.. code-block:: text
from paddle.distributed.fleet.utils
.fs
import HDFSClient
from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = {
...
...
python/paddle/fluid/tests/unittests/dist_fleet_ctr.py
浏览文件 @
2725f51d
...
...
@@ -28,7 +28,6 @@ import numpy as np
import
ctr_dataset_reader
from
test_dist_fleet_base
import
runtime_main
,
FleetDistRunnerBase
from
paddle.distributed.fleet.base.util_factory
import
fleet_util
paddle
.
enable_static
()
...
...
@@ -180,13 +179,13 @@ class TestDistCTR2x2(FleetDistRunnerBase):
fetch_list
=
[
self
.
avg_cost
.
name
])
loss_val
=
np
.
mean
(
loss_val
)
# TODO(randomly fail)
# reduce_output = fleet
_
util.all_reduce(
# reduce_output = fleet
.
util.all_reduce(
# np.array(loss_val), mode="sum")
# loss_all_trainer = fleet
_
util.all_gather(float(loss_val))
# loss_all_trainer = fleet
.
util.all_gather(float(loss_val))
# loss_val = float(reduce_output) / len(loss_all_trainer)
message
=
"TRAIN ---> pass: {} loss: {}
\n
"
.
format
(
epoch_id
,
loss_val
)
fleet
_
util
.
print_on_rank
(
message
,
0
)
fleet
.
util
.
print_on_rank
(
message
,
0
)
pass_time
=
time
.
time
()
-
pass_start
except
fluid
.
core
.
EOFException
:
...
...
python/paddle/fluid/tests/unittests/dist_fleet_ctr_ps_gpu.py
浏览文件 @
2725f51d
...
...
@@ -29,7 +29,6 @@ import numpy as np
import
ctr_dataset_reader
from
test_dist_fleet_base
import
runtime_main
,
FleetDistRunnerBase
from
dist_fleet_ctr
import
TestDistCTR2x2
,
fake_ctr_reader
from
paddle.distributed.fleet.base.util_factory
import
fleet_util
# Fix seed for test
fluid
.
default_startup_program
().
random_seed
=
1
...
...
@@ -76,13 +75,13 @@ class TestDistGpuPsCTR2x2(TestDistCTR2x2):
loss_val
=
exe
.
run
(
program
=
fleet
.
main_program
,
fetch_list
=
[
self
.
avg_cost
.
name
])
loss_val
=
np
.
mean
(
loss_val
)
reduce_output
=
fleet
_
util
.
all_reduce
(
reduce_output
=
fleet
.
util
.
all_reduce
(
np
.
array
(
loss_val
),
mode
=
"sum"
)
loss_all_trainer
=
fleet
_
util
.
all_gather
(
float
(
loss_val
))
loss_all_trainer
=
fleet
.
util
.
all_gather
(
float
(
loss_val
))
loss_val
=
float
(
reduce_output
)
/
len
(
loss_all_trainer
)
message
=
"TRAIN ---> pass: {} loss: {}
\n
"
.
format
(
epoch_id
,
loss_val
)
fleet
_
util
.
print_on_rank
(
message
,
0
)
fleet
.
util
.
print_on_rank
(
message
,
0
)
pass_time
=
time
.
time
()
-
pass_start
except
fluid
.
core
.
EOFException
:
...
...
python/paddle/fluid/tests/unittests/dist_fleet_heter_ctr.py
浏览文件 @
2725f51d
...
...
@@ -29,7 +29,6 @@ import numpy as np
import
ctr_dataset_reader
from
test_dist_fleet_heter_base
import
runtime_main
,
FleetDistHeterRunnerBase
from
dist_fleet_ctr
import
TestDistCTR2x2
,
fake_ctr_reader
from
paddle.distributed.fleet.base.util_factory
import
fleet_util
paddle
.
enable_static
()
...
...
@@ -182,7 +181,7 @@ class TestHeterPsCTR2x2(FleetDistHeterRunnerBase):
thread_num
=
int
(
os
.
getenv
(
"CPU_NUM"
,
2
))
batch_size
=
128
filelist
=
fleet
_
util
.
get_file_shard
(
train_file_list
)
filelist
=
fleet
.
util
.
get_file_shard
(
train_file_list
)
print
(
"filelist: {}"
.
format
(
filelist
))
# config dataset
...
...
python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py
浏览文件 @
2725f51d
...
...
@@ -32,7 +32,6 @@ import os
import
signal
from
functools
import
reduce
from
test_dist_fleet_base
import
runtime_main
,
FleetDistRunnerBase
from
paddle.distributed.fleet.base.util_factory
import
fleet_util
paddle
.
enable_static
()
...
...
@@ -198,7 +197,7 @@ class TestDistSimnetBow2x2(FleetDistRunnerBase):
def
net
(
self
,
args
,
batch_size
=
4
,
lr
=
0.01
):
avg_cost
,
_
,
predict
,
self
.
reader
=
\
train_network
(
batch_size
=
batch_size
,
is_distributed
=
False
,
is_sparse
=
True
,
is_self_contained_lr
=
False
,
is_pyreader
=
(
args
.
reader
==
"pyreader"
))
is_sparse
=
True
,
is_self_contained_lr
=
False
,
is_pyreader
=
(
args
.
reader
==
"pyreader"
))
self
.
avg_cost
=
avg_cost
self
.
predict
=
predict
...
...
@@ -238,7 +237,7 @@ class TestDistSimnetBow2x2(FleetDistRunnerBase):
loss_val
=
np
.
mean
(
loss_val
)
message
=
"TRAIN ---> pass: {} loss: {}
\n
"
.
format
(
epoch_id
,
loss_val
)
fleet
_
util
.
print_on_rank
(
message
,
0
)
fleet
.
util
.
print_on_rank
(
message
,
0
)
pass_time
=
time
.
time
()
-
pass_start
except
fluid
.
core
.
EOFException
:
...
...
python/paddle/fluid/tests/unittests/test_dist_fleet_base.py
浏览文件 @
2725f51d
...
...
@@ -34,8 +34,7 @@ import unittest
import
paddle
import
paddle.fluid
as
fluid
import
paddle.distributed.fleet.base.role_maker
as
role_maker
from
paddle.distributed.fleet.base.util_factory
import
fleet_util
from
paddle.distributed.fleet
import
fleet
import
paddle.distributed.fleet
as
fleet
from
paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy
import
StrategyFactory
__all__
=
[
'FleetDistRunnerBase'
,
'TestFleetBase'
,
'runtime_main'
]
...
...
@@ -97,7 +96,7 @@ class FleetDistRunnerBase(object):
self
.
dump_fields_path
=
os
.
getenv
(
"dump_fields_path"
,
""
)
debug
=
int
(
os
.
getenv
(
"Debug"
,
"0"
))
# TODO(update strategy to support dump params)
if
False
:
#debug:
if
False
:
#
debug:
self
.
strategy
.
set_debug_opt
({
"dump_param"
:
self
.
dump_param
,
"dump_fields"
:
self
.
dump_fields
,
...
...
@@ -372,8 +371,6 @@ def runtime_main(test_class):
strategy
=
model
.
build_strategy
(
args
)
avg_cost
=
model
.
net
(
args
)
model
.
build_optimizer
(
avg_cost
,
strategy
)
fleet_util
.
_set_strategy
(
strategy
)
fleet_util
.
_set_role_maker
(
role
)
if
args
.
role
==
"pserver"
:
model
.
run_pserver
(
args
)
else
:
...
...
python/paddle/fluid/tests/unittests/test_dist_fleet_heter_base.py
浏览文件 @
2725f51d
...
...
@@ -34,8 +34,7 @@ import unittest
import
paddle
import
paddle.fluid
as
fluid
import
paddle.distributed.fleet.base.role_maker
as
role_maker
from
paddle.distributed.fleet.base.util_factory
import
fleet_util
from
paddle.distributed.fleet
import
fleet
import
paddle.distributed.fleet
as
fleet
__all__
=
[
'FleetDistHeterRunnerBase'
,
'TestFleetHeterBase'
,
'runtime_main'
]
...
...
@@ -376,8 +375,6 @@ def runtime_main(test_class):
strategy
=
model
.
build_strategy
(
args
)
avg_cost
=
model
.
net
(
args
)
model
.
build_optimizer
(
avg_cost
,
strategy
)
fleet_util
.
_set_strategy
(
strategy
)
fleet_util
.
_set_role_maker
(
role
)
if
args
.
role
==
"pserver"
or
args
.
role
==
"heter_trainer"
:
model
.
run_pserver
(
args
)
...
...
python/paddle/fluid/tests/unittests/test_dist_fleet_heter_program.py
浏览文件 @
2725f51d
...
...
@@ -19,7 +19,6 @@ import os
import
math
import
paddle.fluid
as
fluid
import
paddle.distributed.fleet.base.role_maker
as
role_maker
from
paddle.distributed.fleet.base.util_factory
import
fleet_util
from
paddle.distributed.fleet
import
fleet
import
paddle
...
...
python/paddle/fluid/tests/unittests/test_fleet_base.py
浏览文件 @
2725f51d
...
...
@@ -107,7 +107,7 @@ class TestFleetBase(unittest.TestCase):
def
test_util
(
self
):
role
=
role_maker
.
PaddleCloudRoleMaker
(
is_collective
=
True
)
fleet
.
init
(
role
)
self
.
assert
Equal
(
fleet
.
util
()
,
None
)
self
.
assert
NotEqual
(
fleet
.
util
,
None
)
def
test_barrier_worker
(
self
):
role
=
role_maker
.
PaddleCloudRoleMaker
(
is_collective
=
True
)
...
...
python/paddle/fluid/tests/unittests/test_fleet_rolemaker_new.py
浏览文件 @
2725f51d
...
...
@@ -436,12 +436,12 @@ class TestGlooWithCloudRoleMaker(unittest.TestCase):
optimizer
.
minimize
(
avg_cost
)
comm_world
=
"server"
fleet
.
util
()
.
barrier
(
comm_world
)
fleet
.
util
.
barrier
(
comm_world
)
gather
=
fleet
.
util
()
.
all_gather
(
1
,
comm_world
)
gather
=
fleet
.
util
.
all_gather
(
1
,
comm_world
)
self
.
assertEqual
(
gather
[
0
],
1
)
all_reduce
=
fleet
.
util
()
.
all_reduce
(
1
,
"sum"
,
comm_world
)
all_reduce
=
fleet
.
util
.
all_reduce
(
1
,
"sum"
,
comm_world
)
self
.
assertEqual
(
1
,
all_reduce
)
self
.
clean
(
tmp
)
...
...
@@ -752,12 +752,12 @@ class TestGlooWithCloudRoleMaker(unittest.TestCase):
optimizer
.
minimize
(
avg_cost
)
comm_world
=
"server"
fleet
.
util
()
.
barrier
(
comm_world
)
fleet
.
util
.
barrier
(
comm_world
)
gather
=
fleet
.
util
()
.
all_gather
(
1
,
comm_world
)
gather
=
fleet
.
util
.
all_gather
(
1
,
comm_world
)
self
.
assertEqual
(
gather
[
0
],
1
)
all_reduce
=
fleet
.
util
()
.
all_reduce
(
1
,
"sum"
,
comm_world
)
all_reduce
=
fleet
.
util
.
all_reduce
(
1
,
"sum"
,
comm_world
)
self
.
assertEqual
(
1
,
all_reduce
)
self
.
clean
(
tmp
)
...
...
python/paddle/fluid/tests/unittests/test_fleet_util.py
浏览文件 @
2725f51d
...
...
@@ -22,7 +22,6 @@ import tempfile
import
os
import
sys
from
paddle.dataset.common
import
download
,
DATA_HOME
from
paddle.distributed.fleet.base.util_factory
import
fleet_util
import
paddle.distributed.fleet.base.role_maker
as
role_maker
...
...
@@ -59,8 +58,7 @@ class TestFleetUtil(unittest.TestCase):
import
paddle.distributed.fleet.base.role_maker
as
role_maker
role
=
role_maker
.
PaddleCloudRoleMaker
(
is_collective
=
True
)
fleet
.
init
(
role
)
default_util
=
fleet
.
util
()
self
.
assertEqual
(
default_util
,
None
)
self
.
assertNotEqual
(
fleet
.
util
,
None
)
def
test_set_user_defined_util
(
self
):
import
paddle.distributed.fleet
as
fleet
...
...
@@ -76,17 +74,19 @@ class TestFleetUtil(unittest.TestCase):
role
=
role_maker
.
PaddleCloudRoleMaker
(
is_collective
=
True
)
fleet
.
init
(
role
)
my_util
=
UserDefinedUtil
()
fleet
.
set_util
(
my_util
)
user_id
=
fleet
.
util
()
.
get_user_id
()
fleet
.
util
=
my_util
user_id
=
fleet
.
util
.
get_user_id
()
self
.
assertEqual
(
user_id
,
10
)
def
test_fs
(
self
):
from
paddle.distributed.fleet.utils.fs
import
LocalFS
import
paddle.distributed.fleet
as
fleet
from
paddle.distributed.fleet.utils
import
LocalFS
fs
=
LocalFS
()
dirs
,
files
=
fs
.
ls_dir
(
"test_tmp"
)
dirs
,
files
=
fs
.
ls_dir
(
"./"
)
self
.
assertFalse
(
fs
.
need_upload_download
())
fleet
_
util
.
_set_file_system
(
fs
)
fleet
.
util
.
_set_file_system
(
fs
)
def
download_files
(
self
):
path
=
download
(
self
.
proto_data_url
,
self
.
module_name
,
...
...
@@ -98,7 +98,8 @@ class TestFleetUtil(unittest.TestCase):
return
unzip_folder
def
test_get_file_shard
(
self
):
self
.
assertRaises
(
Exception
,
fleet_util
.
get_file_shard
,
"files"
)
import
paddle.distributed.fleet
as
fleet
self
.
assertRaises
(
Exception
,
fleet
.
util
.
get_file_shard
,
"files"
)
try
:
import
netifaces
except
:
...
...
@@ -112,18 +113,20 @@ class TestFleetUtil(unittest.TestCase):
role
=
role_maker
.
Role
.
WORKER
,
worker_endpoints
=
[
"127.0.0.1:6003"
,
"127.0.0.1:6004"
],
server_endpoints
=
[
"127.0.0.1:6001"
,
"127.0.0.1:6002"
])
fleet_util
.
_set_role_maker
(
role
)
files
=
fleet_util
.
get_file_shard
([
"1"
,
"2"
,
"3"
])
fleet
.
init
(
role
)
files
=
fleet
.
util
.
get_file_shard
([
"1"
,
"2"
,
"3"
])
self
.
assertTrue
(
len
(
files
)
==
2
and
"1"
in
files
and
"2"
in
files
)
def
test_program_type_trans
(
self
):
import
paddle.distributed.fleet
as
fleet
data_dir
=
self
.
download_files
()
program_dir
=
os
.
path
.
join
(
data_dir
,
self
.
pruned_dir
)
text_program
=
"pruned_main_program.pbtxt"
binary_program
=
"pruned_main_program.bin"
text_to_binary
=
fleet
_
util
.
_program_type_trans
(
program_dir
,
text_to_binary
=
fleet
.
util
.
_program_type_trans
(
program_dir
,
text_program
,
True
)
binary_to_text
=
fleet
_
util
.
_program_type_trans
(
program_dir
,
binary_to_text
=
fleet
.
util
.
_program_type_trans
(
program_dir
,
binary_program
,
False
)
self
.
assertTrue
(
os
.
path
.
exists
(
os
.
path
.
join
(
program_dir
,
text_to_binary
)))
...
...
@@ -131,6 +134,7 @@ class TestFleetUtil(unittest.TestCase):
os
.
path
.
exists
(
os
.
path
.
join
(
program_dir
,
binary_to_text
)))
def
test_prams_check
(
self
):
import
paddle.distributed.fleet
as
fleet
data_dir
=
self
.
download_files
()
class
config
:
...
...
@@ -160,11 +164,11 @@ class TestFleetUtil(unittest.TestCase):
# test saved var's shape
conf
.
dump_program_filename
=
"pruned_main_program.save_var_shape_not_match"
self
.
assertRaises
(
Exception
,
fleet
_
util
.
_params_check
)
self
.
assertRaises
(
Exception
,
fleet
.
util
.
_params_check
)
# test program.proto without feed_op and fetch_op
conf
.
dump_program_filename
=
"pruned_main_program.no_feed_fetch"
results
=
fleet
_
util
.
_params_check
(
conf
)
results
=
fleet
.
util
.
_params_check
(
conf
)
self
.
assertTrue
(
len
(
results
)
==
1
)
np
.
testing
.
assert_array_almost_equal
(
results
[
0
],
np
.
array
(
...
...
@@ -172,11 +176,11 @@ class TestFleetUtil(unittest.TestCase):
# test feed_var's shape
conf
.
dump_program_filename
=
"pruned_main_program.feed_var_shape_not_match"
self
.
assertRaises
(
Exception
,
fleet
_
util
.
_params_check
)
self
.
assertRaises
(
Exception
,
fleet
.
util
.
_params_check
)
# test correct case with feed_vars_filelist
conf
.
dump_program_filename
=
"pruned_main_program.pbtxt"
results
=
fleet
_
util
.
_params_check
(
conf
)
results
=
fleet
.
util
.
_params_check
(
conf
)
self
.
assertTrue
(
len
(
results
)
==
1
)
np
.
testing
.
assert_array_almost_equal
(
results
[
0
],
np
.
array
(
...
...
@@ -186,13 +190,14 @@ class TestFleetUtil(unittest.TestCase):
conf
.
feed_config
.
feeded_vars_filelist
=
None
# test feed var with lod_level >= 2
conf
.
dump_program_filename
=
"pruned_main_program.feed_lod2"
self
.
assertRaises
(
Exception
,
fleet
_
util
.
_params_check
)
self
.
assertRaises
(
Exception
,
fleet
.
util
.
_params_check
)
conf
.
dump_program_filename
=
"pruned_main_program.pbtxt"
results
=
fleet
_
util
.
_params_check
(
conf
)
results
=
fleet
.
util
.
_params_check
(
conf
)
self
.
assertTrue
(
len
(
results
)
==
1
)
def
test_proto_check
(
self
):
import
paddle.distributed.fleet
as
fleet
data_dir
=
self
.
download_files
()
class
config
:
...
...
@@ -210,7 +215,7 @@ class TestFleetUtil(unittest.TestCase):
"pruned_main_program.save_var_shape_not_match"
))
conf
.
is_text_pruned_program
=
True
conf
.
draw
=
False
res
=
fleet
_
util
.
_proto_check
(
conf
)
res
=
fleet
.
util
.
_proto_check
(
conf
)
self
.
assertFalse
(
res
)
# test match
...
...
@@ -222,10 +227,11 @@ class TestFleetUtil(unittest.TestCase):
else
:
conf
.
draw
=
True
conf
.
draw_out_name
=
"pruned_check"
res
=
fleet
_
util
.
_proto_check
(
conf
)
res
=
fleet
.
util
.
_proto_check
(
conf
)
self
.
assertTrue
(
res
)
def
test_visualize
(
self
):
import
paddle.distributed.fleet
as
fleet
if
sys
.
platform
==
'win32'
or
sys
.
platform
==
'sys.platform'
:
pass
else
:
...
...
@@ -234,10 +240,10 @@ class TestFleetUtil(unittest.TestCase):
data_dir
,
os
.
path
.
join
(
self
.
train_dir
,
"join_main_program.pbtxt"
))
is_text
=
True
program
=
fleet
_
util
.
_load_program
(
program_path
,
is_text
)
program
=
fleet
.
util
.
_load_program
(
program_path
,
is_text
)
output_dir
=
os
.
path
.
join
(
data_dir
,
self
.
train_dir
)
output_filename
=
"draw_prog"
fleet
_
util
.
_visualize_graphviz
(
program
,
output_dir
,
output_filename
)
fleet
.
util
.
_visualize_graphviz
(
program
,
output_dir
,
output_filename
)
self
.
assertTrue
(
os
.
path
.
exists
(
os
.
path
.
join
(
output_dir
,
output_filename
+
".dot"
)))
...
...
python/paddle/fluid/tests/unittests/test_imperative_signal_handler.py
浏览文件 @
2725f51d
...
...
@@ -49,7 +49,7 @@ class TestDygraphDataLoaderSingalHandler(unittest.TestCase):
test_process
.
start
()
set_child_signal_handler
(
id
(
self
),
test_process
.
pid
)
time
.
sleep
(
3
)
time
.
sleep
(
5
)
except
core
.
EnforceNotMet
as
ex
:
self
.
assertIn
(
"FatalError"
,
cpt
.
get_exception_message
(
ex
))
exception
=
ex
...
...
python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py
浏览文件 @
2725f51d
...
...
@@ -124,8 +124,10 @@ class TestMNIST(TestParallelExecutorBase):
def
test_simple_fc_with_new_strategy
(
self
):
# use_cuda, use_reduce
self
.
_compare_reduce_and_allreduce
(
simple_fc_net
,
True
)
self
.
_compare_reduce_and_allreduce
(
simple_fc_net
,
False
)
# NOTE: the computation result of nccl_reduce is non-deterministic,
# related issue: https://github.com/NVIDIA/nccl/issues/157
self
.
_compare_reduce_and_allreduce
(
simple_fc_net
,
True
,
1e-5
,
1e-2
)
self
.
_compare_reduce_and_allreduce
(
simple_fc_net
,
False
,
1e-5
,
1e-2
)
def
check_simple_fc_parallel_accuracy
(
self
,
use_cuda
):
if
use_cuda
and
not
core
.
is_compiled_with_cuda
():
...
...
@@ -179,7 +181,7 @@ class TestMNIST(TestParallelExecutorBase):
# NOTE: the computation result of nccl_reduce is non-deterministic,
# related issue: https://github.com/NVIDIA/nccl/issues/157
self
.
_compare_reduce_and_allreduce
(
fc_with_batchnorm
,
True
,
1e-5
,
1e-2
)
self
.
_compare_reduce_and_allreduce
(
fc_with_batchnorm
,
False
)
self
.
_compare_reduce_and_allreduce
(
fc_with_batchnorm
,
False
,
1e-5
,
1e-2
)
if
__name__
==
'__main__'
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录