Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
2725f51d
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 2 年 前同步成功
通知
2325
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
“b110fb376507b4652470fbd7694adc7549d00c2b”上不存在“mobile/src/operators/conv_op.cpp”
提交
2725f51d
编写于
9月 24, 2020
作者:
S
smallv0221
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into yxp0924
上级
43c4a00d
32ad4f90
变更
31
显示空白变更内容
内联
并排
Showing
31 changed file
with
351 addition
and
210 deletion
+351
-210
cmake/generic.cmake
cmake/generic.cmake
+9
-0
cmake/inference_lib.cmake
cmake/inference_lib.cmake
+7
-4
cmake/init.cmake
cmake/init.cmake
+3
-0
cmake/paddle_win.props
cmake/paddle_win.props
+91
-0
paddle/fluid/inference/CMakeLists.txt
paddle/fluid/inference/CMakeLists.txt
+2
-7
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+30
-4
paddle/fluid/inference/api/analysis_predictor.h
paddle/fluid/inference/api/analysis_predictor.h
+11
-0
paddle/fluid/inference/api/demo_ci/run.sh
paddle/fluid/inference/api/demo_ci/run.sh
+0
-5
paddle/fluid/operators/conv_cudnn_op.cu
paddle/fluid/operators/conv_cudnn_op.cu
+36
-24
paddle/fluid/operators/conv_op.h
paddle/fluid/operators/conv_op.h
+14
-4
paddle/fluid/operators/conv_transpose_cudnn_op.cu
paddle/fluid/operators/conv_transpose_cudnn_op.cu
+10
-10
paddle/fluid/operators/conv_transpose_op.h
paddle/fluid/operators/conv_transpose_op.h
+10
-2
paddle/fluid/operators/pixel_shuffle_op.cc
paddle/fluid/operators/pixel_shuffle_op.cc
+2
-2
python/paddle/distributed/fleet/__init__.py
python/paddle/distributed/fleet/__init__.py
+3
-3
python/paddle/distributed/fleet/base/fleet_base.py
python/paddle/distributed/fleet/base/fleet_base.py
+5
-27
python/paddle/distributed/fleet/base/util_factory.py
python/paddle/distributed/fleet/base/util_factory.py
+33
-30
python/paddle/distributed/fleet/launch.py
python/paddle/distributed/fleet/launch.py
+4
-6
python/paddle/distributed/fleet/utils/__init__.py
python/paddle/distributed/fleet/utils/__init__.py
+2
-0
python/paddle/distributed/fleet/utils/fs.py
python/paddle/distributed/fleet/utils/fs.py
+26
-26
python/paddle/fluid/tests/unittests/dist_fleet_ctr.py
python/paddle/fluid/tests/unittests/dist_fleet_ctr.py
+3
-4
python/paddle/fluid/tests/unittests/dist_fleet_ctr_ps_gpu.py
python/paddle/fluid/tests/unittests/dist_fleet_ctr_ps_gpu.py
+3
-4
python/paddle/fluid/tests/unittests/dist_fleet_heter_ctr.py
python/paddle/fluid/tests/unittests/dist_fleet_heter_ctr.py
+1
-2
python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py
python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py
+2
-3
python/paddle/fluid/tests/unittests/test_dist_fleet_base.py
python/paddle/fluid/tests/unittests/test_dist_fleet_base.py
+2
-5
python/paddle/fluid/tests/unittests/test_dist_fleet_heter_base.py
...addle/fluid/tests/unittests/test_dist_fleet_heter_base.py
+1
-4
python/paddle/fluid/tests/unittests/test_dist_fleet_heter_program.py
...le/fluid/tests/unittests/test_dist_fleet_heter_program.py
+0
-1
python/paddle/fluid/tests/unittests/test_fleet_base.py
python/paddle/fluid/tests/unittests/test_fleet_base.py
+1
-1
python/paddle/fluid/tests/unittests/test_fleet_rolemaker_new.py
.../paddle/fluid/tests/unittests/test_fleet_rolemaker_new.py
+6
-6
python/paddle/fluid/tests/unittests/test_fleet_util.py
python/paddle/fluid/tests/unittests/test_fleet_util.py
+28
-22
python/paddle/fluid/tests/unittests/test_imperative_signal_handler.py
...e/fluid/tests/unittests/test_imperative_signal_handler.py
+1
-1
python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py
...dle/fluid/tests/unittests/test_parallel_executor_mnist.py
+5
-3
未找到文件。
cmake/generic.cmake
浏览文件 @
2725f51d
...
@@ -446,6 +446,9 @@ function(nv_library TARGET_NAME)
...
@@ -446,6 +446,9 @@ function(nv_library TARGET_NAME)
message
(
FATAL
"Please specify source file or library in nv_library."
)
message
(
FATAL
"Please specify source file or library in nv_library."
)
endif
()
endif
()
endif
(
nv_library_SRCS
)
endif
(
nv_library_SRCS
)
if
(
WIN32
)
set_target_properties
(
${
TARGET_NAME
}
PROPERTIES VS_USER_PROPS
${
WIN_PROPS
}
)
endif
(
WIN32
)
endif
()
endif
()
endfunction
(
nv_library
)
endfunction
(
nv_library
)
...
@@ -461,6 +464,9 @@ function(nv_binary TARGET_NAME)
...
@@ -461,6 +464,9 @@ function(nv_binary TARGET_NAME)
add_dependencies
(
${
TARGET_NAME
}
${
nv_binary_DEPS
}
)
add_dependencies
(
${
TARGET_NAME
}
${
nv_binary_DEPS
}
)
common_link
(
${
TARGET_NAME
}
)
common_link
(
${
TARGET_NAME
}
)
endif
()
endif
()
if
(
WIN32
)
set_target_properties
(
${
TARGET_NAME
}
PROPERTIES VS_USER_PROPS
${
WIN_PROPS
}
)
endif
(
WIN32
)
endif
()
endif
()
endfunction
(
nv_binary
)
endfunction
(
nv_binary
)
...
@@ -482,6 +488,9 @@ function(nv_test TARGET_NAME)
...
@@ -482,6 +488,9 @@ function(nv_test TARGET_NAME)
set_property
(
TEST
${
TARGET_NAME
}
PROPERTY ENVIRONMENT FLAGS_cpu_deterministic=true
)
set_property
(
TEST
${
TARGET_NAME
}
PROPERTY ENVIRONMENT FLAGS_cpu_deterministic=true
)
set_property
(
TEST
${
TARGET_NAME
}
PROPERTY ENVIRONMENT FLAGS_init_allocated_mem=true
)
set_property
(
TEST
${
TARGET_NAME
}
PROPERTY ENVIRONMENT FLAGS_init_allocated_mem=true
)
set_property
(
TEST
${
TARGET_NAME
}
PROPERTY ENVIRONMENT FLAGS_cudnn_deterministic=true
)
set_property
(
TEST
${
TARGET_NAME
}
PROPERTY ENVIRONMENT FLAGS_cudnn_deterministic=true
)
if
(
WIN32
)
set_target_properties
(
${
TARGET_NAME
}
PROPERTIES VS_USER_PROPS
${
WIN_PROPS
}
)
endif
(
WIN32
)
endif
()
endif
()
endfunction
(
nv_test
)
endfunction
(
nv_test
)
...
...
cmake/inference_lib.cmake
浏览文件 @
2725f51d
...
@@ -19,9 +19,8 @@ set(PADDLE_INSTALL_DIR "${CMAKE_BINARY_DIR}/paddle_install_dir" CACHE STRING
...
@@ -19,9 +19,8 @@ set(PADDLE_INSTALL_DIR "${CMAKE_BINARY_DIR}/paddle_install_dir" CACHE STRING
set
(
PADDLE_INFERENCE_INSTALL_DIR
"
${
CMAKE_BINARY_DIR
}
/paddle_inference_install_dir"
CACHE STRING
set
(
PADDLE_INFERENCE_INSTALL_DIR
"
${
CMAKE_BINARY_DIR
}
/paddle_inference_install_dir"
CACHE STRING
"A path setting paddle inference shared and static libraries"
)
"A path setting paddle inference shared and static libraries"
)
# TODO(zhaolong)
# At present, the size of static lib in Windows is very large,
# At present, the size of static lib in Windows exceeds the system limit,
# so we need to crop the library size.
# so the generation of static lib is temporarily turned off.
if
(
WIN32
)
if
(
WIN32
)
#todo: remove the option
#todo: remove the option
option
(
WITH_STATIC_LIB
"Compile demo with static/shared library, default use dynamic."
OFF
)
option
(
WITH_STATIC_LIB
"Compile demo with static/shared library, default use dynamic."
OFF
)
...
@@ -196,7 +195,11 @@ set(PADDLE_INFERENCE_C_INSTALL_DIR "${CMAKE_BINARY_DIR}/paddle_inference_c_insta
...
@@ -196,7 +195,11 @@ set(PADDLE_INFERENCE_C_INSTALL_DIR "${CMAKE_BINARY_DIR}/paddle_inference_c_insta
copy_part_of_thrid_party
(
inference_lib_dist
${
PADDLE_INFERENCE_C_INSTALL_DIR
}
)
copy_part_of_thrid_party
(
inference_lib_dist
${
PADDLE_INFERENCE_C_INSTALL_DIR
}
)
set
(
src_dir
"
${
PADDLE_SOURCE_DIR
}
/paddle/fluid"
)
set
(
src_dir
"
${
PADDLE_SOURCE_DIR
}
/paddle/fluid"
)
set
(
paddle_fluid_c_lib
${
PADDLE_BINARY_DIR
}
/paddle/fluid/inference/capi/libpaddle_fluid_c.*
)
if
(
WIN32
)
set
(
paddle_fluid_c_lib
${
PADDLE_BINARY_DIR
}
/paddle/fluid/inference/capi/
${
CMAKE_BUILD_TYPE
}
/paddle_fluid_c.*
)
else
(
WIN32
)
set
(
paddle_fluid_c_lib
${
PADDLE_BINARY_DIR
}
/paddle/fluid/inference/capi/libpaddle_fluid_c.*
)
endif
(
WIN32
)
copy
(
inference_lib_dist
copy
(
inference_lib_dist
SRCS
${
src_dir
}
/inference/capi/paddle_c_api.h
${
paddle_fluid_c_lib
}
SRCS
${
src_dir
}
/inference/capi/paddle_c_api.h
${
paddle_fluid_c_lib
}
...
...
cmake/init.cmake
浏览文件 @
2725f51d
...
@@ -26,4 +26,7 @@ if(WITH_GPU)
...
@@ -26,4 +26,7 @@ if(WITH_GPU)
set
(
CMAKE_CUDA_FLAGS_MINSIZEREL
"-O1 -DNDEBUG"
)
set
(
CMAKE_CUDA_FLAGS_MINSIZEREL
"-O1 -DNDEBUG"
)
endif
()
endif
()
if
(
WIN32
)
set
(
WIN_PROPS
${
CMAKE_SOURCE_DIR
}
/cmake/paddle_win.props
)
endif
()
cmake/paddle_win.props
0 → 100644
浏览文件 @
2725f51d
<?xml version="1.0" encoding="utf-8"?>
<Project
xmlns=
"http://schemas.microsoft.com/developer/msbuild/2003"
>
<ItemDefinitionGroup>
<CudaCompile>
<!-- Project schema: Host properties -->
<UseHostDefines>
true
</UseHostDefines>
<Emulation>
false
</Emulation>
<HostDebugInfo
Condition=
"'$(Configuration)' == 'Debug'"
>
true
</HostDebugInfo>
<HostDebugInfo
Condition=
"'$(Configuration)' != 'Debug'"
>
false
</HostDebugInfo>
<FastMath>
false
</FastMath>
<Optimization>
InheritFromHost
</Optimization>
<Runtime>
InheritFromHost
</Runtime>
<RuntimeChecks>
InheritFromHost
</RuntimeChecks>
<TypeInfo>
InheritFromHost
</TypeInfo>
<Warning>
InheritFromHost
</Warning>
<BaseCommandLineTemplate>
-ccbin "%(VCBinDir)" -x cu [GenerateRelocatableDeviceCode] [Include] [RequiredIncludes] [InterleaveSourceInPTX] [GPUDebugInfo] [GenerateLineInfo] [Keep] [KeepDir] [MaxRegCount] [PtxAsOptionV] [TargetMachinePlatform] [NvccCompilation] [CudaRuntime] [AdditionalOptions]
</BaseCommandLineTemplate>
<BuildCommandLineTemplate>
--use-local-env
</BuildCommandLineTemplate>
<BuildDynamicCommandLineTemplate>
[CodeGeneration]
</BuildDynamicCommandLineTemplate>
<CleanCommandLineTemplate>
-clean
</CleanCommandLineTemplate>
<!-- <HostCommandLineTemplate>-Xcompiler "/EHsc [Warning] /nologo [Optimization] [ProgramDataBaseFileName] $(CudaForceSynchronousPdbWrites) /Zi [RuntimeChecks] [Runtime] [TypeInfo]"</HostCommandLineTemplate> -->
<HostCommandLineTemplate>
-Xcompiler
"
/EHsc [Warning] /nologo [Optimization] [ProgramDataBaseFileName] $(CudaForceSynchronousPdbWrites) [RuntimeChecks] [Runtime] [TypeInfo]
"
</HostCommandLineTemplate>
<DriverApiCommandLineTemplate>
%(BaseCommandLineTemplate) [CompileOut] "%(FullPath)"
</DriverApiCommandLineTemplate>
<RuntimeApiCommandLineTemplate>
%(BaseCommandLineTemplate) [HostDebugInfo] [Emulation] [FastMath] [Defines] %(HostCommandLineTemplate) [CompileOut] "%(FullPath)"
</RuntimeApiCommandLineTemplate>
<CommandLineTemplate>
# (Approximate command-line. Settings inherited from host are not visible below.)
# (Please see the output window after a build for the full command-line)
# Driver API (NVCC Compilation Type is .cubin, .gpu, or .ptx)
set CUDAFE_FLAGS=--sdk_dir "$(WindowsSdkDir)"
"$(CudaToolkitNvccPath)" %(BuildCommandLineTemplate) %(DriverApiCommandLineTemplate)
# Runtime API (NVCC Compilation Type is hybrid object or .c file)
set CUDAFE_FLAGS=--sdk_dir "$(WindowsSdkDir)"
"$(CudaToolkitNvccPath)" %(BuildCommandLineTemplate) %(RuntimeApiCommandLineTemplate)
</CommandLineTemplate>
<ExecutionDescription>
Compiling CUDA source file %(Identity)...
</ExecutionDescription>
<ExclusionDescription>
Skipping CUDA source file %(Identity) (excluded from build).
</ExclusionDescription>
<!-- Miscellaneous -->
<PropsCacheOutputFile>
%(Filename)%(Extension).cache
</PropsCacheOutputFile>
<PropsCacheOutputPath>
$(IntDir)%(PropsCacheOutputFile)
</PropsCacheOutputPath>
<CudaCompileCoreProject>
$(MSBuildProjectFullPath)
</CudaCompileCoreProject>
</CudaCompile>
<CudaLink>
<PerformDeviceLink>
true
</PerformDeviceLink>
<LinkOut>
$(IntDir)$(TargetName).device-link.obj
</LinkOut>
<AdditionalLibraryDirectories></AdditionalLibraryDirectories>
<UseHostLibraryDirectories>
true
</UseHostLibraryDirectories>
<AdditionalDependencies></AdditionalDependencies>
<UseHostLibraryDependencies>
true
</UseHostLibraryDependencies>
<GPUDebugInfo>
InheritFromProject
</GPUDebugInfo>
<Optimization>
InheritFromProject
</Optimization>
<!-- Implicitly inherited from the project via @(CudaCompile) -->
<CodeGeneration></CodeGeneration>
<RuntimeChecks></RuntimeChecks>
<Runtime></Runtime>
<TargetMachinePlatform></TargetMachinePlatform>
<TypeInfo></TypeInfo>
<Warning></Warning>
<Inputs></Inputs>
<!-- <HostCommandLineTemplate>-Xcompiler "/EHsc [Warning] /nologo [Optimization] /Zi [RuntimeChecks] [Runtime] [TypeInfo]"</HostCommandLineTemplate> -->
<HostCommandLineTemplate>
-Xcompiler
"
/EHsc [Warning] /nologo [Optimization] [RuntimeChecks] [Runtime] [TypeInfo]
"
</HostCommandLineTemplate>
<LinkCommandLineTemplate>
"$(CudaToolkitNvccPath)" -dlink [LinkOut] %(HostCommandLineTemplate) [AdditionalLibraryDirectories] [AdditionalDependencies] [AdditionalOptions] [CodeGeneration] [GPUDebugInfo] [TargetMachinePlatform] [Inputs]
</LinkCommandLineTemplate>
<CommandLineTemplate>
# (Approximate command-line. Settings inherited from host are not visible below.)
# (Please see the output window after a build for the full command-line)
%(LinkCommandLineTemplate)
</CommandLineTemplate>
</CudaLink>
<Link>
<AdditionalLibraryDirectories>
%(AdditionalLibraryDirectories);$(CudaToolkitLibDir)
</AdditionalLibraryDirectories>
</Link>
<ClCompile>
<AdditionalIncludeDirectories>
%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir)
</AdditionalIncludeDirectories>
</ClCompile>
</ItemDefinitionGroup>
</Project>
paddle/fluid/inference/CMakeLists.txt
浏览文件 @
2725f51d
...
@@ -44,14 +44,9 @@ add_subdirectory(api)
...
@@ -44,14 +44,9 @@ add_subdirectory(api)
set
(
STATIC_INFERENCE_API paddle_inference_api analysis_predictor
set
(
STATIC_INFERENCE_API paddle_inference_api analysis_predictor
zero_copy_tensor reset_tensor_array
zero_copy_tensor reset_tensor_array
analysis_config paddle_pass_builder activation_functions
${
mkldnn_quantizer_cfg
}
)
analysis_config paddle_pass_builder activation_functions
${
mkldnn_quantizer_cfg
}
)
# TODO(xingzhaolong, jiweibo): remove this and create_static_lib(paddle_fluid) on windows GPU
create_static_lib
(
paddle_fluid
${
fluid_modules
}
${
STATIC_INFERENCE_API
}
)
if
(
WIN32 AND WITH_GPU
)
cc_library
(
paddle_fluid DEPS
${
fluid_modules
}
${
STATIC_INFERENCE_API
}
)
else
()
create_static_lib
(
paddle_fluid
${
fluid_modules
}
${
STATIC_INFERENCE_API
}
)
endif
()
if
(
NOT APPLE
AND NOT WIN32
)
if
(
NOT APPLE
)
# TODO(liuyiqu: Temporarily disable the link flag because it is not support on Mac.
# TODO(liuyiqu: Temporarily disable the link flag because it is not support on Mac.
set
(
LINK_FLAGS
"-Wl,--retain-symbols-file
${
CMAKE_CURRENT_SOURCE_DIR
}
/paddle_fluid.sym"
)
set
(
LINK_FLAGS
"-Wl,--retain-symbols-file
${
CMAKE_CURRENT_SOURCE_DIR
}
/paddle_fluid.sym"
)
set_target_properties
(
paddle_fluid PROPERTIES LINK_FLAGS
"
${
LINK_FLAGS
}
"
)
set_target_properties
(
paddle_fluid PROPERTIES LINK_FLAGS
"
${
LINK_FLAGS
}
"
)
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
2725f51d
...
@@ -245,7 +245,18 @@ bool AnalysisPredictor::PrepareExecutor() {
...
@@ -245,7 +245,18 @@ bool AnalysisPredictor::PrepareExecutor() {
void
AnalysisPredictor
::
MkldnnPreSet
(
const
std
::
vector
<
PaddleTensor
>
&
inputs
)
{
void
AnalysisPredictor
::
MkldnnPreSet
(
const
std
::
vector
<
PaddleTensor
>
&
inputs
)
{
#ifdef PADDLE_WITH_MKLDNN
#ifdef PADDLE_WITH_MKLDNN
VLOG
(
2
)
<<
"AnalysisPredictor::Run get_cur_mkldnn_session_id="
std
::
vector
<
std
::
vector
<
int
>>
inputs_shape
;
for
(
size_t
i
=
0
;
i
<
inputs
.
size
();
++
i
)
{
inputs_shape
.
emplace_back
(
inputs
[
i
].
shape
);
}
MkldnnPreSet
(
inputs_shape
);
#endif
}
void
AnalysisPredictor
::
MkldnnPreSet
(
const
std
::
vector
<
std
::
vector
<
int
>>
&
inputs_shape
)
{
#ifdef PADDLE_WITH_MKLDNN
VLOG
(
2
)
<<
"AnalysisPredictor::ZeroCopyRun get_cur_mkldnn_session_id="
<<
platform
::
MKLDNNDeviceContext
::
tls
().
get_cur_mkldnn_session_id
();
<<
platform
::
MKLDNNDeviceContext
::
tls
().
get_cur_mkldnn_session_id
();
// In cache clearing mode.
// In cache clearing mode.
if
(
config_
.
mkldnn_cache_capacity_
>
0
)
{
if
(
config_
.
mkldnn_cache_capacity_
>
0
)
{
...
@@ -257,9 +268,9 @@ void AnalysisPredictor::MkldnnPreSet(const std::vector<PaddleTensor> &inputs) {
...
@@ -257,9 +268,9 @@ void AnalysisPredictor::MkldnnPreSet(const std::vector<PaddleTensor> &inputs) {
config_
.
mkldnn_cache_capacity_
);
config_
.
mkldnn_cache_capacity_
);
// Set current_input_shape for caching dynamic shape.
// Set current_input_shape for caching dynamic shape.
std
::
stringstream
ss
;
std
::
stringstream
ss
;
for
(
size_t
i
=
0
;
i
<
inputs
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
inputs
_shape
.
size
();
++
i
)
{
for
(
size_t
j
=
0
;
j
<
inputs
[
i
].
shape
.
size
();
++
j
)
{
for
(
size_t
j
=
0
;
j
<
inputs
_shape
[
i
]
.
size
();
++
j
)
{
ss
<<
inputs
[
i
].
shape
[
j
]
<<
"-"
;
ss
<<
inputs
_shape
[
i
]
[
j
]
<<
"-"
;
}
}
}
}
VLOG
(
2
)
<<
"Set input shape="
<<
ss
.
str
();
VLOG
(
2
)
<<
"Set input shape="
<<
ss
.
str
();
...
@@ -742,6 +753,18 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor(
...
@@ -742,6 +753,18 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor(
bool
AnalysisPredictor
::
ZeroCopyRun
()
{
bool
AnalysisPredictor
::
ZeroCopyRun
()
{
paddle
::
platform
::
SetNumThreads
(
config_
.
cpu_math_library_num_threads
());
paddle
::
platform
::
SetNumThreads
(
config_
.
cpu_math_library_num_threads
());
#ifdef PADDLE_WITH_MKLDNN
if
(
config_
.
use_mkldnn_
)
{
std
::
vector
<
std
::
vector
<
int
>>
shape_vector
;
auto
names
=
GetInputNames
();
for
(
size_t
i
=
0
;
i
<
names
.
size
();
++
i
)
{
auto
in_tensor
=
GetInputTensor
(
names
[
i
]);
shape_vector
.
emplace_back
(
in_tensor
->
shape
());
}
MkldnnPreSet
(
shape_vector
);
}
#endif
executor_
->
Run
();
executor_
->
Run
();
// Fix TensorArray reuse not cleaned bug.
// Fix TensorArray reuse not cleaned bug.
tensor_array_batch_cleaner_
.
CollectTensorArrays
(
sub_scope_
);
tensor_array_batch_cleaner_
.
CollectTensorArrays
(
sub_scope_
);
...
@@ -750,6 +773,9 @@ bool AnalysisPredictor::ZeroCopyRun() {
...
@@ -750,6 +773,9 @@ bool AnalysisPredictor::ZeroCopyRun() {
// recover the cpu_math_library_num_threads to 1, in order to avoid thread
// recover the cpu_math_library_num_threads to 1, in order to avoid thread
// conflict when integrating it into deployment service.
// conflict when integrating it into deployment service.
paddle
::
platform
::
SetNumThreads
(
1
);
paddle
::
platform
::
SetNumThreads
(
1
);
#ifdef PADDLE_WITH_MKLDNN
if
(
config_
.
use_mkldnn_
)
MkldnnPostReset
();
#endif
#if defined(PADDLE_WITH_MKLML)
#if defined(PADDLE_WITH_MKLML)
// Frees unused memory allocated by the Intel® MKL Memory Allocator to
// Frees unused memory allocated by the Intel® MKL Memory Allocator to
// avoid memory leak. See:
// avoid memory leak. See:
...
...
paddle/fluid/inference/api/analysis_predictor.h
浏览文件 @
2725f51d
...
@@ -317,6 +317,17 @@ class AnalysisPredictor : public PaddlePredictor {
...
@@ -317,6 +317,17 @@ class AnalysisPredictor : public PaddlePredictor {
/// \param[in] inputs tensors
/// \param[in] inputs tensors
///
///
void
MkldnnPreSet
(
const
std
::
vector
<
PaddleTensor
>
&
inputs
);
void
MkldnnPreSet
(
const
std
::
vector
<
PaddleTensor
>
&
inputs
);
///
/// \brief PreSet for Mkldnn multi-thread and dynamic shape input.
///
/// Used in AnalysisPredictor::Run(), do not support
/// AnalysisPredictor::ZeroCopyRun() now.
///
/// \param[in] inputs tensor shape
///
void
MkldnnPreSet
(
const
std
::
vector
<
std
::
vector
<
int
>>
&
inputs_shape
);
///
///
/// \brief PostReset for Mkldnn multi-thread and dynamic shape input.
/// \brief PostReset for Mkldnn multi-thread and dynamic shape input.
///
///
...
...
paddle/fluid/inference/api/demo_ci/run.sh
浏览文件 @
2725f51d
...
@@ -68,11 +68,6 @@ rm -rf *
...
@@ -68,11 +68,6 @@ rm -rf *
for
WITH_STATIC_LIB
in
ON OFF
;
do
for
WITH_STATIC_LIB
in
ON OFF
;
do
if
[
$(
echo
`
uname
`
|
grep
"Win"
)
!=
""
]
;
then
if
[
$(
echo
`
uname
`
|
grep
"Win"
)
!=
""
]
;
then
# TODO(xingzhaolong, jiweibo): remove this if windows GPU library is ready.
if
[
$TEST_GPU_CPU
==
ON]
&&
[
$WITH_STATIC_LIB
==
ON
]
;
then
return
0
fi
# -----simple_on_word2vec on windows-----
# -----simple_on_word2vec on windows-----
cmake ..
-G
"Visual Studio 14 2015"
-A
x64
-DPADDLE_LIB
=
${
inference_install_dir
}
\
cmake ..
-G
"Visual Studio 14 2015"
-A
x64
-DPADDLE_LIB
=
${
inference_install_dir
}
\
-DWITH_MKL
=
$TURN_ON_MKL
\
-DWITH_MKL
=
$TURN_ON_MKL
\
...
...
paddle/fluid/operators/conv_cudnn_op.cu
浏览文件 @
2725f51d
...
@@ -50,8 +50,9 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
...
@@ -50,8 +50,9 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
CUDADeviceContext
>();
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
CUDADeviceContext
>();
PADDLE_ENFORCE_EQ
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
true
,
PADDLE_ENFORCE_EQ
(
"It must use CUDAPlace."
);
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
true
,
paddle
::
platform
::
errors
::
PreconditionNotMet
(
"It must use CUDAPlace."
));
const
Tensor
*
input
=
ctx
.
Input
<
Tensor
>
(
"Input"
);
const
Tensor
*
input
=
ctx
.
Input
<
Tensor
>
(
"Input"
);
auto
*
filter
=
ctx
.
Input
<
Tensor
>
(
"Filter"
);
auto
*
filter
=
ctx
.
Input
<
Tensor
>
(
"Filter"
);
auto
*
output
=
ctx
.
Output
<
Tensor
>
(
"Output"
);
auto
*
output
=
ctx
.
Output
<
Tensor
>
(
"Output"
);
...
@@ -60,14 +61,16 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
...
@@ -60,14 +61,16 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
std
::
vector
<
int
>
paddings
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int
>
paddings
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int
>
dilations
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"dilations"
);
std
::
vector
<
int
>
dilations
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"dilations"
);
int
groups
=
ctx
.
Attr
<
int
>
(
"groups"
);
int
groups
=
ctx
.
Attr
<
int
>
(
"groups"
);
bool
exhaustive_search
=
bool
exhaustive_search
=
FLAGS_cudnn_exhaustive_search
||
ctx
.
Attr
<
bool
>
(
"exhaustive_search"
);
FLAGS_cudnn_exhaustive_search
||
ctx
.
Attr
<
bool
>
(
"exhaustive_search"
);
bool
deterministic
=
FLAGS_cudnn_deterministic
;
if
(
exhaustive_search
&&
FLAGS_cudnn_deterministic
)
{
auto
exhaustive_deterministic
=
exhaustive_search
&&
deterministic
;
PADDLE_THROW
(
PADDLE_ENFORCE_EQ
(
exhaustive_deterministic
,
false
,
platform
::
errors
::
InvalidArgument
(
"Cann't set exhaustive_search True and "
"Cann't set exhaustive_search True and "
"FLAGS_cudnn_deterministic True at same time."
);
"FLAGS_cudnn_deterministic True at same time."
)
);
}
const
std
::
string
padding_algorithm
=
const
std
::
string
padding_algorithm
=
ctx
.
Attr
<
std
::
string
>
(
"padding_algorithm"
);
ctx
.
Attr
<
std
::
string
>
(
"padding_algorithm"
);
const
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
const
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
...
@@ -197,7 +200,8 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
...
@@ -197,7 +200,8 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
&
transformed_input
);
&
transformed_input
);
}
break
;
}
break
;
default:
default:
PADDLE_THROW
(
"ConvOp only support tensors with 4 or 5 dimensions."
);
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"ConvOp only support tensors with 4 or 5 dimensions."
));
}
}
}
else
{
}
else
{
...
@@ -317,8 +321,9 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
...
@@ -317,8 +321,9 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
CUDADeviceContext
>();
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
CUDADeviceContext
>();
PADDLE_ENFORCE_EQ
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
true
,
PADDLE_ENFORCE_EQ
(
"It must use CUDAPlace."
);
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
true
,
paddle
::
platform
::
errors
::
PreconditionNotMet
(
"It must use CUDAPlace."
));
auto
input
=
ctx
.
Input
<
Tensor
>
(
"Input"
);
auto
input
=
ctx
.
Input
<
Tensor
>
(
"Input"
);
auto
filter
=
ctx
.
Input
<
Tensor
>
(
"Filter"
);
auto
filter
=
ctx
.
Input
<
Tensor
>
(
"Filter"
);
auto
output_grad
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Output"
));
auto
output_grad
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Output"
));
...
@@ -337,14 +342,16 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
...
@@ -337,14 +342,16 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
std
::
vector
<
int
>
paddings
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int
>
paddings
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
string
padding_algorithm
=
ctx
.
Attr
<
std
::
string
>
(
"padding_algorithm"
);
std
::
string
padding_algorithm
=
ctx
.
Attr
<
std
::
string
>
(
"padding_algorithm"
);
int
groups
=
ctx
.
Attr
<
int
>
(
"groups"
);
int
groups
=
ctx
.
Attr
<
int
>
(
"groups"
);
bool
exhaustive_search
=
bool
exhaustive_search
=
FLAGS_cudnn_exhaustive_search
||
ctx
.
Attr
<
bool
>
(
"exhaustive_search"
);
FLAGS_cudnn_exhaustive_search
||
ctx
.
Attr
<
bool
>
(
"exhaustive_search"
);
bool
deterministic
=
FLAGS_cudnn_deterministic
;
bool
deterministic
=
FLAGS_cudnn_deterministic
;
if
(
exhaustive_search
&&
deterministic
)
{
auto
exhaustive_deterministic
=
exhaustive_search
&&
deterministic
;
PADDLE_THROW
(
PADDLE_ENFORCE_EQ
(
exhaustive_deterministic
,
false
,
"Can't set exhaustive_search True and "
platform
::
errors
::
InvalidArgument
(
"FLAGS_cudnn_deterministic True at same time."
);
"Cann't set exhaustive_search True and "
}
"FLAGS_cudnn_deterministic True at same time."
));
const
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
const
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
const
bool
channel_last
=
(
data_format
==
"NHWC"
||
data_format
==
"NDHWC"
);
const
bool
channel_last
=
(
data_format
==
"NHWC"
||
data_format
==
"NDHWC"
);
...
@@ -495,7 +502,8 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
...
@@ -495,7 +502,8 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
&
transformed_input
);
&
transformed_input
);
}
break
;
}
break
;
default:
default:
PADDLE_THROW
(
"ConvOp only support tensors with 4 or 5 dimensions."
);
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"ConvOp only support tensors with 4 or 5 dimensions."
));
}
}
}
else
{
}
else
{
transformed_input
.
ShareDataWith
(
transformed_input_channel
);
transformed_input
.
ShareDataWith
(
transformed_input_channel
);
...
@@ -701,8 +709,9 @@ class CUDNNConvDoubleGradOpKernel : public framework::OpKernel<T> {
...
@@ -701,8 +709,9 @@ class CUDNNConvDoubleGradOpKernel : public framework::OpKernel<T> {
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
CUDADeviceContext
>();
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
CUDADeviceContext
>();
PADDLE_ENFORCE_EQ
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
true
,
PADDLE_ENFORCE_EQ
(
"It must use CUDAPlace."
);
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
true
,
paddle
::
platform
::
errors
::
PreconditionNotMet
(
"It must use CUDAPlace."
));
auto
X
=
ctx
.
Input
<
Tensor
>
(
"Input"
);
auto
X
=
ctx
.
Input
<
Tensor
>
(
"Input"
);
auto
W
=
ctx
.
Input
<
Tensor
>
(
"Filter"
);
auto
W
=
ctx
.
Input
<
Tensor
>
(
"Filter"
);
auto
dO
=
ctx
.
Input
<
Tensor
>
(
"DOutput"
);
auto
dO
=
ctx
.
Input
<
Tensor
>
(
"DOutput"
);
...
@@ -736,14 +745,16 @@ class CUDNNConvDoubleGradOpKernel : public framework::OpKernel<T> {
...
@@ -736,14 +745,16 @@ class CUDNNConvDoubleGradOpKernel : public framework::OpKernel<T> {
const
std
::
vector
<
int
>&
strides
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
const
std
::
vector
<
int
>&
strides
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
dilations
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"dilations"
);
std
::
vector
<
int
>
dilations
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"dilations"
);
int
groups
=
ctx
.
Attr
<
int
>
(
"groups"
);
int
groups
=
ctx
.
Attr
<
int
>
(
"groups"
);
bool
exhaustive_search
=
bool
exhaustive_search
=
FLAGS_cudnn_exhaustive_search
||
ctx
.
Attr
<
bool
>
(
"exhaustive_search"
);
FLAGS_cudnn_exhaustive_search
||
ctx
.
Attr
<
bool
>
(
"exhaustive_search"
);
bool
deterministic
=
FLAGS_cudnn_deterministic
;
bool
deterministic
=
FLAGS_cudnn_deterministic
;
if
(
exhaustive_search
&&
deterministic
)
{
auto
exhaustive_deterministic
=
exhaustive_search
&&
deterministic
;
PADDLE_THROW
(
PADDLE_ENFORCE_EQ
(
exhaustive_deterministic
,
false
,
"Can't set exhaustive_search True and "
platform
::
errors
::
InvalidArgument
(
"FLAGS_cudnn_deterministic True at same time."
);
"Cann't set exhaustive_search True and "
}
"FLAGS_cudnn_deterministic True at same time."
));
std
::
vector
<
int
>
paddings
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int
>
paddings
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
string
padding_algorithm
=
ctx
.
Attr
<
std
::
string
>
(
"padding_algorithm"
);
std
::
string
padding_algorithm
=
ctx
.
Attr
<
std
::
string
>
(
"padding_algorithm"
);
...
@@ -878,7 +889,8 @@ class CUDNNConvDoubleGradOpKernel : public framework::OpKernel<T> {
...
@@ -878,7 +889,8 @@ class CUDNNConvDoubleGradOpKernel : public framework::OpKernel<T> {
}
}
}
break
;
}
break
;
default:
default:
PADDLE_THROW
(
"ConvOp only support tensors with 4 or 5 dimensions."
);
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"ConvOp only support tensors with 4 or 5 dimensions."
));
}
}
}
else
{
}
else
{
...
...
paddle/fluid/operators/conv_op.h
浏览文件 @
2725f51d
...
@@ -685,8 +685,9 @@ class GemmConvDoubleGradKernel : public framework::OpKernel<T> {
...
@@ -685,8 +685,9 @@ class GemmConvDoubleGradKernel : public framework::OpKernel<T> {
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
CPUDeviceContext
>();
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
CPUDeviceContext
>();
PADDLE_ENFORCE_EQ
(
platform
::
is_cpu_place
(
ctx
.
GetPlace
()),
true
,
PADDLE_ENFORCE_EQ
(
"It must use CPUPlace."
);
platform
::
is_cpu_place
(
ctx
.
GetPlace
()),
true
,
paddle
::
platform
::
errors
::
PreconditionNotMet
(
"It must use CPUPlace."
));
const
Tensor
*
X
=
ctx
.
Input
<
Tensor
>
(
"Input"
);
const
Tensor
*
X
=
ctx
.
Input
<
Tensor
>
(
"Input"
);
const
Tensor
*
dY
=
ctx
.
Input
<
Tensor
>
(
"DOutput"
);
const
Tensor
*
dY
=
ctx
.
Input
<
Tensor
>
(
"DOutput"
);
const
Tensor
*
ddX
=
ctx
.
Input
<
Tensor
>
(
"DDInput"
);
const
Tensor
*
ddX
=
ctx
.
Input
<
Tensor
>
(
"DDInput"
);
...
@@ -982,11 +983,20 @@ class DepthwiseConvKernel : public framework::OpKernel<T> {
...
@@ -982,11 +983,20 @@ class DepthwiseConvKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
output
->
dims
()[
output
->
dims
().
size
()
-
1
]
%
output
->
dims
()[
output
->
dims
().
size
()
-
1
]
%
input
->
dims
()[
input
->
dims
().
size
()
-
1
],
input
->
dims
()[
input
->
dims
().
size
()
-
1
],
0
,
"The output channels must be a multiple of the input channels"
);
0
,
platform
::
errors
::
InvalidArgument
(
"ShapeError: The output channels must be a multiple of the "
"input channels. But receivced output channel number is %d "
"and input channel number is %d"
,
output
->
dims
()[
output
->
dims
().
size
()
-
1
],
input
->
dims
()[
input
->
dims
().
size
()
-
1
]));
}
else
{
}
else
{
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
output
->
dims
()[
1
]
%
input
->
dims
()[
1
],
0
,
output
->
dims
()[
1
]
%
input
->
dims
()[
1
],
0
,
"The output channels must be a multiple of the input channels"
);
platform
::
errors
::
InvalidArgument
(
"ShapeError: The output channels must be a multiple of the "
"input channels. But receivced output channel number is %d "
"and input channel number is %d"
,
output
->
dims
()[
1
],
input
->
dims
()[
1
]));
}
}
// transform tensor
// transform tensor
Tensor
transformed_input
(
input
->
type
());
Tensor
transformed_input
(
input
->
type
());
...
...
paddle/fluid/operators/conv_transpose_cudnn_op.cu
浏览文件 @
2725f51d
...
@@ -51,8 +51,9 @@ template <typename T>
...
@@ -51,8 +51,9 @@ template <typename T>
class
CUDNNConvTransposeOpKernel
:
public
framework
::
OpKernel
<
T
>
{
class
CUDNNConvTransposeOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE_EQ
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
true
,
PADDLE_ENFORCE_EQ
(
"It must use CUDAPlace."
);
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
true
,
paddle
::
platform
::
errors
::
PreconditionNotMet
(
"It must use CUDAPlace."
));
auto
*
input
=
ctx
.
Input
<
Tensor
>
(
"Input"
);
auto
*
input
=
ctx
.
Input
<
Tensor
>
(
"Input"
);
auto
*
filter
=
ctx
.
Input
<
Tensor
>
(
"Filter"
);
auto
*
filter
=
ctx
.
Input
<
Tensor
>
(
"Filter"
);
auto
*
output
=
ctx
.
Output
<
Tensor
>
(
"Output"
);
auto
*
output
=
ctx
.
Output
<
Tensor
>
(
"Output"
);
...
@@ -145,9 +146,8 @@ class CUDNNConvTransposeOpKernel : public framework::OpKernel<T> {
...
@@ -145,9 +146,8 @@ class CUDNNConvTransposeOpKernel : public framework::OpKernel<T> {
ctx
,
input_pad
,
input_transpose
,
pad_value
,
&
transformed_input
);
ctx
,
input_pad
,
input_transpose
,
pad_value
,
&
transformed_input
);
}
break
;
}
break
;
default:
default:
PADDLE_ENFORCE_EQ
(
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
rank
==
4
||
rank
==
5
,
true
,
"Op(ConvTranspose) only supports 4-D or 5-D input Tensor."
));
"Op(ConvTranspose) only supports 4-D or 5-D input Tensor."
);
}
}
}
else
{
}
else
{
transformed_input
=
input_transpose
;
transformed_input
=
input_transpose
;
...
@@ -290,8 +290,9 @@ template <typename T>
...
@@ -290,8 +290,9 @@ template <typename T>
class
CUDNNConvTransposeGradOpKernel
:
public
framework
::
OpKernel
<
T
>
{
class
CUDNNConvTransposeGradOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
PADDLE_ENFORCE_EQ
(
"It must use CUDAPlace."
);
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
true
,
paddle
::
platform
::
errors
::
PreconditionNotMet
(
"It must use CUDAPlace."
));
auto
input
=
ctx
.
Input
<
Tensor
>
(
"Input"
);
auto
input
=
ctx
.
Input
<
Tensor
>
(
"Input"
);
auto
filter
=
ctx
.
Input
<
Tensor
>
(
"Filter"
);
auto
filter
=
ctx
.
Input
<
Tensor
>
(
"Filter"
);
auto
output_grad
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Output"
));
auto
output_grad
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Output"
));
...
@@ -393,9 +394,8 @@ class CUDNNConvTransposeGradOpKernel : public framework::OpKernel<T> {
...
@@ -393,9 +394,8 @@ class CUDNNConvTransposeGradOpKernel : public framework::OpKernel<T> {
&
transformed_output_grad
);
&
transformed_output_grad
);
}
break
;
}
break
;
default:
default:
PADDLE_ENFORCE_EQ
(
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
rank
==
4
||
rank
==
5
,
true
,
"Op(ConvTranspose) only supports 4-D or 5-D input Tensor."
));
"Op(ConvTranspose) only supports 4-D or 5-D input Tensor."
);
}
}
}
else
{
}
else
{
transformed_output_grad
=
output_grad_transpose
;
transformed_output_grad
=
output_grad_transpose
;
...
...
paddle/fluid/operators/conv_transpose_op.h
浏览文件 @
2725f51d
...
@@ -580,7 +580,12 @@ class DepthwiseConvTransposeKernel : public framework::OpKernel<T> {
...
@@ -580,7 +580,12 @@ class DepthwiseConvTransposeKernel : public framework::OpKernel<T> {
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
int
groups
=
context
.
Attr
<
int
>
(
"groups"
);
int
groups
=
context
.
Attr
<
int
>
(
"groups"
);
PADDLE_ENFORCE_EQ
(
groups
,
filter
.
dims
()[
0
]);
PADDLE_ENFORCE_EQ
(
groups
,
filter
.
dims
()[
0
],
platform
::
errors
::
InvalidArgument
(
"groups should be error to the 1st dimension of filter. But "
"received groups is %d and filter dimension[0] is %d"
,
groups
,
filter
.
dims
()[
0
]));
std
::
vector
<
int
>
strides
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
strides
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
paddings
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int
>
paddings
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
...
@@ -588,7 +593,10 @@ class DepthwiseConvTransposeKernel : public framework::OpKernel<T> {
...
@@ -588,7 +593,10 @@ class DepthwiseConvTransposeKernel : public framework::OpKernel<T> {
std
::
string
padding_algorithm
=
std
::
string
padding_algorithm
=
context
.
Attr
<
std
::
string
>
(
"padding_algorithm"
);
context
.
Attr
<
std
::
string
>
(
"padding_algorithm"
);
for
(
auto
v
:
dilations
)
{
for
(
auto
v
:
dilations
)
{
PADDLE_ENFORCE_EQ
(
v
,
1
);
PADDLE_ENFORCE_EQ
(
v
,
1
,
platform
::
errors
::
InvalidArgument
(
"dilations should be 1 in depthwise conv. "
"But received dilations is %d"
,
v
));
}
}
auto
in_dims
=
input
->
dims
();
auto
in_dims
=
input
->
dims
();
...
...
paddle/fluid/operators/pixel_shuffle_op.cc
浏览文件 @
2725f51d
...
@@ -46,14 +46,14 @@ class PixelShuffleOp : public framework::OperatorWithKernel {
...
@@ -46,14 +46,14 @@ class PixelShuffleOp : public framework::OperatorWithKernel {
platform
::
errors
::
InvalidArgument
(
platform
::
errors
::
InvalidArgument
(
"The square of upscale_factor[%u] should divide the "
"The square of upscale_factor[%u] should divide the "
"number of channel[%u]"
,
"number of channel[%u]"
,
input_dims
[
1
],
upscale_factor
*
upscale_factor
));
upscale_factor
*
upscale_factor
,
input_dims
[
1
]
));
}
else
{
}
else
{
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
input_dims
[
3
]
%
(
upscale_factor
*
upscale_factor
),
0
,
input_dims
[
3
]
%
(
upscale_factor
*
upscale_factor
),
0
,
platform
::
errors
::
InvalidArgument
(
platform
::
errors
::
InvalidArgument
(
"The square of upscale_factor[%u] should divide the "
"The square of upscale_factor[%u] should divide the "
"number of channel[%u]"
,
"number of channel[%u]"
,
input_dims
[
3
],
upscale_factor
*
upscale_factor
));
upscale_factor
*
upscale_factor
,
input_dims
[
3
]
));
}
}
auto
output_dims
=
input_dims
;
auto
output_dims
=
input_dims
;
output_dims
[
0
]
=
input_dims
[
0
];
output_dims
[
0
]
=
input_dims
[
0
];
...
...
python/paddle/distributed/fleet/__init__.py
浏览文件 @
2725f51d
...
@@ -13,7 +13,7 @@
...
@@ -13,7 +13,7 @@
# limitations under the License.
# limitations under the License.
# TODO: define distributed api under this directory,
# TODO: define distributed api under this directory,
from
.base.role_maker
import
UserDefinedRoleMaker
,
PaddleCloudRoleMaker
from
.base.role_maker
import
Role
,
UserDefinedRoleMaker
,
PaddleCloudRoleMaker
from
.base.distributed_strategy
import
DistributedStrategy
from
.base.distributed_strategy
import
DistributedStrategy
from
.base.fleet_base
import
Fleet
from
.base.fleet_base
import
Fleet
from
.base.util_factory
import
UtilBase
from
.base.util_factory
import
UtilBase
...
@@ -26,6 +26,7 @@ __all__ = [
...
@@ -26,6 +26,7 @@ __all__ = [
"UserDefinedRoleMaker"
,
"UserDefinedRoleMaker"
,
"PaddleCloudRoleMaker"
,
"PaddleCloudRoleMaker"
,
"Fleet"
,
"Fleet"
,
"Role"
,
]
]
fleet
=
Fleet
()
fleet
=
Fleet
()
...
@@ -39,8 +40,7 @@ server_num = fleet.server_num
...
@@ -39,8 +40,7 @@ server_num = fleet.server_num
server_index
=
fleet
.
server_index
server_index
=
fleet
.
server_index
server_endpoints
=
fleet
.
server_endpoints
server_endpoints
=
fleet
.
server_endpoints
is_server
=
fleet
.
is_server
is_server
=
fleet
.
is_server
set_util
=
fleet
.
set_util
util
=
UtilBase
()
util
=
fleet
.
util
barrier_worker
=
fleet
.
barrier_worker
barrier_worker
=
fleet
.
barrier_worker
init_worker
=
fleet
.
init_worker
init_worker
=
fleet
.
init_worker
init_server
=
fleet
.
init_server
init_server
=
fleet
.
init_server
...
...
python/paddle/distributed/fleet/base/fleet_base.py
浏览文件 @
2725f51d
...
@@ -23,7 +23,6 @@ from .strategy_compiler import StrategyCompiler
...
@@ -23,7 +23,6 @@ from .strategy_compiler import StrategyCompiler
from
.distributed_strategy
import
DistributedStrategy
from
.distributed_strategy
import
DistributedStrategy
from
.meta_optimizer_factory
import
MetaOptimizerFactory
from
.meta_optimizer_factory
import
MetaOptimizerFactory
from
.runtime_factory
import
RuntimeFactory
from
.runtime_factory
import
RuntimeFactory
from
.util_factory
import
UtilFactory
from
paddle.fluid.wrapped_decorator
import
wrap_decorator
from
paddle.fluid.wrapped_decorator
import
wrap_decorator
from
paddle.fluid.dygraph
import
parallel_helper
from
paddle.fluid.dygraph
import
parallel_helper
...
@@ -120,7 +119,6 @@ class Fleet(object):
...
@@ -120,7 +119,6 @@ class Fleet(object):
self
.
strategy_compiler
=
None
self
.
strategy_compiler
=
None
self
.
_is_collective
=
False
self
.
_is_collective
=
False
self
.
_runtime_handle
=
None
self
.
_runtime_handle
=
None
self
.
_util
=
None
def
init
(
self
,
role_maker
=
None
,
is_collective
=
False
):
def
init
(
self
,
role_maker
=
None
,
is_collective
=
False
):
"""
"""
...
@@ -182,6 +180,9 @@ class Fleet(object):
...
@@ -182,6 +180,9 @@ class Fleet(object):
format
(
type
(
role_maker
)))
format
(
type
(
role_maker
)))
self
.
_role_maker
.
_generate_role
()
self
.
_role_maker
.
_generate_role
()
import
paddle.distributed.fleet
as
fleet
fleet
.
util
.
_set_role_maker
(
self
.
_role_maker
)
self
.
strategy_compiler
=
StrategyCompiler
()
self
.
strategy_compiler
=
StrategyCompiler
()
if
paddle
.
fluid
.
framework
.
in_dygraph_mode
():
if
paddle
.
fluid
.
framework
.
in_dygraph_mode
():
if
parallel_helper
.
_is_parallel_ctx_initialized
():
if
parallel_helper
.
_is_parallel_ctx_initialized
():
...
@@ -353,29 +354,6 @@ class Fleet(object):
...
@@ -353,29 +354,6 @@ class Fleet(object):
return
self
.
_role_maker
.
_is_server
(
return
self
.
_role_maker
.
_is_server
(
)
or
self
.
_role_maker
.
_is_heter_worker
()
)
or
self
.
_role_maker
.
_is_heter_worker
()
def
set_util
(
self
,
util
):
self
.
_util
=
util
def
util
(
self
):
"""
Utility functions that can be used under certain runtime
return util
Returns:
UtilBase: instance of UtilBase, can use distributed ops/tools easily.
Examples:
.. code-block:: python
import paddle.distributed.fleet as fleet
fleet.init()
util = fleet.util
files = ["1.log", "2.log", "3.log", "4.log"]
files = util.get_file_shard()
"""
return
self
.
_util
def
barrier_worker
(
self
):
def
barrier_worker
(
self
):
"""
"""
barrier all workers
barrier all workers
...
@@ -1102,7 +1080,7 @@ class Fleet(object):
...
@@ -1102,7 +1080,7 @@ class Fleet(object):
if
self
.
_runtime_handle
is
None
:
if
self
.
_runtime_handle
is
None
:
self
.
_runtime_handle
=
RuntimeFactory
().
_create_runtime
(
context
)
self
.
_runtime_handle
=
RuntimeFactory
().
_create_runtime
(
context
)
i
f
self
.
_util
is
None
:
i
mport
paddle.distributed.fleet
as
fleet
self
.
_util
=
UtilFactory
().
_create_util
(
context
)
fleet
.
util
.
_set_strategy
(
context
[
"valid_strategy"
]
)
return
optimize_ops
,
params_grads
return
optimize_ops
,
params_grads
python/paddle/distributed/fleet/base/util_factory.py
浏览文件 @
2725f51d
...
@@ -73,11 +73,13 @@ class UtilBase(object):
...
@@ -73,11 +73,13 @@ class UtilBase(object):
.. code-block:: python
.. code-block:: python
# Save the following code in `train.py` , and then execute the command `fleetrun --server_num 2 --worker_num 2 train.py` .
# Save the following code in `train.py` , and then execute the command `fleetrun --server_num 2 --worker_num 2 train.py` .
from paddle.distributed.fleet.base.util_factory import fleet_util
import paddle.distributed.fleet as fleet
import paddle.distributed.fleet as fleet
from paddle.distributed.fleet import PaddleCloudRoleMaker
from paddle.distributed.fleet import PaddleCloudRoleMaker
import sys
import sys
import numpy as np
import numpy as np
import os
os.environ["PADDLE_WITH_GLOO"] = "2"
def train():
def train():
role = PaddleCloudRoleMaker(
role = PaddleCloudRoleMaker(
...
@@ -85,19 +87,18 @@ class UtilBase(object):
...
@@ -85,19 +87,18 @@ class UtilBase(object):
init_gloo=True,
init_gloo=True,
path="./tmp_gloo")
path="./tmp_gloo")
fleet.init(role)
fleet.init(role)
fleet_util._set_role_maker(role)
if fleet.is_server():
if fleet.is_server():
input = [1, 2]
input = [1, 2]
output = fleet
_
util.all_reduce(input, "sum", "server")
output = fleet
.
util.all_reduce(input, "sum", "server")
print(output)
print(output)
# [2, 4]
# [2, 4]
elif fleet.is_worker():
elif fleet.is_worker():
input = np.array([3, 4])
input = np.array([3, 4])
output = fleet
_
util.all_reduce(input, "sum", "worker")
output = fleet
.
util.all_reduce(input, "sum", "worker")
print(output)
print(output)
# [6, 8]
# [6, 8]
output = fleet
_
util.all_reduce(input, "sum", "all")
output = fleet
.
util.all_reduce(input, "sum", "all")
print(output)
print(output)
# [8, 12]
# [8, 12]
if __name__ == "__main__":
if __name__ == "__main__":
...
@@ -117,10 +118,12 @@ class UtilBase(object):
...
@@ -117,10 +118,12 @@ class UtilBase(object):
.. code-block:: python
.. code-block:: python
# Save the following code in `train.py` , and then execute the command `fleetrun --server_num 2 --worker_num 2 train.py` .
# Save the following code in `train.py` , and then execute the command `fleetrun --server_num 2 --worker_num 2 train.py` .
from paddle.distributed.fleet.base.util_factory import fleet_util
import paddle.distributed.fleet as fleet
import paddle.distributed.fleet as fleet
from paddle.distributed.fleet import PaddleCloudRoleMaker
from paddle.distributed.fleet import PaddleCloudRoleMaker
import sys
import sys
import os
os.environ["PADDLE_WITH_GLOO"] = "2"
def train():
def train():
role = PaddleCloudRoleMaker(
role = PaddleCloudRoleMaker(
...
@@ -128,15 +131,14 @@ class UtilBase(object):
...
@@ -128,15 +131,14 @@ class UtilBase(object):
init_gloo=True,
init_gloo=True,
path="./tmp_gloo")
path="./tmp_gloo")
fleet.init(role)
fleet.init(role)
fleet_util._set_role_maker(role)
if fleet.is_server():
if fleet.is_server():
fleet
_
util.barrier("server")
fleet
.
util.barrier("server")
print("all server arrive here")
print("all server arrive here")
elif fleet.is_worker():
elif fleet.is_worker():
fleet
_
util.barrier("worker")
fleet
.
util.barrier("worker")
print("all server arrive here")
print("all server arrive here")
fleet
_
util.barrier("all")
fleet
.
util.barrier("all")
print("all servers and workers arrive here")
print("all servers and workers arrive here")
if __name__ == "__main__":
if __name__ == "__main__":
...
@@ -160,10 +162,12 @@ class UtilBase(object):
...
@@ -160,10 +162,12 @@ class UtilBase(object):
.. code-block:: python
.. code-block:: python
# Save the following code in `train.py` , and then execute the command `fleetrun --server_num 2 --worker_num 2 train.py` .
# Save the following code in `train.py` , and then execute the command `fleetrun --server_num 2 --worker_num 2 train.py` .
from paddle.distributed.fleet.base.util_factory import fleet_util
import paddle.distributed.fleet as fleet
import paddle.distributed.fleet as fleet
from paddle.distributed.fleet import PaddleCloudRoleMaker
from paddle.distributed.fleet import PaddleCloudRoleMaker
import sys
import sys
import os
os.environ["PADDLE_WITH_GLOO"] = "2"
def train():
def train():
role = PaddleCloudRoleMaker(
role = PaddleCloudRoleMaker(
...
@@ -171,19 +175,18 @@ class UtilBase(object):
...
@@ -171,19 +175,18 @@ class UtilBase(object):
init_gloo=True,
init_gloo=True,
path="./tmp_gloo")
path="./tmp_gloo")
fleet.init(role)
fleet.init(role)
fleet_util._set_role_maker(role)
if fleet.is_server():
if fleet.is_server():
input = fleet.server_index()
input = fleet.server_index()
output = fleet
_
util.all_gather(input, "server")
output = fleet
.
util.all_gather(input, "server")
print(output)
print(output)
# output = [0, 1]
# output = [0, 1]
elif fleet.is_worker():
elif fleet.is_worker():
input = fleet.worker_index()
input = fleet.worker_index()
output = fleet
_
util.all_gather(input, "worker")
output = fleet
.
util.all_gather(input, "worker")
# output = [0, 1]
# output = [0, 1]
print(output)
print(output)
output = fleet
_
util.all_gather(input, "all")
output = fleet
.
util.all_gather(input, "all")
print(output)
print(output)
# output = [0, 1, 0, 1]
# output = [0, 1, 0, 1]
...
@@ -220,18 +223,20 @@ class UtilBase(object):
...
@@ -220,18 +223,20 @@ class UtilBase(object):
.. code-block:: python
.. code-block:: python
from paddle.distributed.fleet.base.util_factory import fleet_util
import paddle.distributed.fleet as fleet
import paddle.distributed.fleet.base.role_maker as role_m
aker
from paddle.distributed.fleet import UserDefinedRoleM
aker
role =
role_maker.
UserDefinedRoleMaker(
role = UserDefinedRoleMaker(
is_collective=False,
is_collective=False,
init_gloo=False,
init_gloo=False,
current_id=0,
current_id=0,
role=
role_maker
.Role.WORKER,
role=
fleet
.Role.WORKER,
worker_endpoints=["127.0.0.1:6003", "127.0.0.1:6004"],
worker_endpoints=["127.0.0.1:6003", "127.0.0.1:6004"],
server_endpoints=["127.0.0.1:6001", "127.0.0.1:6002"])
server_endpoints=["127.0.0.1:6001", "127.0.0.1:6002"])
fleet_util._set_role_maker(role)
fleet.init(role)
files = fleet_util.get_file_shard(["file1", "file2", "file3"])
files = fleet.util.get_file_shard(["file1", "file2", "file3"])
print(files)
# files = ["file1", "file2"]
# files = ["file1", "file2"]
"""
"""
if
not
isinstance
(
files
,
list
):
if
not
isinstance
(
files
,
list
):
...
@@ -267,18 +272,19 @@ class UtilBase(object):
...
@@ -267,18 +272,19 @@ class UtilBase(object):
.. code-block:: python
.. code-block:: python
from paddle.distributed.fleet.base.util_factory import fleet_util
import paddle.distributed.fleet as fleet
import paddle.distributed.fleet.base.role_maker as role_m
aker
from paddle.distributed.fleet import UserDefinedRoleM
aker
role =
role_maker.
UserDefinedRoleMaker(
role = UserDefinedRoleMaker(
is_collective=False,
is_collective=False,
init_gloo=False,
init_gloo=False,
current_id=0,
current_id=0,
role=
role_maker
.Role.WORKER,
role=
fleet
.Role.WORKER,
worker_endpoints=["127.0.0.1:6003", "127.0.0.1:6004"],
worker_endpoints=["127.0.0.1:6003", "127.0.0.1:6004"],
server_endpoints=["127.0.0.1:6001", "127.0.0.1:6002"])
server_endpoints=["127.0.0.1:6001", "127.0.0.1:6002"])
fleet_util._set_role_maker(role)
fleet.init(role)
fleet_util.print_on_rank("I'm worker 0", 0)
fleet.util.print_on_rank("I'm worker 0", 0)
"""
"""
if
self
.
role_maker
.
_worker_index
()
!=
rank_id
:
if
self
.
role_maker
.
_worker_index
()
!=
rank_id
:
return
return
...
@@ -577,6 +583,3 @@ class UtilBase(object):
...
@@ -577,6 +583,3 @@ class UtilBase(object):
print
(
"fetch_targets name: %s"
%
v
.
name
)
print
(
"fetch_targets name: %s"
%
v
.
name
)
print
(
"fetch_targets: {}"
.
format
(
results
[
i
]))
print
(
"fetch_targets: {}"
.
format
(
results
[
i
]))
return
results
return
results
fleet_util
=
UtilFactory
().
_create_util
(
None
)
python/paddle/distributed/fleet/launch.py
浏览文件 @
2725f51d
...
@@ -348,8 +348,7 @@ def launch_ps(args):
...
@@ -348,8 +348,7 @@ def launch_ps(args):
"PADDLE_PORT"
:
cur_server
.
endpoint
.
split
(
":"
)[
1
],
"PADDLE_PORT"
:
cur_server
.
endpoint
.
split
(
":"
)[
1
],
"TRAINING_ROLE"
:
"PSERVER"
,
"TRAINING_ROLE"
:
"PSERVER"
,
"PADDLE_TRAINERS_NUM"
:
str
(
worker_num
),
"PADDLE_TRAINERS_NUM"
:
str
(
worker_num
),
"POD_IP"
:
cur_server
.
endpoint
.
split
(
":"
)[
0
],
"POD_IP"
:
cur_server
.
endpoint
.
split
(
":"
)[
0
]
"PADDLE_WITH_GLOO"
:
"1"
}
}
current_env
.
update
(
proc_env
)
current_env
.
update
(
proc_env
)
...
@@ -388,8 +387,7 @@ def launch_ps(args):
...
@@ -388,8 +387,7 @@ def launch_ps(args):
"PADDLE_TRAINER_ENDPOINTS"
:
worker_endpoints
,
"PADDLE_TRAINER_ENDPOINTS"
:
worker_endpoints
,
"PADDLE_TRAINERS_NUM"
:
str
(
worker_num
),
"PADDLE_TRAINERS_NUM"
:
str
(
worker_num
),
"TRAINING_ROLE"
:
"TRAINER"
,
"TRAINING_ROLE"
:
"TRAINER"
,
"PADDLE_TRAINER_ID"
:
str
(
cur_worker
.
rank
),
"PADDLE_TRAINER_ID"
:
str
(
cur_worker
.
rank
)
"PADDLE_WITH_GLOO"
:
"1"
}
}
current_env
.
update
(
proc_env
)
current_env
.
update
(
proc_env
)
...
...
python/paddle/distributed/fleet/utils/__init__.py
浏览文件 @
2725f51d
...
@@ -11,3 +11,5 @@
...
@@ -11,3 +11,5 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
from
.fs
import
LocalFS
,
HDFSClient
python/paddle/distributed/fleet/utils/fs.py
浏览文件 @
2725f51d
...
@@ -120,7 +120,7 @@ class LocalFS(FS):
...
@@ -120,7 +120,7 @@ class LocalFS(FS):
Examples:
Examples:
.. code-block:: python
.. code-block:: python
from paddle.distributed.fleet.utils
.fs
import LocalFS
from paddle.distributed.fleet.utils import LocalFS
client = LocalFS()
client = LocalFS()
subdirs, files = client.ls_dir("./")
subdirs, files = client.ls_dir("./")
...
@@ -140,7 +140,7 @@ class LocalFS(FS):
...
@@ -140,7 +140,7 @@ class LocalFS(FS):
Examples:
Examples:
.. code-block:: python
.. code-block:: python
from paddle.distributed.fleet.utils
.fs
import LocalFS
from paddle.distributed.fleet.utils import LocalFS
client = LocalFS()
client = LocalFS()
subdirs, files = client.ls_dir("./")
subdirs, files = client.ls_dir("./")
...
@@ -160,7 +160,7 @@ class LocalFS(FS):
...
@@ -160,7 +160,7 @@ class LocalFS(FS):
def
mkdirs
(
self
,
fs_path
):
def
mkdirs
(
self
,
fs_path
):
"""
"""
Create a
remote HDFS
directory.
Create a
local
directory.
Args:
Args:
fs_path(str): The local directory path.
fs_path(str): The local directory path.
...
@@ -168,7 +168,7 @@ class LocalFS(FS):
...
@@ -168,7 +168,7 @@ class LocalFS(FS):
Examples:
Examples:
.. code-block:: python
.. code-block:: python
from paddle.distributed.fleet.utils
.fs
import LocalFS
from paddle.distributed.fleet.utils import LocalFS
client = LocalFS()
client = LocalFS()
client.mkdirs("test_mkdirs")
client.mkdirs("test_mkdirs")
...
@@ -189,7 +189,7 @@ class LocalFS(FS):
...
@@ -189,7 +189,7 @@ class LocalFS(FS):
Examples:
Examples:
.. code-block:: python
.. code-block:: python
from paddle.distributed.fleet.utils
.fs
import LocalFS
from paddle.distributed.fleet.utils import LocalFS
client = LocalFS()
client = LocalFS()
client.touch("test_rename_src")
client.touch("test_rename_src")
...
@@ -217,7 +217,7 @@ class LocalFS(FS):
...
@@ -217,7 +217,7 @@ class LocalFS(FS):
Examples:
Examples:
.. code-block:: python
.. code-block:: python
from paddle.distributed.fleet.utils
.fs
import LocalFS
from paddle.distributed.fleet.utils import LocalFS
client = LocalFS()
client = LocalFS()
client.mkdirs("test_localFS_mkdirs")
client.mkdirs("test_localFS_mkdirs")
...
@@ -247,7 +247,7 @@ class LocalFS(FS):
...
@@ -247,7 +247,7 @@ class LocalFS(FS):
Examples:
Examples:
.. code-block:: python
.. code-block:: python
from paddle.distributed.fleet.utils
.fs
import LocalFS
from paddle.distributed.fleet.utils import LocalFS
client = LocalFS()
client = LocalFS()
client.touch("test_is_file")
client.touch("test_is_file")
...
@@ -269,7 +269,7 @@ class LocalFS(FS):
...
@@ -269,7 +269,7 @@ class LocalFS(FS):
Examples:
Examples:
.. code-block:: python
.. code-block:: python
from paddle.distributed.fleet.utils
.fs
import LocalFS
from paddle.distributed.fleet.utils import LocalFS
client = LocalFS()
client = LocalFS()
client.mkdirs("test_is_dir")
client.mkdirs("test_is_dir")
...
@@ -292,7 +292,7 @@ class LocalFS(FS):
...
@@ -292,7 +292,7 @@ class LocalFS(FS):
Examples:
Examples:
.. code-block:: python
.. code-block:: python
from paddle.distributed.fleet.utils
.fs
import LocalFS
from paddle.distributed.fleet.utils import LocalFS
client = LocalFS()
client = LocalFS()
ret = local_fs.is_exist("test_is_exist")
ret = local_fs.is_exist("test_is_exist")
...
@@ -311,7 +311,7 @@ class LocalFS(FS):
...
@@ -311,7 +311,7 @@ class LocalFS(FS):
Examples:
Examples:
.. code-block:: python
.. code-block:: python
from paddle.distributed.fleet.utils
.fs
import LocalFS
from paddle.distributed.fleet.utils import LocalFS
client = LocalFS()
client = LocalFS()
client.touch("test_touch")
client.touch("test_touch")
...
@@ -332,13 +332,11 @@ class LocalFS(FS):
...
@@ -332,13 +332,11 @@ class LocalFS(FS):
src_path(str): Name of the file or directory, that's needed to be moved.
src_path(str): Name of the file or directory, that's needed to be moved.
dst_path(str): Name of the file or directory to which to move to.
dst_path(str): Name of the file or directory to which to move to.
overwrite(bool): Whether to re-write `dst_path` if that exists. Default is False.
overwrite(bool): Whether to re-write `dst_path` if that exists. Default is False.
test_exists(bool): Check the existence of `src_path` and `dst_path` .
When `test_exists` is set true, if `src_path` doesn't exist or `dst_path` exists, program will throw an Excetption.
Examples:
Examples:
.. code-block:: python
.. code-block:: python
from paddle.distributed.fleet.utils
.fs
import LocalFS
from paddle.distributed.fleet.utils import LocalFS
client = LocalFS()
client = LocalFS()
client.touch("test_mv_src")
client.touch("test_mv_src")
...
@@ -369,7 +367,7 @@ class LocalFS(FS):
...
@@ -369,7 +367,7 @@ class LocalFS(FS):
Examples:
Examples:
.. code-block:: python
.. code-block:: python
from paddle.distributed.fleet.utils
.fs
import LocalFS
from paddle.distributed.fleet.utils import LocalFS
client = LocalFS()
client = LocalFS()
subdirs = client.list_dirs("./")
subdirs = client.list_dirs("./")
...
@@ -432,7 +430,7 @@ class HDFSClient(FS):
...
@@ -432,7 +430,7 @@ class HDFSClient(FS):
.. code-block:: text
.. code-block:: text
from paddle.distributed.fleet.utils
.fs
import HDFSClient
from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/"
hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = {
configs = {
...
@@ -493,7 +491,7 @@ class HDFSClient(FS):
...
@@ -493,7 +491,7 @@ class HDFSClient(FS):
.. code-block:: text
.. code-block:: text
from paddle.distributed.fleet.utils
.fs
import HDFSClient
from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/"
hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = {
configs = {
...
@@ -526,7 +524,7 @@ class HDFSClient(FS):
...
@@ -526,7 +524,7 @@ class HDFSClient(FS):
.. code-block:: text
.. code-block:: text
from paddle.distributed.fleet.utils
.fs
import HDFSClient
from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/"
hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = {
configs = {
...
@@ -587,7 +585,7 @@ class HDFSClient(FS):
...
@@ -587,7 +585,7 @@ class HDFSClient(FS):
.. code-block:: text
.. code-block:: text
from paddle.distributed.fleet.utils
.fs
import HDFSClient
from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/"
hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = {
configs = {
...
@@ -629,7 +627,7 @@ class HDFSClient(FS):
...
@@ -629,7 +627,7 @@ class HDFSClient(FS):
.. code-block:: text
.. code-block:: text
from paddle.distributed.fleet.utils
.fs
import HDFSClient
from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/"
hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = {
configs = {
...
@@ -661,7 +659,7 @@ class HDFSClient(FS):
...
@@ -661,7 +659,7 @@ class HDFSClient(FS):
.. code-block:: text
.. code-block:: text
from paddle.distributed.fleet.utils
.fs
import HDFSClient
from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/"
hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = {
configs = {
...
@@ -695,7 +693,7 @@ class HDFSClient(FS):
...
@@ -695,7 +693,7 @@ class HDFSClient(FS):
.. code-block:: text
.. code-block:: text
from paddle.distributed.fleet.utils
.fs
import HDFSClient
from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/"
hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = {
configs = {
...
@@ -740,7 +738,7 @@ class HDFSClient(FS):
...
@@ -740,7 +738,7 @@ class HDFSClient(FS):
.. code-block:: text
.. code-block:: text
from paddle.distributed.fleet.utils
.fs
import HDFSClient
from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/"
hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = {
configs = {
...
@@ -784,7 +782,7 @@ class HDFSClient(FS):
...
@@ -784,7 +782,7 @@ class HDFSClient(FS):
.. code-block:: text
.. code-block:: text
from paddle.distributed.fleet.utils
.fs
import HDFSClient
from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/"
hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = {
configs = {
...
@@ -830,7 +828,7 @@ class HDFSClient(FS):
...
@@ -830,7 +828,7 @@ class HDFSClient(FS):
.. code-block:: text
.. code-block:: text
from paddle.distributed.fleet.utils
.fs
import HDFSClient
from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/"
hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = {
configs = {
...
@@ -893,7 +891,7 @@ class HDFSClient(FS):
...
@@ -893,7 +891,7 @@ class HDFSClient(FS):
.. code-block:: text
.. code-block:: text
from paddle.distributed.fleet.utils
.fs
import HDFSClient
from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/"
hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = {
configs = {
...
@@ -919,12 +917,14 @@ class HDFSClient(FS):
...
@@ -919,12 +917,14 @@ class HDFSClient(FS):
Args:
Args:
fs_path(str): The HDFS file path.
fs_path(str): The HDFS file path.
exist_ok(bool): When `fs_path` exists, if `exist_ok` is set false,
program will throw an Exception. Default is true.
Examples:
Examples:
.. code-block:: text
.. code-block:: text
from paddle.distributed.fleet.utils
.fs
import HDFSClient
from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/"
hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = {
configs = {
...
...
python/paddle/fluid/tests/unittests/dist_fleet_ctr.py
浏览文件 @
2725f51d
...
@@ -28,7 +28,6 @@ import numpy as np
...
@@ -28,7 +28,6 @@ import numpy as np
import
ctr_dataset_reader
import
ctr_dataset_reader
from
test_dist_fleet_base
import
runtime_main
,
FleetDistRunnerBase
from
test_dist_fleet_base
import
runtime_main
,
FleetDistRunnerBase
from
paddle.distributed.fleet.base.util_factory
import
fleet_util
paddle
.
enable_static
()
paddle
.
enable_static
()
...
@@ -180,13 +179,13 @@ class TestDistCTR2x2(FleetDistRunnerBase):
...
@@ -180,13 +179,13 @@ class TestDistCTR2x2(FleetDistRunnerBase):
fetch_list
=
[
self
.
avg_cost
.
name
])
fetch_list
=
[
self
.
avg_cost
.
name
])
loss_val
=
np
.
mean
(
loss_val
)
loss_val
=
np
.
mean
(
loss_val
)
# TODO(randomly fail)
# TODO(randomly fail)
# reduce_output = fleet
_
util.all_reduce(
# reduce_output = fleet
.
util.all_reduce(
# np.array(loss_val), mode="sum")
# np.array(loss_val), mode="sum")
# loss_all_trainer = fleet
_
util.all_gather(float(loss_val))
# loss_all_trainer = fleet
.
util.all_gather(float(loss_val))
# loss_val = float(reduce_output) / len(loss_all_trainer)
# loss_val = float(reduce_output) / len(loss_all_trainer)
message
=
"TRAIN ---> pass: {} loss: {}
\n
"
.
format
(
epoch_id
,
message
=
"TRAIN ---> pass: {} loss: {}
\n
"
.
format
(
epoch_id
,
loss_val
)
loss_val
)
fleet
_
util
.
print_on_rank
(
message
,
0
)
fleet
.
util
.
print_on_rank
(
message
,
0
)
pass_time
=
time
.
time
()
-
pass_start
pass_time
=
time
.
time
()
-
pass_start
except
fluid
.
core
.
EOFException
:
except
fluid
.
core
.
EOFException
:
...
...
python/paddle/fluid/tests/unittests/dist_fleet_ctr_ps_gpu.py
浏览文件 @
2725f51d
...
@@ -29,7 +29,6 @@ import numpy as np
...
@@ -29,7 +29,6 @@ import numpy as np
import
ctr_dataset_reader
import
ctr_dataset_reader
from
test_dist_fleet_base
import
runtime_main
,
FleetDistRunnerBase
from
test_dist_fleet_base
import
runtime_main
,
FleetDistRunnerBase
from
dist_fleet_ctr
import
TestDistCTR2x2
,
fake_ctr_reader
from
dist_fleet_ctr
import
TestDistCTR2x2
,
fake_ctr_reader
from
paddle.distributed.fleet.base.util_factory
import
fleet_util
# Fix seed for test
# Fix seed for test
fluid
.
default_startup_program
().
random_seed
=
1
fluid
.
default_startup_program
().
random_seed
=
1
...
@@ -76,13 +75,13 @@ class TestDistGpuPsCTR2x2(TestDistCTR2x2):
...
@@ -76,13 +75,13 @@ class TestDistGpuPsCTR2x2(TestDistCTR2x2):
loss_val
=
exe
.
run
(
program
=
fleet
.
main_program
,
loss_val
=
exe
.
run
(
program
=
fleet
.
main_program
,
fetch_list
=
[
self
.
avg_cost
.
name
])
fetch_list
=
[
self
.
avg_cost
.
name
])
loss_val
=
np
.
mean
(
loss_val
)
loss_val
=
np
.
mean
(
loss_val
)
reduce_output
=
fleet
_
util
.
all_reduce
(
reduce_output
=
fleet
.
util
.
all_reduce
(
np
.
array
(
loss_val
),
mode
=
"sum"
)
np
.
array
(
loss_val
),
mode
=
"sum"
)
loss_all_trainer
=
fleet
_
util
.
all_gather
(
float
(
loss_val
))
loss_all_trainer
=
fleet
.
util
.
all_gather
(
float
(
loss_val
))
loss_val
=
float
(
reduce_output
)
/
len
(
loss_all_trainer
)
loss_val
=
float
(
reduce_output
)
/
len
(
loss_all_trainer
)
message
=
"TRAIN ---> pass: {} loss: {}
\n
"
.
format
(
epoch_id
,
message
=
"TRAIN ---> pass: {} loss: {}
\n
"
.
format
(
epoch_id
,
loss_val
)
loss_val
)
fleet
_
util
.
print_on_rank
(
message
,
0
)
fleet
.
util
.
print_on_rank
(
message
,
0
)
pass_time
=
time
.
time
()
-
pass_start
pass_time
=
time
.
time
()
-
pass_start
except
fluid
.
core
.
EOFException
:
except
fluid
.
core
.
EOFException
:
...
...
python/paddle/fluid/tests/unittests/dist_fleet_heter_ctr.py
浏览文件 @
2725f51d
...
@@ -29,7 +29,6 @@ import numpy as np
...
@@ -29,7 +29,6 @@ import numpy as np
import
ctr_dataset_reader
import
ctr_dataset_reader
from
test_dist_fleet_heter_base
import
runtime_main
,
FleetDistHeterRunnerBase
from
test_dist_fleet_heter_base
import
runtime_main
,
FleetDistHeterRunnerBase
from
dist_fleet_ctr
import
TestDistCTR2x2
,
fake_ctr_reader
from
dist_fleet_ctr
import
TestDistCTR2x2
,
fake_ctr_reader
from
paddle.distributed.fleet.base.util_factory
import
fleet_util
paddle
.
enable_static
()
paddle
.
enable_static
()
...
@@ -182,7 +181,7 @@ class TestHeterPsCTR2x2(FleetDistHeterRunnerBase):
...
@@ -182,7 +181,7 @@ class TestHeterPsCTR2x2(FleetDistHeterRunnerBase):
thread_num
=
int
(
os
.
getenv
(
"CPU_NUM"
,
2
))
thread_num
=
int
(
os
.
getenv
(
"CPU_NUM"
,
2
))
batch_size
=
128
batch_size
=
128
filelist
=
fleet
_
util
.
get_file_shard
(
train_file_list
)
filelist
=
fleet
.
util
.
get_file_shard
(
train_file_list
)
print
(
"filelist: {}"
.
format
(
filelist
))
print
(
"filelist: {}"
.
format
(
filelist
))
# config dataset
# config dataset
...
...
python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py
浏览文件 @
2725f51d
...
@@ -32,7 +32,6 @@ import os
...
@@ -32,7 +32,6 @@ import os
import
signal
import
signal
from
functools
import
reduce
from
functools
import
reduce
from
test_dist_fleet_base
import
runtime_main
,
FleetDistRunnerBase
from
test_dist_fleet_base
import
runtime_main
,
FleetDistRunnerBase
from
paddle.distributed.fleet.base.util_factory
import
fleet_util
paddle
.
enable_static
()
paddle
.
enable_static
()
...
@@ -238,7 +237,7 @@ class TestDistSimnetBow2x2(FleetDistRunnerBase):
...
@@ -238,7 +237,7 @@ class TestDistSimnetBow2x2(FleetDistRunnerBase):
loss_val
=
np
.
mean
(
loss_val
)
loss_val
=
np
.
mean
(
loss_val
)
message
=
"TRAIN ---> pass: {} loss: {}
\n
"
.
format
(
epoch_id
,
message
=
"TRAIN ---> pass: {} loss: {}
\n
"
.
format
(
epoch_id
,
loss_val
)
loss_val
)
fleet
_
util
.
print_on_rank
(
message
,
0
)
fleet
.
util
.
print_on_rank
(
message
,
0
)
pass_time
=
time
.
time
()
-
pass_start
pass_time
=
time
.
time
()
-
pass_start
except
fluid
.
core
.
EOFException
:
except
fluid
.
core
.
EOFException
:
...
...
python/paddle/fluid/tests/unittests/test_dist_fleet_base.py
浏览文件 @
2725f51d
...
@@ -34,8 +34,7 @@ import unittest
...
@@ -34,8 +34,7 @@ import unittest
import
paddle
import
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
import
paddle.distributed.fleet.base.role_maker
as
role_maker
import
paddle.distributed.fleet.base.role_maker
as
role_maker
from
paddle.distributed.fleet.base.util_factory
import
fleet_util
import
paddle.distributed.fleet
as
fleet
from
paddle.distributed.fleet
import
fleet
from
paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy
import
StrategyFactory
from
paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy
import
StrategyFactory
__all__
=
[
'FleetDistRunnerBase'
,
'TestFleetBase'
,
'runtime_main'
]
__all__
=
[
'FleetDistRunnerBase'
,
'TestFleetBase'
,
'runtime_main'
]
...
@@ -97,7 +96,7 @@ class FleetDistRunnerBase(object):
...
@@ -97,7 +96,7 @@ class FleetDistRunnerBase(object):
self
.
dump_fields_path
=
os
.
getenv
(
"dump_fields_path"
,
""
)
self
.
dump_fields_path
=
os
.
getenv
(
"dump_fields_path"
,
""
)
debug
=
int
(
os
.
getenv
(
"Debug"
,
"0"
))
debug
=
int
(
os
.
getenv
(
"Debug"
,
"0"
))
# TODO(update strategy to support dump params)
# TODO(update strategy to support dump params)
if
False
:
#debug:
if
False
:
#
debug:
self
.
strategy
.
set_debug_opt
({
self
.
strategy
.
set_debug_opt
({
"dump_param"
:
self
.
dump_param
,
"dump_param"
:
self
.
dump_param
,
"dump_fields"
:
self
.
dump_fields
,
"dump_fields"
:
self
.
dump_fields
,
...
@@ -372,8 +371,6 @@ def runtime_main(test_class):
...
@@ -372,8 +371,6 @@ def runtime_main(test_class):
strategy
=
model
.
build_strategy
(
args
)
strategy
=
model
.
build_strategy
(
args
)
avg_cost
=
model
.
net
(
args
)
avg_cost
=
model
.
net
(
args
)
model
.
build_optimizer
(
avg_cost
,
strategy
)
model
.
build_optimizer
(
avg_cost
,
strategy
)
fleet_util
.
_set_strategy
(
strategy
)
fleet_util
.
_set_role_maker
(
role
)
if
args
.
role
==
"pserver"
:
if
args
.
role
==
"pserver"
:
model
.
run_pserver
(
args
)
model
.
run_pserver
(
args
)
else
:
else
:
...
...
python/paddle/fluid/tests/unittests/test_dist_fleet_heter_base.py
浏览文件 @
2725f51d
...
@@ -34,8 +34,7 @@ import unittest
...
@@ -34,8 +34,7 @@ import unittest
import
paddle
import
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
import
paddle.distributed.fleet.base.role_maker
as
role_maker
import
paddle.distributed.fleet.base.role_maker
as
role_maker
from
paddle.distributed.fleet.base.util_factory
import
fleet_util
import
paddle.distributed.fleet
as
fleet
from
paddle.distributed.fleet
import
fleet
__all__
=
[
'FleetDistHeterRunnerBase'
,
'TestFleetHeterBase'
,
'runtime_main'
]
__all__
=
[
'FleetDistHeterRunnerBase'
,
'TestFleetHeterBase'
,
'runtime_main'
]
...
@@ -376,8 +375,6 @@ def runtime_main(test_class):
...
@@ -376,8 +375,6 @@ def runtime_main(test_class):
strategy
=
model
.
build_strategy
(
args
)
strategy
=
model
.
build_strategy
(
args
)
avg_cost
=
model
.
net
(
args
)
avg_cost
=
model
.
net
(
args
)
model
.
build_optimizer
(
avg_cost
,
strategy
)
model
.
build_optimizer
(
avg_cost
,
strategy
)
fleet_util
.
_set_strategy
(
strategy
)
fleet_util
.
_set_role_maker
(
role
)
if
args
.
role
==
"pserver"
or
args
.
role
==
"heter_trainer"
:
if
args
.
role
==
"pserver"
or
args
.
role
==
"heter_trainer"
:
model
.
run_pserver
(
args
)
model
.
run_pserver
(
args
)
...
...
python/paddle/fluid/tests/unittests/test_dist_fleet_heter_program.py
浏览文件 @
2725f51d
...
@@ -19,7 +19,6 @@ import os
...
@@ -19,7 +19,6 @@ import os
import
math
import
math
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
import
paddle.distributed.fleet.base.role_maker
as
role_maker
import
paddle.distributed.fleet.base.role_maker
as
role_maker
from
paddle.distributed.fleet.base.util_factory
import
fleet_util
from
paddle.distributed.fleet
import
fleet
from
paddle.distributed.fleet
import
fleet
import
paddle
import
paddle
...
...
python/paddle/fluid/tests/unittests/test_fleet_base.py
浏览文件 @
2725f51d
...
@@ -107,7 +107,7 @@ class TestFleetBase(unittest.TestCase):
...
@@ -107,7 +107,7 @@ class TestFleetBase(unittest.TestCase):
def
test_util
(
self
):
def
test_util
(
self
):
role
=
role_maker
.
PaddleCloudRoleMaker
(
is_collective
=
True
)
role
=
role_maker
.
PaddleCloudRoleMaker
(
is_collective
=
True
)
fleet
.
init
(
role
)
fleet
.
init
(
role
)
self
.
assert
Equal
(
fleet
.
util
()
,
None
)
self
.
assert
NotEqual
(
fleet
.
util
,
None
)
def
test_barrier_worker
(
self
):
def
test_barrier_worker
(
self
):
role
=
role_maker
.
PaddleCloudRoleMaker
(
is_collective
=
True
)
role
=
role_maker
.
PaddleCloudRoleMaker
(
is_collective
=
True
)
...
...
python/paddle/fluid/tests/unittests/test_fleet_rolemaker_new.py
浏览文件 @
2725f51d
...
@@ -436,12 +436,12 @@ class TestGlooWithCloudRoleMaker(unittest.TestCase):
...
@@ -436,12 +436,12 @@ class TestGlooWithCloudRoleMaker(unittest.TestCase):
optimizer
.
minimize
(
avg_cost
)
optimizer
.
minimize
(
avg_cost
)
comm_world
=
"server"
comm_world
=
"server"
fleet
.
util
()
.
barrier
(
comm_world
)
fleet
.
util
.
barrier
(
comm_world
)
gather
=
fleet
.
util
()
.
all_gather
(
1
,
comm_world
)
gather
=
fleet
.
util
.
all_gather
(
1
,
comm_world
)
self
.
assertEqual
(
gather
[
0
],
1
)
self
.
assertEqual
(
gather
[
0
],
1
)
all_reduce
=
fleet
.
util
()
.
all_reduce
(
1
,
"sum"
,
comm_world
)
all_reduce
=
fleet
.
util
.
all_reduce
(
1
,
"sum"
,
comm_world
)
self
.
assertEqual
(
1
,
all_reduce
)
self
.
assertEqual
(
1
,
all_reduce
)
self
.
clean
(
tmp
)
self
.
clean
(
tmp
)
...
@@ -752,12 +752,12 @@ class TestGlooWithCloudRoleMaker(unittest.TestCase):
...
@@ -752,12 +752,12 @@ class TestGlooWithCloudRoleMaker(unittest.TestCase):
optimizer
.
minimize
(
avg_cost
)
optimizer
.
minimize
(
avg_cost
)
comm_world
=
"server"
comm_world
=
"server"
fleet
.
util
()
.
barrier
(
comm_world
)
fleet
.
util
.
barrier
(
comm_world
)
gather
=
fleet
.
util
()
.
all_gather
(
1
,
comm_world
)
gather
=
fleet
.
util
.
all_gather
(
1
,
comm_world
)
self
.
assertEqual
(
gather
[
0
],
1
)
self
.
assertEqual
(
gather
[
0
],
1
)
all_reduce
=
fleet
.
util
()
.
all_reduce
(
1
,
"sum"
,
comm_world
)
all_reduce
=
fleet
.
util
.
all_reduce
(
1
,
"sum"
,
comm_world
)
self
.
assertEqual
(
1
,
all_reduce
)
self
.
assertEqual
(
1
,
all_reduce
)
self
.
clean
(
tmp
)
self
.
clean
(
tmp
)
...
...
python/paddle/fluid/tests/unittests/test_fleet_util.py
浏览文件 @
2725f51d
...
@@ -22,7 +22,6 @@ import tempfile
...
@@ -22,7 +22,6 @@ import tempfile
import
os
import
os
import
sys
import
sys
from
paddle.dataset.common
import
download
,
DATA_HOME
from
paddle.dataset.common
import
download
,
DATA_HOME
from
paddle.distributed.fleet.base.util_factory
import
fleet_util
import
paddle.distributed.fleet.base.role_maker
as
role_maker
import
paddle.distributed.fleet.base.role_maker
as
role_maker
...
@@ -59,8 +58,7 @@ class TestFleetUtil(unittest.TestCase):
...
@@ -59,8 +58,7 @@ class TestFleetUtil(unittest.TestCase):
import
paddle.distributed.fleet.base.role_maker
as
role_maker
import
paddle.distributed.fleet.base.role_maker
as
role_maker
role
=
role_maker
.
PaddleCloudRoleMaker
(
is_collective
=
True
)
role
=
role_maker
.
PaddleCloudRoleMaker
(
is_collective
=
True
)
fleet
.
init
(
role
)
fleet
.
init
(
role
)
default_util
=
fleet
.
util
()
self
.
assertNotEqual
(
fleet
.
util
,
None
)
self
.
assertEqual
(
default_util
,
None
)
def
test_set_user_defined_util
(
self
):
def
test_set_user_defined_util
(
self
):
import
paddle.distributed.fleet
as
fleet
import
paddle.distributed.fleet
as
fleet
...
@@ -76,17 +74,19 @@ class TestFleetUtil(unittest.TestCase):
...
@@ -76,17 +74,19 @@ class TestFleetUtil(unittest.TestCase):
role
=
role_maker
.
PaddleCloudRoleMaker
(
is_collective
=
True
)
role
=
role_maker
.
PaddleCloudRoleMaker
(
is_collective
=
True
)
fleet
.
init
(
role
)
fleet
.
init
(
role
)
my_util
=
UserDefinedUtil
()
my_util
=
UserDefinedUtil
()
fleet
.
set_util
(
my_util
)
fleet
.
util
=
my_util
user_id
=
fleet
.
util
()
.
get_user_id
()
user_id
=
fleet
.
util
.
get_user_id
()
self
.
assertEqual
(
user_id
,
10
)
self
.
assertEqual
(
user_id
,
10
)
def
test_fs
(
self
):
def
test_fs
(
self
):
from
paddle.distributed.fleet.utils.fs
import
LocalFS
import
paddle.distributed.fleet
as
fleet
from
paddle.distributed.fleet.utils
import
LocalFS
fs
=
LocalFS
()
fs
=
LocalFS
()
dirs
,
files
=
fs
.
ls_dir
(
"test_tmp"
)
dirs
,
files
=
fs
.
ls_dir
(
"test_tmp"
)
dirs
,
files
=
fs
.
ls_dir
(
"./"
)
dirs
,
files
=
fs
.
ls_dir
(
"./"
)
self
.
assertFalse
(
fs
.
need_upload_download
())
self
.
assertFalse
(
fs
.
need_upload_download
())
fleet
_
util
.
_set_file_system
(
fs
)
fleet
.
util
.
_set_file_system
(
fs
)
def
download_files
(
self
):
def
download_files
(
self
):
path
=
download
(
self
.
proto_data_url
,
self
.
module_name
,
path
=
download
(
self
.
proto_data_url
,
self
.
module_name
,
...
@@ -98,7 +98,8 @@ class TestFleetUtil(unittest.TestCase):
...
@@ -98,7 +98,8 @@ class TestFleetUtil(unittest.TestCase):
return
unzip_folder
return
unzip_folder
def
test_get_file_shard
(
self
):
def
test_get_file_shard
(
self
):
self
.
assertRaises
(
Exception
,
fleet_util
.
get_file_shard
,
"files"
)
import
paddle.distributed.fleet
as
fleet
self
.
assertRaises
(
Exception
,
fleet
.
util
.
get_file_shard
,
"files"
)
try
:
try
:
import
netifaces
import
netifaces
except
:
except
:
...
@@ -112,18 +113,20 @@ class TestFleetUtil(unittest.TestCase):
...
@@ -112,18 +113,20 @@ class TestFleetUtil(unittest.TestCase):
role
=
role_maker
.
Role
.
WORKER
,
role
=
role_maker
.
Role
.
WORKER
,
worker_endpoints
=
[
"127.0.0.1:6003"
,
"127.0.0.1:6004"
],
worker_endpoints
=
[
"127.0.0.1:6003"
,
"127.0.0.1:6004"
],
server_endpoints
=
[
"127.0.0.1:6001"
,
"127.0.0.1:6002"
])
server_endpoints
=
[
"127.0.0.1:6001"
,
"127.0.0.1:6002"
])
fleet_util
.
_set_role_maker
(
role
)
fleet
.
init
(
role
)
files
=
fleet_util
.
get_file_shard
([
"1"
,
"2"
,
"3"
])
files
=
fleet
.
util
.
get_file_shard
([
"1"
,
"2"
,
"3"
])
self
.
assertTrue
(
len
(
files
)
==
2
and
"1"
in
files
and
"2"
in
files
)
self
.
assertTrue
(
len
(
files
)
==
2
and
"1"
in
files
and
"2"
in
files
)
def
test_program_type_trans
(
self
):
def
test_program_type_trans
(
self
):
import
paddle.distributed.fleet
as
fleet
data_dir
=
self
.
download_files
()
data_dir
=
self
.
download_files
()
program_dir
=
os
.
path
.
join
(
data_dir
,
self
.
pruned_dir
)
program_dir
=
os
.
path
.
join
(
data_dir
,
self
.
pruned_dir
)
text_program
=
"pruned_main_program.pbtxt"
text_program
=
"pruned_main_program.pbtxt"
binary_program
=
"pruned_main_program.bin"
binary_program
=
"pruned_main_program.bin"
text_to_binary
=
fleet
_
util
.
_program_type_trans
(
program_dir
,
text_to_binary
=
fleet
.
util
.
_program_type_trans
(
program_dir
,
text_program
,
True
)
text_program
,
True
)
binary_to_text
=
fleet
_
util
.
_program_type_trans
(
program_dir
,
binary_to_text
=
fleet
.
util
.
_program_type_trans
(
program_dir
,
binary_program
,
False
)
binary_program
,
False
)
self
.
assertTrue
(
self
.
assertTrue
(
os
.
path
.
exists
(
os
.
path
.
join
(
program_dir
,
text_to_binary
)))
os
.
path
.
exists
(
os
.
path
.
join
(
program_dir
,
text_to_binary
)))
...
@@ -131,6 +134,7 @@ class TestFleetUtil(unittest.TestCase):
...
@@ -131,6 +134,7 @@ class TestFleetUtil(unittest.TestCase):
os
.
path
.
exists
(
os
.
path
.
join
(
program_dir
,
binary_to_text
)))
os
.
path
.
exists
(
os
.
path
.
join
(
program_dir
,
binary_to_text
)))
def
test_prams_check
(
self
):
def
test_prams_check
(
self
):
import
paddle.distributed.fleet
as
fleet
data_dir
=
self
.
download_files
()
data_dir
=
self
.
download_files
()
class
config
:
class
config
:
...
@@ -160,11 +164,11 @@ class TestFleetUtil(unittest.TestCase):
...
@@ -160,11 +164,11 @@ class TestFleetUtil(unittest.TestCase):
# test saved var's shape
# test saved var's shape
conf
.
dump_program_filename
=
"pruned_main_program.save_var_shape_not_match"
conf
.
dump_program_filename
=
"pruned_main_program.save_var_shape_not_match"
self
.
assertRaises
(
Exception
,
fleet
_
util
.
_params_check
)
self
.
assertRaises
(
Exception
,
fleet
.
util
.
_params_check
)
# test program.proto without feed_op and fetch_op
# test program.proto without feed_op and fetch_op
conf
.
dump_program_filename
=
"pruned_main_program.no_feed_fetch"
conf
.
dump_program_filename
=
"pruned_main_program.no_feed_fetch"
results
=
fleet
_
util
.
_params_check
(
conf
)
results
=
fleet
.
util
.
_params_check
(
conf
)
self
.
assertTrue
(
len
(
results
)
==
1
)
self
.
assertTrue
(
len
(
results
)
==
1
)
np
.
testing
.
assert_array_almost_equal
(
np
.
testing
.
assert_array_almost_equal
(
results
[
0
],
np
.
array
(
results
[
0
],
np
.
array
(
...
@@ -172,11 +176,11 @@ class TestFleetUtil(unittest.TestCase):
...
@@ -172,11 +176,11 @@ class TestFleetUtil(unittest.TestCase):
# test feed_var's shape
# test feed_var's shape
conf
.
dump_program_filename
=
"pruned_main_program.feed_var_shape_not_match"
conf
.
dump_program_filename
=
"pruned_main_program.feed_var_shape_not_match"
self
.
assertRaises
(
Exception
,
fleet
_
util
.
_params_check
)
self
.
assertRaises
(
Exception
,
fleet
.
util
.
_params_check
)
# test correct case with feed_vars_filelist
# test correct case with feed_vars_filelist
conf
.
dump_program_filename
=
"pruned_main_program.pbtxt"
conf
.
dump_program_filename
=
"pruned_main_program.pbtxt"
results
=
fleet
_
util
.
_params_check
(
conf
)
results
=
fleet
.
util
.
_params_check
(
conf
)
self
.
assertTrue
(
len
(
results
)
==
1
)
self
.
assertTrue
(
len
(
results
)
==
1
)
np
.
testing
.
assert_array_almost_equal
(
np
.
testing
.
assert_array_almost_equal
(
results
[
0
],
np
.
array
(
results
[
0
],
np
.
array
(
...
@@ -186,13 +190,14 @@ class TestFleetUtil(unittest.TestCase):
...
@@ -186,13 +190,14 @@ class TestFleetUtil(unittest.TestCase):
conf
.
feed_config
.
feeded_vars_filelist
=
None
conf
.
feed_config
.
feeded_vars_filelist
=
None
# test feed var with lod_level >= 2
# test feed var with lod_level >= 2
conf
.
dump_program_filename
=
"pruned_main_program.feed_lod2"
conf
.
dump_program_filename
=
"pruned_main_program.feed_lod2"
self
.
assertRaises
(
Exception
,
fleet
_
util
.
_params_check
)
self
.
assertRaises
(
Exception
,
fleet
.
util
.
_params_check
)
conf
.
dump_program_filename
=
"pruned_main_program.pbtxt"
conf
.
dump_program_filename
=
"pruned_main_program.pbtxt"
results
=
fleet
_
util
.
_params_check
(
conf
)
results
=
fleet
.
util
.
_params_check
(
conf
)
self
.
assertTrue
(
len
(
results
)
==
1
)
self
.
assertTrue
(
len
(
results
)
==
1
)
def
test_proto_check
(
self
):
def
test_proto_check
(
self
):
import
paddle.distributed.fleet
as
fleet
data_dir
=
self
.
download_files
()
data_dir
=
self
.
download_files
()
class
config
:
class
config
:
...
@@ -210,7 +215,7 @@ class TestFleetUtil(unittest.TestCase):
...
@@ -210,7 +215,7 @@ class TestFleetUtil(unittest.TestCase):
"pruned_main_program.save_var_shape_not_match"
))
"pruned_main_program.save_var_shape_not_match"
))
conf
.
is_text_pruned_program
=
True
conf
.
is_text_pruned_program
=
True
conf
.
draw
=
False
conf
.
draw
=
False
res
=
fleet
_
util
.
_proto_check
(
conf
)
res
=
fleet
.
util
.
_proto_check
(
conf
)
self
.
assertFalse
(
res
)
self
.
assertFalse
(
res
)
# test match
# test match
...
@@ -222,10 +227,11 @@ class TestFleetUtil(unittest.TestCase):
...
@@ -222,10 +227,11 @@ class TestFleetUtil(unittest.TestCase):
else
:
else
:
conf
.
draw
=
True
conf
.
draw
=
True
conf
.
draw_out_name
=
"pruned_check"
conf
.
draw_out_name
=
"pruned_check"
res
=
fleet
_
util
.
_proto_check
(
conf
)
res
=
fleet
.
util
.
_proto_check
(
conf
)
self
.
assertTrue
(
res
)
self
.
assertTrue
(
res
)
def
test_visualize
(
self
):
def
test_visualize
(
self
):
import
paddle.distributed.fleet
as
fleet
if
sys
.
platform
==
'win32'
or
sys
.
platform
==
'sys.platform'
:
if
sys
.
platform
==
'win32'
or
sys
.
platform
==
'sys.platform'
:
pass
pass
else
:
else
:
...
@@ -234,10 +240,10 @@ class TestFleetUtil(unittest.TestCase):
...
@@ -234,10 +240,10 @@ class TestFleetUtil(unittest.TestCase):
data_dir
,
data_dir
,
os
.
path
.
join
(
self
.
train_dir
,
"join_main_program.pbtxt"
))
os
.
path
.
join
(
self
.
train_dir
,
"join_main_program.pbtxt"
))
is_text
=
True
is_text
=
True
program
=
fleet
_
util
.
_load_program
(
program_path
,
is_text
)
program
=
fleet
.
util
.
_load_program
(
program_path
,
is_text
)
output_dir
=
os
.
path
.
join
(
data_dir
,
self
.
train_dir
)
output_dir
=
os
.
path
.
join
(
data_dir
,
self
.
train_dir
)
output_filename
=
"draw_prog"
output_filename
=
"draw_prog"
fleet
_
util
.
_visualize_graphviz
(
program
,
output_dir
,
output_filename
)
fleet
.
util
.
_visualize_graphviz
(
program
,
output_dir
,
output_filename
)
self
.
assertTrue
(
self
.
assertTrue
(
os
.
path
.
exists
(
os
.
path
.
exists
(
os
.
path
.
join
(
output_dir
,
output_filename
+
".dot"
)))
os
.
path
.
join
(
output_dir
,
output_filename
+
".dot"
)))
...
...
python/paddle/fluid/tests/unittests/test_imperative_signal_handler.py
浏览文件 @
2725f51d
...
@@ -49,7 +49,7 @@ class TestDygraphDataLoaderSingalHandler(unittest.TestCase):
...
@@ -49,7 +49,7 @@ class TestDygraphDataLoaderSingalHandler(unittest.TestCase):
test_process
.
start
()
test_process
.
start
()
set_child_signal_handler
(
id
(
self
),
test_process
.
pid
)
set_child_signal_handler
(
id
(
self
),
test_process
.
pid
)
time
.
sleep
(
3
)
time
.
sleep
(
5
)
except
core
.
EnforceNotMet
as
ex
:
except
core
.
EnforceNotMet
as
ex
:
self
.
assertIn
(
"FatalError"
,
cpt
.
get_exception_message
(
ex
))
self
.
assertIn
(
"FatalError"
,
cpt
.
get_exception_message
(
ex
))
exception
=
ex
exception
=
ex
...
...
python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py
浏览文件 @
2725f51d
...
@@ -124,8 +124,10 @@ class TestMNIST(TestParallelExecutorBase):
...
@@ -124,8 +124,10 @@ class TestMNIST(TestParallelExecutorBase):
def
test_simple_fc_with_new_strategy
(
self
):
def
test_simple_fc_with_new_strategy
(
self
):
# use_cuda, use_reduce
# use_cuda, use_reduce
self
.
_compare_reduce_and_allreduce
(
simple_fc_net
,
True
)
# NOTE: the computation result of nccl_reduce is non-deterministic,
self
.
_compare_reduce_and_allreduce
(
simple_fc_net
,
False
)
# related issue: https://github.com/NVIDIA/nccl/issues/157
self
.
_compare_reduce_and_allreduce
(
simple_fc_net
,
True
,
1e-5
,
1e-2
)
self
.
_compare_reduce_and_allreduce
(
simple_fc_net
,
False
,
1e-5
,
1e-2
)
def
check_simple_fc_parallel_accuracy
(
self
,
use_cuda
):
def
check_simple_fc_parallel_accuracy
(
self
,
use_cuda
):
if
use_cuda
and
not
core
.
is_compiled_with_cuda
():
if
use_cuda
and
not
core
.
is_compiled_with_cuda
():
...
@@ -179,7 +181,7 @@ class TestMNIST(TestParallelExecutorBase):
...
@@ -179,7 +181,7 @@ class TestMNIST(TestParallelExecutorBase):
# NOTE: the computation result of nccl_reduce is non-deterministic,
# NOTE: the computation result of nccl_reduce is non-deterministic,
# related issue: https://github.com/NVIDIA/nccl/issues/157
# related issue: https://github.com/NVIDIA/nccl/issues/157
self
.
_compare_reduce_and_allreduce
(
fc_with_batchnorm
,
True
,
1e-5
,
1e-2
)
self
.
_compare_reduce_and_allreduce
(
fc_with_batchnorm
,
True
,
1e-5
,
1e-2
)
self
.
_compare_reduce_and_allreduce
(
fc_with_batchnorm
,
False
)
self
.
_compare_reduce_and_allreduce
(
fc_with_batchnorm
,
False
,
1e-5
,
1e-2
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录