提交 2725f51d 编写于 作者: S smallv0221

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into yxp0924

...@@ -446,6 +446,9 @@ function(nv_library TARGET_NAME) ...@@ -446,6 +446,9 @@ function(nv_library TARGET_NAME)
message(FATAL "Please specify source file or library in nv_library.") message(FATAL "Please specify source file or library in nv_library.")
endif() endif()
endif(nv_library_SRCS) endif(nv_library_SRCS)
if (WIN32)
set_target_properties(${TARGET_NAME} PROPERTIES VS_USER_PROPS ${WIN_PROPS})
endif(WIN32)
endif() endif()
endfunction(nv_library) endfunction(nv_library)
...@@ -461,6 +464,9 @@ function(nv_binary TARGET_NAME) ...@@ -461,6 +464,9 @@ function(nv_binary TARGET_NAME)
add_dependencies(${TARGET_NAME} ${nv_binary_DEPS}) add_dependencies(${TARGET_NAME} ${nv_binary_DEPS})
common_link(${TARGET_NAME}) common_link(${TARGET_NAME})
endif() endif()
if (WIN32)
set_target_properties(${TARGET_NAME} PROPERTIES VS_USER_PROPS ${WIN_PROPS})
endif(WIN32)
endif() endif()
endfunction(nv_binary) endfunction(nv_binary)
...@@ -482,6 +488,9 @@ function(nv_test TARGET_NAME) ...@@ -482,6 +488,9 @@ function(nv_test TARGET_NAME)
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cpu_deterministic=true) set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cpu_deterministic=true)
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_init_allocated_mem=true) set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_init_allocated_mem=true)
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cudnn_deterministic=true) set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cudnn_deterministic=true)
if (WIN32)
set_target_properties(${TARGET_NAME} PROPERTIES VS_USER_PROPS ${WIN_PROPS})
endif(WIN32)
endif() endif()
endfunction(nv_test) endfunction(nv_test)
......
...@@ -19,9 +19,8 @@ set(PADDLE_INSTALL_DIR "${CMAKE_BINARY_DIR}/paddle_install_dir" CACHE STRING ...@@ -19,9 +19,8 @@ set(PADDLE_INSTALL_DIR "${CMAKE_BINARY_DIR}/paddle_install_dir" CACHE STRING
set(PADDLE_INFERENCE_INSTALL_DIR "${CMAKE_BINARY_DIR}/paddle_inference_install_dir" CACHE STRING set(PADDLE_INFERENCE_INSTALL_DIR "${CMAKE_BINARY_DIR}/paddle_inference_install_dir" CACHE STRING
"A path setting paddle inference shared and static libraries") "A path setting paddle inference shared and static libraries")
# TODO(zhaolong) # At present, the size of static lib in Windows is very large,
# At present, the size of static lib in Windows exceeds the system limit, # so we need to crop the library size.
# so the generation of static lib is temporarily turned off.
if(WIN32) if(WIN32)
#todo: remove the option #todo: remove the option
option(WITH_STATIC_LIB "Compile demo with static/shared library, default use dynamic." OFF) option(WITH_STATIC_LIB "Compile demo with static/shared library, default use dynamic." OFF)
...@@ -196,7 +195,11 @@ set(PADDLE_INFERENCE_C_INSTALL_DIR "${CMAKE_BINARY_DIR}/paddle_inference_c_insta ...@@ -196,7 +195,11 @@ set(PADDLE_INFERENCE_C_INSTALL_DIR "${CMAKE_BINARY_DIR}/paddle_inference_c_insta
copy_part_of_thrid_party(inference_lib_dist ${PADDLE_INFERENCE_C_INSTALL_DIR}) copy_part_of_thrid_party(inference_lib_dist ${PADDLE_INFERENCE_C_INSTALL_DIR})
set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid") set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid")
set(paddle_fluid_c_lib ${PADDLE_BINARY_DIR}/paddle/fluid/inference/capi/libpaddle_fluid_c.*) if(WIN32)
set(paddle_fluid_c_lib ${PADDLE_BINARY_DIR}/paddle/fluid/inference/capi/${CMAKE_BUILD_TYPE}/paddle_fluid_c.*)
else(WIN32)
set(paddle_fluid_c_lib ${PADDLE_BINARY_DIR}/paddle/fluid/inference/capi/libpaddle_fluid_c.*)
endif(WIN32)
copy(inference_lib_dist copy(inference_lib_dist
SRCS ${src_dir}/inference/capi/paddle_c_api.h ${paddle_fluid_c_lib} SRCS ${src_dir}/inference/capi/paddle_c_api.h ${paddle_fluid_c_lib}
......
...@@ -26,4 +26,7 @@ if(WITH_GPU) ...@@ -26,4 +26,7 @@ if(WITH_GPU)
set(CMAKE_CUDA_FLAGS_MINSIZEREL "-O1 -DNDEBUG") set(CMAKE_CUDA_FLAGS_MINSIZEREL "-O1 -DNDEBUG")
endif() endif()
if(WIN32)
set(WIN_PROPS ${CMAKE_SOURCE_DIR}/cmake/paddle_win.props)
endif()
<?xml version="1.0" encoding="utf-8"?>
<Project xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemDefinitionGroup>
<CudaCompile>
<!-- Project schema: Host properties -->
<UseHostDefines>true</UseHostDefines>
<Emulation>false</Emulation>
<HostDebugInfo Condition="'$(Configuration)' == 'Debug'">true</HostDebugInfo>
<HostDebugInfo Condition="'$(Configuration)' != 'Debug'">false</HostDebugInfo>
<FastMath>false</FastMath>
<Optimization>InheritFromHost</Optimization>
<Runtime>InheritFromHost</Runtime>
<RuntimeChecks>InheritFromHost</RuntimeChecks>
<TypeInfo>InheritFromHost</TypeInfo>
<Warning>InheritFromHost</Warning>
<BaseCommandLineTemplate>-ccbin "%(VCBinDir)" -x cu [GenerateRelocatableDeviceCode] [Include] [RequiredIncludes] [InterleaveSourceInPTX] [GPUDebugInfo] [GenerateLineInfo] [Keep] [KeepDir] [MaxRegCount] [PtxAsOptionV] [TargetMachinePlatform] [NvccCompilation] [CudaRuntime] [AdditionalOptions]</BaseCommandLineTemplate>
<BuildCommandLineTemplate>--use-local-env</BuildCommandLineTemplate>
<BuildDynamicCommandLineTemplate>[CodeGeneration]</BuildDynamicCommandLineTemplate>
<CleanCommandLineTemplate>-clean</CleanCommandLineTemplate>
<!-- <HostCommandLineTemplate>-Xcompiler &quot;/EHsc [Warning] /nologo [Optimization] [ProgramDataBaseFileName] $(CudaForceSynchronousPdbWrites) /Zi [RuntimeChecks] [Runtime] [TypeInfo]&quot;</HostCommandLineTemplate> -->
<HostCommandLineTemplate>-Xcompiler &quot;/EHsc [Warning] /nologo [Optimization] [ProgramDataBaseFileName] $(CudaForceSynchronousPdbWrites) [RuntimeChecks] [Runtime] [TypeInfo]&quot;</HostCommandLineTemplate>
<DriverApiCommandLineTemplate>%(BaseCommandLineTemplate) [CompileOut] "%(FullPath)"</DriverApiCommandLineTemplate>
<RuntimeApiCommandLineTemplate>%(BaseCommandLineTemplate) [HostDebugInfo] [Emulation] [FastMath] [Defines] %(HostCommandLineTemplate) [CompileOut] "%(FullPath)"</RuntimeApiCommandLineTemplate>
<CommandLineTemplate>
# (Approximate command-line. Settings inherited from host are not visible below.)
# (Please see the output window after a build for the full command-line)
# Driver API (NVCC Compilation Type is .cubin, .gpu, or .ptx)
set CUDAFE_FLAGS=--sdk_dir "$(WindowsSdkDir)"
"$(CudaToolkitNvccPath)" %(BuildCommandLineTemplate) %(DriverApiCommandLineTemplate)
# Runtime API (NVCC Compilation Type is hybrid object or .c file)
set CUDAFE_FLAGS=--sdk_dir "$(WindowsSdkDir)"
"$(CudaToolkitNvccPath)" %(BuildCommandLineTemplate) %(RuntimeApiCommandLineTemplate)
</CommandLineTemplate>
<ExecutionDescription>Compiling CUDA source file %(Identity)...</ExecutionDescription>
<ExclusionDescription>Skipping CUDA source file %(Identity) (excluded from build).</ExclusionDescription>
<!-- Miscellaneous -->
<PropsCacheOutputFile>%(Filename)%(Extension).cache</PropsCacheOutputFile>
<PropsCacheOutputPath>$(IntDir)%(PropsCacheOutputFile)</PropsCacheOutputPath>
<CudaCompileCoreProject>$(MSBuildProjectFullPath)</CudaCompileCoreProject>
</CudaCompile>
<CudaLink>
<PerformDeviceLink>true</PerformDeviceLink>
<LinkOut>$(IntDir)$(TargetName).device-link.obj</LinkOut>
<AdditionalLibraryDirectories></AdditionalLibraryDirectories>
<UseHostLibraryDirectories>true</UseHostLibraryDirectories>
<AdditionalDependencies></AdditionalDependencies>
<UseHostLibraryDependencies>true</UseHostLibraryDependencies>
<GPUDebugInfo>InheritFromProject</GPUDebugInfo>
<Optimization>InheritFromProject</Optimization>
<!-- Implicitly inherited from the project via @(CudaCompile) -->
<CodeGeneration></CodeGeneration>
<RuntimeChecks></RuntimeChecks>
<Runtime></Runtime>
<TargetMachinePlatform></TargetMachinePlatform>
<TypeInfo></TypeInfo>
<Warning></Warning>
<Inputs></Inputs>
<!-- <HostCommandLineTemplate>-Xcompiler &quot;/EHsc [Warning] /nologo [Optimization] /Zi [RuntimeChecks] [Runtime] [TypeInfo]&quot;</HostCommandLineTemplate> -->
<HostCommandLineTemplate>-Xcompiler &quot;/EHsc [Warning] /nologo [Optimization] [RuntimeChecks] [Runtime] [TypeInfo]&quot;</HostCommandLineTemplate>
<LinkCommandLineTemplate>"$(CudaToolkitNvccPath)" -dlink [LinkOut] %(HostCommandLineTemplate) [AdditionalLibraryDirectories] [AdditionalDependencies] [AdditionalOptions] [CodeGeneration] [GPUDebugInfo] [TargetMachinePlatform] [Inputs]</LinkCommandLineTemplate>
<CommandLineTemplate>
# (Approximate command-line. Settings inherited from host are not visible below.)
# (Please see the output window after a build for the full command-line)
%(LinkCommandLineTemplate)
</CommandLineTemplate>
</CudaLink>
<Link>
<AdditionalLibraryDirectories>%(AdditionalLibraryDirectories);$(CudaToolkitLibDir)</AdditionalLibraryDirectories>
</Link>
<ClCompile>
<AdditionalIncludeDirectories>%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir)</AdditionalIncludeDirectories>
</ClCompile>
</ItemDefinitionGroup>
</Project>
...@@ -44,14 +44,9 @@ add_subdirectory(api) ...@@ -44,14 +44,9 @@ add_subdirectory(api)
set(STATIC_INFERENCE_API paddle_inference_api analysis_predictor set(STATIC_INFERENCE_API paddle_inference_api analysis_predictor
zero_copy_tensor reset_tensor_array zero_copy_tensor reset_tensor_array
analysis_config paddle_pass_builder activation_functions ${mkldnn_quantizer_cfg}) analysis_config paddle_pass_builder activation_functions ${mkldnn_quantizer_cfg})
# TODO(xingzhaolong, jiweibo): remove this and create_static_lib(paddle_fluid) on windows GPU create_static_lib(paddle_fluid ${fluid_modules} ${STATIC_INFERENCE_API})
if(WIN32 AND WITH_GPU)
cc_library(paddle_fluid DEPS ${fluid_modules} ${STATIC_INFERENCE_API})
else()
create_static_lib(paddle_fluid ${fluid_modules} ${STATIC_INFERENCE_API})
endif()
if(NOT APPLE AND NOT WIN32) if(NOT APPLE)
# TODO(liuyiqu: Temporarily disable the link flag because it is not support on Mac. # TODO(liuyiqu: Temporarily disable the link flag because it is not support on Mac.
set(LINK_FLAGS "-Wl,--retain-symbols-file ${CMAKE_CURRENT_SOURCE_DIR}/paddle_fluid.sym") set(LINK_FLAGS "-Wl,--retain-symbols-file ${CMAKE_CURRENT_SOURCE_DIR}/paddle_fluid.sym")
set_target_properties(paddle_fluid PROPERTIES LINK_FLAGS "${LINK_FLAGS}") set_target_properties(paddle_fluid PROPERTIES LINK_FLAGS "${LINK_FLAGS}")
......
...@@ -245,7 +245,18 @@ bool AnalysisPredictor::PrepareExecutor() { ...@@ -245,7 +245,18 @@ bool AnalysisPredictor::PrepareExecutor() {
void AnalysisPredictor::MkldnnPreSet(const std::vector<PaddleTensor> &inputs) { void AnalysisPredictor::MkldnnPreSet(const std::vector<PaddleTensor> &inputs) {
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
VLOG(2) << "AnalysisPredictor::Run get_cur_mkldnn_session_id=" std::vector<std::vector<int>> inputs_shape;
for (size_t i = 0; i < inputs.size(); ++i) {
inputs_shape.emplace_back(inputs[i].shape);
}
MkldnnPreSet(inputs_shape);
#endif
}
void AnalysisPredictor::MkldnnPreSet(
const std::vector<std::vector<int>> &inputs_shape) {
#ifdef PADDLE_WITH_MKLDNN
VLOG(2) << "AnalysisPredictor::ZeroCopyRun get_cur_mkldnn_session_id="
<< platform::MKLDNNDeviceContext::tls().get_cur_mkldnn_session_id(); << platform::MKLDNNDeviceContext::tls().get_cur_mkldnn_session_id();
// In cache clearing mode. // In cache clearing mode.
if (config_.mkldnn_cache_capacity_ > 0) { if (config_.mkldnn_cache_capacity_ > 0) {
...@@ -257,9 +268,9 @@ void AnalysisPredictor::MkldnnPreSet(const std::vector<PaddleTensor> &inputs) { ...@@ -257,9 +268,9 @@ void AnalysisPredictor::MkldnnPreSet(const std::vector<PaddleTensor> &inputs) {
config_.mkldnn_cache_capacity_); config_.mkldnn_cache_capacity_);
// Set current_input_shape for caching dynamic shape. // Set current_input_shape for caching dynamic shape.
std::stringstream ss; std::stringstream ss;
for (size_t i = 0; i < inputs.size(); ++i) { for (size_t i = 0; i < inputs_shape.size(); ++i) {
for (size_t j = 0; j < inputs[i].shape.size(); ++j) { for (size_t j = 0; j < inputs_shape[i].size(); ++j) {
ss << inputs[i].shape[j] << "-"; ss << inputs_shape[i][j] << "-";
} }
} }
VLOG(2) << "Set input shape=" << ss.str(); VLOG(2) << "Set input shape=" << ss.str();
...@@ -742,6 +753,18 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor( ...@@ -742,6 +753,18 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor(
bool AnalysisPredictor::ZeroCopyRun() { bool AnalysisPredictor::ZeroCopyRun() {
paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads()); paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads());
#ifdef PADDLE_WITH_MKLDNN
if (config_.use_mkldnn_) {
std::vector<std::vector<int>> shape_vector;
auto names = GetInputNames();
for (size_t i = 0; i < names.size(); ++i) {
auto in_tensor = GetInputTensor(names[i]);
shape_vector.emplace_back(in_tensor->shape());
}
MkldnnPreSet(shape_vector);
}
#endif
executor_->Run(); executor_->Run();
// Fix TensorArray reuse not cleaned bug. // Fix TensorArray reuse not cleaned bug.
tensor_array_batch_cleaner_.CollectTensorArrays(sub_scope_); tensor_array_batch_cleaner_.CollectTensorArrays(sub_scope_);
...@@ -750,6 +773,9 @@ bool AnalysisPredictor::ZeroCopyRun() { ...@@ -750,6 +773,9 @@ bool AnalysisPredictor::ZeroCopyRun() {
// recover the cpu_math_library_num_threads to 1, in order to avoid thread // recover the cpu_math_library_num_threads to 1, in order to avoid thread
// conflict when integrating it into deployment service. // conflict when integrating it into deployment service.
paddle::platform::SetNumThreads(1); paddle::platform::SetNumThreads(1);
#ifdef PADDLE_WITH_MKLDNN
if (config_.use_mkldnn_) MkldnnPostReset();
#endif
#if defined(PADDLE_WITH_MKLML) #if defined(PADDLE_WITH_MKLML)
// Frees unused memory allocated by the Intel® MKL Memory Allocator to // Frees unused memory allocated by the Intel® MKL Memory Allocator to
// avoid memory leak. See: // avoid memory leak. See:
......
...@@ -317,6 +317,17 @@ class AnalysisPredictor : public PaddlePredictor { ...@@ -317,6 +317,17 @@ class AnalysisPredictor : public PaddlePredictor {
/// \param[in] inputs tensors /// \param[in] inputs tensors
/// ///
void MkldnnPreSet(const std::vector<PaddleTensor> &inputs); void MkldnnPreSet(const std::vector<PaddleTensor> &inputs);
///
/// \brief PreSet for Mkldnn multi-thread and dynamic shape input.
///
/// Used in AnalysisPredictor::Run(), do not support
/// AnalysisPredictor::ZeroCopyRun() now.
///
/// \param[in] inputs tensor shape
///
void MkldnnPreSet(const std::vector<std::vector<int>> &inputs_shape);
/// ///
/// \brief PostReset for Mkldnn multi-thread and dynamic shape input. /// \brief PostReset for Mkldnn multi-thread and dynamic shape input.
/// ///
......
...@@ -68,11 +68,6 @@ rm -rf * ...@@ -68,11 +68,6 @@ rm -rf *
for WITH_STATIC_LIB in ON OFF; do for WITH_STATIC_LIB in ON OFF; do
if [ $(echo `uname` | grep "Win") != "" ]; then if [ $(echo `uname` | grep "Win") != "" ]; then
# TODO(xingzhaolong, jiweibo): remove this if windows GPU library is ready.
if [ $TEST_GPU_CPU == ON] && [ $WITH_STATIC_LIB ==ON ]; then
return 0
fi
# -----simple_on_word2vec on windows----- # -----simple_on_word2vec on windows-----
cmake .. -G "Visual Studio 14 2015" -A x64 -DPADDLE_LIB=${inference_install_dir} \ cmake .. -G "Visual Studio 14 2015" -A x64 -DPADDLE_LIB=${inference_install_dir} \
-DWITH_MKL=$TURN_ON_MKL \ -DWITH_MKL=$TURN_ON_MKL \
......
...@@ -50,8 +50,9 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> { ...@@ -50,8 +50,9 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
auto& dev_ctx = ctx.template device_context<platform::CUDADeviceContext>(); auto& dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), true, PADDLE_ENFORCE_EQ(
"It must use CUDAPlace."); platform::is_gpu_place(ctx.GetPlace()), true,
paddle::platform::errors::PreconditionNotMet("It must use CUDAPlace."));
const Tensor* input = ctx.Input<Tensor>("Input"); const Tensor* input = ctx.Input<Tensor>("Input");
auto* filter = ctx.Input<Tensor>("Filter"); auto* filter = ctx.Input<Tensor>("Filter");
auto* output = ctx.Output<Tensor>("Output"); auto* output = ctx.Output<Tensor>("Output");
...@@ -60,14 +61,16 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> { ...@@ -60,14 +61,16 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings"); std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings");
std::vector<int> dilations = ctx.Attr<std::vector<int>>("dilations"); std::vector<int> dilations = ctx.Attr<std::vector<int>>("dilations");
int groups = ctx.Attr<int>("groups"); int groups = ctx.Attr<int>("groups");
bool exhaustive_search = bool exhaustive_search =
FLAGS_cudnn_exhaustive_search || ctx.Attr<bool>("exhaustive_search"); FLAGS_cudnn_exhaustive_search || ctx.Attr<bool>("exhaustive_search");
bool deterministic = FLAGS_cudnn_deterministic;
if (exhaustive_search && FLAGS_cudnn_deterministic) { auto exhaustive_deterministic = exhaustive_search && deterministic;
PADDLE_THROW( PADDLE_ENFORCE_EQ(exhaustive_deterministic, false,
platform::errors::InvalidArgument(
"Cann't set exhaustive_search True and " "Cann't set exhaustive_search True and "
"FLAGS_cudnn_deterministic True at same time."); "FLAGS_cudnn_deterministic True at same time."));
}
const std::string padding_algorithm = const std::string padding_algorithm =
ctx.Attr<std::string>("padding_algorithm"); ctx.Attr<std::string>("padding_algorithm");
const std::string data_format = ctx.Attr<std::string>("data_format"); const std::string data_format = ctx.Attr<std::string>("data_format");
...@@ -197,7 +200,8 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> { ...@@ -197,7 +200,8 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
&transformed_input); &transformed_input);
} break; } break;
default: default:
PADDLE_THROW("ConvOp only support tensors with 4 or 5 dimensions."); PADDLE_THROW(platform::errors::InvalidArgument(
"ConvOp only support tensors with 4 or 5 dimensions."));
} }
} else { } else {
...@@ -317,8 +321,9 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> { ...@@ -317,8 +321,9 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
auto& dev_ctx = ctx.template device_context<platform::CUDADeviceContext>(); auto& dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), true, PADDLE_ENFORCE_EQ(
"It must use CUDAPlace."); platform::is_gpu_place(ctx.GetPlace()), true,
paddle::platform::errors::PreconditionNotMet("It must use CUDAPlace."));
auto input = ctx.Input<Tensor>("Input"); auto input = ctx.Input<Tensor>("Input");
auto filter = ctx.Input<Tensor>("Filter"); auto filter = ctx.Input<Tensor>("Filter");
auto output_grad = ctx.Input<Tensor>(framework::GradVarName("Output")); auto output_grad = ctx.Input<Tensor>(framework::GradVarName("Output"));
...@@ -337,14 +342,16 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> { ...@@ -337,14 +342,16 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings"); std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings");
std::string padding_algorithm = ctx.Attr<std::string>("padding_algorithm"); std::string padding_algorithm = ctx.Attr<std::string>("padding_algorithm");
int groups = ctx.Attr<int>("groups"); int groups = ctx.Attr<int>("groups");
bool exhaustive_search = bool exhaustive_search =
FLAGS_cudnn_exhaustive_search || ctx.Attr<bool>("exhaustive_search"); FLAGS_cudnn_exhaustive_search || ctx.Attr<bool>("exhaustive_search");
bool deterministic = FLAGS_cudnn_deterministic; bool deterministic = FLAGS_cudnn_deterministic;
if (exhaustive_search && deterministic) { auto exhaustive_deterministic = exhaustive_search && deterministic;
PADDLE_THROW( PADDLE_ENFORCE_EQ(exhaustive_deterministic, false,
"Can't set exhaustive_search True and " platform::errors::InvalidArgument(
"FLAGS_cudnn_deterministic True at same time."); "Cann't set exhaustive_search True and "
} "FLAGS_cudnn_deterministic True at same time."));
const std::string data_format = ctx.Attr<std::string>("data_format"); const std::string data_format = ctx.Attr<std::string>("data_format");
const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC"); const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC");
...@@ -495,7 +502,8 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> { ...@@ -495,7 +502,8 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
&transformed_input); &transformed_input);
} break; } break;
default: default:
PADDLE_THROW("ConvOp only support tensors with 4 or 5 dimensions."); PADDLE_THROW(platform::errors::InvalidArgument(
"ConvOp only support tensors with 4 or 5 dimensions."));
} }
} else { } else {
transformed_input.ShareDataWith(transformed_input_channel); transformed_input.ShareDataWith(transformed_input_channel);
...@@ -701,8 +709,9 @@ class CUDNNConvDoubleGradOpKernel : public framework::OpKernel<T> { ...@@ -701,8 +709,9 @@ class CUDNNConvDoubleGradOpKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
auto& dev_ctx = ctx.template device_context<platform::CUDADeviceContext>(); auto& dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), true, PADDLE_ENFORCE_EQ(
"It must use CUDAPlace."); platform::is_gpu_place(ctx.GetPlace()), true,
paddle::platform::errors::PreconditionNotMet("It must use CUDAPlace."));
auto X = ctx.Input<Tensor>("Input"); auto X = ctx.Input<Tensor>("Input");
auto W = ctx.Input<Tensor>("Filter"); auto W = ctx.Input<Tensor>("Filter");
auto dO = ctx.Input<Tensor>("DOutput"); auto dO = ctx.Input<Tensor>("DOutput");
...@@ -736,14 +745,16 @@ class CUDNNConvDoubleGradOpKernel : public framework::OpKernel<T> { ...@@ -736,14 +745,16 @@ class CUDNNConvDoubleGradOpKernel : public framework::OpKernel<T> {
const std::vector<int>& strides = ctx.Attr<std::vector<int>>("strides"); const std::vector<int>& strides = ctx.Attr<std::vector<int>>("strides");
std::vector<int> dilations = ctx.Attr<std::vector<int>>("dilations"); std::vector<int> dilations = ctx.Attr<std::vector<int>>("dilations");
int groups = ctx.Attr<int>("groups"); int groups = ctx.Attr<int>("groups");
bool exhaustive_search = bool exhaustive_search =
FLAGS_cudnn_exhaustive_search || ctx.Attr<bool>("exhaustive_search"); FLAGS_cudnn_exhaustive_search || ctx.Attr<bool>("exhaustive_search");
bool deterministic = FLAGS_cudnn_deterministic; bool deterministic = FLAGS_cudnn_deterministic;
if (exhaustive_search && deterministic) { auto exhaustive_deterministic = exhaustive_search && deterministic;
PADDLE_THROW( PADDLE_ENFORCE_EQ(exhaustive_deterministic, false,
"Can't set exhaustive_search True and " platform::errors::InvalidArgument(
"FLAGS_cudnn_deterministic True at same time."); "Cann't set exhaustive_search True and "
} "FLAGS_cudnn_deterministic True at same time."));
std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings"); std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings");
std::string padding_algorithm = ctx.Attr<std::string>("padding_algorithm"); std::string padding_algorithm = ctx.Attr<std::string>("padding_algorithm");
...@@ -878,7 +889,8 @@ class CUDNNConvDoubleGradOpKernel : public framework::OpKernel<T> { ...@@ -878,7 +889,8 @@ class CUDNNConvDoubleGradOpKernel : public framework::OpKernel<T> {
} }
} break; } break;
default: default:
PADDLE_THROW("ConvOp only support tensors with 4 or 5 dimensions."); PADDLE_THROW(platform::errors::InvalidArgument(
"ConvOp only support tensors with 4 or 5 dimensions."));
} }
} else { } else {
......
...@@ -685,8 +685,9 @@ class GemmConvDoubleGradKernel : public framework::OpKernel<T> { ...@@ -685,8 +685,9 @@ class GemmConvDoubleGradKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
auto& dev_ctx = ctx.template device_context<platform::CPUDeviceContext>(); auto& dev_ctx = ctx.template device_context<platform::CPUDeviceContext>();
PADDLE_ENFORCE_EQ(platform::is_cpu_place(ctx.GetPlace()), true, PADDLE_ENFORCE_EQ(
"It must use CPUPlace."); platform::is_cpu_place(ctx.GetPlace()), true,
paddle::platform::errors::PreconditionNotMet("It must use CPUPlace."));
const Tensor* X = ctx.Input<Tensor>("Input"); const Tensor* X = ctx.Input<Tensor>("Input");
const Tensor* dY = ctx.Input<Tensor>("DOutput"); const Tensor* dY = ctx.Input<Tensor>("DOutput");
const Tensor* ddX = ctx.Input<Tensor>("DDInput"); const Tensor* ddX = ctx.Input<Tensor>("DDInput");
...@@ -982,11 +983,20 @@ class DepthwiseConvKernel : public framework::OpKernel<T> { ...@@ -982,11 +983,20 @@ class DepthwiseConvKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
output->dims()[output->dims().size() - 1] % output->dims()[output->dims().size() - 1] %
input->dims()[input->dims().size() - 1], input->dims()[input->dims().size() - 1],
0, "The output channels must be a multiple of the input channels"); 0, platform::errors::InvalidArgument(
"ShapeError: The output channels must be a multiple of the "
"input channels. But receivced output channel number is %d "
"and input channel number is %d",
output->dims()[output->dims().size() - 1],
input->dims()[input->dims().size() - 1]));
} else { } else {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
output->dims()[1] % input->dims()[1], 0, output->dims()[1] % input->dims()[1], 0,
"The output channels must be a multiple of the input channels"); platform::errors::InvalidArgument(
"ShapeError: The output channels must be a multiple of the "
"input channels. But receivced output channel number is %d "
"and input channel number is %d",
output->dims()[1], input->dims()[1]));
} }
// transform tensor // transform tensor
Tensor transformed_input(input->type()); Tensor transformed_input(input->type());
......
...@@ -51,8 +51,9 @@ template <typename T> ...@@ -51,8 +51,9 @@ template <typename T>
class CUDNNConvTransposeOpKernel : public framework::OpKernel<T> { class CUDNNConvTransposeOpKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), true, PADDLE_ENFORCE_EQ(
"It must use CUDAPlace."); platform::is_gpu_place(ctx.GetPlace()), true,
paddle::platform::errors::PreconditionNotMet("It must use CUDAPlace."));
auto* input = ctx.Input<Tensor>("Input"); auto* input = ctx.Input<Tensor>("Input");
auto* filter = ctx.Input<Tensor>("Filter"); auto* filter = ctx.Input<Tensor>("Filter");
auto* output = ctx.Output<Tensor>("Output"); auto* output = ctx.Output<Tensor>("Output");
...@@ -145,9 +146,8 @@ class CUDNNConvTransposeOpKernel : public framework::OpKernel<T> { ...@@ -145,9 +146,8 @@ class CUDNNConvTransposeOpKernel : public framework::OpKernel<T> {
ctx, input_pad, input_transpose, pad_value, &transformed_input); ctx, input_pad, input_transpose, pad_value, &transformed_input);
} break; } break;
default: default:
PADDLE_ENFORCE_EQ( PADDLE_THROW(platform::errors::InvalidArgument(
rank == 4 || rank == 5, true, "Op(ConvTranspose) only supports 4-D or 5-D input Tensor."));
"Op(ConvTranspose) only supports 4-D or 5-D input Tensor.");
} }
} else { } else {
transformed_input = input_transpose; transformed_input = input_transpose;
...@@ -290,8 +290,9 @@ template <typename T> ...@@ -290,8 +290,9 @@ template <typename T>
class CUDNNConvTransposeGradOpKernel : public framework::OpKernel<T> { class CUDNNConvTransposeGradOpKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), PADDLE_ENFORCE_EQ(
"It must use CUDAPlace."); platform::is_gpu_place(ctx.GetPlace()), true,
paddle::platform::errors::PreconditionNotMet("It must use CUDAPlace."));
auto input = ctx.Input<Tensor>("Input"); auto input = ctx.Input<Tensor>("Input");
auto filter = ctx.Input<Tensor>("Filter"); auto filter = ctx.Input<Tensor>("Filter");
auto output_grad = ctx.Input<Tensor>(framework::GradVarName("Output")); auto output_grad = ctx.Input<Tensor>(framework::GradVarName("Output"));
...@@ -393,9 +394,8 @@ class CUDNNConvTransposeGradOpKernel : public framework::OpKernel<T> { ...@@ -393,9 +394,8 @@ class CUDNNConvTransposeGradOpKernel : public framework::OpKernel<T> {
&transformed_output_grad); &transformed_output_grad);
} break; } break;
default: default:
PADDLE_ENFORCE_EQ( PADDLE_THROW(platform::errors::InvalidArgument(
rank == 4 || rank == 5, true, "Op(ConvTranspose) only supports 4-D or 5-D input Tensor."));
"Op(ConvTranspose) only supports 4-D or 5-D input Tensor.");
} }
} else { } else {
transformed_output_grad = output_grad_transpose; transformed_output_grad = output_grad_transpose;
......
...@@ -580,7 +580,12 @@ class DepthwiseConvTransposeKernel : public framework::OpKernel<T> { ...@@ -580,7 +580,12 @@ class DepthwiseConvTransposeKernel : public framework::OpKernel<T> {
output->mutable_data<T>(context.GetPlace()); output->mutable_data<T>(context.GetPlace());
int groups = context.Attr<int>("groups"); int groups = context.Attr<int>("groups");
PADDLE_ENFORCE_EQ(groups, filter.dims()[0]); PADDLE_ENFORCE_EQ(
groups, filter.dims()[0],
platform::errors::InvalidArgument(
"groups should be error to the 1st dimension of filter. But "
"received groups is %d and filter dimension[0] is %d",
groups, filter.dims()[0]));
std::vector<int> strides = context.Attr<std::vector<int>>("strides"); std::vector<int> strides = context.Attr<std::vector<int>>("strides");
std::vector<int> paddings = context.Attr<std::vector<int>>("paddings"); std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
...@@ -588,7 +593,10 @@ class DepthwiseConvTransposeKernel : public framework::OpKernel<T> { ...@@ -588,7 +593,10 @@ class DepthwiseConvTransposeKernel : public framework::OpKernel<T> {
std::string padding_algorithm = std::string padding_algorithm =
context.Attr<std::string>("padding_algorithm"); context.Attr<std::string>("padding_algorithm");
for (auto v : dilations) { for (auto v : dilations) {
PADDLE_ENFORCE_EQ(v, 1); PADDLE_ENFORCE_EQ(v, 1, platform::errors::InvalidArgument(
"dilations should be 1 in depthwise conv. "
"But received dilations is %d",
v));
} }
auto in_dims = input->dims(); auto in_dims = input->dims();
......
...@@ -46,14 +46,14 @@ class PixelShuffleOp : public framework::OperatorWithKernel { ...@@ -46,14 +46,14 @@ class PixelShuffleOp : public framework::OperatorWithKernel {
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"The square of upscale_factor[%u] should divide the " "The square of upscale_factor[%u] should divide the "
"number of channel[%u]", "number of channel[%u]",
input_dims[1], upscale_factor * upscale_factor)); upscale_factor * upscale_factor, input_dims[1]));
} else { } else {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
input_dims[3] % (upscale_factor * upscale_factor), 0, input_dims[3] % (upscale_factor * upscale_factor), 0,
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"The square of upscale_factor[%u] should divide the " "The square of upscale_factor[%u] should divide the "
"number of channel[%u]", "number of channel[%u]",
input_dims[3], upscale_factor * upscale_factor)); upscale_factor * upscale_factor, input_dims[3]));
} }
auto output_dims = input_dims; auto output_dims = input_dims;
output_dims[0] = input_dims[0]; output_dims[0] = input_dims[0];
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
# limitations under the License. # limitations under the License.
# TODO: define distributed api under this directory, # TODO: define distributed api under this directory,
from .base.role_maker import UserDefinedRoleMaker, PaddleCloudRoleMaker from .base.role_maker import Role, UserDefinedRoleMaker, PaddleCloudRoleMaker
from .base.distributed_strategy import DistributedStrategy from .base.distributed_strategy import DistributedStrategy
from .base.fleet_base import Fleet from .base.fleet_base import Fleet
from .base.util_factory import UtilBase from .base.util_factory import UtilBase
...@@ -26,6 +26,7 @@ __all__ = [ ...@@ -26,6 +26,7 @@ __all__ = [
"UserDefinedRoleMaker", "UserDefinedRoleMaker",
"PaddleCloudRoleMaker", "PaddleCloudRoleMaker",
"Fleet", "Fleet",
"Role",
] ]
fleet = Fleet() fleet = Fleet()
...@@ -39,8 +40,7 @@ server_num = fleet.server_num ...@@ -39,8 +40,7 @@ server_num = fleet.server_num
server_index = fleet.server_index server_index = fleet.server_index
server_endpoints = fleet.server_endpoints server_endpoints = fleet.server_endpoints
is_server = fleet.is_server is_server = fleet.is_server
set_util = fleet.set_util util = UtilBase()
util = fleet.util
barrier_worker = fleet.barrier_worker barrier_worker = fleet.barrier_worker
init_worker = fleet.init_worker init_worker = fleet.init_worker
init_server = fleet.init_server init_server = fleet.init_server
......
...@@ -23,7 +23,6 @@ from .strategy_compiler import StrategyCompiler ...@@ -23,7 +23,6 @@ from .strategy_compiler import StrategyCompiler
from .distributed_strategy import DistributedStrategy from .distributed_strategy import DistributedStrategy
from .meta_optimizer_factory import MetaOptimizerFactory from .meta_optimizer_factory import MetaOptimizerFactory
from .runtime_factory import RuntimeFactory from .runtime_factory import RuntimeFactory
from .util_factory import UtilFactory
from paddle.fluid.wrapped_decorator import wrap_decorator from paddle.fluid.wrapped_decorator import wrap_decorator
from paddle.fluid.dygraph import parallel_helper from paddle.fluid.dygraph import parallel_helper
...@@ -120,7 +119,6 @@ class Fleet(object): ...@@ -120,7 +119,6 @@ class Fleet(object):
self.strategy_compiler = None self.strategy_compiler = None
self._is_collective = False self._is_collective = False
self._runtime_handle = None self._runtime_handle = None
self._util = None
def init(self, role_maker=None, is_collective=False): def init(self, role_maker=None, is_collective=False):
""" """
...@@ -182,6 +180,9 @@ class Fleet(object): ...@@ -182,6 +180,9 @@ class Fleet(object):
format(type(role_maker))) format(type(role_maker)))
self._role_maker._generate_role() self._role_maker._generate_role()
import paddle.distributed.fleet as fleet
fleet.util._set_role_maker(self._role_maker)
self.strategy_compiler = StrategyCompiler() self.strategy_compiler = StrategyCompiler()
if paddle.fluid.framework.in_dygraph_mode(): if paddle.fluid.framework.in_dygraph_mode():
if parallel_helper._is_parallel_ctx_initialized(): if parallel_helper._is_parallel_ctx_initialized():
...@@ -353,29 +354,6 @@ class Fleet(object): ...@@ -353,29 +354,6 @@ class Fleet(object):
return self._role_maker._is_server( return self._role_maker._is_server(
) or self._role_maker._is_heter_worker() ) or self._role_maker._is_heter_worker()
def set_util(self, util):
self._util = util
def util(self):
"""
Utility functions that can be used under certain runtime
return util
Returns:
UtilBase: instance of UtilBase, can use distributed ops/tools easily.
Examples:
.. code-block:: python
import paddle.distributed.fleet as fleet
fleet.init()
util = fleet.util
files = ["1.log", "2.log", "3.log", "4.log"]
files = util.get_file_shard()
"""
return self._util
def barrier_worker(self): def barrier_worker(self):
""" """
barrier all workers barrier all workers
...@@ -1102,7 +1080,7 @@ class Fleet(object): ...@@ -1102,7 +1080,7 @@ class Fleet(object):
if self._runtime_handle is None: if self._runtime_handle is None:
self._runtime_handle = RuntimeFactory()._create_runtime(context) self._runtime_handle = RuntimeFactory()._create_runtime(context)
if self._util is None: import paddle.distributed.fleet as fleet
self._util = UtilFactory()._create_util(context) fleet.util._set_strategy(context["valid_strategy"])
return optimize_ops, params_grads return optimize_ops, params_grads
...@@ -73,11 +73,13 @@ class UtilBase(object): ...@@ -73,11 +73,13 @@ class UtilBase(object):
.. code-block:: python .. code-block:: python
# Save the following code in `train.py` , and then execute the command `fleetrun --server_num 2 --worker_num 2 train.py` . # Save the following code in `train.py` , and then execute the command `fleetrun --server_num 2 --worker_num 2 train.py` .
from paddle.distributed.fleet.base.util_factory import fleet_util
import paddle.distributed.fleet as fleet import paddle.distributed.fleet as fleet
from paddle.distributed.fleet import PaddleCloudRoleMaker from paddle.distributed.fleet import PaddleCloudRoleMaker
import sys import sys
import numpy as np import numpy as np
import os
os.environ["PADDLE_WITH_GLOO"] = "2"
def train(): def train():
role = PaddleCloudRoleMaker( role = PaddleCloudRoleMaker(
...@@ -85,19 +87,18 @@ class UtilBase(object): ...@@ -85,19 +87,18 @@ class UtilBase(object):
init_gloo=True, init_gloo=True,
path="./tmp_gloo") path="./tmp_gloo")
fleet.init(role) fleet.init(role)
fleet_util._set_role_maker(role)
if fleet.is_server(): if fleet.is_server():
input = [1, 2] input = [1, 2]
output = fleet_util.all_reduce(input, "sum", "server") output = fleet.util.all_reduce(input, "sum", "server")
print(output) print(output)
# [2, 4] # [2, 4]
elif fleet.is_worker(): elif fleet.is_worker():
input = np.array([3, 4]) input = np.array([3, 4])
output = fleet_util.all_reduce(input, "sum", "worker") output = fleet.util.all_reduce(input, "sum", "worker")
print(output) print(output)
# [6, 8] # [6, 8]
output = fleet_util.all_reduce(input, "sum", "all") output = fleet.util.all_reduce(input, "sum", "all")
print(output) print(output)
# [8, 12] # [8, 12]
if __name__ == "__main__": if __name__ == "__main__":
...@@ -117,10 +118,12 @@ class UtilBase(object): ...@@ -117,10 +118,12 @@ class UtilBase(object):
.. code-block:: python .. code-block:: python
# Save the following code in `train.py` , and then execute the command `fleetrun --server_num 2 --worker_num 2 train.py` . # Save the following code in `train.py` , and then execute the command `fleetrun --server_num 2 --worker_num 2 train.py` .
from paddle.distributed.fleet.base.util_factory import fleet_util
import paddle.distributed.fleet as fleet import paddle.distributed.fleet as fleet
from paddle.distributed.fleet import PaddleCloudRoleMaker from paddle.distributed.fleet import PaddleCloudRoleMaker
import sys import sys
import os
os.environ["PADDLE_WITH_GLOO"] = "2"
def train(): def train():
role = PaddleCloudRoleMaker( role = PaddleCloudRoleMaker(
...@@ -128,15 +131,14 @@ class UtilBase(object): ...@@ -128,15 +131,14 @@ class UtilBase(object):
init_gloo=True, init_gloo=True,
path="./tmp_gloo") path="./tmp_gloo")
fleet.init(role) fleet.init(role)
fleet_util._set_role_maker(role)
if fleet.is_server(): if fleet.is_server():
fleet_util.barrier("server") fleet.util.barrier("server")
print("all server arrive here") print("all server arrive here")
elif fleet.is_worker(): elif fleet.is_worker():
fleet_util.barrier("worker") fleet.util.barrier("worker")
print("all server arrive here") print("all server arrive here")
fleet_util.barrier("all") fleet.util.barrier("all")
print("all servers and workers arrive here") print("all servers and workers arrive here")
if __name__ == "__main__": if __name__ == "__main__":
...@@ -160,10 +162,12 @@ class UtilBase(object): ...@@ -160,10 +162,12 @@ class UtilBase(object):
.. code-block:: python .. code-block:: python
# Save the following code in `train.py` , and then execute the command `fleetrun --server_num 2 --worker_num 2 train.py` . # Save the following code in `train.py` , and then execute the command `fleetrun --server_num 2 --worker_num 2 train.py` .
from paddle.distributed.fleet.base.util_factory import fleet_util
import paddle.distributed.fleet as fleet import paddle.distributed.fleet as fleet
from paddle.distributed.fleet import PaddleCloudRoleMaker from paddle.distributed.fleet import PaddleCloudRoleMaker
import sys import sys
import os
os.environ["PADDLE_WITH_GLOO"] = "2"
def train(): def train():
role = PaddleCloudRoleMaker( role = PaddleCloudRoleMaker(
...@@ -171,19 +175,18 @@ class UtilBase(object): ...@@ -171,19 +175,18 @@ class UtilBase(object):
init_gloo=True, init_gloo=True,
path="./tmp_gloo") path="./tmp_gloo")
fleet.init(role) fleet.init(role)
fleet_util._set_role_maker(role)
if fleet.is_server(): if fleet.is_server():
input = fleet.server_index() input = fleet.server_index()
output = fleet_util.all_gather(input, "server") output = fleet.util.all_gather(input, "server")
print(output) print(output)
# output = [0, 1] # output = [0, 1]
elif fleet.is_worker(): elif fleet.is_worker():
input = fleet.worker_index() input = fleet.worker_index()
output = fleet_util.all_gather(input, "worker") output = fleet.util.all_gather(input, "worker")
# output = [0, 1] # output = [0, 1]
print(output) print(output)
output = fleet_util.all_gather(input, "all") output = fleet.util.all_gather(input, "all")
print(output) print(output)
# output = [0, 1, 0, 1] # output = [0, 1, 0, 1]
...@@ -220,18 +223,20 @@ class UtilBase(object): ...@@ -220,18 +223,20 @@ class UtilBase(object):
.. code-block:: python .. code-block:: python
from paddle.distributed.fleet.base.util_factory import fleet_util import paddle.distributed.fleet as fleet
import paddle.distributed.fleet.base.role_maker as role_maker from paddle.distributed.fleet import UserDefinedRoleMaker
role = role_maker.UserDefinedRoleMaker( role = UserDefinedRoleMaker(
is_collective=False, is_collective=False,
init_gloo=False, init_gloo=False,
current_id=0, current_id=0,
role=role_maker.Role.WORKER, role=fleet.Role.WORKER,
worker_endpoints=["127.0.0.1:6003", "127.0.0.1:6004"], worker_endpoints=["127.0.0.1:6003", "127.0.0.1:6004"],
server_endpoints=["127.0.0.1:6001", "127.0.0.1:6002"]) server_endpoints=["127.0.0.1:6001", "127.0.0.1:6002"])
fleet_util._set_role_maker(role) fleet.init(role)
files = fleet_util.get_file_shard(["file1", "file2", "file3"])
files = fleet.util.get_file_shard(["file1", "file2", "file3"])
print(files)
# files = ["file1", "file2"] # files = ["file1", "file2"]
""" """
if not isinstance(files, list): if not isinstance(files, list):
...@@ -267,18 +272,19 @@ class UtilBase(object): ...@@ -267,18 +272,19 @@ class UtilBase(object):
.. code-block:: python .. code-block:: python
from paddle.distributed.fleet.base.util_factory import fleet_util import paddle.distributed.fleet as fleet
import paddle.distributed.fleet.base.role_maker as role_maker from paddle.distributed.fleet import UserDefinedRoleMaker
role = role_maker.UserDefinedRoleMaker( role = UserDefinedRoleMaker(
is_collective=False, is_collective=False,
init_gloo=False, init_gloo=False,
current_id=0, current_id=0,
role=role_maker.Role.WORKER, role=fleet.Role.WORKER,
worker_endpoints=["127.0.0.1:6003", "127.0.0.1:6004"], worker_endpoints=["127.0.0.1:6003", "127.0.0.1:6004"],
server_endpoints=["127.0.0.1:6001", "127.0.0.1:6002"]) server_endpoints=["127.0.0.1:6001", "127.0.0.1:6002"])
fleet_util._set_role_maker(role) fleet.init(role)
fleet_util.print_on_rank("I'm worker 0", 0)
fleet.util.print_on_rank("I'm worker 0", 0)
""" """
if self.role_maker._worker_index() != rank_id: if self.role_maker._worker_index() != rank_id:
return return
...@@ -577,6 +583,3 @@ class UtilBase(object): ...@@ -577,6 +583,3 @@ class UtilBase(object):
print("fetch_targets name: %s" % v.name) print("fetch_targets name: %s" % v.name)
print("fetch_targets: {}".format(results[i])) print("fetch_targets: {}".format(results[i]))
return results return results
fleet_util = UtilFactory()._create_util(None)
...@@ -348,8 +348,7 @@ def launch_ps(args): ...@@ -348,8 +348,7 @@ def launch_ps(args):
"PADDLE_PORT": cur_server.endpoint.split(":")[1], "PADDLE_PORT": cur_server.endpoint.split(":")[1],
"TRAINING_ROLE": "PSERVER", "TRAINING_ROLE": "PSERVER",
"PADDLE_TRAINERS_NUM": str(worker_num), "PADDLE_TRAINERS_NUM": str(worker_num),
"POD_IP": cur_server.endpoint.split(":")[0], "POD_IP": cur_server.endpoint.split(":")[0]
"PADDLE_WITH_GLOO": "1"
} }
current_env.update(proc_env) current_env.update(proc_env)
...@@ -388,8 +387,7 @@ def launch_ps(args): ...@@ -388,8 +387,7 @@ def launch_ps(args):
"PADDLE_TRAINER_ENDPOINTS": worker_endpoints, "PADDLE_TRAINER_ENDPOINTS": worker_endpoints,
"PADDLE_TRAINERS_NUM": str(worker_num), "PADDLE_TRAINERS_NUM": str(worker_num),
"TRAINING_ROLE": "TRAINER", "TRAINING_ROLE": "TRAINER",
"PADDLE_TRAINER_ID": str(cur_worker.rank), "PADDLE_TRAINER_ID": str(cur_worker.rank)
"PADDLE_WITH_GLOO": "1"
} }
current_env.update(proc_env) current_env.update(proc_env)
......
...@@ -11,3 +11,5 @@ ...@@ -11,3 +11,5 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from .fs import LocalFS, HDFSClient
...@@ -120,7 +120,7 @@ class LocalFS(FS): ...@@ -120,7 +120,7 @@ class LocalFS(FS):
Examples: Examples:
.. code-block:: python .. code-block:: python
from paddle.distributed.fleet.utils.fs import LocalFS from paddle.distributed.fleet.utils import LocalFS
client = LocalFS() client = LocalFS()
subdirs, files = client.ls_dir("./") subdirs, files = client.ls_dir("./")
...@@ -140,7 +140,7 @@ class LocalFS(FS): ...@@ -140,7 +140,7 @@ class LocalFS(FS):
Examples: Examples:
.. code-block:: python .. code-block:: python
from paddle.distributed.fleet.utils.fs import LocalFS from paddle.distributed.fleet.utils import LocalFS
client = LocalFS() client = LocalFS()
subdirs, files = client.ls_dir("./") subdirs, files = client.ls_dir("./")
...@@ -160,7 +160,7 @@ class LocalFS(FS): ...@@ -160,7 +160,7 @@ class LocalFS(FS):
def mkdirs(self, fs_path): def mkdirs(self, fs_path):
""" """
Create a remote HDFS directory. Create a local directory.
Args: Args:
fs_path(str): The local directory path. fs_path(str): The local directory path.
...@@ -168,7 +168,7 @@ class LocalFS(FS): ...@@ -168,7 +168,7 @@ class LocalFS(FS):
Examples: Examples:
.. code-block:: python .. code-block:: python
from paddle.distributed.fleet.utils.fs import LocalFS from paddle.distributed.fleet.utils import LocalFS
client = LocalFS() client = LocalFS()
client.mkdirs("test_mkdirs") client.mkdirs("test_mkdirs")
...@@ -189,7 +189,7 @@ class LocalFS(FS): ...@@ -189,7 +189,7 @@ class LocalFS(FS):
Examples: Examples:
.. code-block:: python .. code-block:: python
from paddle.distributed.fleet.utils.fs import LocalFS from paddle.distributed.fleet.utils import LocalFS
client = LocalFS() client = LocalFS()
client.touch("test_rename_src") client.touch("test_rename_src")
...@@ -217,7 +217,7 @@ class LocalFS(FS): ...@@ -217,7 +217,7 @@ class LocalFS(FS):
Examples: Examples:
.. code-block:: python .. code-block:: python
from paddle.distributed.fleet.utils.fs import LocalFS from paddle.distributed.fleet.utils import LocalFS
client = LocalFS() client = LocalFS()
client.mkdirs("test_localFS_mkdirs") client.mkdirs("test_localFS_mkdirs")
...@@ -247,7 +247,7 @@ class LocalFS(FS): ...@@ -247,7 +247,7 @@ class LocalFS(FS):
Examples: Examples:
.. code-block:: python .. code-block:: python
from paddle.distributed.fleet.utils.fs import LocalFS from paddle.distributed.fleet.utils import LocalFS
client = LocalFS() client = LocalFS()
client.touch("test_is_file") client.touch("test_is_file")
...@@ -269,7 +269,7 @@ class LocalFS(FS): ...@@ -269,7 +269,7 @@ class LocalFS(FS):
Examples: Examples:
.. code-block:: python .. code-block:: python
from paddle.distributed.fleet.utils.fs import LocalFS from paddle.distributed.fleet.utils import LocalFS
client = LocalFS() client = LocalFS()
client.mkdirs("test_is_dir") client.mkdirs("test_is_dir")
...@@ -292,7 +292,7 @@ class LocalFS(FS): ...@@ -292,7 +292,7 @@ class LocalFS(FS):
Examples: Examples:
.. code-block:: python .. code-block:: python
from paddle.distributed.fleet.utils.fs import LocalFS from paddle.distributed.fleet.utils import LocalFS
client = LocalFS() client = LocalFS()
ret = local_fs.is_exist("test_is_exist") ret = local_fs.is_exist("test_is_exist")
...@@ -311,7 +311,7 @@ class LocalFS(FS): ...@@ -311,7 +311,7 @@ class LocalFS(FS):
Examples: Examples:
.. code-block:: python .. code-block:: python
from paddle.distributed.fleet.utils.fs import LocalFS from paddle.distributed.fleet.utils import LocalFS
client = LocalFS() client = LocalFS()
client.touch("test_touch") client.touch("test_touch")
...@@ -332,13 +332,11 @@ class LocalFS(FS): ...@@ -332,13 +332,11 @@ class LocalFS(FS):
src_path(str): Name of the file or directory, that's needed to be moved. src_path(str): Name of the file or directory, that's needed to be moved.
dst_path(str): Name of the file or directory to which to move to. dst_path(str): Name of the file or directory to which to move to.
overwrite(bool): Whether to re-write `dst_path` if that exists. Default is False. overwrite(bool): Whether to re-write `dst_path` if that exists. Default is False.
test_exists(bool): Check the existence of `src_path` and `dst_path` .
When `test_exists` is set true, if `src_path` doesn't exist or `dst_path` exists, program will throw an Excetption.
Examples: Examples:
.. code-block:: python .. code-block:: python
from paddle.distributed.fleet.utils.fs import LocalFS from paddle.distributed.fleet.utils import LocalFS
client = LocalFS() client = LocalFS()
client.touch("test_mv_src") client.touch("test_mv_src")
...@@ -369,7 +367,7 @@ class LocalFS(FS): ...@@ -369,7 +367,7 @@ class LocalFS(FS):
Examples: Examples:
.. code-block:: python .. code-block:: python
from paddle.distributed.fleet.utils.fs import LocalFS from paddle.distributed.fleet.utils import LocalFS
client = LocalFS() client = LocalFS()
subdirs = client.list_dirs("./") subdirs = client.list_dirs("./")
...@@ -432,7 +430,7 @@ class HDFSClient(FS): ...@@ -432,7 +430,7 @@ class HDFSClient(FS):
.. code-block:: text .. code-block:: text
from paddle.distributed.fleet.utils.fs import HDFSClient from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/" hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = { configs = {
...@@ -493,7 +491,7 @@ class HDFSClient(FS): ...@@ -493,7 +491,7 @@ class HDFSClient(FS):
.. code-block:: text .. code-block:: text
from paddle.distributed.fleet.utils.fs import HDFSClient from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/" hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = { configs = {
...@@ -526,7 +524,7 @@ class HDFSClient(FS): ...@@ -526,7 +524,7 @@ class HDFSClient(FS):
.. code-block:: text .. code-block:: text
from paddle.distributed.fleet.utils.fs import HDFSClient from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/" hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = { configs = {
...@@ -587,7 +585,7 @@ class HDFSClient(FS): ...@@ -587,7 +585,7 @@ class HDFSClient(FS):
.. code-block:: text .. code-block:: text
from paddle.distributed.fleet.utils.fs import HDFSClient from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/" hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = { configs = {
...@@ -629,7 +627,7 @@ class HDFSClient(FS): ...@@ -629,7 +627,7 @@ class HDFSClient(FS):
.. code-block:: text .. code-block:: text
from paddle.distributed.fleet.utils.fs import HDFSClient from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/" hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = { configs = {
...@@ -661,7 +659,7 @@ class HDFSClient(FS): ...@@ -661,7 +659,7 @@ class HDFSClient(FS):
.. code-block:: text .. code-block:: text
from paddle.distributed.fleet.utils.fs import HDFSClient from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/" hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = { configs = {
...@@ -695,7 +693,7 @@ class HDFSClient(FS): ...@@ -695,7 +693,7 @@ class HDFSClient(FS):
.. code-block:: text .. code-block:: text
from paddle.distributed.fleet.utils.fs import HDFSClient from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/" hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = { configs = {
...@@ -740,7 +738,7 @@ class HDFSClient(FS): ...@@ -740,7 +738,7 @@ class HDFSClient(FS):
.. code-block:: text .. code-block:: text
from paddle.distributed.fleet.utils.fs import HDFSClient from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/" hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = { configs = {
...@@ -784,7 +782,7 @@ class HDFSClient(FS): ...@@ -784,7 +782,7 @@ class HDFSClient(FS):
.. code-block:: text .. code-block:: text
from paddle.distributed.fleet.utils.fs import HDFSClient from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/" hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = { configs = {
...@@ -830,7 +828,7 @@ class HDFSClient(FS): ...@@ -830,7 +828,7 @@ class HDFSClient(FS):
.. code-block:: text .. code-block:: text
from paddle.distributed.fleet.utils.fs import HDFSClient from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/" hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = { configs = {
...@@ -893,7 +891,7 @@ class HDFSClient(FS): ...@@ -893,7 +891,7 @@ class HDFSClient(FS):
.. code-block:: text .. code-block:: text
from paddle.distributed.fleet.utils.fs import HDFSClient from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/" hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = { configs = {
...@@ -919,12 +917,14 @@ class HDFSClient(FS): ...@@ -919,12 +917,14 @@ class HDFSClient(FS):
Args: Args:
fs_path(str): The HDFS file path. fs_path(str): The HDFS file path.
exist_ok(bool): When `fs_path` exists, if `exist_ok` is set false,
program will throw an Exception. Default is true.
Examples: Examples:
.. code-block:: text .. code-block:: text
from paddle.distributed.fleet.utils.fs import HDFSClient from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/" hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = { configs = {
......
...@@ -28,7 +28,6 @@ import numpy as np ...@@ -28,7 +28,6 @@ import numpy as np
import ctr_dataset_reader import ctr_dataset_reader
from test_dist_fleet_base import runtime_main, FleetDistRunnerBase from test_dist_fleet_base import runtime_main, FleetDistRunnerBase
from paddle.distributed.fleet.base.util_factory import fleet_util
paddle.enable_static() paddle.enable_static()
...@@ -180,13 +179,13 @@ class TestDistCTR2x2(FleetDistRunnerBase): ...@@ -180,13 +179,13 @@ class TestDistCTR2x2(FleetDistRunnerBase):
fetch_list=[self.avg_cost.name]) fetch_list=[self.avg_cost.name])
loss_val = np.mean(loss_val) loss_val = np.mean(loss_val)
# TODO(randomly fail) # TODO(randomly fail)
# reduce_output = fleet_util.all_reduce( # reduce_output = fleet.util.all_reduce(
# np.array(loss_val), mode="sum") # np.array(loss_val), mode="sum")
# loss_all_trainer = fleet_util.all_gather(float(loss_val)) # loss_all_trainer = fleet.util.all_gather(float(loss_val))
# loss_val = float(reduce_output) / len(loss_all_trainer) # loss_val = float(reduce_output) / len(loss_all_trainer)
message = "TRAIN ---> pass: {} loss: {}\n".format(epoch_id, message = "TRAIN ---> pass: {} loss: {}\n".format(epoch_id,
loss_val) loss_val)
fleet_util.print_on_rank(message, 0) fleet.util.print_on_rank(message, 0)
pass_time = time.time() - pass_start pass_time = time.time() - pass_start
except fluid.core.EOFException: except fluid.core.EOFException:
......
...@@ -29,7 +29,6 @@ import numpy as np ...@@ -29,7 +29,6 @@ import numpy as np
import ctr_dataset_reader import ctr_dataset_reader
from test_dist_fleet_base import runtime_main, FleetDistRunnerBase from test_dist_fleet_base import runtime_main, FleetDistRunnerBase
from dist_fleet_ctr import TestDistCTR2x2, fake_ctr_reader from dist_fleet_ctr import TestDistCTR2x2, fake_ctr_reader
from paddle.distributed.fleet.base.util_factory import fleet_util
# Fix seed for test # Fix seed for test
fluid.default_startup_program().random_seed = 1 fluid.default_startup_program().random_seed = 1
...@@ -76,13 +75,13 @@ class TestDistGpuPsCTR2x2(TestDistCTR2x2): ...@@ -76,13 +75,13 @@ class TestDistGpuPsCTR2x2(TestDistCTR2x2):
loss_val = exe.run(program=fleet.main_program, loss_val = exe.run(program=fleet.main_program,
fetch_list=[self.avg_cost.name]) fetch_list=[self.avg_cost.name])
loss_val = np.mean(loss_val) loss_val = np.mean(loss_val)
reduce_output = fleet_util.all_reduce( reduce_output = fleet.util.all_reduce(
np.array(loss_val), mode="sum") np.array(loss_val), mode="sum")
loss_all_trainer = fleet_util.all_gather(float(loss_val)) loss_all_trainer = fleet.util.all_gather(float(loss_val))
loss_val = float(reduce_output) / len(loss_all_trainer) loss_val = float(reduce_output) / len(loss_all_trainer)
message = "TRAIN ---> pass: {} loss: {}\n".format(epoch_id, message = "TRAIN ---> pass: {} loss: {}\n".format(epoch_id,
loss_val) loss_val)
fleet_util.print_on_rank(message, 0) fleet.util.print_on_rank(message, 0)
pass_time = time.time() - pass_start pass_time = time.time() - pass_start
except fluid.core.EOFException: except fluid.core.EOFException:
......
...@@ -29,7 +29,6 @@ import numpy as np ...@@ -29,7 +29,6 @@ import numpy as np
import ctr_dataset_reader import ctr_dataset_reader
from test_dist_fleet_heter_base import runtime_main, FleetDistHeterRunnerBase from test_dist_fleet_heter_base import runtime_main, FleetDistHeterRunnerBase
from dist_fleet_ctr import TestDistCTR2x2, fake_ctr_reader from dist_fleet_ctr import TestDistCTR2x2, fake_ctr_reader
from paddle.distributed.fleet.base.util_factory import fleet_util
paddle.enable_static() paddle.enable_static()
...@@ -182,7 +181,7 @@ class TestHeterPsCTR2x2(FleetDistHeterRunnerBase): ...@@ -182,7 +181,7 @@ class TestHeterPsCTR2x2(FleetDistHeterRunnerBase):
thread_num = int(os.getenv("CPU_NUM", 2)) thread_num = int(os.getenv("CPU_NUM", 2))
batch_size = 128 batch_size = 128
filelist = fleet_util.get_file_shard(train_file_list) filelist = fleet.util.get_file_shard(train_file_list)
print("filelist: {}".format(filelist)) print("filelist: {}".format(filelist))
# config dataset # config dataset
......
...@@ -32,7 +32,6 @@ import os ...@@ -32,7 +32,6 @@ import os
import signal import signal
from functools import reduce from functools import reduce
from test_dist_fleet_base import runtime_main, FleetDistRunnerBase from test_dist_fleet_base import runtime_main, FleetDistRunnerBase
from paddle.distributed.fleet.base.util_factory import fleet_util
paddle.enable_static() paddle.enable_static()
...@@ -238,7 +237,7 @@ class TestDistSimnetBow2x2(FleetDistRunnerBase): ...@@ -238,7 +237,7 @@ class TestDistSimnetBow2x2(FleetDistRunnerBase):
loss_val = np.mean(loss_val) loss_val = np.mean(loss_val)
message = "TRAIN ---> pass: {} loss: {}\n".format(epoch_id, message = "TRAIN ---> pass: {} loss: {}\n".format(epoch_id,
loss_val) loss_val)
fleet_util.print_on_rank(message, 0) fleet.util.print_on_rank(message, 0)
pass_time = time.time() - pass_start pass_time = time.time() - pass_start
except fluid.core.EOFException: except fluid.core.EOFException:
......
...@@ -34,8 +34,7 @@ import unittest ...@@ -34,8 +34,7 @@ import unittest
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.distributed.fleet.base.role_maker as role_maker import paddle.distributed.fleet.base.role_maker as role_maker
from paddle.distributed.fleet.base.util_factory import fleet_util import paddle.distributed.fleet as fleet
from paddle.distributed.fleet import fleet
from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy import StrategyFactory from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy import StrategyFactory
__all__ = ['FleetDistRunnerBase', 'TestFleetBase', 'runtime_main'] __all__ = ['FleetDistRunnerBase', 'TestFleetBase', 'runtime_main']
...@@ -97,7 +96,7 @@ class FleetDistRunnerBase(object): ...@@ -97,7 +96,7 @@ class FleetDistRunnerBase(object):
self.dump_fields_path = os.getenv("dump_fields_path", "") self.dump_fields_path = os.getenv("dump_fields_path", "")
debug = int(os.getenv("Debug", "0")) debug = int(os.getenv("Debug", "0"))
# TODO(update strategy to support dump params) # TODO(update strategy to support dump params)
if False: #debug: if False: # debug:
self.strategy.set_debug_opt({ self.strategy.set_debug_opt({
"dump_param": self.dump_param, "dump_param": self.dump_param,
"dump_fields": self.dump_fields, "dump_fields": self.dump_fields,
...@@ -372,8 +371,6 @@ def runtime_main(test_class): ...@@ -372,8 +371,6 @@ def runtime_main(test_class):
strategy = model.build_strategy(args) strategy = model.build_strategy(args)
avg_cost = model.net(args) avg_cost = model.net(args)
model.build_optimizer(avg_cost, strategy) model.build_optimizer(avg_cost, strategy)
fleet_util._set_strategy(strategy)
fleet_util._set_role_maker(role)
if args.role == "pserver": if args.role == "pserver":
model.run_pserver(args) model.run_pserver(args)
else: else:
......
...@@ -34,8 +34,7 @@ import unittest ...@@ -34,8 +34,7 @@ import unittest
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.distributed.fleet.base.role_maker as role_maker import paddle.distributed.fleet.base.role_maker as role_maker
from paddle.distributed.fleet.base.util_factory import fleet_util import paddle.distributed.fleet as fleet
from paddle.distributed.fleet import fleet
__all__ = ['FleetDistHeterRunnerBase', 'TestFleetHeterBase', 'runtime_main'] __all__ = ['FleetDistHeterRunnerBase', 'TestFleetHeterBase', 'runtime_main']
...@@ -376,8 +375,6 @@ def runtime_main(test_class): ...@@ -376,8 +375,6 @@ def runtime_main(test_class):
strategy = model.build_strategy(args) strategy = model.build_strategy(args)
avg_cost = model.net(args) avg_cost = model.net(args)
model.build_optimizer(avg_cost, strategy) model.build_optimizer(avg_cost, strategy)
fleet_util._set_strategy(strategy)
fleet_util._set_role_maker(role)
if args.role == "pserver" or args.role == "heter_trainer": if args.role == "pserver" or args.role == "heter_trainer":
model.run_pserver(args) model.run_pserver(args)
......
...@@ -19,7 +19,6 @@ import os ...@@ -19,7 +19,6 @@ import os
import math import math
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.distributed.fleet.base.role_maker as role_maker import paddle.distributed.fleet.base.role_maker as role_maker
from paddle.distributed.fleet.base.util_factory import fleet_util
from paddle.distributed.fleet import fleet from paddle.distributed.fleet import fleet
import paddle import paddle
......
...@@ -107,7 +107,7 @@ class TestFleetBase(unittest.TestCase): ...@@ -107,7 +107,7 @@ class TestFleetBase(unittest.TestCase):
def test_util(self): def test_util(self):
role = role_maker.PaddleCloudRoleMaker(is_collective=True) role = role_maker.PaddleCloudRoleMaker(is_collective=True)
fleet.init(role) fleet.init(role)
self.assertEqual(fleet.util(), None) self.assertNotEqual(fleet.util, None)
def test_barrier_worker(self): def test_barrier_worker(self):
role = role_maker.PaddleCloudRoleMaker(is_collective=True) role = role_maker.PaddleCloudRoleMaker(is_collective=True)
......
...@@ -436,12 +436,12 @@ class TestGlooWithCloudRoleMaker(unittest.TestCase): ...@@ -436,12 +436,12 @@ class TestGlooWithCloudRoleMaker(unittest.TestCase):
optimizer.minimize(avg_cost) optimizer.minimize(avg_cost)
comm_world = "server" comm_world = "server"
fleet.util().barrier(comm_world) fleet.util.barrier(comm_world)
gather = fleet.util().all_gather(1, comm_world) gather = fleet.util.all_gather(1, comm_world)
self.assertEqual(gather[0], 1) self.assertEqual(gather[0], 1)
all_reduce = fleet.util().all_reduce(1, "sum", comm_world) all_reduce = fleet.util.all_reduce(1, "sum", comm_world)
self.assertEqual(1, all_reduce) self.assertEqual(1, all_reduce)
self.clean(tmp) self.clean(tmp)
...@@ -752,12 +752,12 @@ class TestGlooWithCloudRoleMaker(unittest.TestCase): ...@@ -752,12 +752,12 @@ class TestGlooWithCloudRoleMaker(unittest.TestCase):
optimizer.minimize(avg_cost) optimizer.minimize(avg_cost)
comm_world = "server" comm_world = "server"
fleet.util().barrier(comm_world) fleet.util.barrier(comm_world)
gather = fleet.util().all_gather(1, comm_world) gather = fleet.util.all_gather(1, comm_world)
self.assertEqual(gather[0], 1) self.assertEqual(gather[0], 1)
all_reduce = fleet.util().all_reduce(1, "sum", comm_world) all_reduce = fleet.util.all_reduce(1, "sum", comm_world)
self.assertEqual(1, all_reduce) self.assertEqual(1, all_reduce)
self.clean(tmp) self.clean(tmp)
......
...@@ -22,7 +22,6 @@ import tempfile ...@@ -22,7 +22,6 @@ import tempfile
import os import os
import sys import sys
from paddle.dataset.common import download, DATA_HOME from paddle.dataset.common import download, DATA_HOME
from paddle.distributed.fleet.base.util_factory import fleet_util
import paddle.distributed.fleet.base.role_maker as role_maker import paddle.distributed.fleet.base.role_maker as role_maker
...@@ -59,8 +58,7 @@ class TestFleetUtil(unittest.TestCase): ...@@ -59,8 +58,7 @@ class TestFleetUtil(unittest.TestCase):
import paddle.distributed.fleet.base.role_maker as role_maker import paddle.distributed.fleet.base.role_maker as role_maker
role = role_maker.PaddleCloudRoleMaker(is_collective=True) role = role_maker.PaddleCloudRoleMaker(is_collective=True)
fleet.init(role) fleet.init(role)
default_util = fleet.util() self.assertNotEqual(fleet.util, None)
self.assertEqual(default_util, None)
def test_set_user_defined_util(self): def test_set_user_defined_util(self):
import paddle.distributed.fleet as fleet import paddle.distributed.fleet as fleet
...@@ -76,17 +74,19 @@ class TestFleetUtil(unittest.TestCase): ...@@ -76,17 +74,19 @@ class TestFleetUtil(unittest.TestCase):
role = role_maker.PaddleCloudRoleMaker(is_collective=True) role = role_maker.PaddleCloudRoleMaker(is_collective=True)
fleet.init(role) fleet.init(role)
my_util = UserDefinedUtil() my_util = UserDefinedUtil()
fleet.set_util(my_util) fleet.util = my_util
user_id = fleet.util().get_user_id() user_id = fleet.util.get_user_id()
self.assertEqual(user_id, 10) self.assertEqual(user_id, 10)
def test_fs(self): def test_fs(self):
from paddle.distributed.fleet.utils.fs import LocalFS import paddle.distributed.fleet as fleet
from paddle.distributed.fleet.utils import LocalFS
fs = LocalFS() fs = LocalFS()
dirs, files = fs.ls_dir("test_tmp") dirs, files = fs.ls_dir("test_tmp")
dirs, files = fs.ls_dir("./") dirs, files = fs.ls_dir("./")
self.assertFalse(fs.need_upload_download()) self.assertFalse(fs.need_upload_download())
fleet_util._set_file_system(fs) fleet.util._set_file_system(fs)
def download_files(self): def download_files(self):
path = download(self.proto_data_url, self.module_name, path = download(self.proto_data_url, self.module_name,
...@@ -98,7 +98,8 @@ class TestFleetUtil(unittest.TestCase): ...@@ -98,7 +98,8 @@ class TestFleetUtil(unittest.TestCase):
return unzip_folder return unzip_folder
def test_get_file_shard(self): def test_get_file_shard(self):
self.assertRaises(Exception, fleet_util.get_file_shard, "files") import paddle.distributed.fleet as fleet
self.assertRaises(Exception, fleet.util.get_file_shard, "files")
try: try:
import netifaces import netifaces
except: except:
...@@ -112,18 +113,20 @@ class TestFleetUtil(unittest.TestCase): ...@@ -112,18 +113,20 @@ class TestFleetUtil(unittest.TestCase):
role=role_maker.Role.WORKER, role=role_maker.Role.WORKER,
worker_endpoints=["127.0.0.1:6003", "127.0.0.1:6004"], worker_endpoints=["127.0.0.1:6003", "127.0.0.1:6004"],
server_endpoints=["127.0.0.1:6001", "127.0.0.1:6002"]) server_endpoints=["127.0.0.1:6001", "127.0.0.1:6002"])
fleet_util._set_role_maker(role) fleet.init(role)
files = fleet_util.get_file_shard(["1", "2", "3"])
files = fleet.util.get_file_shard(["1", "2", "3"])
self.assertTrue(len(files) == 2 and "1" in files and "2" in files) self.assertTrue(len(files) == 2 and "1" in files and "2" in files)
def test_program_type_trans(self): def test_program_type_trans(self):
import paddle.distributed.fleet as fleet
data_dir = self.download_files() data_dir = self.download_files()
program_dir = os.path.join(data_dir, self.pruned_dir) program_dir = os.path.join(data_dir, self.pruned_dir)
text_program = "pruned_main_program.pbtxt" text_program = "pruned_main_program.pbtxt"
binary_program = "pruned_main_program.bin" binary_program = "pruned_main_program.bin"
text_to_binary = fleet_util._program_type_trans(program_dir, text_to_binary = fleet.util._program_type_trans(program_dir,
text_program, True) text_program, True)
binary_to_text = fleet_util._program_type_trans(program_dir, binary_to_text = fleet.util._program_type_trans(program_dir,
binary_program, False) binary_program, False)
self.assertTrue( self.assertTrue(
os.path.exists(os.path.join(program_dir, text_to_binary))) os.path.exists(os.path.join(program_dir, text_to_binary)))
...@@ -131,6 +134,7 @@ class TestFleetUtil(unittest.TestCase): ...@@ -131,6 +134,7 @@ class TestFleetUtil(unittest.TestCase):
os.path.exists(os.path.join(program_dir, binary_to_text))) os.path.exists(os.path.join(program_dir, binary_to_text)))
def test_prams_check(self): def test_prams_check(self):
import paddle.distributed.fleet as fleet
data_dir = self.download_files() data_dir = self.download_files()
class config: class config:
...@@ -160,11 +164,11 @@ class TestFleetUtil(unittest.TestCase): ...@@ -160,11 +164,11 @@ class TestFleetUtil(unittest.TestCase):
# test saved var's shape # test saved var's shape
conf.dump_program_filename = "pruned_main_program.save_var_shape_not_match" conf.dump_program_filename = "pruned_main_program.save_var_shape_not_match"
self.assertRaises(Exception, fleet_util._params_check) self.assertRaises(Exception, fleet.util._params_check)
# test program.proto without feed_op and fetch_op # test program.proto without feed_op and fetch_op
conf.dump_program_filename = "pruned_main_program.no_feed_fetch" conf.dump_program_filename = "pruned_main_program.no_feed_fetch"
results = fleet_util._params_check(conf) results = fleet.util._params_check(conf)
self.assertTrue(len(results) == 1) self.assertTrue(len(results) == 1)
np.testing.assert_array_almost_equal( np.testing.assert_array_almost_equal(
results[0], np.array( results[0], np.array(
...@@ -172,11 +176,11 @@ class TestFleetUtil(unittest.TestCase): ...@@ -172,11 +176,11 @@ class TestFleetUtil(unittest.TestCase):
# test feed_var's shape # test feed_var's shape
conf.dump_program_filename = "pruned_main_program.feed_var_shape_not_match" conf.dump_program_filename = "pruned_main_program.feed_var_shape_not_match"
self.assertRaises(Exception, fleet_util._params_check) self.assertRaises(Exception, fleet.util._params_check)
# test correct case with feed_vars_filelist # test correct case with feed_vars_filelist
conf.dump_program_filename = "pruned_main_program.pbtxt" conf.dump_program_filename = "pruned_main_program.pbtxt"
results = fleet_util._params_check(conf) results = fleet.util._params_check(conf)
self.assertTrue(len(results) == 1) self.assertTrue(len(results) == 1)
np.testing.assert_array_almost_equal( np.testing.assert_array_almost_equal(
results[0], np.array( results[0], np.array(
...@@ -186,13 +190,14 @@ class TestFleetUtil(unittest.TestCase): ...@@ -186,13 +190,14 @@ class TestFleetUtil(unittest.TestCase):
conf.feed_config.feeded_vars_filelist = None conf.feed_config.feeded_vars_filelist = None
# test feed var with lod_level >= 2 # test feed var with lod_level >= 2
conf.dump_program_filename = "pruned_main_program.feed_lod2" conf.dump_program_filename = "pruned_main_program.feed_lod2"
self.assertRaises(Exception, fleet_util._params_check) self.assertRaises(Exception, fleet.util._params_check)
conf.dump_program_filename = "pruned_main_program.pbtxt" conf.dump_program_filename = "pruned_main_program.pbtxt"
results = fleet_util._params_check(conf) results = fleet.util._params_check(conf)
self.assertTrue(len(results) == 1) self.assertTrue(len(results) == 1)
def test_proto_check(self): def test_proto_check(self):
import paddle.distributed.fleet as fleet
data_dir = self.download_files() data_dir = self.download_files()
class config: class config:
...@@ -210,7 +215,7 @@ class TestFleetUtil(unittest.TestCase): ...@@ -210,7 +215,7 @@ class TestFleetUtil(unittest.TestCase):
"pruned_main_program.save_var_shape_not_match")) "pruned_main_program.save_var_shape_not_match"))
conf.is_text_pruned_program = True conf.is_text_pruned_program = True
conf.draw = False conf.draw = False
res = fleet_util._proto_check(conf) res = fleet.util._proto_check(conf)
self.assertFalse(res) self.assertFalse(res)
# test match # test match
...@@ -222,10 +227,11 @@ class TestFleetUtil(unittest.TestCase): ...@@ -222,10 +227,11 @@ class TestFleetUtil(unittest.TestCase):
else: else:
conf.draw = True conf.draw = True
conf.draw_out_name = "pruned_check" conf.draw_out_name = "pruned_check"
res = fleet_util._proto_check(conf) res = fleet.util._proto_check(conf)
self.assertTrue(res) self.assertTrue(res)
def test_visualize(self): def test_visualize(self):
import paddle.distributed.fleet as fleet
if sys.platform == 'win32' or sys.platform == 'sys.platform': if sys.platform == 'win32' or sys.platform == 'sys.platform':
pass pass
else: else:
...@@ -234,10 +240,10 @@ class TestFleetUtil(unittest.TestCase): ...@@ -234,10 +240,10 @@ class TestFleetUtil(unittest.TestCase):
data_dir, data_dir,
os.path.join(self.train_dir, "join_main_program.pbtxt")) os.path.join(self.train_dir, "join_main_program.pbtxt"))
is_text = True is_text = True
program = fleet_util._load_program(program_path, is_text) program = fleet.util._load_program(program_path, is_text)
output_dir = os.path.join(data_dir, self.train_dir) output_dir = os.path.join(data_dir, self.train_dir)
output_filename = "draw_prog" output_filename = "draw_prog"
fleet_util._visualize_graphviz(program, output_dir, output_filename) fleet.util._visualize_graphviz(program, output_dir, output_filename)
self.assertTrue( self.assertTrue(
os.path.exists( os.path.exists(
os.path.join(output_dir, output_filename + ".dot"))) os.path.join(output_dir, output_filename + ".dot")))
......
...@@ -49,7 +49,7 @@ class TestDygraphDataLoaderSingalHandler(unittest.TestCase): ...@@ -49,7 +49,7 @@ class TestDygraphDataLoaderSingalHandler(unittest.TestCase):
test_process.start() test_process.start()
set_child_signal_handler(id(self), test_process.pid) set_child_signal_handler(id(self), test_process.pid)
time.sleep(3) time.sleep(5)
except core.EnforceNotMet as ex: except core.EnforceNotMet as ex:
self.assertIn("FatalError", cpt.get_exception_message(ex)) self.assertIn("FatalError", cpt.get_exception_message(ex))
exception = ex exception = ex
......
...@@ -124,8 +124,10 @@ class TestMNIST(TestParallelExecutorBase): ...@@ -124,8 +124,10 @@ class TestMNIST(TestParallelExecutorBase):
def test_simple_fc_with_new_strategy(self): def test_simple_fc_with_new_strategy(self):
# use_cuda, use_reduce # use_cuda, use_reduce
self._compare_reduce_and_allreduce(simple_fc_net, True) # NOTE: the computation result of nccl_reduce is non-deterministic,
self._compare_reduce_and_allreduce(simple_fc_net, False) # related issue: https://github.com/NVIDIA/nccl/issues/157
self._compare_reduce_and_allreduce(simple_fc_net, True, 1e-5, 1e-2)
self._compare_reduce_and_allreduce(simple_fc_net, False, 1e-5, 1e-2)
def check_simple_fc_parallel_accuracy(self, use_cuda): def check_simple_fc_parallel_accuracy(self, use_cuda):
if use_cuda and not core.is_compiled_with_cuda(): if use_cuda and not core.is_compiled_with_cuda():
...@@ -179,7 +181,7 @@ class TestMNIST(TestParallelExecutorBase): ...@@ -179,7 +181,7 @@ class TestMNIST(TestParallelExecutorBase):
# NOTE: the computation result of nccl_reduce is non-deterministic, # NOTE: the computation result of nccl_reduce is non-deterministic,
# related issue: https://github.com/NVIDIA/nccl/issues/157 # related issue: https://github.com/NVIDIA/nccl/issues/157
self._compare_reduce_and_allreduce(fc_with_batchnorm, True, 1e-5, 1e-2) self._compare_reduce_and_allreduce(fc_with_batchnorm, True, 1e-5, 1e-2)
self._compare_reduce_and_allreduce(fc_with_batchnorm, False) self._compare_reduce_and_allreduce(fc_with_batchnorm, False, 1e-5, 1e-2)
if __name__ == '__main__': if __name__ == '__main__':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册