提交 2725f51d 编写于 作者: S smallv0221

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into yxp0924

......@@ -446,6 +446,9 @@ function(nv_library TARGET_NAME)
message(FATAL "Please specify source file or library in nv_library.")
endif()
endif(nv_library_SRCS)
if (WIN32)
set_target_properties(${TARGET_NAME} PROPERTIES VS_USER_PROPS ${WIN_PROPS})
endif(WIN32)
endif()
endfunction(nv_library)
......@@ -461,6 +464,9 @@ function(nv_binary TARGET_NAME)
add_dependencies(${TARGET_NAME} ${nv_binary_DEPS})
common_link(${TARGET_NAME})
endif()
if (WIN32)
set_target_properties(${TARGET_NAME} PROPERTIES VS_USER_PROPS ${WIN_PROPS})
endif(WIN32)
endif()
endfunction(nv_binary)
......@@ -482,6 +488,9 @@ function(nv_test TARGET_NAME)
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cpu_deterministic=true)
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_init_allocated_mem=true)
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cudnn_deterministic=true)
if (WIN32)
set_target_properties(${TARGET_NAME} PROPERTIES VS_USER_PROPS ${WIN_PROPS})
endif(WIN32)
endif()
endfunction(nv_test)
......
......@@ -19,9 +19,8 @@ set(PADDLE_INSTALL_DIR "${CMAKE_BINARY_DIR}/paddle_install_dir" CACHE STRING
set(PADDLE_INFERENCE_INSTALL_DIR "${CMAKE_BINARY_DIR}/paddle_inference_install_dir" CACHE STRING
"A path setting paddle inference shared and static libraries")
# TODO(zhaolong)
# At present, the size of static lib in Windows exceeds the system limit,
# so the generation of static lib is temporarily turned off.
# At present, the size of static lib in Windows is very large,
# so we need to crop the library size.
if(WIN32)
#todo: remove the option
option(WITH_STATIC_LIB "Compile demo with static/shared library, default use dynamic." OFF)
......@@ -196,7 +195,11 @@ set(PADDLE_INFERENCE_C_INSTALL_DIR "${CMAKE_BINARY_DIR}/paddle_inference_c_insta
copy_part_of_thrid_party(inference_lib_dist ${PADDLE_INFERENCE_C_INSTALL_DIR})
set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid")
set(paddle_fluid_c_lib ${PADDLE_BINARY_DIR}/paddle/fluid/inference/capi/libpaddle_fluid_c.*)
if(WIN32)
set(paddle_fluid_c_lib ${PADDLE_BINARY_DIR}/paddle/fluid/inference/capi/${CMAKE_BUILD_TYPE}/paddle_fluid_c.*)
else(WIN32)
set(paddle_fluid_c_lib ${PADDLE_BINARY_DIR}/paddle/fluid/inference/capi/libpaddle_fluid_c.*)
endif(WIN32)
copy(inference_lib_dist
SRCS ${src_dir}/inference/capi/paddle_c_api.h ${paddle_fluid_c_lib}
......
......@@ -26,4 +26,7 @@ if(WITH_GPU)
set(CMAKE_CUDA_FLAGS_MINSIZEREL "-O1 -DNDEBUG")
endif()
if(WIN32)
set(WIN_PROPS ${CMAKE_SOURCE_DIR}/cmake/paddle_win.props)
endif()
<?xml version="1.0" encoding="utf-8"?>
<Project xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemDefinitionGroup>
<CudaCompile>
<!-- Project schema: Host properties -->
<UseHostDefines>true</UseHostDefines>
<Emulation>false</Emulation>
<HostDebugInfo Condition="'$(Configuration)' == 'Debug'">true</HostDebugInfo>
<HostDebugInfo Condition="'$(Configuration)' != 'Debug'">false</HostDebugInfo>
<FastMath>false</FastMath>
<Optimization>InheritFromHost</Optimization>
<Runtime>InheritFromHost</Runtime>
<RuntimeChecks>InheritFromHost</RuntimeChecks>
<TypeInfo>InheritFromHost</TypeInfo>
<Warning>InheritFromHost</Warning>
<BaseCommandLineTemplate>-ccbin "%(VCBinDir)" -x cu [GenerateRelocatableDeviceCode] [Include] [RequiredIncludes] [InterleaveSourceInPTX] [GPUDebugInfo] [GenerateLineInfo] [Keep] [KeepDir] [MaxRegCount] [PtxAsOptionV] [TargetMachinePlatform] [NvccCompilation] [CudaRuntime] [AdditionalOptions]</BaseCommandLineTemplate>
<BuildCommandLineTemplate>--use-local-env</BuildCommandLineTemplate>
<BuildDynamicCommandLineTemplate>[CodeGeneration]</BuildDynamicCommandLineTemplate>
<CleanCommandLineTemplate>-clean</CleanCommandLineTemplate>
<!-- <HostCommandLineTemplate>-Xcompiler &quot;/EHsc [Warning] /nologo [Optimization] [ProgramDataBaseFileName] $(CudaForceSynchronousPdbWrites) /Zi [RuntimeChecks] [Runtime] [TypeInfo]&quot;</HostCommandLineTemplate> -->
<HostCommandLineTemplate>-Xcompiler &quot;/EHsc [Warning] /nologo [Optimization] [ProgramDataBaseFileName] $(CudaForceSynchronousPdbWrites) [RuntimeChecks] [Runtime] [TypeInfo]&quot;</HostCommandLineTemplate>
<DriverApiCommandLineTemplate>%(BaseCommandLineTemplate) [CompileOut] "%(FullPath)"</DriverApiCommandLineTemplate>
<RuntimeApiCommandLineTemplate>%(BaseCommandLineTemplate) [HostDebugInfo] [Emulation] [FastMath] [Defines] %(HostCommandLineTemplate) [CompileOut] "%(FullPath)"</RuntimeApiCommandLineTemplate>
<CommandLineTemplate>
# (Approximate command-line. Settings inherited from host are not visible below.)
# (Please see the output window after a build for the full command-line)
# Driver API (NVCC Compilation Type is .cubin, .gpu, or .ptx)
set CUDAFE_FLAGS=--sdk_dir "$(WindowsSdkDir)"
"$(CudaToolkitNvccPath)" %(BuildCommandLineTemplate) %(DriverApiCommandLineTemplate)
# Runtime API (NVCC Compilation Type is hybrid object or .c file)
set CUDAFE_FLAGS=--sdk_dir "$(WindowsSdkDir)"
"$(CudaToolkitNvccPath)" %(BuildCommandLineTemplate) %(RuntimeApiCommandLineTemplate)
</CommandLineTemplate>
<ExecutionDescription>Compiling CUDA source file %(Identity)...</ExecutionDescription>
<ExclusionDescription>Skipping CUDA source file %(Identity) (excluded from build).</ExclusionDescription>
<!-- Miscellaneous -->
<PropsCacheOutputFile>%(Filename)%(Extension).cache</PropsCacheOutputFile>
<PropsCacheOutputPath>$(IntDir)%(PropsCacheOutputFile)</PropsCacheOutputPath>
<CudaCompileCoreProject>$(MSBuildProjectFullPath)</CudaCompileCoreProject>
</CudaCompile>
<CudaLink>
<PerformDeviceLink>true</PerformDeviceLink>
<LinkOut>$(IntDir)$(TargetName).device-link.obj</LinkOut>
<AdditionalLibraryDirectories></AdditionalLibraryDirectories>
<UseHostLibraryDirectories>true</UseHostLibraryDirectories>
<AdditionalDependencies></AdditionalDependencies>
<UseHostLibraryDependencies>true</UseHostLibraryDependencies>
<GPUDebugInfo>InheritFromProject</GPUDebugInfo>
<Optimization>InheritFromProject</Optimization>
<!-- Implicitly inherited from the project via @(CudaCompile) -->
<CodeGeneration></CodeGeneration>
<RuntimeChecks></RuntimeChecks>
<Runtime></Runtime>
<TargetMachinePlatform></TargetMachinePlatform>
<TypeInfo></TypeInfo>
<Warning></Warning>
<Inputs></Inputs>
<!-- <HostCommandLineTemplate>-Xcompiler &quot;/EHsc [Warning] /nologo [Optimization] /Zi [RuntimeChecks] [Runtime] [TypeInfo]&quot;</HostCommandLineTemplate> -->
<HostCommandLineTemplate>-Xcompiler &quot;/EHsc [Warning] /nologo [Optimization] [RuntimeChecks] [Runtime] [TypeInfo]&quot;</HostCommandLineTemplate>
<LinkCommandLineTemplate>"$(CudaToolkitNvccPath)" -dlink [LinkOut] %(HostCommandLineTemplate) [AdditionalLibraryDirectories] [AdditionalDependencies] [AdditionalOptions] [CodeGeneration] [GPUDebugInfo] [TargetMachinePlatform] [Inputs]</LinkCommandLineTemplate>
<CommandLineTemplate>
# (Approximate command-line. Settings inherited from host are not visible below.)
# (Please see the output window after a build for the full command-line)
%(LinkCommandLineTemplate)
</CommandLineTemplate>
</CudaLink>
<Link>
<AdditionalLibraryDirectories>%(AdditionalLibraryDirectories);$(CudaToolkitLibDir)</AdditionalLibraryDirectories>
</Link>
<ClCompile>
<AdditionalIncludeDirectories>%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir)</AdditionalIncludeDirectories>
</ClCompile>
</ItemDefinitionGroup>
</Project>
......@@ -44,14 +44,9 @@ add_subdirectory(api)
set(STATIC_INFERENCE_API paddle_inference_api analysis_predictor
zero_copy_tensor reset_tensor_array
analysis_config paddle_pass_builder activation_functions ${mkldnn_quantizer_cfg})
# TODO(xingzhaolong, jiweibo): remove this and create_static_lib(paddle_fluid) on windows GPU
if(WIN32 AND WITH_GPU)
cc_library(paddle_fluid DEPS ${fluid_modules} ${STATIC_INFERENCE_API})
else()
create_static_lib(paddle_fluid ${fluid_modules} ${STATIC_INFERENCE_API})
endif()
create_static_lib(paddle_fluid ${fluid_modules} ${STATIC_INFERENCE_API})
if(NOT APPLE AND NOT WIN32)
if(NOT APPLE)
# TODO(liuyiqu: Temporarily disable the link flag because it is not support on Mac.
set(LINK_FLAGS "-Wl,--retain-symbols-file ${CMAKE_CURRENT_SOURCE_DIR}/paddle_fluid.sym")
set_target_properties(paddle_fluid PROPERTIES LINK_FLAGS "${LINK_FLAGS}")
......
......@@ -245,7 +245,18 @@ bool AnalysisPredictor::PrepareExecutor() {
void AnalysisPredictor::MkldnnPreSet(const std::vector<PaddleTensor> &inputs) {
#ifdef PADDLE_WITH_MKLDNN
VLOG(2) << "AnalysisPredictor::Run get_cur_mkldnn_session_id="
std::vector<std::vector<int>> inputs_shape;
for (size_t i = 0; i < inputs.size(); ++i) {
inputs_shape.emplace_back(inputs[i].shape);
}
MkldnnPreSet(inputs_shape);
#endif
}
void AnalysisPredictor::MkldnnPreSet(
const std::vector<std::vector<int>> &inputs_shape) {
#ifdef PADDLE_WITH_MKLDNN
VLOG(2) << "AnalysisPredictor::ZeroCopyRun get_cur_mkldnn_session_id="
<< platform::MKLDNNDeviceContext::tls().get_cur_mkldnn_session_id();
// In cache clearing mode.
if (config_.mkldnn_cache_capacity_ > 0) {
......@@ -257,9 +268,9 @@ void AnalysisPredictor::MkldnnPreSet(const std::vector<PaddleTensor> &inputs) {
config_.mkldnn_cache_capacity_);
// Set current_input_shape for caching dynamic shape.
std::stringstream ss;
for (size_t i = 0; i < inputs.size(); ++i) {
for (size_t j = 0; j < inputs[i].shape.size(); ++j) {
ss << inputs[i].shape[j] << "-";
for (size_t i = 0; i < inputs_shape.size(); ++i) {
for (size_t j = 0; j < inputs_shape[i].size(); ++j) {
ss << inputs_shape[i][j] << "-";
}
}
VLOG(2) << "Set input shape=" << ss.str();
......@@ -742,6 +753,18 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor(
bool AnalysisPredictor::ZeroCopyRun() {
paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads());
#ifdef PADDLE_WITH_MKLDNN
if (config_.use_mkldnn_) {
std::vector<std::vector<int>> shape_vector;
auto names = GetInputNames();
for (size_t i = 0; i < names.size(); ++i) {
auto in_tensor = GetInputTensor(names[i]);
shape_vector.emplace_back(in_tensor->shape());
}
MkldnnPreSet(shape_vector);
}
#endif
executor_->Run();
// Fix TensorArray reuse not cleaned bug.
tensor_array_batch_cleaner_.CollectTensorArrays(sub_scope_);
......@@ -750,6 +773,9 @@ bool AnalysisPredictor::ZeroCopyRun() {
// recover the cpu_math_library_num_threads to 1, in order to avoid thread
// conflict when integrating it into deployment service.
paddle::platform::SetNumThreads(1);
#ifdef PADDLE_WITH_MKLDNN
if (config_.use_mkldnn_) MkldnnPostReset();
#endif
#if defined(PADDLE_WITH_MKLML)
// Frees unused memory allocated by the Intel® MKL Memory Allocator to
// avoid memory leak. See:
......
......@@ -317,6 +317,17 @@ class AnalysisPredictor : public PaddlePredictor {
/// \param[in] inputs tensors
///
void MkldnnPreSet(const std::vector<PaddleTensor> &inputs);
///
/// \brief PreSet for Mkldnn multi-thread and dynamic shape input.
///
/// Used in AnalysisPredictor::Run(), do not support
/// AnalysisPredictor::ZeroCopyRun() now.
///
/// \param[in] inputs tensor shape
///
void MkldnnPreSet(const std::vector<std::vector<int>> &inputs_shape);
///
/// \brief PostReset for Mkldnn multi-thread and dynamic shape input.
///
......
......@@ -68,11 +68,6 @@ rm -rf *
for WITH_STATIC_LIB in ON OFF; do
if [ $(echo `uname` | grep "Win") != "" ]; then
# TODO(xingzhaolong, jiweibo): remove this if windows GPU library is ready.
if [ $TEST_GPU_CPU == ON] && [ $WITH_STATIC_LIB ==ON ]; then
return 0
fi
# -----simple_on_word2vec on windows-----
cmake .. -G "Visual Studio 14 2015" -A x64 -DPADDLE_LIB=${inference_install_dir} \
-DWITH_MKL=$TURN_ON_MKL \
......
......@@ -50,8 +50,9 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto& dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), true,
"It must use CUDAPlace.");
PADDLE_ENFORCE_EQ(
platform::is_gpu_place(ctx.GetPlace()), true,
paddle::platform::errors::PreconditionNotMet("It must use CUDAPlace."));
const Tensor* input = ctx.Input<Tensor>("Input");
auto* filter = ctx.Input<Tensor>("Filter");
auto* output = ctx.Output<Tensor>("Output");
......@@ -60,14 +61,16 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings");
std::vector<int> dilations = ctx.Attr<std::vector<int>>("dilations");
int groups = ctx.Attr<int>("groups");
bool exhaustive_search =
FLAGS_cudnn_exhaustive_search || ctx.Attr<bool>("exhaustive_search");
bool deterministic = FLAGS_cudnn_deterministic;
auto exhaustive_deterministic = exhaustive_search && deterministic;
PADDLE_ENFORCE_EQ(exhaustive_deterministic, false,
platform::errors::InvalidArgument(
"Cann't set exhaustive_search True and "
"FLAGS_cudnn_deterministic True at same time."));
if (exhaustive_search && FLAGS_cudnn_deterministic) {
PADDLE_THROW(
"Cann't set exhaustive_search True and "
"FLAGS_cudnn_deterministic True at same time.");
}
const std::string padding_algorithm =
ctx.Attr<std::string>("padding_algorithm");
const std::string data_format = ctx.Attr<std::string>("data_format");
......@@ -197,7 +200,8 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
&transformed_input);
} break;
default:
PADDLE_THROW("ConvOp only support tensors with 4 or 5 dimensions.");
PADDLE_THROW(platform::errors::InvalidArgument(
"ConvOp only support tensors with 4 or 5 dimensions."));
}
} else {
......@@ -317,8 +321,9 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto& dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), true,
"It must use CUDAPlace.");
PADDLE_ENFORCE_EQ(
platform::is_gpu_place(ctx.GetPlace()), true,
paddle::platform::errors::PreconditionNotMet("It must use CUDAPlace."));
auto input = ctx.Input<Tensor>("Input");
auto filter = ctx.Input<Tensor>("Filter");
auto output_grad = ctx.Input<Tensor>(framework::GradVarName("Output"));
......@@ -337,14 +342,16 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings");
std::string padding_algorithm = ctx.Attr<std::string>("padding_algorithm");
int groups = ctx.Attr<int>("groups");
bool exhaustive_search =
FLAGS_cudnn_exhaustive_search || ctx.Attr<bool>("exhaustive_search");
bool deterministic = FLAGS_cudnn_deterministic;
if (exhaustive_search && deterministic) {
PADDLE_THROW(
"Can't set exhaustive_search True and "
"FLAGS_cudnn_deterministic True at same time.");
}
auto exhaustive_deterministic = exhaustive_search && deterministic;
PADDLE_ENFORCE_EQ(exhaustive_deterministic, false,
platform::errors::InvalidArgument(
"Cann't set exhaustive_search True and "
"FLAGS_cudnn_deterministic True at same time."));
const std::string data_format = ctx.Attr<std::string>("data_format");
const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC");
......@@ -495,7 +502,8 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
&transformed_input);
} break;
default:
PADDLE_THROW("ConvOp only support tensors with 4 or 5 dimensions.");
PADDLE_THROW(platform::errors::InvalidArgument(
"ConvOp only support tensors with 4 or 5 dimensions."));
}
} else {
transformed_input.ShareDataWith(transformed_input_channel);
......@@ -701,8 +709,9 @@ class CUDNNConvDoubleGradOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto& dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), true,
"It must use CUDAPlace.");
PADDLE_ENFORCE_EQ(
platform::is_gpu_place(ctx.GetPlace()), true,
paddle::platform::errors::PreconditionNotMet("It must use CUDAPlace."));
auto X = ctx.Input<Tensor>("Input");
auto W = ctx.Input<Tensor>("Filter");
auto dO = ctx.Input<Tensor>("DOutput");
......@@ -736,14 +745,16 @@ class CUDNNConvDoubleGradOpKernel : public framework::OpKernel<T> {
const std::vector<int>& strides = ctx.Attr<std::vector<int>>("strides");
std::vector<int> dilations = ctx.Attr<std::vector<int>>("dilations");
int groups = ctx.Attr<int>("groups");
bool exhaustive_search =
FLAGS_cudnn_exhaustive_search || ctx.Attr<bool>("exhaustive_search");
bool deterministic = FLAGS_cudnn_deterministic;
if (exhaustive_search && deterministic) {
PADDLE_THROW(
"Can't set exhaustive_search True and "
"FLAGS_cudnn_deterministic True at same time.");
}
auto exhaustive_deterministic = exhaustive_search && deterministic;
PADDLE_ENFORCE_EQ(exhaustive_deterministic, false,
platform::errors::InvalidArgument(
"Cann't set exhaustive_search True and "
"FLAGS_cudnn_deterministic True at same time."));
std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings");
std::string padding_algorithm = ctx.Attr<std::string>("padding_algorithm");
......@@ -878,7 +889,8 @@ class CUDNNConvDoubleGradOpKernel : public framework::OpKernel<T> {
}
} break;
default:
PADDLE_THROW("ConvOp only support tensors with 4 or 5 dimensions.");
PADDLE_THROW(platform::errors::InvalidArgument(
"ConvOp only support tensors with 4 or 5 dimensions."));
}
} else {
......
......@@ -685,8 +685,9 @@ class GemmConvDoubleGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto& dev_ctx = ctx.template device_context<platform::CPUDeviceContext>();
PADDLE_ENFORCE_EQ(platform::is_cpu_place(ctx.GetPlace()), true,
"It must use CPUPlace.");
PADDLE_ENFORCE_EQ(
platform::is_cpu_place(ctx.GetPlace()), true,
paddle::platform::errors::PreconditionNotMet("It must use CPUPlace."));
const Tensor* X = ctx.Input<Tensor>("Input");
const Tensor* dY = ctx.Input<Tensor>("DOutput");
const Tensor* ddX = ctx.Input<Tensor>("DDInput");
......@@ -982,11 +983,20 @@ class DepthwiseConvKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_EQ(
output->dims()[output->dims().size() - 1] %
input->dims()[input->dims().size() - 1],
0, "The output channels must be a multiple of the input channels");
0, platform::errors::InvalidArgument(
"ShapeError: The output channels must be a multiple of the "
"input channels. But receivced output channel number is %d "
"and input channel number is %d",
output->dims()[output->dims().size() - 1],
input->dims()[input->dims().size() - 1]));
} else {
PADDLE_ENFORCE_EQ(
output->dims()[1] % input->dims()[1], 0,
"The output channels must be a multiple of the input channels");
platform::errors::InvalidArgument(
"ShapeError: The output channels must be a multiple of the "
"input channels. But receivced output channel number is %d "
"and input channel number is %d",
output->dims()[1], input->dims()[1]));
}
// transform tensor
Tensor transformed_input(input->type());
......
......@@ -51,8 +51,9 @@ template <typename T>
class CUDNNConvTransposeOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), true,
"It must use CUDAPlace.");
PADDLE_ENFORCE_EQ(
platform::is_gpu_place(ctx.GetPlace()), true,
paddle::platform::errors::PreconditionNotMet("It must use CUDAPlace."));
auto* input = ctx.Input<Tensor>("Input");
auto* filter = ctx.Input<Tensor>("Filter");
auto* output = ctx.Output<Tensor>("Output");
......@@ -145,9 +146,8 @@ class CUDNNConvTransposeOpKernel : public framework::OpKernel<T> {
ctx, input_pad, input_transpose, pad_value, &transformed_input);
} break;
default:
PADDLE_ENFORCE_EQ(
rank == 4 || rank == 5, true,
"Op(ConvTranspose) only supports 4-D or 5-D input Tensor.");
PADDLE_THROW(platform::errors::InvalidArgument(
"Op(ConvTranspose) only supports 4-D or 5-D input Tensor."));
}
} else {
transformed_input = input_transpose;
......@@ -290,8 +290,9 @@ template <typename T>
class CUDNNConvTransposeGradOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
"It must use CUDAPlace.");
PADDLE_ENFORCE_EQ(
platform::is_gpu_place(ctx.GetPlace()), true,
paddle::platform::errors::PreconditionNotMet("It must use CUDAPlace."));
auto input = ctx.Input<Tensor>("Input");
auto filter = ctx.Input<Tensor>("Filter");
auto output_grad = ctx.Input<Tensor>(framework::GradVarName("Output"));
......@@ -393,9 +394,8 @@ class CUDNNConvTransposeGradOpKernel : public framework::OpKernel<T> {
&transformed_output_grad);
} break;
default:
PADDLE_ENFORCE_EQ(
rank == 4 || rank == 5, true,
"Op(ConvTranspose) only supports 4-D or 5-D input Tensor.");
PADDLE_THROW(platform::errors::InvalidArgument(
"Op(ConvTranspose) only supports 4-D or 5-D input Tensor."));
}
} else {
transformed_output_grad = output_grad_transpose;
......
......@@ -580,7 +580,12 @@ class DepthwiseConvTransposeKernel : public framework::OpKernel<T> {
output->mutable_data<T>(context.GetPlace());
int groups = context.Attr<int>("groups");
PADDLE_ENFORCE_EQ(groups, filter.dims()[0]);
PADDLE_ENFORCE_EQ(
groups, filter.dims()[0],
platform::errors::InvalidArgument(
"groups should be error to the 1st dimension of filter. But "
"received groups is %d and filter dimension[0] is %d",
groups, filter.dims()[0]));
std::vector<int> strides = context.Attr<std::vector<int>>("strides");
std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
......@@ -588,7 +593,10 @@ class DepthwiseConvTransposeKernel : public framework::OpKernel<T> {
std::string padding_algorithm =
context.Attr<std::string>("padding_algorithm");
for (auto v : dilations) {
PADDLE_ENFORCE_EQ(v, 1);
PADDLE_ENFORCE_EQ(v, 1, platform::errors::InvalidArgument(
"dilations should be 1 in depthwise conv. "
"But received dilations is %d",
v));
}
auto in_dims = input->dims();
......
......@@ -46,14 +46,14 @@ class PixelShuffleOp : public framework::OperatorWithKernel {
platform::errors::InvalidArgument(
"The square of upscale_factor[%u] should divide the "
"number of channel[%u]",
input_dims[1], upscale_factor * upscale_factor));
upscale_factor * upscale_factor, input_dims[1]));
} else {
PADDLE_ENFORCE_EQ(
input_dims[3] % (upscale_factor * upscale_factor), 0,
platform::errors::InvalidArgument(
"The square of upscale_factor[%u] should divide the "
"number of channel[%u]",
input_dims[3], upscale_factor * upscale_factor));
upscale_factor * upscale_factor, input_dims[3]));
}
auto output_dims = input_dims;
output_dims[0] = input_dims[0];
......
......@@ -13,7 +13,7 @@
# limitations under the License.
# TODO: define distributed api under this directory,
from .base.role_maker import UserDefinedRoleMaker, PaddleCloudRoleMaker
from .base.role_maker import Role, UserDefinedRoleMaker, PaddleCloudRoleMaker
from .base.distributed_strategy import DistributedStrategy
from .base.fleet_base import Fleet
from .base.util_factory import UtilBase
......@@ -26,6 +26,7 @@ __all__ = [
"UserDefinedRoleMaker",
"PaddleCloudRoleMaker",
"Fleet",
"Role",
]
fleet = Fleet()
......@@ -39,8 +40,7 @@ server_num = fleet.server_num
server_index = fleet.server_index
server_endpoints = fleet.server_endpoints
is_server = fleet.is_server
set_util = fleet.set_util
util = fleet.util
util = UtilBase()
barrier_worker = fleet.barrier_worker
init_worker = fleet.init_worker
init_server = fleet.init_server
......
......@@ -23,7 +23,6 @@ from .strategy_compiler import StrategyCompiler
from .distributed_strategy import DistributedStrategy
from .meta_optimizer_factory import MetaOptimizerFactory
from .runtime_factory import RuntimeFactory
from .util_factory import UtilFactory
from paddle.fluid.wrapped_decorator import wrap_decorator
from paddle.fluid.dygraph import parallel_helper
......@@ -120,7 +119,6 @@ class Fleet(object):
self.strategy_compiler = None
self._is_collective = False
self._runtime_handle = None
self._util = None
def init(self, role_maker=None, is_collective=False):
"""
......@@ -182,6 +180,9 @@ class Fleet(object):
format(type(role_maker)))
self._role_maker._generate_role()
import paddle.distributed.fleet as fleet
fleet.util._set_role_maker(self._role_maker)
self.strategy_compiler = StrategyCompiler()
if paddle.fluid.framework.in_dygraph_mode():
if parallel_helper._is_parallel_ctx_initialized():
......@@ -353,29 +354,6 @@ class Fleet(object):
return self._role_maker._is_server(
) or self._role_maker._is_heter_worker()
def set_util(self, util):
self._util = util
def util(self):
"""
Utility functions that can be used under certain runtime
return util
Returns:
UtilBase: instance of UtilBase, can use distributed ops/tools easily.
Examples:
.. code-block:: python
import paddle.distributed.fleet as fleet
fleet.init()
util = fleet.util
files = ["1.log", "2.log", "3.log", "4.log"]
files = util.get_file_shard()
"""
return self._util
def barrier_worker(self):
"""
barrier all workers
......@@ -1102,7 +1080,7 @@ class Fleet(object):
if self._runtime_handle is None:
self._runtime_handle = RuntimeFactory()._create_runtime(context)
if self._util is None:
self._util = UtilFactory()._create_util(context)
import paddle.distributed.fleet as fleet
fleet.util._set_strategy(context["valid_strategy"])
return optimize_ops, params_grads
......@@ -73,11 +73,13 @@ class UtilBase(object):
.. code-block:: python
# Save the following code in `train.py` , and then execute the command `fleetrun --server_num 2 --worker_num 2 train.py` .
from paddle.distributed.fleet.base.util_factory import fleet_util
import paddle.distributed.fleet as fleet
from paddle.distributed.fleet import PaddleCloudRoleMaker
import sys
import numpy as np
import os
os.environ["PADDLE_WITH_GLOO"] = "2"
def train():
role = PaddleCloudRoleMaker(
......@@ -85,19 +87,18 @@ class UtilBase(object):
init_gloo=True,
path="./tmp_gloo")
fleet.init(role)
fleet_util._set_role_maker(role)
if fleet.is_server():
input = [1, 2]
output = fleet_util.all_reduce(input, "sum", "server")
output = fleet.util.all_reduce(input, "sum", "server")
print(output)
# [2, 4]
elif fleet.is_worker():
input = np.array([3, 4])
output = fleet_util.all_reduce(input, "sum", "worker")
output = fleet.util.all_reduce(input, "sum", "worker")
print(output)
# [6, 8]
output = fleet_util.all_reduce(input, "sum", "all")
output = fleet.util.all_reduce(input, "sum", "all")
print(output)
# [8, 12]
if __name__ == "__main__":
......@@ -117,10 +118,12 @@ class UtilBase(object):
.. code-block:: python
# Save the following code in `train.py` , and then execute the command `fleetrun --server_num 2 --worker_num 2 train.py` .
from paddle.distributed.fleet.base.util_factory import fleet_util
import paddle.distributed.fleet as fleet
from paddle.distributed.fleet import PaddleCloudRoleMaker
import sys
import os
os.environ["PADDLE_WITH_GLOO"] = "2"
def train():
role = PaddleCloudRoleMaker(
......@@ -128,15 +131,14 @@ class UtilBase(object):
init_gloo=True,
path="./tmp_gloo")
fleet.init(role)
fleet_util._set_role_maker(role)
if fleet.is_server():
fleet_util.barrier("server")
fleet.util.barrier("server")
print("all server arrive here")
elif fleet.is_worker():
fleet_util.barrier("worker")
fleet.util.barrier("worker")
print("all server arrive here")
fleet_util.barrier("all")
fleet.util.barrier("all")
print("all servers and workers arrive here")
if __name__ == "__main__":
......@@ -160,10 +162,12 @@ class UtilBase(object):
.. code-block:: python
# Save the following code in `train.py` , and then execute the command `fleetrun --server_num 2 --worker_num 2 train.py` .
from paddle.distributed.fleet.base.util_factory import fleet_util
import paddle.distributed.fleet as fleet
from paddle.distributed.fleet import PaddleCloudRoleMaker
import sys
import os
os.environ["PADDLE_WITH_GLOO"] = "2"
def train():
role = PaddleCloudRoleMaker(
......@@ -171,19 +175,18 @@ class UtilBase(object):
init_gloo=True,
path="./tmp_gloo")
fleet.init(role)
fleet_util._set_role_maker(role)
if fleet.is_server():
input = fleet.server_index()
output = fleet_util.all_gather(input, "server")
output = fleet.util.all_gather(input, "server")
print(output)
# output = [0, 1]
elif fleet.is_worker():
input = fleet.worker_index()
output = fleet_util.all_gather(input, "worker")
output = fleet.util.all_gather(input, "worker")
# output = [0, 1]
print(output)
output = fleet_util.all_gather(input, "all")
output = fleet.util.all_gather(input, "all")
print(output)
# output = [0, 1, 0, 1]
......@@ -220,18 +223,20 @@ class UtilBase(object):
.. code-block:: python
from paddle.distributed.fleet.base.util_factory import fleet_util
import paddle.distributed.fleet.base.role_maker as role_maker
import paddle.distributed.fleet as fleet
from paddle.distributed.fleet import UserDefinedRoleMaker
role = role_maker.UserDefinedRoleMaker(
role = UserDefinedRoleMaker(
is_collective=False,
init_gloo=False,
current_id=0,
role=role_maker.Role.WORKER,
role=fleet.Role.WORKER,
worker_endpoints=["127.0.0.1:6003", "127.0.0.1:6004"],
server_endpoints=["127.0.0.1:6001", "127.0.0.1:6002"])
fleet_util._set_role_maker(role)
files = fleet_util.get_file_shard(["file1", "file2", "file3"])
fleet.init(role)
files = fleet.util.get_file_shard(["file1", "file2", "file3"])
print(files)
# files = ["file1", "file2"]
"""
if not isinstance(files, list):
......@@ -267,18 +272,19 @@ class UtilBase(object):
.. code-block:: python
from paddle.distributed.fleet.base.util_factory import fleet_util
import paddle.distributed.fleet.base.role_maker as role_maker
import paddle.distributed.fleet as fleet
from paddle.distributed.fleet import UserDefinedRoleMaker
role = role_maker.UserDefinedRoleMaker(
role = UserDefinedRoleMaker(
is_collective=False,
init_gloo=False,
current_id=0,
role=role_maker.Role.WORKER,
role=fleet.Role.WORKER,
worker_endpoints=["127.0.0.1:6003", "127.0.0.1:6004"],
server_endpoints=["127.0.0.1:6001", "127.0.0.1:6002"])
fleet_util._set_role_maker(role)
fleet_util.print_on_rank("I'm worker 0", 0)
fleet.init(role)
fleet.util.print_on_rank("I'm worker 0", 0)
"""
if self.role_maker._worker_index() != rank_id:
return
......@@ -577,6 +583,3 @@ class UtilBase(object):
print("fetch_targets name: %s" % v.name)
print("fetch_targets: {}".format(results[i]))
return results
fleet_util = UtilFactory()._create_util(None)
......@@ -181,8 +181,8 @@ def get_gpus(gpus):
cuda_visible_devices_list = cuda_visible_devices.split(',')
for x in gpus.split(','):
assert x in cuda_visible_devices_list, "Can't find "\
"your gpus %s in CUDA_VISIBLE_DEVICES[%s]."\
% (x, cuda_visible_devices)
"your gpus %s in CUDA_VISIBLE_DEVICES[%s]."\
% (x, cuda_visible_devices)
res_gpus = [
cuda_visible_devices_list.index(x.strip())
for x in gpus.split(',')
......@@ -348,8 +348,7 @@ def launch_ps(args):
"PADDLE_PORT": cur_server.endpoint.split(":")[1],
"TRAINING_ROLE": "PSERVER",
"PADDLE_TRAINERS_NUM": str(worker_num),
"POD_IP": cur_server.endpoint.split(":")[0],
"PADDLE_WITH_GLOO": "1"
"POD_IP": cur_server.endpoint.split(":")[0]
}
current_env.update(proc_env)
......@@ -388,8 +387,7 @@ def launch_ps(args):
"PADDLE_TRAINER_ENDPOINTS": worker_endpoints,
"PADDLE_TRAINERS_NUM": str(worker_num),
"TRAINING_ROLE": "TRAINER",
"PADDLE_TRAINER_ID": str(cur_worker.rank),
"PADDLE_WITH_GLOO": "1"
"PADDLE_TRAINER_ID": str(cur_worker.rank)
}
current_env.update(proc_env)
......
......@@ -11,3 +11,5 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .fs import LocalFS, HDFSClient
......@@ -120,7 +120,7 @@ class LocalFS(FS):
Examples:
.. code-block:: python
from paddle.distributed.fleet.utils.fs import LocalFS
from paddle.distributed.fleet.utils import LocalFS
client = LocalFS()
subdirs, files = client.ls_dir("./")
......@@ -140,7 +140,7 @@ class LocalFS(FS):
Examples:
.. code-block:: python
from paddle.distributed.fleet.utils.fs import LocalFS
from paddle.distributed.fleet.utils import LocalFS
client = LocalFS()
subdirs, files = client.ls_dir("./")
......@@ -160,7 +160,7 @@ class LocalFS(FS):
def mkdirs(self, fs_path):
"""
Create a remote HDFS directory.
Create a local directory.
Args:
fs_path(str): The local directory path.
......@@ -168,7 +168,7 @@ class LocalFS(FS):
Examples:
.. code-block:: python
from paddle.distributed.fleet.utils.fs import LocalFS
from paddle.distributed.fleet.utils import LocalFS
client = LocalFS()
client.mkdirs("test_mkdirs")
......@@ -189,7 +189,7 @@ class LocalFS(FS):
Examples:
.. code-block:: python
from paddle.distributed.fleet.utils.fs import LocalFS
from paddle.distributed.fleet.utils import LocalFS
client = LocalFS()
client.touch("test_rename_src")
......@@ -217,7 +217,7 @@ class LocalFS(FS):
Examples:
.. code-block:: python
from paddle.distributed.fleet.utils.fs import LocalFS
from paddle.distributed.fleet.utils import LocalFS
client = LocalFS()
client.mkdirs("test_localFS_mkdirs")
......@@ -247,7 +247,7 @@ class LocalFS(FS):
Examples:
.. code-block:: python
from paddle.distributed.fleet.utils.fs import LocalFS
from paddle.distributed.fleet.utils import LocalFS
client = LocalFS()
client.touch("test_is_file")
......@@ -269,7 +269,7 @@ class LocalFS(FS):
Examples:
.. code-block:: python
from paddle.distributed.fleet.utils.fs import LocalFS
from paddle.distributed.fleet.utils import LocalFS
client = LocalFS()
client.mkdirs("test_is_dir")
......@@ -292,7 +292,7 @@ class LocalFS(FS):
Examples:
.. code-block:: python
from paddle.distributed.fleet.utils.fs import LocalFS
from paddle.distributed.fleet.utils import LocalFS
client = LocalFS()
ret = local_fs.is_exist("test_is_exist")
......@@ -311,7 +311,7 @@ class LocalFS(FS):
Examples:
.. code-block:: python
from paddle.distributed.fleet.utils.fs import LocalFS
from paddle.distributed.fleet.utils import LocalFS
client = LocalFS()
client.touch("test_touch")
......@@ -332,13 +332,11 @@ class LocalFS(FS):
src_path(str): Name of the file or directory, that's needed to be moved.
dst_path(str): Name of the file or directory to which to move to.
overwrite(bool): Whether to re-write `dst_path` if that exists. Default is False.
test_exists(bool): Check the existence of `src_path` and `dst_path` .
When `test_exists` is set true, if `src_path` doesn't exist or `dst_path` exists, program will throw an Excetption.
Examples:
.. code-block:: python
from paddle.distributed.fleet.utils.fs import LocalFS
from paddle.distributed.fleet.utils import LocalFS
client = LocalFS()
client.touch("test_mv_src")
......@@ -369,7 +367,7 @@ class LocalFS(FS):
Examples:
.. code-block:: python
from paddle.distributed.fleet.utils.fs import LocalFS
from paddle.distributed.fleet.utils import LocalFS
client = LocalFS()
subdirs = client.list_dirs("./")
......@@ -432,7 +430,7 @@ class HDFSClient(FS):
.. code-block:: text
from paddle.distributed.fleet.utils.fs import HDFSClient
from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = {
......@@ -493,7 +491,7 @@ class HDFSClient(FS):
.. code-block:: text
from paddle.distributed.fleet.utils.fs import HDFSClient
from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = {
......@@ -526,7 +524,7 @@ class HDFSClient(FS):
.. code-block:: text
from paddle.distributed.fleet.utils.fs import HDFSClient
from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = {
......@@ -587,7 +585,7 @@ class HDFSClient(FS):
.. code-block:: text
from paddle.distributed.fleet.utils.fs import HDFSClient
from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = {
......@@ -629,7 +627,7 @@ class HDFSClient(FS):
.. code-block:: text
from paddle.distributed.fleet.utils.fs import HDFSClient
from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = {
......@@ -661,7 +659,7 @@ class HDFSClient(FS):
.. code-block:: text
from paddle.distributed.fleet.utils.fs import HDFSClient
from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = {
......@@ -695,7 +693,7 @@ class HDFSClient(FS):
.. code-block:: text
from paddle.distributed.fleet.utils.fs import HDFSClient
from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = {
......@@ -740,7 +738,7 @@ class HDFSClient(FS):
.. code-block:: text
from paddle.distributed.fleet.utils.fs import HDFSClient
from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = {
......@@ -784,7 +782,7 @@ class HDFSClient(FS):
.. code-block:: text
from paddle.distributed.fleet.utils.fs import HDFSClient
from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = {
......@@ -830,7 +828,7 @@ class HDFSClient(FS):
.. code-block:: text
from paddle.distributed.fleet.utils.fs import HDFSClient
from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = {
......@@ -893,7 +891,7 @@ class HDFSClient(FS):
.. code-block:: text
from paddle.distributed.fleet.utils.fs import HDFSClient
from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = {
......@@ -919,12 +917,14 @@ class HDFSClient(FS):
Args:
fs_path(str): The HDFS file path.
exist_ok(bool): When `fs_path` exists, if `exist_ok` is set false,
program will throw an Exception. Default is true.
Examples:
.. code-block:: text
from paddle.distributed.fleet.utils.fs import HDFSClient
from paddle.distributed.fleet.utils import HDFSClient
hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = {
......
......@@ -28,7 +28,6 @@ import numpy as np
import ctr_dataset_reader
from test_dist_fleet_base import runtime_main, FleetDistRunnerBase
from paddle.distributed.fleet.base.util_factory import fleet_util
paddle.enable_static()
......@@ -180,13 +179,13 @@ class TestDistCTR2x2(FleetDistRunnerBase):
fetch_list=[self.avg_cost.name])
loss_val = np.mean(loss_val)
# TODO(randomly fail)
# reduce_output = fleet_util.all_reduce(
# reduce_output = fleet.util.all_reduce(
# np.array(loss_val), mode="sum")
# loss_all_trainer = fleet_util.all_gather(float(loss_val))
# loss_all_trainer = fleet.util.all_gather(float(loss_val))
# loss_val = float(reduce_output) / len(loss_all_trainer)
message = "TRAIN ---> pass: {} loss: {}\n".format(epoch_id,
loss_val)
fleet_util.print_on_rank(message, 0)
fleet.util.print_on_rank(message, 0)
pass_time = time.time() - pass_start
except fluid.core.EOFException:
......
......@@ -29,7 +29,6 @@ import numpy as np
import ctr_dataset_reader
from test_dist_fleet_base import runtime_main, FleetDistRunnerBase
from dist_fleet_ctr import TestDistCTR2x2, fake_ctr_reader
from paddle.distributed.fleet.base.util_factory import fleet_util
# Fix seed for test
fluid.default_startup_program().random_seed = 1
......@@ -76,13 +75,13 @@ class TestDistGpuPsCTR2x2(TestDistCTR2x2):
loss_val = exe.run(program=fleet.main_program,
fetch_list=[self.avg_cost.name])
loss_val = np.mean(loss_val)
reduce_output = fleet_util.all_reduce(
reduce_output = fleet.util.all_reduce(
np.array(loss_val), mode="sum")
loss_all_trainer = fleet_util.all_gather(float(loss_val))
loss_all_trainer = fleet.util.all_gather(float(loss_val))
loss_val = float(reduce_output) / len(loss_all_trainer)
message = "TRAIN ---> pass: {} loss: {}\n".format(epoch_id,
loss_val)
fleet_util.print_on_rank(message, 0)
fleet.util.print_on_rank(message, 0)
pass_time = time.time() - pass_start
except fluid.core.EOFException:
......
......@@ -29,7 +29,6 @@ import numpy as np
import ctr_dataset_reader
from test_dist_fleet_heter_base import runtime_main, FleetDistHeterRunnerBase
from dist_fleet_ctr import TestDistCTR2x2, fake_ctr_reader
from paddle.distributed.fleet.base.util_factory import fleet_util
paddle.enable_static()
......@@ -182,7 +181,7 @@ class TestHeterPsCTR2x2(FleetDistHeterRunnerBase):
thread_num = int(os.getenv("CPU_NUM", 2))
batch_size = 128
filelist = fleet_util.get_file_shard(train_file_list)
filelist = fleet.util.get_file_shard(train_file_list)
print("filelist: {}".format(filelist))
# config dataset
......
......@@ -32,7 +32,6 @@ import os
import signal
from functools import reduce
from test_dist_fleet_base import runtime_main, FleetDistRunnerBase
from paddle.distributed.fleet.base.util_factory import fleet_util
paddle.enable_static()
......@@ -198,7 +197,7 @@ class TestDistSimnetBow2x2(FleetDistRunnerBase):
def net(self, args, batch_size=4, lr=0.01):
avg_cost, _, predict, self.reader = \
train_network(batch_size=batch_size, is_distributed=False,
is_sparse=True, is_self_contained_lr=False, is_pyreader=(args.reader == "pyreader"))
is_sparse=True, is_self_contained_lr=False, is_pyreader=(args.reader == "pyreader"))
self.avg_cost = avg_cost
self.predict = predict
......@@ -238,7 +237,7 @@ class TestDistSimnetBow2x2(FleetDistRunnerBase):
loss_val = np.mean(loss_val)
message = "TRAIN ---> pass: {} loss: {}\n".format(epoch_id,
loss_val)
fleet_util.print_on_rank(message, 0)
fleet.util.print_on_rank(message, 0)
pass_time = time.time() - pass_start
except fluid.core.EOFException:
......
......@@ -34,8 +34,7 @@ import unittest
import paddle
import paddle.fluid as fluid
import paddle.distributed.fleet.base.role_maker as role_maker
from paddle.distributed.fleet.base.util_factory import fleet_util
from paddle.distributed.fleet import fleet
import paddle.distributed.fleet as fleet
from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy import StrategyFactory
__all__ = ['FleetDistRunnerBase', 'TestFleetBase', 'runtime_main']
......@@ -97,7 +96,7 @@ class FleetDistRunnerBase(object):
self.dump_fields_path = os.getenv("dump_fields_path", "")
debug = int(os.getenv("Debug", "0"))
# TODO(update strategy to support dump params)
if False: #debug:
if False: # debug:
self.strategy.set_debug_opt({
"dump_param": self.dump_param,
"dump_fields": self.dump_fields,
......@@ -372,8 +371,6 @@ def runtime_main(test_class):
strategy = model.build_strategy(args)
avg_cost = model.net(args)
model.build_optimizer(avg_cost, strategy)
fleet_util._set_strategy(strategy)
fleet_util._set_role_maker(role)
if args.role == "pserver":
model.run_pserver(args)
else:
......
......@@ -34,8 +34,7 @@ import unittest
import paddle
import paddle.fluid as fluid
import paddle.distributed.fleet.base.role_maker as role_maker
from paddle.distributed.fleet.base.util_factory import fleet_util
from paddle.distributed.fleet import fleet
import paddle.distributed.fleet as fleet
__all__ = ['FleetDistHeterRunnerBase', 'TestFleetHeterBase', 'runtime_main']
......@@ -376,8 +375,6 @@ def runtime_main(test_class):
strategy = model.build_strategy(args)
avg_cost = model.net(args)
model.build_optimizer(avg_cost, strategy)
fleet_util._set_strategy(strategy)
fleet_util._set_role_maker(role)
if args.role == "pserver" or args.role == "heter_trainer":
model.run_pserver(args)
......
......@@ -19,7 +19,6 @@ import os
import math
import paddle.fluid as fluid
import paddle.distributed.fleet.base.role_maker as role_maker
from paddle.distributed.fleet.base.util_factory import fleet_util
from paddle.distributed.fleet import fleet
import paddle
......
......@@ -107,7 +107,7 @@ class TestFleetBase(unittest.TestCase):
def test_util(self):
role = role_maker.PaddleCloudRoleMaker(is_collective=True)
fleet.init(role)
self.assertEqual(fleet.util(), None)
self.assertNotEqual(fleet.util, None)
def test_barrier_worker(self):
role = role_maker.PaddleCloudRoleMaker(is_collective=True)
......
......@@ -436,12 +436,12 @@ class TestGlooWithCloudRoleMaker(unittest.TestCase):
optimizer.minimize(avg_cost)
comm_world = "server"
fleet.util().barrier(comm_world)
fleet.util.barrier(comm_world)
gather = fleet.util().all_gather(1, comm_world)
gather = fleet.util.all_gather(1, comm_world)
self.assertEqual(gather[0], 1)
all_reduce = fleet.util().all_reduce(1, "sum", comm_world)
all_reduce = fleet.util.all_reduce(1, "sum", comm_world)
self.assertEqual(1, all_reduce)
self.clean(tmp)
......@@ -752,12 +752,12 @@ class TestGlooWithCloudRoleMaker(unittest.TestCase):
optimizer.minimize(avg_cost)
comm_world = "server"
fleet.util().barrier(comm_world)
fleet.util.barrier(comm_world)
gather = fleet.util().all_gather(1, comm_world)
gather = fleet.util.all_gather(1, comm_world)
self.assertEqual(gather[0], 1)
all_reduce = fleet.util().all_reduce(1, "sum", comm_world)
all_reduce = fleet.util.all_reduce(1, "sum", comm_world)
self.assertEqual(1, all_reduce)
self.clean(tmp)
......
......@@ -22,7 +22,6 @@ import tempfile
import os
import sys
from paddle.dataset.common import download, DATA_HOME
from paddle.distributed.fleet.base.util_factory import fleet_util
import paddle.distributed.fleet.base.role_maker as role_maker
......@@ -59,8 +58,7 @@ class TestFleetUtil(unittest.TestCase):
import paddle.distributed.fleet.base.role_maker as role_maker
role = role_maker.PaddleCloudRoleMaker(is_collective=True)
fleet.init(role)
default_util = fleet.util()
self.assertEqual(default_util, None)
self.assertNotEqual(fleet.util, None)
def test_set_user_defined_util(self):
import paddle.distributed.fleet as fleet
......@@ -76,17 +74,19 @@ class TestFleetUtil(unittest.TestCase):
role = role_maker.PaddleCloudRoleMaker(is_collective=True)
fleet.init(role)
my_util = UserDefinedUtil()
fleet.set_util(my_util)
user_id = fleet.util().get_user_id()
fleet.util = my_util
user_id = fleet.util.get_user_id()
self.assertEqual(user_id, 10)
def test_fs(self):
from paddle.distributed.fleet.utils.fs import LocalFS
import paddle.distributed.fleet as fleet
from paddle.distributed.fleet.utils import LocalFS
fs = LocalFS()
dirs, files = fs.ls_dir("test_tmp")
dirs, files = fs.ls_dir("./")
self.assertFalse(fs.need_upload_download())
fleet_util._set_file_system(fs)
fleet.util._set_file_system(fs)
def download_files(self):
path = download(self.proto_data_url, self.module_name,
......@@ -98,7 +98,8 @@ class TestFleetUtil(unittest.TestCase):
return unzip_folder
def test_get_file_shard(self):
self.assertRaises(Exception, fleet_util.get_file_shard, "files")
import paddle.distributed.fleet as fleet
self.assertRaises(Exception, fleet.util.get_file_shard, "files")
try:
import netifaces
except:
......@@ -112,18 +113,20 @@ class TestFleetUtil(unittest.TestCase):
role=role_maker.Role.WORKER,
worker_endpoints=["127.0.0.1:6003", "127.0.0.1:6004"],
server_endpoints=["127.0.0.1:6001", "127.0.0.1:6002"])
fleet_util._set_role_maker(role)
files = fleet_util.get_file_shard(["1", "2", "3"])
fleet.init(role)
files = fleet.util.get_file_shard(["1", "2", "3"])
self.assertTrue(len(files) == 2 and "1" in files and "2" in files)
def test_program_type_trans(self):
import paddle.distributed.fleet as fleet
data_dir = self.download_files()
program_dir = os.path.join(data_dir, self.pruned_dir)
text_program = "pruned_main_program.pbtxt"
binary_program = "pruned_main_program.bin"
text_to_binary = fleet_util._program_type_trans(program_dir,
text_to_binary = fleet.util._program_type_trans(program_dir,
text_program, True)
binary_to_text = fleet_util._program_type_trans(program_dir,
binary_to_text = fleet.util._program_type_trans(program_dir,
binary_program, False)
self.assertTrue(
os.path.exists(os.path.join(program_dir, text_to_binary)))
......@@ -131,6 +134,7 @@ class TestFleetUtil(unittest.TestCase):
os.path.exists(os.path.join(program_dir, binary_to_text)))
def test_prams_check(self):
import paddle.distributed.fleet as fleet
data_dir = self.download_files()
class config:
......@@ -160,11 +164,11 @@ class TestFleetUtil(unittest.TestCase):
# test saved var's shape
conf.dump_program_filename = "pruned_main_program.save_var_shape_not_match"
self.assertRaises(Exception, fleet_util._params_check)
self.assertRaises(Exception, fleet.util._params_check)
# test program.proto without feed_op and fetch_op
conf.dump_program_filename = "pruned_main_program.no_feed_fetch"
results = fleet_util._params_check(conf)
results = fleet.util._params_check(conf)
self.assertTrue(len(results) == 1)
np.testing.assert_array_almost_equal(
results[0], np.array(
......@@ -172,11 +176,11 @@ class TestFleetUtil(unittest.TestCase):
# test feed_var's shape
conf.dump_program_filename = "pruned_main_program.feed_var_shape_not_match"
self.assertRaises(Exception, fleet_util._params_check)
self.assertRaises(Exception, fleet.util._params_check)
# test correct case with feed_vars_filelist
conf.dump_program_filename = "pruned_main_program.pbtxt"
results = fleet_util._params_check(conf)
results = fleet.util._params_check(conf)
self.assertTrue(len(results) == 1)
np.testing.assert_array_almost_equal(
results[0], np.array(
......@@ -186,13 +190,14 @@ class TestFleetUtil(unittest.TestCase):
conf.feed_config.feeded_vars_filelist = None
# test feed var with lod_level >= 2
conf.dump_program_filename = "pruned_main_program.feed_lod2"
self.assertRaises(Exception, fleet_util._params_check)
self.assertRaises(Exception, fleet.util._params_check)
conf.dump_program_filename = "pruned_main_program.pbtxt"
results = fleet_util._params_check(conf)
results = fleet.util._params_check(conf)
self.assertTrue(len(results) == 1)
def test_proto_check(self):
import paddle.distributed.fleet as fleet
data_dir = self.download_files()
class config:
......@@ -210,7 +215,7 @@ class TestFleetUtil(unittest.TestCase):
"pruned_main_program.save_var_shape_not_match"))
conf.is_text_pruned_program = True
conf.draw = False
res = fleet_util._proto_check(conf)
res = fleet.util._proto_check(conf)
self.assertFalse(res)
# test match
......@@ -222,10 +227,11 @@ class TestFleetUtil(unittest.TestCase):
else:
conf.draw = True
conf.draw_out_name = "pruned_check"
res = fleet_util._proto_check(conf)
res = fleet.util._proto_check(conf)
self.assertTrue(res)
def test_visualize(self):
import paddle.distributed.fleet as fleet
if sys.platform == 'win32' or sys.platform == 'sys.platform':
pass
else:
......@@ -234,10 +240,10 @@ class TestFleetUtil(unittest.TestCase):
data_dir,
os.path.join(self.train_dir, "join_main_program.pbtxt"))
is_text = True
program = fleet_util._load_program(program_path, is_text)
program = fleet.util._load_program(program_path, is_text)
output_dir = os.path.join(data_dir, self.train_dir)
output_filename = "draw_prog"
fleet_util._visualize_graphviz(program, output_dir, output_filename)
fleet.util._visualize_graphviz(program, output_dir, output_filename)
self.assertTrue(
os.path.exists(
os.path.join(output_dir, output_filename + ".dot")))
......
......@@ -49,7 +49,7 @@ class TestDygraphDataLoaderSingalHandler(unittest.TestCase):
test_process.start()
set_child_signal_handler(id(self), test_process.pid)
time.sleep(3)
time.sleep(5)
except core.EnforceNotMet as ex:
self.assertIn("FatalError", cpt.get_exception_message(ex))
exception = ex
......
......@@ -124,8 +124,10 @@ class TestMNIST(TestParallelExecutorBase):
def test_simple_fc_with_new_strategy(self):
# use_cuda, use_reduce
self._compare_reduce_and_allreduce(simple_fc_net, True)
self._compare_reduce_and_allreduce(simple_fc_net, False)
# NOTE: the computation result of nccl_reduce is non-deterministic,
# related issue: https://github.com/NVIDIA/nccl/issues/157
self._compare_reduce_and_allreduce(simple_fc_net, True, 1e-5, 1e-2)
self._compare_reduce_and_allreduce(simple_fc_net, False, 1e-5, 1e-2)
def check_simple_fc_parallel_accuracy(self, use_cuda):
if use_cuda and not core.is_compiled_with_cuda():
......@@ -179,7 +181,7 @@ class TestMNIST(TestParallelExecutorBase):
# NOTE: the computation result of nccl_reduce is non-deterministic,
# related issue: https://github.com/NVIDIA/nccl/issues/157
self._compare_reduce_and_allreduce(fc_with_batchnorm, True, 1e-5, 1e-2)
self._compare_reduce_and_allreduce(fc_with_batchnorm, False)
self._compare_reduce_and_allreduce(fc_with_batchnorm, False, 1e-5, 1e-2)
if __name__ == '__main__':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册