diff --git a/paddle/fluid/framework/distributed_strategy.proto b/paddle/fluid/framework/distributed_strategy.proto index 524a112ff197499e5dcf0937c23ceafa5784ee1c..e0b20bbba91b9a0af34f5d8e6e8fabab3b24785d 100644 --- a/paddle/fluid/framework/distributed_strategy.proto +++ b/paddle/fluid/framework/distributed_strategy.proto @@ -33,7 +33,7 @@ message ShardingConfig { optional bool hybrid_dp = 2 [ default = false ]; optional int32 sharding_group_size = 3 [ default = 8 ]; optional bool as_outer_parallelism = 4 [ default = false ]; - optional int32 inner_parallelism_size = 5 [ default = 8 ]; + optional int32 parallelism = 5 [ default = 1 ]; optional bool use_pipeline = 6 [ default = false ]; } @@ -47,6 +47,8 @@ message AMPConfig { repeated string custom_white_list = 7; repeated string custom_black_list = 8; repeated string custom_black_varnames = 9; + optional bool use_pure_fp16 = 10 [ default = false ]; + optional bool use_fp16_guard = 11 [ default = true ]; } message LocalSGDConfig { @@ -145,7 +147,7 @@ message DistributedStrategy { optional int32 fuse_grad_size_in_MB = 19 [ default = 32 ]; optional float fuse_grad_size_in_TFLOPS = 20 [ default = 50 ]; optional bool cudnn_exhaustive_search = 21 [ default = true ]; - optional int32 conv_workspace_size_limit = 22 [ default = 4000 ]; + optional int32 conv_workspace_size_limit = 22 [ default = 512 ]; optional bool cudnn_batchnorm_spatial_persistent = 23 [ default = true ]; optional bool adaptive_localsgd = 24 [ default = false ]; optional bool fp16_allreduce = 25 [ default = false ]; diff --git a/paddle/fluid/framework/section_worker.cc b/paddle/fluid/framework/section_worker.cc index 54682e3e1cf1c2bad3a4c7904866009105e45502..5cb0497ece7a0fcf712f0e6a577073587932dec7 100644 --- a/paddle/fluid/framework/section_worker.cc +++ b/paddle/fluid/framework/section_worker.cc @@ -11,25 +11,16 @@ limitations under the License. */ #if defined(PADDLE_WITH_NCCL) #include +#include "paddle/fluid/framework/device_worker.h" #include "paddle/fluid/framework/executor_gc_helper.h" -#include "paddle/fluid/framework/garbage_collector.h" -#include "paddle/fluid/framework/program_desc.h" - -#include "google/protobuf/io/zero_copy_stream_impl.h" -#include "google/protobuf/message.h" -#include "google/protobuf/text_format.h" -#include "paddle/fluid/framework/device_worker.h" -#include "paddle/fluid/framework/fleet/box_wrapper.h" -#include "paddle/fluid/framework/tensor_util.h" -#include "paddle/fluid/framework/trainer_desc.pb.h" -#include "paddle/fluid/platform/cpu_helper.h" #include "paddle/fluid/platform/device_context.h" -#include "paddle/fluid/platform/lodtensor_printer.h" namespace paddle { namespace framework { +class TrainerDesc; + uint64_t SectionWorker::batch_id_(0); void SectionWorker::Initialize(const TrainerDesc& desc) {