“7dbf4dcfe2987c35c2c4675cd7ae1b6006979176”上不存在“tools/perf/tests/builtin-test.c”
build_strategy.h 2.4 KB
Newer Older
Y
yuyang18 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

Y
yuyang18 已提交
17 18
#include <string>

Y
yuyang18 已提交
19 20 21 22 23
namespace paddle {
namespace framework {
namespace details {

struct BuildStrategy {
C
chengduo 已提交
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
  // ParallelExecutor supports two modes of ReduceStrategy, kAllReduce and
  // kReduce, for CPU and GPU. If you use kAllReduce, different threads
  // optimize their parameters separately. If you use kReduce, the optimizations
  // of parameters are distributed to different threads.
  // For example, a model has 100 parameters and is running with four threads,
  // if you choose kAllReduce, every thread is to optimize 100 parameters
  // separately, if you choose kReduce, every thread is to optimize 25
  // parameters.
  // Of particular note is, if you use kReduce when using CPU training,
  // all the parameters are shared between different threads. This feature will
  // save memory.
  // FIXME(zcd): The result of the two modes(kAllReduce and kReduce) maybe not
  // equal for GPU. Because, the result of the different order of summing maybe
  // different, for example, the result of `a+b+c+d` may be different with the
  // result of `c+a+b+d`.
  // For GPU, the implementation of kAllReduce and kReduce is adopted NCCL,
  // so the result of kAllReduce and kReduce maybe not equal.
  // For CPU, if you want to fix the order of summing to make the result
  // of kAllReduce and kReduce no diff, you can add
  // `FLAGS_cpu_deterministic=true` to env.
Y
yuyang18 已提交
44 45 46 47 48 49 50 51
  enum class ReduceStrategy { kAllReduce = 0, kReduce = 1 };

  enum class GradientScaleStrategy {
    kCoeffNumDevice = 0,
    kOne = 1,
    kCustomized = 2,
  };

Y
yuyang18 已提交
52
  ReduceStrategy reduce_{ReduceStrategy::kAllReduce};
Y
yuyang18 已提交
53
  GradientScaleStrategy gradient_scale_{GradientScaleStrategy::kCoeffNumDevice};
Y
yuyang18 已提交
54 55

  std::string debug_graphviz_path_{""};
F
fengjiayi 已提交
56

Y
yuyang18 已提交
57
  bool enable_data_balance_{false};
Y
yuyang18 已提交
58 59 60 61 62
};

}  // namespace details
}  // namespace framework
}  // namespace paddle