runtime_environment.h 3.1 KB
Newer Older
X
xiexionghang 已提交
1 2 3 4 5 6 7
/*
 *Author: xiexionghang
 *运行环境,屏蔽MPI or Local环境的运行差异
 *为了兼容不同环境的底层实现,Env的接口调用条件严格于sum(limit(env[n]))
 *如:MPI环境下,写接口只允许单线程调用,那么默认对所有Env保证此调用限制
 */
#pragma once
X
xiexionghang 已提交
8
#include <yaml-cpp/yaml.h>
X
xiexionghang 已提交
9
#include "communicate/ps_env.h"
X
xiexionghang 已提交
10
#include "paddle/fluid/framework/archive.h"
X
xiexionghang 已提交
11
#include "paddle/fluid/string/string_helper.h"
X
xiexionghang 已提交
12
#include "paddle/fluid/train/custom_trainer/feed/common/registerer.h"
X
xiexionghang 已提交
13 14 15 16 17

namespace paddle {
namespace custom_trainer {
namespace feed {

X
xiexionghang 已提交
18 19 20 21 22 23 24 25
enum class EnvironmentLogLevel {
    FATAL       = 0,
    ERROR       = 1,
    NOTICE      = 2,
    DEBUG       = 3
};

enum class EnvironmentLogType {
X
xiexionghang 已提交
26 27 28 29 30 31 32 33 34 35
    MASTER_LOG      = 0,        //仅master节点对外输出
    ALL_LOG         = 1         //所有节点都会对外输出
};

//保持该枚举值的连续递增,且ALL在尾部
enum class EnvironmentRole {
    WORKER          = 0,        //训练Worker
    PSERVER         = 1,        //参数服务器

    ALL             = 2         //所有角色,请保持在枚举尾部
X
xiexionghang 已提交
36 37
};

X
xiexionghang 已提交
38 39
class RuntimeEnvironment {
public:
X
xiexionghang 已提交
40 41
    RuntimeEnvironment();
    virtual ~RuntimeEnvironment();
X
xiexionghang 已提交
42
    // 配置初始化
X
xiexionghang 已提交
43
    virtual int initialize(YAML::Node config) = 0;
X
xiexionghang 已提交
44
    // 设置role
X
xiexionghang 已提交
45 46 47
    virtual int add_role(EnvironmentRole role) = 0;
    // 判断role
    virtual bool is_role(EnvironmentRole role) = 0;
X
xiexionghang 已提交
48
    // 环境初始化,会在所有依赖模块initialize后调用
X
xiexionghang 已提交
49 50
    virtual int wireup() = 0;
    
X
xiexionghang 已提交
51 52
    // 多线程可调用接口  Start
    // 当前环境rank_idx
X
xiexionghang 已提交
53
    virtual uint32_t rank_id(EnvironmentRole role) = 0;
X
xiexionghang 已提交
54
    // 运行环境节点数
X
xiexionghang 已提交
55
    virtual uint32_t node_num(EnvironmentRole role) = 0;
X
xiexionghang 已提交
56
    // 环境内主节点
X
xiexionghang 已提交
57
    virtual bool is_master_node(EnvironmentRole role);
X
xiexionghang 已提交
58 59
    //For PS
    virtual paddle::ps::PSEnvironment* ps_environment() = 0;
X
xiexionghang 已提交
60
    
X
xiexionghang 已提交
61
    // 环境定制化log
X
xiexionghang 已提交
62
    template<class... ARGS>
X
xiexionghang 已提交
63 64 65
    void log(EnvironmentRole role, EnvironmentLogType type, 
        EnvironmentLogLevel level, const char* fmt, ARGS && ... args) {
        print_log(role, type, level, paddle::string::format_string(fmt, args...));
X
xiexionghang 已提交
66
    }
X
xiexionghang 已提交
67
    // 多线程可调用接口      End
X
xiexionghang 已提交
68 69


X
xiexionghang 已提交
70 71
    // 接口只允许在主线程调用   Start
    // barrier 指定role的节点
X
xiexionghang 已提交
72
    virtual void barrier(EnvironmentRole role) = 0;
X
xiexionghang 已提交
73
    // bcast 广播
X
xiexionghang 已提交
74
    virtual void bcast(paddle::framework::BinaryArchive& ar, int root_id, EnvironmentRole role) = 0;
X
xiexionghang 已提交
75
    // all_reduce sum element 规约元素
Y
yaopenghui 已提交
76
    virtual double all_reduce_ele(double x) = 0;
X
xiexionghang 已提交
77
    // all_reduce sum array 规约数组
Y
yaopenghui 已提交
78
    virtual void all_reduce_arr(double* x, int n) = 0;
X
xiexionghang 已提交
79
    // 接口只允许在主线程调用   End
X
xiexionghang 已提交
80
protected:
X
xiexionghang 已提交
81 82
    virtual void print_log(EnvironmentRole role, EnvironmentLogType type, 
        EnvironmentLogLevel level,  const std::string& log_str) = 0;
X
xiexionghang 已提交
83
};
X
xiexionghang 已提交
84
REGIST_REGISTERER(RuntimeEnvironment);
X
xiexionghang 已提交
85

X
xiexionghang 已提交
86
std::string format_timestamp(time_t time, const char* format);
X
xiexionghang 已提交
87
inline std::string format_timestamp(time_t time, const std::string& format) {
X
xiexionghang 已提交
88 89
    return format_timestamp(time, format.c_str());
}
X
xiexionghang 已提交
90 91 92 93

}  // namespace feed
}  // namespace custom_trainer
}  // namespace paddle