runtime_environment.h 4.2 KB
Newer Older
X
xiexionghang 已提交
1 2 3 4 5 6 7
/*
 *Author: xiexionghang
 *运行环境,屏蔽MPI or Local环境的运行差异
 *为了兼容不同环境的底层实现,Env的接口调用条件严格于sum(limit(env[n]))
 *如:MPI环境下,写接口只允许单线程调用,那么默认对所有Env保证此调用限制
 */
#pragma once
X
xiexionghang 已提交
8
#include <yaml-cpp/yaml.h>
X
xiexionghang 已提交
9
#include "communicate/ps_env.h"
X
xiexionghang 已提交
10
#include "paddle/fluid/framework/archive.h"
X
xiexionghang 已提交
11
#include "paddle/fluid/string/string_helper.h"
X
xiexionghang 已提交
12
#include "paddle/fluid/train/custom_trainer/feed/common/registerer.h"
X
xiexionghang 已提交
13 14 15 16 17

namespace paddle {
namespace custom_trainer {
namespace feed {

X
xiexionghang 已提交
18 19 20 21 22 23 24 25
enum class EnvironmentLogLevel {
    FATAL       = 0,
    ERROR       = 1,
    NOTICE      = 2,
    DEBUG       = 3
};

enum class EnvironmentLogType {
X
xiexionghang 已提交
26 27 28 29
    MASTER_LOG      = 0,        //仅master节点对外输出
    ALL_LOG         = 1         //所有节点都会对外输出
};

X
xiexionghang 已提交
30
// 保持该枚举值的连续递增,且ALL在尾部
X
xiexionghang 已提交
31 32 33 34 35
enum class EnvironmentRole {
    WORKER          = 0,        //训练Worker
    PSERVER         = 1,        //参数服务器

    ALL             = 2         //所有角色,请保持在枚举尾部
X
xiexionghang 已提交
36 37
};

X
xiexionghang 已提交
38 39 40 41 42
// Reduce的操作类型
enum class ReduceOperator {
    SUM             = 0         //求和
};

X
xiexionghang 已提交
43 44
class RuntimeEnvironment {
public:
X
xiexionghang 已提交
45 46
    RuntimeEnvironment();
    virtual ~RuntimeEnvironment();
X
xiexionghang 已提交
47
    // 配置初始化
X
xiexionghang 已提交
48
    virtual int initialize(YAML::Node config) = 0;
49 50 51 52 53 54 55 56 57

    // job 信息
    virtual std::string job_id() {
        return _job_id;
    }
    virtual std::string job_name() {
        return _job_name;
    }

X
xiexionghang 已提交
58
    // 设置role
X
xiexionghang 已提交
59 60 61
    virtual int add_role(EnvironmentRole role) = 0;
    // 判断role
    virtual bool is_role(EnvironmentRole role) = 0;
X
xiexionghang 已提交
62
    // 环境初始化,会在所有依赖模块initialize后调用
X
xiexionghang 已提交
63 64
    virtual int wireup() = 0;
    
X
xiexionghang 已提交
65 66
    // 多线程可调用接口  Start
    // 当前环境rank_idx
X
xiexionghang 已提交
67
    virtual uint32_t rank_id(EnvironmentRole role) = 0;
X
xiexionghang 已提交
68
    // 运行环境节点数
X
xiexionghang 已提交
69
    virtual uint32_t node_num(EnvironmentRole role) = 0;
X
xiexionghang 已提交
70
    // 环境内主节点
X
xiexionghang 已提交
71
    virtual bool is_master_node(EnvironmentRole role);
X
xiexionghang 已提交
72 73
    //For PS
    virtual paddle::ps::PSEnvironment* ps_environment() = 0;
X
xiexionghang 已提交
74
    
X
xiexionghang 已提交
75
    // 环境定制化log
X
xiexionghang 已提交
76
    template<class... ARGS>
X
xiexionghang 已提交
77 78 79
    void log(EnvironmentRole role, EnvironmentLogType type, 
        EnvironmentLogLevel level, const char* fmt, ARGS && ... args) {
        print_log(role, type, level, paddle::string::format_string(fmt, args...));
X
xiexionghang 已提交
80
    }
X
xiexionghang 已提交
81
    // 多线程可调用接口      End
X
xiexionghang 已提交
82 83


X
xiexionghang 已提交
84 85
    // 接口只允许在主线程调用   Start
    // barrier 指定role的节点
X
xiexionghang 已提交
86
    virtual void barrier(EnvironmentRole role) = 0;
X
xiexionghang 已提交
87
    // bcast 广播
X
xiexionghang 已提交
88
    virtual void bcast(paddle::framework::BinaryArchive& ar, int root_id, EnvironmentRole role) = 0;
X
xiexionghang 已提交
89 90 91 92 93 94 95 96 97
    // 全局reduce操作, 返回reduce结果
    virtual double all_reduce(double x, ReduceOperator op, EnvironmentRole role) {
        double result = x;
        all_reduce_in_place(&result, 1, op, role);
        return result;
    }
    // 全局reduce,就地执行
    virtual void all_reduce_in_place(double* x, int n, 
            ReduceOperator op, EnvironmentRole role) = 0;
X
xiexionghang 已提交
98
    // 接口只允许在主线程调用   End
X
xiexionghang 已提交
99
protected:
X
xiexionghang 已提交
100 101
    virtual void print_log(EnvironmentRole role, EnvironmentLogType type, 
        EnvironmentLogLevel level,  const std::string& log_str) = 0;
102

X
xiexionghang 已提交
103
    std::string _debug_verion;
104 105
    std::string _job_id = "default_job_id";
    std::string _job_name = "default_job_name";
X
xiexionghang 已提交
106
};
X
xiexionghang 已提交
107
REGIST_REGISTERER(RuntimeEnvironment);
X
xiexionghang 已提交
108

109 110 111 112 113 114 115 116 117
#define ENVLOG_WORKER_ALL_NOTICE \
environment->log(EnvironmentRole::WORKER, EnvironmentLogType::ALL_LOG, EnvironmentLogType::NOTICE, 
#define ENVLOG_WORKER_MASTER_NOTICE \
environment->log(EnvironmentRole::WORKER, EnvironmentLogType::MASTER_LOG, EnvironmentLogType::NOTICE, 
#define ENVLOG_WORKER_ALL_ERROR \
environment->log(EnvironmentRole::WORKER, EnvironmentLogType::ALL_LOG, EnvironmentLogType::ERROR, 
#define ENVLOG_WORKER_MASTER_ERROR \
environment->log(EnvironmentRole::WORKER, EnvironmentLogType::MASTER_LOG, EnvironmentLogType::ERROR, 

X
xiexionghang 已提交
118
std::string format_timestamp(time_t time, const char* format);
X
xiexionghang 已提交
119
inline std::string format_timestamp(time_t time, const std::string& format) {
X
xiexionghang 已提交
120 121
    return format_timestamp(time, format.c_str());
}
X
xiexionghang 已提交
122 123 124 125

}  // namespace feed
}  // namespace custom_trainer
}  // namespace paddle