trainer_desc.proto 7.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

syntax = "proto2";
H
hutuxian 已提交
16
option optimize_for = LITE_RUNTIME;
17
import "data_feed.proto";
H
hutuxian 已提交
18
import "framework.proto";
19 20 21 22 23 24 25 26 27 28 29 30 31 32
package paddle.framework;

message TrainerDesc {
  // class name for create trainer desc
  // the matchness of trainer name and device worker name
  // will be checked in python API
  optional string class_name = 1;
  // class name for creating device worker
  optional string device_worker_name = 2;
  // thread number
  optional int32 thread_num = 3;
  // if we need to binding cpu
  optional bool binding_cpu = 4 [ default = false ];
  repeated string filelist = 5;
D
dongdaxiang 已提交
33 34
  optional bool debug = 6 [ default = false ];
  optional FetchConfig fetch_config = 7;
35
  optional bool use_cvm = 8 [ default = false ];
T
Thunderbrook 已提交
36
  optional bool dump_slot = 9 [ default = false ];
37
  optional float scale_datanorm = 10 [ default = -1 ];
38 39 40 41
  optional int32 mpi_rank = 11 [ default = -1 ];
  optional string dump_fields_path = 12;
  repeated string dump_fields = 13;
  optional string dump_converter = 14;
42
  repeated string dump_param = 15;
T
Thunderbrook 已提交
43 44
  optional int32 mpi_size = 16 [ default = -1 ];
  optional int32 dump_file_num = 17 [ default = 16 ];
45
  repeated string check_nan_var_names = 18;
X
xujiaqi01 已提交
46 47 48
  optional CopyTableConfig copy_table_config = 19;
  // adjust ins weight
  optional AdjustInsWeightConfig adjust_ins_weight_config = 20;
49
  optional bool no_cvm = 21 [ default = false ];
50
  optional bool thread_barrier = 22;
51
  repeated string loss_names = 23;
H
hutuxian 已提交
52 53 54
  optional bool enable_random_dump = 24 [ default = false ];
  optional bool random_with_lineid = 25 [ default = false ];
  optional int32 dump_interval = 26 [ default = 10000 ];
T
Thunderbrook 已提交
55 56 57 58 59 60
  repeated int32 worker_places = 27;

  repeated string xpu_send_list = 28;
  repeated string xpu_recv_list = 29;
  optional int32 xpu_start_idx = 30;
  optional int32 xpu_end_idx = 31;
T
Thunderbrook 已提交
61

T
Thunderbrook 已提交
62
  optional bool use_ps_gpu = 32 [ default = false ];
Y
yaoxuefeng 已提交
63
  optional string user_define_dump_filename = 33;
64
  optional bool scale_sparse_gradient_with_batch_size = 34 [ default = true ];
T
Thunderbrook 已提交
65

66 67 68
  repeated int32 trainers = 35;
  optional int32 trainer_id = 36;

69 70 71
  // add for gpu
  optional string fleet_desc = 37;

72 73 74 75
  // device worker parameters
  optional HogwildWorkerParameter hogwild_param = 101;
  optional DownpourWorkerParameter downpour_param = 103;
  optional PullDenseWorkerParameter pull_dense_param = 102;
H
hutuxian 已提交
76
  optional SectionWorkerParameter section_param = 104;
77
  optional HeterSectionWorkerParameter heter_section_param = 105;
78 79 80 81
  // datafeed desc
  optional DataFeedDesc data_desc = 201;
}

82 83 84 85
message HogwildWorkerParameter {
  repeated string skip_ops = 1;
  repeated string stat_var_names = 2;
}
86 87 88 89 90

message DownpourWorkerParameter {
  repeated TableParameter sparse_table = 1;
  repeated TableParameter dense_table = 2;
  repeated string skip_ops = 3;
H
heqiaozhi 已提交
91
  repeated ProgramConfig program_config = 4;
D
dongdaxiang 已提交
92 93
  optional bool push_sparse = 5 [ default = true ];
  optional bool push_dense = 6 [ default = true ];
94
  repeated string stat_var_names = 7;
H
heqiaozhi 已提交
95 96
}

H
hutuxian 已提交
97
message SectionWorkerParameter {
98
  optional SectionConfig section_config = 1;
H
hutuxian 已提交
99 100 101 102
  optional int32 queue_size = 2 [ default = 1 ];
  optional int64 sync_steps = 3 [ default = 1 ];
  optional int32 start_cpu_core_id = 4 [ default = 1 ];
  repeated string param_need_sync = 5;
L
lilong12 已提交
103
  optional int32 num_microbatches = 6;
104 105 106
  optional int32 num_pipeline_stages = 7 [ default = 1 ];
  optional int32 pipeline_stage = 8 [ default = 1 ];
  optional int32 schedule_mode = 9 [ default = 0 ];
H
hutuxian 已提交
107 108
}

109 110 111 112 113 114 115 116 117 118 119
message HeterSectionWorkerParameter {
  optional SectionConfig section_config = 1;
  optional int32 queue_size = 2 [ default = 1 ];
  optional int64 sync_steps = 3 [ default = 1 ];
  optional int32 start_cpu_core_id = 4 [ default = 1 ];
  repeated string param_need_sync = 5;
  optional int32 num_microbatches = 6;
  optional int32 num_pipeline_stages = 7 [ default = 1 ];
  optional int32 pipeline_stage = 8 [ default = 1 ];
}

H
hutuxian 已提交
120 121 122 123 124 125 126 127 128 129 130 131 132 133
message SectionConfig {
  enum Place {
    CPUPlace = 0;
    CUDAPlace = 1;
    CUDAPinnedPlace = 2;
  }

  // FIXME: How to use proto::ProgramDesc
  // required string program_desc_str = 1;
  optional proto.ProgramDesc program_desc = 1;
  optional Place place = 2;
  optional int32 concurrency = 3 [ default = 1 ];
  repeated string section_in_var_names = 4;
  repeated string section_out_var_names = 5;
L
lilong12 已提交
134
  optional int32 place_id = 6 [ default = -1 ];
H
hutuxian 已提交
135 136
}

D
dongdaxiang 已提交
137 138 139
message FetchConfig {
  enum Method { PRINT = 0; }
  repeated string fetch_var_names = 1;
D
dongdaxiang 已提交
140
  repeated string fetch_var_str_format = 2;
D
dongdaxiang 已提交
141 142 143 144
  optional int32 print_period = 3 [ default = 100 ];
  optional Method method = 4 [ default = PRINT ];
}

145 146 147 148 149 150 151 152
message AdjustInsWeightConfig {
  optional bool need_adjust = 1 [ default = false ];
  optional string nid_slot = 2 [ default = "" ];
  optional float nid_adjw_threshold = 3 [ default = 0.0 ];
  optional float nid_adjw_ratio = 4 [ default = 0.0 ];
  optional string ins_weight_slot = 5 [ default = "" ];
}

X
xujiaqi01 已提交
153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175
message TableDependencyMap {
  required int32 key = 1;
  repeated int32 values = 2;
}

message CopyTableConfig {
  optional bool need_copy = 1 [ default = false ];
  optional int32 batch_num = 2 [ default = 100 ];
  repeated int32 src_sparse_tables = 3;
  repeated int32 dest_sparse_tables = 4;
  repeated int32 src_dense_tables = 5;
  repeated int32 dest_dense_tables = 6;
  repeated string src_var_list = 7;
  repeated string dest_var_list = 8;
  // when dest dense table has no grad, should pull explicitly
  optional bool dense_pull_after_copy = 9 [ default = false ];
  // copy feasigns or copy the whole table
  optional bool sparse_copy_by_feasign = 10 [ default = true ];
  // table dependency for pull/push
  optional bool enable_dependency = 11 [ default = false ];
  repeated TableDependencyMap table_denpendency_map = 12;
}

Z
zhang wenhui 已提交
176 177 178 179
message CondTableMap {
  required int32 key = 1;
  required int32 value = 2;
}
H
heqiaozhi 已提交
180 181 182 183 184 185
message ProgramConfig {
  required string program_id = 1;
  repeated int32 push_sparse_table_id = 2;
  repeated int32 push_dense_table_id = 3;
  repeated int32 pull_sparse_table_id = 4;
  repeated int32 pull_dense_table_id = 5;
Z
zhang wenhui 已提交
186
  repeated CondTableMap partial_pushdense_condtable_map = 10;
187 188 189 190 191 192 193 194 195 196 197 198
}

message PullDenseWorkerParameter {
  // dense table only and specialized usage
  optional int32 threshold = 1 [ default = 1 ];
  optional int32 device_num = 2;
  optional int32 sleep_time_ms = 3 [ default = 2 ];
  repeated TableParameter dense_table = 4;
}

message TableParameter {
  // dense table only
D
dongdaxiang 已提交
199
  optional uint64 table_id = 1;
200 201 202 203 204 205 206 207 208 209 210 211
  repeated string dense_value_name = 2;
  repeated string dense_grad_name = 3;
  repeated int32 push_dense_wait_times = 5;
  // sparse table only
  repeated string sparse_key_name = 6;
  repeated string sparse_value_name = 7;
  repeated string sparse_grad_name = 8;
  repeated int32 push_sparse_wait_times = 9;
  // sparse table only and specialized usage
  optional int32 emb_dim = 10;
  optional int32 fea_dim = 11;
  optional string label_var_name = 12;
212 213 214 215 216
  // if table will pull sparse to local first
  optional bool is_local = 13 [ default = false ];
  // if table will pull sparse asynchronously in worker
  optional bool is_async = 14 [ default = false ];
  optional string async_wait_op_name = 15;
217
}