DataConfig.proto 3.4 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Z
zhangjinchao01 已提交
2 3 4 5 6 7 8 9 10 11 12 13

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
Y
Yu Yang 已提交
14
syntax = "proto2";
Z
zhangjinchao01 已提交
15 16 17 18

package paddle;

message FileGroupConf {
L
liaogang 已提交
19
  optional uint32 queue_capacity = 1 [ default = 1 ];
Z
zhangjinchao01 已提交
20
  // how many files to load for a load file thread
L
liaogang 已提交
21
  optional int32 load_file_count = 2 [ default = 1 ];
Z
zhangjinchao01 已提交
22 23
  // how many threads to load files
  // Setting to be 5~10 is appropriate when loading files by hadoop vfs
L
liaogang 已提交
24
  optional int32 load_thread_num = 3 [ default = 1 ];
Z
zhangjinchao01 已提交
25 26 27
};

message DataConfig {
Y
Yu Yang 已提交
28

Z
zhangjinchao01 已提交
29 30 31 32 33
  required string type = 1;

  // name of a text file which contains a list of file names at each line
  optional string files = 3;

L
liaogang 已提交
34 35 36 37
  optional int32 feat_dim = 4;         // feature dimension of one frame
  repeated int32 slot_dims = 5;        // feature slot dims
  optional int32 context_len = 6;      // max neibour frame numbers
  optional uint64 buffer_capacity = 7; // the number of samples
Z
zhangjinchao01 已提交
38

L
liaogang 已提交
39 40 41
  // part of data used in training
  // if not -1, part of train data is used in training
  optional int64 train_sample_num = 8 [ default = -1 ];
Z
zhangjinchao01 已提交
42

L
liaogang 已提交
43 44 45
  // The number of documents processed once
  optional int32 file_load_num = 9 [ default = -1 ];
  optional bool async_load_data = 12 [ default = false ];
Z
zhangjinchao01 已提交
46
  /// Note the field number 10, 11 and 13 have been deprecated.
L
liaogang 已提交
47 48
  optional bool for_test = 14
      [ default = false ]; // whether this data is for test
Z
zhangjinchao01 已提交
49 50 51 52 53
  optional FileGroupConf file_group_conf = 15;
  repeated int32 float_slot_dims = 16;

  /// Note the field number 17, 18 and 19 have been deprecated.

L
liaogang 已提交
54 55
  // a list of values which will be used to create additional one dimensional
  // float
Z
zhangjinchao01 已提交
56 57 58
  // values slots. These one dimensional slots can be used as the weight input
  // for cost layers.
  // Currently this is only supported by ProtoDataProvider.
Y
Yu Yang 已提交
59
  repeated double constant_slots = 20;
Z
zhangjinchao01 已提交
60 61 62 63 64 65 66 67 68

  // for PyDataProvider.
  // Specify the load data script module name, object name and user args
  optional string load_data_module = 21;
  optional string load_data_object = 22;
  optional string load_data_args = 23;

  // for MultiDataProvider
  repeated DataConfig sub_data_configs = 24; // sub dataproviders
L
liaogang 已提交
69 70 71 72 73 74
                                             /*
                                              * the ratio of each sub dataproviders:
                                              * e.g. sub dataprovider A's ratio is 1, B's ratio is 9, batch_size is 100,
                                              * then each mini-batch is combined by 10 instance from A and 90 instances
                                              * from B.
                                              */
Z
zhangjinchao01 已提交
75 76 77 78 79 80
  optional int32 data_ratio = 25;
  /*
   * if one of the sub dataproviders is running out of data, then
   * (1) it is "main data", then finish current pass.
   * (2) it is not "main data", then reset it, and try getNextBatch again.
   */
L
liaogang 已提交
81
  optional bool is_main_data = 26 [ default = true ];
Z
zhangjinchao01 已提交
82

L
liaogang 已提交
83 84 85
  // the usage ratio of instances. Setting to 1.0 means the use of all
  // instances.
  optional double usage_ratio = 27 [ default = 1.0 ];
Z
zhangjinchao01 已提交
86
};