storage.proto 3.5 KB
Newer Older
S
superjom 已提交
1 2 3 4
syntax = "proto3";
package storage;

enum DataType {
S
superjom 已提交
5 6 7 8 9 10 11 12 13 14 15 16 17 18
  // single entry
  kInt32 = 0;
  kInt64 = 1;
  kFloat = 2;
  kDouble = 3;
  kString = 4;
  kBool = 5;
  // entrys
  kInt64s = 6;
  kFloats = 7;
  kDoubles = 8;
  kStrings = 9;
  kInt32s = 10;
  kBools = 11;
S
superjom 已提交
19

S
superjom 已提交
20
  kUnknown = 12;
S
superjom 已提交
21 22 23 24
}

// A data array, which type is `type`.
message Entry {
S
superjom 已提交
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
  // if all the entries in a record share the same data type, ignore this field
  // and store type to `dtype` in `Record`.
  DataType dtype = 1;
  // single element
  int32 i32 = 2;
  int64 i64 = 3;
  string s = 4;
  float f = 5;
  double d = 6;
  bool b = 7;
  // array
  repeated int64 i64s = 8;
  repeated float fs = 9;
  repeated double ds = 10;
  repeated int32 i32s = 11;
  repeated string ss = 12;
  repeated bool bs = 13;
S
superjom 已提交
42 43
}

S
superjom 已提交
44
/*
S
superjom 已提交
45 46
The Record proto is designed to represent any data structure for any component,
for
S
superjom 已提交
47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
example, to store a record of Scalar component

Record {
  // training error is 0.1, testing error is 0.2
  data = [0.1, 0.2],
  timestamp = xxxx,
  // step id
  id = xxxx
}

to store a record of Image component

Record {
  // RBG pixel weights of a image
  data = [[0.1, 0.2, ...], [...], [...]],
  timestamp = xxxx,
  // image shape
  meta = [100, 200]
}

S
superjom 已提交
67 68 69 70
for other complex structure which have more fields than `timestamp`, `id` and
meta, the additional fields
can store in the `data` field, for it can store a list of values in different
basic data types.
S
superjom 已提交
71

S
superjom 已提交
72 73
A component handlers in logic layer should know how to write or load a record
for the corresponding component.
S
superjom 已提交
74
*/
S
superjom 已提交
75
message Record {
S
superjom 已提交
76 77 78 79 80 81 82 83 84 85 86 87 88 89
  // one record might have multiple fields, one specific component should know
  // how
  // to parse the records.
  repeated Entry data = 1;
  // NOTE the timestamp, id, dtype might be useless for that all the meta info
  // can
  // be stored in `data` field.
  int64 timestamp = 2;
  int64 id = 3;
  DataType dtype = 4;
  // shape or some other meta infomation for this record, if all the records
  // share the same meta, just store one copy of meta in `Storage`, or create
  // a unique copy for each `Record`.
  Entry meta = 5;
S
superjom 已提交
90 91 92
}

/*
S
superjom 已提交
93 94 95 96 97 98
A Tablet stores the records of a component which type is `component` and
indidates as `tag`.
The records will be saved in a file which name contains `tag`. During the
running period,
`num_records` will be accumulated, and `num_samples` indicates the size of
sample set the
S
superjom 已提交
99 100 101
reservoir sampling algorithm will collect.
*/
message Tablet {
S
superjom 已提交
102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
  // the kinds of the components that supported
  enum Type {
    kScalar = 0;
    kHistogram = 1;
    kGraph = 2;
  }
  // type of the component, different component should have different storage
  // format.
  Type component = 1;
  // records the total count of records, each Write operation should increate
  // this value.
  int64 total_records = 2;
  // indicate the number of instances to sample, this should be a constant
  // value.
  int32 num_samples = 3;
  repeated Record records = 4;
  // store a meta infomation if all the records share.
  Entry meta = 5;
  // the unique identification for this `Tablet`.
  string tag = 6;
  // one tablet might have multiple captions, for example, a scalar component
  // might have
  // two plots labeled "train" and "test".
  repeated string captions = 7;
S
superjom 已提交
126 127 128 129 130 131
}

/*
The Storage stores all the records.
*/
message Storage {
S
superjom 已提交
132
  // tags to Tablet, should be thread safe if fix the keys after initialization.
133
  // TODO to delete in the new storage interface.
S
superjom 已提交
134
  map<string, Tablet> tablets = 1;
135
  repeated string tags = 4;
S
superjom 已提交
136 137
  string dir = 2;
  int64 timestamp = 3;
138
}