index_tfile.c 22.0 KB
Newer Older
dengyihao's avatar
dengyihao 已提交
1 2
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
dengyihao's avatar
dengyihao 已提交
3
p *
dengyihao's avatar
dengyihao 已提交
4 5 6 7 8 9 10 11 12 13 14 15 16
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

#include "index_tfile.h"
17
#include "index.h"
dengyihao's avatar
dengyihao 已提交
18
#include "index_fst.h"
19
#include "index_fst_counting_writer.h"
dengyihao's avatar
dengyihao 已提交
20
#include "index_util.h"
dengyihao's avatar
dengyihao 已提交
21
#include "taosdef.h"
dengyihao's avatar
dengyihao 已提交
22
#include "tcoding.h"
dengyihao's avatar
dengyihao 已提交
23
#include "tcompare.h"
dengyihao's avatar
dengyihao 已提交
24

dengyihao's avatar
dengyihao 已提交
25 26
const static uint64_t tfileMagicNumber = 0xdb4775248b80fb57ull;

dengyihao's avatar
dengyihao 已提交
27 28 29 30 31 32 33
typedef struct TFileFstIter {
  FstStreamBuilder* fb;
  StreamWithState*  st;
  AutomationCtx*    ctx;
  TFileReader*      rdr;
} TFileFstIter;

dengyihao's avatar
dengyihao 已提交
34 35
#define TF_TABLE_TATOAL_SIZE(sz) (sizeof(sz) + sz * sizeof(uint64_t))

dengyihao's avatar
dengyihao 已提交
36
static int  tfileUidCompare(const void* a, const void* b);
dengyihao's avatar
dengyihao 已提交
37
static int  tfileStrCompare(const void* a, const void* b);
dengyihao's avatar
dengyihao 已提交
38 39
static int  tfileValueCompare(const void* a, const void* b, const void* param);
static void tfileSerialTableIdsToBuf(char* buf, SArray* tableIds);
dengyihao's avatar
dengyihao 已提交
40

dengyihao's avatar
dengyihao 已提交
41
static int tfileWriteHeader(TFileWriter* writer);
dengyihao's avatar
dengyihao 已提交
42
static int tfileWriteFstOffset(TFileWriter* tw, int32_t offset);
dengyihao's avatar
dengyihao 已提交
43
static int tfileWriteData(TFileWriter* write, TFileValue* tval);
dengyihao's avatar
dengyihao 已提交
44
static int tfileWriteFooter(TFileWriter* write);
dengyihao's avatar
dengyihao 已提交
45

dengyihao's avatar
dengyihao 已提交
46
// handle file corrupt later
dengyihao's avatar
dengyihao 已提交
47 48
static int tfileReaderLoadHeader(TFileReader* reader);
static int tfileReaderLoadFst(TFileReader* reader);
dengyihao's avatar
dengyihao 已提交
49
static int tfileReaderVerify(TFileReader* reader);
dengyihao's avatar
dengyihao 已提交
50
static int tfileReaderLoadTableIds(TFileReader* reader, int32_t offset, SArray* result);
dengyihao's avatar
dengyihao 已提交
51

dengyihao's avatar
dengyihao 已提交
52 53 54 55 56 57 58
static SArray* tfileGetFileList(const char* path);
static int     tfileRmExpireFile(SArray* result);
static void    tfileDestroyFileName(void* elem);
static int     tfileCompare(const void* a, const void* b);
static int     tfileParseFileName(const char* filename, uint64_t* suid, char* col, int* version);
static void    tfileGenFileName(char* filename, uint64_t suid, const char* col, int version);
static void    tfileGenFileFullName(char* fullname, const char* path, uint64_t suid, const char* col, int32_t version);
dengyihao's avatar
dengyihao 已提交
59

dengyihao's avatar
dengyihao 已提交
60 61
TFileCache* tfileCacheCreate(const char* path) {
  TFileCache* tcache = calloc(1, sizeof(TFileCache));
dengyihao's avatar
dengyihao 已提交
62 63 64
  if (tcache == NULL) {
    return NULL;
  }
65 66 67 68

  tcache->tableCache = taosHashInit(8, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK);
  tcache->capacity = 64;

dengyihao's avatar
dengyihao 已提交
69
  SArray* files = tfileGetFileList(path);
dengyihao's avatar
dengyihao 已提交
70
  for (size_t i = 0; i < taosArrayGetSize(files); i++) {
dengyihao's avatar
dengyihao 已提交
71
    char* file = taosArrayGetP(files, i);
dengyihao's avatar
dengyihao 已提交
72

dengyihao's avatar
dengyihao 已提交
73
    WriterCtx* wc = writerCtxCreate(TFile, file, true, 1024 * 1024 * 64);
74
    if (wc == NULL) {
dengyihao's avatar
dengyihao 已提交
75
      indexError("failed to open index:%s", file);
76
      goto End;
dengyihao's avatar
dengyihao 已提交
77
    }
dengyihao's avatar
dengyihao 已提交
78

dengyihao's avatar
dengyihao 已提交
79
    TFileReader* reader = tfileReaderCreate(wc);
dengyihao's avatar
dengyihao 已提交
80 81 82 83
    if (reader == NULL) {
      indexInfo("skip invalid file: %s", file);
      continue;
    }
dengyihao's avatar
dengyihao 已提交
84
    TFileHeader* header = &reader->header;
dengyihao's avatar
dengyihao 已提交
85
    ICacheKey    key = {.suid = header->suid, .colName = header->colName, .nColName = strlen(header->colName)};
dengyihao's avatar
dengyihao 已提交
86

dengyihao's avatar
dengyihao 已提交
87
    char    buf[128] = {0};
dengyihao's avatar
dengyihao 已提交
88 89 90
    int32_t sz = indexSerialCacheKey(&key, buf);
    assert(sz < sizeof(buf));
    taosHashPut(tcache->tableCache, buf, sz, &reader, sizeof(void*));
dengyihao's avatar
dengyihao 已提交
91
    tfileReaderRef(reader);
dengyihao's avatar
dengyihao 已提交
92
  }
dengyihao's avatar
dengyihao 已提交
93
  taosArrayDestroyEx(files, tfileDestroyFileName);
dengyihao's avatar
dengyihao 已提交
94
  return tcache;
dengyihao's avatar
dengyihao 已提交
95
End:
96
  tfileCacheDestroy(tcache);
dengyihao's avatar
dengyihao 已提交
97
  taosArrayDestroyEx(files, tfileDestroyFileName);
98
  return NULL;
dengyihao's avatar
dengyihao 已提交
99
}
dengyihao's avatar
dengyihao 已提交
100
void tfileCacheDestroy(TFileCache* tcache) {
dengyihao's avatar
dengyihao 已提交
101 102 103
  if (tcache == NULL) {
    return;
  }
104 105

  // free table cache
dengyihao's avatar
dengyihao 已提交
106
  TFileReader** reader = taosHashIterate(tcache->tableCache, NULL);
107
  while (reader) {
dengyihao's avatar
dengyihao 已提交
108
    TFileReader* p = *reader;
109 110
    indexInfo("drop table cache suid: %" PRIu64 ", colName: %s, colType: %d", p->header.suid, p->header.colName,
              p->header.colType);
dengyihao's avatar
dengyihao 已提交
111

dengyihao's avatar
dengyihao 已提交
112
    tfileReaderUnRef(p);
113 114 115 116
    reader = taosHashIterate(tcache->tableCache, reader);
  }
  taosHashCleanup(tcache->tableCache);
  free(tcache);
dengyihao's avatar
dengyihao 已提交
117 118
}

dengyihao's avatar
dengyihao 已提交
119 120 121 122 123
TFileReader* tfileCacheGet(TFileCache* tcache, ICacheKey* key) {
  char    buf[128] = {0};
  int32_t sz = indexSerialCacheKey(key, buf);
  assert(sz < sizeof(buf));
  TFileReader** reader = taosHashGet(tcache->tableCache, buf, sz);
dengyihao's avatar
dengyihao 已提交
124 125 126
  if (reader == NULL) {
    return NULL;
  }
127
  tfileReaderRef(*reader);
dengyihao's avatar
dengyihao 已提交
128

129
  return *reader;
dengyihao's avatar
dengyihao 已提交
130
}
dengyihao's avatar
dengyihao 已提交
131 132 133
void tfileCachePut(TFileCache* tcache, ICacheKey* key, TFileReader* reader) {
  char    buf[128] = {0};
  int32_t sz = indexSerialCacheKey(key, buf);
dengyihao's avatar
dengyihao 已提交
134
  // remove last version index reader
dengyihao's avatar
dengyihao 已提交
135
  TFileReader** p = taosHashGet(tcache->tableCache, buf, sz);
dengyihao's avatar
dengyihao 已提交
136
  if (p != NULL) {
dengyihao's avatar
dengyihao 已提交
137
    TFileReader* oldReader = *p;
dengyihao's avatar
dengyihao 已提交
138
    taosHashRemove(tcache->tableCache, buf, sz);
dengyihao's avatar
dengyihao 已提交
139
    oldReader->remove = true;
dengyihao's avatar
dengyihao 已提交
140
    tfileReaderUnRef(oldReader);
dengyihao's avatar
dengyihao 已提交
141
  }
dengyihao's avatar
dengyihao 已提交
142

dengyihao's avatar
dengyihao 已提交
143
  taosHashPut(tcache->tableCache, buf, sz, &reader, sizeof(void*));
dengyihao's avatar
dengyihao 已提交
144
  tfileReaderRef(reader);
dengyihao's avatar
dengyihao 已提交
145
  return;
146
}
dengyihao's avatar
dengyihao 已提交
147 148
TFileReader* tfileReaderCreate(WriterCtx* ctx) {
  TFileReader* reader = calloc(1, sizeof(TFileReader));
dengyihao's avatar
dengyihao 已提交
149 150 151
  if (reader == NULL) {
    return NULL;
  }
dengyihao's avatar
dengyihao 已提交
152 153

  reader->ctx = ctx;
dengyihao's avatar
dengyihao 已提交
154 155

  if (0 != tfileReaderVerify(reader)) {
dengyihao's avatar
dengyihao 已提交
156
    indexError("invalid tfile, suid: %" PRIu64 ", colName: %s", reader->header.suid, reader->header.colName);
dengyihao's avatar
dengyihao 已提交
157
    tfileReaderDestroy(reader);
dengyihao's avatar
dengyihao 已提交
158 159 160
    return NULL;
  }
  // T_REF_INC(reader);
dengyihao's avatar
dengyihao 已提交
161
  if (0 != tfileReaderLoadHeader(reader)) {
162 163
    indexError("failed to load index header, suid: %" PRIu64 ", colName: %s", reader->header.suid,
               reader->header.colName);
dengyihao's avatar
dengyihao 已提交
164
    tfileReaderDestroy(reader);
dengyihao's avatar
dengyihao 已提交
165 166 167 168
    return NULL;
  }

  if (0 != tfileReaderLoadFst(reader)) {
dengyihao's avatar
dengyihao 已提交
169 170
    indexError("failed to load index fst, suid: %" PRIu64 ", colName: %s, errno: %d", reader->header.suid,
               reader->header.colName, errno);
dengyihao's avatar
dengyihao 已提交
171 172 173 174
    tfileReaderDestroy(reader);
    return NULL;
  }

175
  return reader;
dengyihao's avatar
dengyihao 已提交
176
}
dengyihao's avatar
dengyihao 已提交
177
void tfileReaderDestroy(TFileReader* reader) {
dengyihao's avatar
dengyihao 已提交
178 179 180
  if (reader == NULL) {
    return;
  }
181
  // T_REF_INC(reader);
dengyihao's avatar
dengyihao 已提交
182
  fstDestroy(reader->fst);
dengyihao's avatar
dengyihao 已提交
183
  writerCtxDestroy(reader->ctx, reader->remove);
dengyihao's avatar
dengyihao 已提交
184 185 186
  free(reader);
}

dengyihao's avatar
dengyihao 已提交
187
int tfileReaderSearch(TFileReader* reader, SIndexTermQuery* query, SArray* result) {
dengyihao's avatar
dengyihao 已提交
188 189
  SIndexTerm*     term = query->term;
  EIndexQueryType qtype = query->qType;
dengyihao's avatar
dengyihao 已提交
190

dengyihao's avatar
dengyihao 已提交
191
  int ret = -1;
dengyihao's avatar
dengyihao 已提交
192
  // refactor to callback later
dengyihao's avatar
dengyihao 已提交
193
  if (qtype == QUERY_TERM) {
dengyihao's avatar
dengyihao 已提交
194 195 196
    uint64_t offset;
    FstSlice key = fstSliceCreate(term->colVal, term->nColVal);
    if (fstGet(reader->fst, &key, &offset)) {
197 198
      indexInfo("index: %" PRIu64 ", col: %s, colVal: %s, found table info in tindex", term->suid, term->colName,
                term->colVal);
dengyihao's avatar
dengyihao 已提交
199
      ret = tfileReaderLoadTableIds(reader, offset, result);
dengyihao's avatar
dengyihao 已提交
200
    } else {
201 202
      indexInfo("index: %" PRIu64 ", col: %s, colVal: %s, not found table info in tindex", term->suid, term->colName,
                term->colVal);
dengyihao's avatar
dengyihao 已提交
203
    }
dengyihao's avatar
dengyihao 已提交
204 205
    fstSliceDestroy(&key);
  } else if (qtype == QUERY_PREFIX) {
dengyihao's avatar
dengyihao 已提交
206
    // handle later
dengyihao's avatar
dengyihao 已提交
207
    //
dengyihao's avatar
dengyihao 已提交
208 209
  } else {
    // handle later
dengyihao's avatar
dengyihao 已提交
210
  }
dengyihao's avatar
dengyihao 已提交
211
  tfileReaderUnRef(reader);
dengyihao's avatar
dengyihao 已提交
212
  return ret;
dengyihao's avatar
dengyihao 已提交
213 214
}

dengyihao's avatar
dengyihao 已提交
215 216
TFileWriter* tfileWriterOpen(char* path, uint64_t suid, int32_t version, const char* colName, uint8_t colType) {
  char fullname[256] = {0};
dengyihao's avatar
dengyihao 已提交
217
  tfileGenFileFullName(fullname, path, suid, colName, version);
dengyihao's avatar
dengyihao 已提交
218
  // indexInfo("open write file name %s", fullname);
dengyihao's avatar
dengyihao 已提交
219
  WriterCtx* wcx = writerCtxCreate(TFile, fullname, false, 1024 * 1024 * 64);
dengyihao's avatar
dengyihao 已提交
220 221 222
  if (wcx == NULL) {
    return NULL;
  }
dengyihao's avatar
dengyihao 已提交
223 224 225 226 227 228 229 230 231

  TFileHeader tfh = {0};
  tfh.suid = suid;
  tfh.version = version;
  memcpy(tfh.colName, colName, strlen(colName));
  tfh.colType = colType;

  return tfileWriterCreate(wcx, &tfh);
}
dengyihao's avatar
dengyihao 已提交
232 233
TFileReader* tfileReaderOpen(char* path, uint64_t suid, int32_t version, const char* colName) {
  char fullname[256] = {0};
dengyihao's avatar
dengyihao 已提交
234 235
  tfileGenFileFullName(fullname, path, suid, colName, version);

dengyihao's avatar
dengyihao 已提交
236
  WriterCtx* wc = writerCtxCreate(TFile, fullname, true, 1024 * 1024 * 1024);
dengyihao's avatar
dengyihao 已提交
237
  indexInfo("open read file name:%s, size: %d", wc->file.buf, wc->file.size);
dengyihao's avatar
dengyihao 已提交
238 239 240
  if (wc == NULL) {
    return NULL;
  }
dengyihao's avatar
dengyihao 已提交
241

dengyihao's avatar
dengyihao 已提交
242
  TFileReader* reader = tfileReaderCreate(wc);
dengyihao's avatar
dengyihao 已提交
243 244
  return reader;
}
dengyihao's avatar
dengyihao 已提交
245 246
TFileWriter* tfileWriterCreate(WriterCtx* ctx, TFileHeader* header) {
  TFileWriter* tw = calloc(1, sizeof(TFileWriter));
dengyihao's avatar
dengyihao 已提交
247
  if (tw == NULL) {
dengyihao's avatar
dengyihao 已提交
248
    indexError("index: %" PRIu64 " failed to alloc TFilerWriter", header->suid);
dengyihao's avatar
dengyihao 已提交
249 250
    return NULL;
  }
dengyihao's avatar
dengyihao 已提交
251 252
  tw->ctx = ctx;
  tw->header = *header;
dengyihao's avatar
dengyihao 已提交
253
  tfileWriteHeader(tw);
dengyihao's avatar
dengyihao 已提交
254 255
  return tw;
}
dengyihao's avatar
dengyihao 已提交
256

dengyihao's avatar
dengyihao 已提交
257
int tfileWriterPut(TFileWriter* tw, void* data, bool order) {
dengyihao's avatar
dengyihao 已提交
258
  // sort by coltype and write to tindex
dengyihao's avatar
dengyihao 已提交
259 260
  if (order == false) {
    __compar_fn_t fn;
dengyihao's avatar
dengyihao 已提交
261 262

    int8_t colType = tw->header.colType;
dengyihao's avatar
dengyihao 已提交
263 264 265 266 267 268
    if (colType == TSDB_DATA_TYPE_BINARY || colType == TSDB_DATA_TYPE_NCHAR) {
      fn = tfileStrCompare;
    } else {
      fn = getComparFunc(colType, 0);
    }
    taosArraySortPWithExt((SArray*)(data), tfileValueCompare, &fn);
dengyihao's avatar
dengyihao 已提交
269
  }
dengyihao's avatar
dengyihao 已提交
270

dengyihao's avatar
dengyihao 已提交
271 272 273
  int32_t bufLimit = 64 * 4096, offset = 0;
  // char*   buf = calloc(1, sizeof(char) * bufLimit);
  // char*   p = buf;
dengyihao's avatar
dengyihao 已提交
274
  int32_t sz = taosArrayGetSize((SArray*)data);
dengyihao's avatar
dengyihao 已提交
275 276 277 278 279
  int32_t fstOffset = tw->offset;

  // ugly code, refactor later
  for (size_t i = 0; i < sz; i++) {
    TFileValue* v = taosArrayGetP((SArray*)data, i);
dengyihao's avatar
dengyihao 已提交
280 281
    taosArraySort(v->tableId, tfileUidCompare);
    taosArrayRemoveDuplicate(v->tableId, tfileUidCompare, NULL);
dengyihao's avatar
dengyihao 已提交
282
    int32_t tbsz = taosArrayGetSize(v->tableId);
dengyihao's avatar
dengyihao 已提交
283
    fstOffset += TF_TABLE_TATOAL_SIZE(tbsz);
dengyihao's avatar
dengyihao 已提交
284 285 286
  }
  tfileWriteFstOffset(tw, fstOffset);

dengyihao's avatar
dengyihao 已提交
287 288 289 290 291 292 293
  for (size_t i = 0; i < sz; i++) {
    TFileValue* v = taosArrayGetP((SArray*)data, i);

    int32_t tbsz = taosArrayGetSize(v->tableId);
    // check buf has enough space or not
    int32_t ttsz = TF_TABLE_TATOAL_SIZE(tbsz);

dengyihao's avatar
dengyihao 已提交
294 295
    char* buf = calloc(1, ttsz * sizeof(char));
    char* p = buf;
dengyihao's avatar
dengyihao 已提交
296
    tfileSerialTableIdsToBuf(p, v->tableId);
dengyihao's avatar
dengyihao 已提交
297
    tw->ctx->write(tw->ctx, buf, ttsz);
dengyihao's avatar
dengyihao 已提交
298 299
    v->offset = tw->offset;
    tw->offset += ttsz;
dengyihao's avatar
dengyihao 已提交
300
    free(buf);
dengyihao's avatar
dengyihao 已提交
301
  }
dengyihao's avatar
dengyihao 已提交
302

dengyihao's avatar
dengyihao 已提交
303 304
  tw->fb = fstBuilderCreate(tw->ctx, 0);
  if (tw->fb == NULL) {
dengyihao's avatar
dengyihao 已提交
305
    tfileWriterClose(tw);
dengyihao's avatar
dengyihao 已提交
306 307
    return -1;
  }
dengyihao's avatar
dengyihao 已提交
308 309

  // write data
dengyihao's avatar
dengyihao 已提交
310 311 312
  for (size_t i = 0; i < sz; i++) {
    // TODO, fst batch write later
    TFileValue* v = taosArrayGetP((SArray*)data, i);
dengyihao's avatar
dengyihao 已提交
313 314 315 316
    if (tfileWriteData(tw, v) != 0) {
      indexError("failed to write data: %s, offset: %d len: %d", v->colVal, v->offset,
                 (int)taosArrayGetSize(v->tableId));
    } else {
dengyihao's avatar
dengyihao 已提交
317 318
      // indexInfo("success to write data: %s, offset: %d len: %d", v->colVal, v->offset,
      //          (int)taosArrayGetSize(v->tableId));
dengyihao's avatar
dengyihao 已提交
319 320

      // indexInfo("tfile write data size: %d", tw->ctx->size(tw->ctx));
dengyihao's avatar
dengyihao 已提交
321 322
    }
  }
dengyihao's avatar
dengyihao 已提交
323 324 325
  fstBuilderFinish(tw->fb);
  fstBuilderDestroy(tw->fb);
  tw->fb = NULL;
dengyihao's avatar
dengyihao 已提交
326 327

  tfileWriteFooter(tw);
dengyihao's avatar
dengyihao 已提交
328 329
  return 0;
}
dengyihao's avatar
dengyihao 已提交
330
void tfileWriterClose(TFileWriter* tw) {
dengyihao's avatar
dengyihao 已提交
331 332 333
  if (tw == NULL) {
    return;
  }
dengyihao's avatar
dengyihao 已提交
334
  writerCtxDestroy(tw->ctx, false);
dengyihao's avatar
dengyihao 已提交
335 336
  free(tw);
}
dengyihao's avatar
dengyihao 已提交
337
void tfileWriterDestroy(TFileWriter* tw) {
dengyihao's avatar
dengyihao 已提交
338 339 340
  if (tw == NULL) {
    return;
  }
dengyihao's avatar
dengyihao 已提交
341
  writerCtxDestroy(tw->ctx, false);
dengyihao's avatar
dengyihao 已提交
342 343
  free(tw);
}
dengyihao's avatar
dengyihao 已提交
344

dengyihao's avatar
dengyihao 已提交
345
IndexTFile* indexTFileCreate(const char* path) {
dengyihao's avatar
dengyihao 已提交
346
  TFileCache* cache = tfileCacheCreate(path);
dengyihao's avatar
dengyihao 已提交
347 348 349
  if (cache == NULL) {
    return NULL;
  }
dengyihao's avatar
dengyihao 已提交
350

dengyihao's avatar
dengyihao 已提交
351
  IndexTFile* tfile = calloc(1, sizeof(IndexTFile));
dengyihao's avatar
dengyihao 已提交
352 353 354 355
  if (tfile == NULL) {
    tfileCacheDestroy(cache);
    return NULL;
  }
356

dengyihao's avatar
dengyihao 已提交
357
  tfile->cache = cache;
dengyihao's avatar
dengyihao 已提交
358 359
  return tfile;
}
360
void indexTFileDestroy(IndexTFile* tfile) {
dengyihao's avatar
dengyihao 已提交
361 362 363
  if (tfile == NULL) {
    return;
  }
dengyihao's avatar
dengyihao 已提交
364 365 366
  tfileCacheDestroy(tfile->cache);
  free(tfile);
}
dengyihao's avatar
dengyihao 已提交
367

dengyihao's avatar
dengyihao 已提交
368
int indexTFileSearch(void* tfile, SIndexTermQuery* query, SArray* result) {
dengyihao's avatar
dengyihao 已提交
369
  int ret = -1;
dengyihao's avatar
dengyihao 已提交
370 371 372
  if (tfile == NULL) {
    return ret;
  }
dengyihao's avatar
dengyihao 已提交
373

dengyihao's avatar
dengyihao 已提交
374
  IndexTFile* pTfile = tfile;
375

dengyihao's avatar
dengyihao 已提交
376 377
  SIndexTerm* term = query->term;
  ICacheKey key = {.suid = term->suid, .colType = term->colType, .colName = term->colName, .nColName = term->nColName};
378
  TFileReader* reader = tfileCacheGet(pTfile->cache, &key);
dengyihao's avatar
dengyihao 已提交
379 380 381
  if (reader == NULL) {
    return 0;
  }
dengyihao's avatar
dengyihao 已提交
382

dengyihao's avatar
dengyihao 已提交
383
  return tfileReaderSearch(reader, query, result);
dengyihao's avatar
dengyihao 已提交
384
}
dengyihao's avatar
dengyihao 已提交
385
int indexTFilePut(void* tfile, SIndexTerm* term, uint64_t uid) {
386 387
  // TFileWriterOpt wOpt = {.suid = term->suid, .colType = term->colType, .colName = term->colName, .nColName =
  // term->nColName, .version = 1};
dengyihao's avatar
dengyihao 已提交
388

389 390
  return 0;
}
dengyihao's avatar
dengyihao 已提交
391 392 393 394 395 396 397 398 399
static bool tfileIteratorNext(Iterate* iiter) {
  IterateValue* iv = &iiter->val;
  iterateValueDestroy(iv, false);

  char*    colVal = NULL;
  uint64_t offset = 0;

  TFileFstIter*          tIter = iiter->iter;
  StreamWithStateResult* rt = streamWithStateNextWith(tIter->st, NULL);
dengyihao's avatar
dengyihao 已提交
400 401 402
  if (rt == NULL) {
    return false;
  }
dengyihao's avatar
dengyihao 已提交
403 404 405 406 407 408 409 410 411

  int32_t sz = 0;
  char*   ch = (char*)fstSliceData(&rt->data, &sz);
  colVal = calloc(1, sz + 1);
  memcpy(colVal, ch, sz);

  offset = (uint64_t)(rt->out.out);
  swsResultDestroy(rt);
  // set up iterate value
dengyihao's avatar
dengyihao 已提交
412 413 414
  if (tfileReaderLoadTableIds(tIter->rdr, offset, iv->val) != 0) {
    return false;
  }
dengyihao's avatar
dengyihao 已提交
415 416

  iv->colVal = colVal;
dengyihao's avatar
dengyihao 已提交
417
  return true;
dengyihao's avatar
dengyihao 已提交
418 419 420
  // std::string key(ch, sz);
}

421
static IterateValue* tifileIterateGetValue(Iterate* iter) { return &iter->val; }
dengyihao's avatar
dengyihao 已提交
422 423

static TFileFstIter* tfileFstIteratorCreate(TFileReader* reader) {
424
  TFileFstIter* tIter = calloc(1, sizeof(TFileFstIter));
dengyihao's avatar
dengyihao 已提交
425 426 427
  if (tIter == NULL) {
    return NULL;
  }
dengyihao's avatar
dengyihao 已提交
428

dengyihao's avatar
dengyihao 已提交
429 430 431 432 433 434 435 436
  tIter->ctx = automCtxCreate(NULL, AUTOMATION_ALWAYS);
  tIter->fb = fstSearch(reader->fst, tIter->ctx);
  tIter->st = streamBuilderIntoStream(tIter->fb);
  tIter->rdr = reader;
  return tIter;
}

Iterate* tfileIteratorCreate(TFileReader* reader) {
dengyihao's avatar
dengyihao 已提交
437 438 439
  if (reader == NULL) {
    return NULL;
  }
dengyihao's avatar
dengyihao 已提交
440

dengyihao's avatar
dengyihao 已提交
441
  Iterate* iter = calloc(1, sizeof(Iterate));
dengyihao's avatar
dengyihao 已提交
442
  iter->iter = tfileFstIteratorCreate(reader);
443
  if (iter->iter == NULL) {
dengyihao's avatar
dengyihao 已提交
444
    free(iter);
445 446
    return NULL;
  }
dengyihao's avatar
dengyihao 已提交
447 448
  iter->next = tfileIteratorNext;
  iter->getValue = tifileIterateGetValue;
dengyihao's avatar
dengyihao 已提交
449
  iter->val.val = taosArrayInit(1, sizeof(uint64_t));
450
  iter->val.colVal = NULL;
dengyihao's avatar
dengyihao 已提交
451 452 453
  return iter;
}
void tfileIteratorDestroy(Iterate* iter) {
dengyihao's avatar
dengyihao 已提交
454 455 456
  if (iter == NULL) {
    return;
  }
dengyihao's avatar
dengyihao 已提交
457

dengyihao's avatar
dengyihao 已提交
458 459 460 461 462 463 464
  IterateValue* iv = &iter->val;
  iterateValueDestroy(iv, true);

  TFileFstIter* tIter = iter->iter;
  streamWithStateDestroy(tIter->st);
  fstStreamBuilderDestroy(tIter->fb);
  automCtxDestroy(tIter->ctx);
465
  free(tIter);
dengyihao's avatar
dengyihao 已提交
466 467 468 469

  free(iter);
}

dengyihao's avatar
dengyihao 已提交
470
TFileReader* tfileGetReaderByCol(IndexTFile* tf, uint64_t suid, char* colName) {
dengyihao's avatar
dengyihao 已提交
471 472 473
  if (tf == NULL) {
    return NULL;
  }
dengyihao's avatar
dengyihao 已提交
474
  ICacheKey key = {.suid = suid, .colType = TSDB_DATA_TYPE_BINARY, .colName = colName, .nColName = strlen(colName)};
dengyihao's avatar
dengyihao 已提交
475 476
  return tfileCacheGet(tf->cache, &key);
}
dengyihao's avatar
dengyihao 已提交
477

dengyihao's avatar
dengyihao 已提交
478 479 480 481 482
static int tfileUidCompare(const void* a, const void* b) {
  uint64_t l = *(uint64_t*)a;
  uint64_t r = *(uint64_t*)b;
  return l - r;
}
dengyihao's avatar
dengyihao 已提交
483 484
static int tfileStrCompare(const void* a, const void* b) {
  int ret = strcmp((char*)a, (char*)b);
dengyihao's avatar
dengyihao 已提交
485 486 487
  if (ret == 0) {
    return ret;
  }
dengyihao's avatar
dengyihao 已提交
488 489 490
  return ret < 0 ? -1 : 1;
}

dengyihao's avatar
dengyihao 已提交
491 492 493 494 495 496 497 498
static int tfileValueCompare(const void* a, const void* b, const void* param) {
  __compar_fn_t fn = *(__compar_fn_t*)param;

  TFileValue* av = (TFileValue*)a;
  TFileValue* bv = (TFileValue*)b;

  return fn(av->colVal, bv->colVal);
}
dengyihao's avatar
dengyihao 已提交
499 500 501

TFileValue* tfileValueCreate(char* val) {
  TFileValue* tf = calloc(1, sizeof(TFileValue));
dengyihao's avatar
dengyihao 已提交
502 503 504
  if (tf == NULL) {
    return NULL;
  }
505
  tf->colVal = tstrdup(val);
dengyihao's avatar
dengyihao 已提交
506 507 508 509
  tf->tableId = taosArrayInit(32, sizeof(uint64_t));
  return tf;
}
int tfileValuePush(TFileValue* tf, uint64_t val) {
dengyihao's avatar
dengyihao 已提交
510 511 512
  if (tf == NULL) {
    return -1;
  }
dengyihao's avatar
dengyihao 已提交
513 514 515 516 517
  taosArrayPush(tf->tableId, &val);
  return 0;
}
void tfileValueDestroy(TFileValue* tf) {
  taosArrayDestroy(tf->tableId);
518
  free(tf->colVal);
dengyihao's avatar
dengyihao 已提交
519 520
  free(tf);
}
dengyihao's avatar
dengyihao 已提交
521 522 523 524 525
static void tfileSerialTableIdsToBuf(char* buf, SArray* ids) {
  int sz = taosArrayGetSize(ids);
  SERIALIZE_VAR_TO_BUF(buf, sz, int32_t);
  for (size_t i = 0; i < sz; i++) {
    uint64_t* v = taosArrayGet(ids, i);
dengyihao's avatar
dengyihao 已提交
526 527 528 529 530 531 532
    SERIALIZE_VAR_TO_BUF(buf, *v, uint64_t);
  }
}

static int tfileWriteFstOffset(TFileWriter* tw, int32_t offset) {
  int32_t fstOffset = offset + sizeof(tw->header.fstOffset);
  tw->header.fstOffset = fstOffset;
dengyihao's avatar
dengyihao 已提交
533

dengyihao's avatar
dengyihao 已提交
534 535 536
  if (sizeof(fstOffset) != tw->ctx->write(tw->ctx, (char*)&fstOffset, sizeof(fstOffset))) {
    return -1;
  }
dengyihao's avatar
dengyihao 已提交
537
  indexInfo("tfile write fst offset: %d", tw->ctx->size(tw->ctx));
dengyihao's avatar
dengyihao 已提交
538
  tw->offset += sizeof(fstOffset);
dengyihao's avatar
dengyihao 已提交
539 540 541
  return 0;
}
static int tfileWriteHeader(TFileWriter* writer) {
dengyihao's avatar
dengyihao 已提交
542
  char buf[TFILE_HEADER_NO_FST] = {0};
dengyihao's avatar
dengyihao 已提交
543 544 545 546

  TFileHeader* header = &writer->header;
  memcpy(buf, (char*)header, sizeof(buf));

dengyihao's avatar
dengyihao 已提交
547
  indexInfo("tfile pre write header size: %d", writer->ctx->size(writer->ctx));
dengyihao's avatar
dengyihao 已提交
548
  int nwrite = writer->ctx->write(writer->ctx, buf, sizeof(buf));
dengyihao's avatar
dengyihao 已提交
549 550 551
  if (sizeof(buf) != nwrite) {
    return -1;
  }
dengyihao's avatar
dengyihao 已提交
552 553

  indexInfo("tfile after write header size: %d", writer->ctx->size(writer->ctx));
dengyihao's avatar
dengyihao 已提交
554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570
  writer->offset = nwrite;
  return 0;
}
static int tfileWriteData(TFileWriter* write, TFileValue* tval) {
  TFileHeader* header = &write->header;
  uint8_t      colType = header->colType;
  if (colType == TSDB_DATA_TYPE_BINARY || colType == TSDB_DATA_TYPE_NCHAR) {
    FstSlice key = fstSliceCreate((uint8_t*)(tval->colVal), (size_t)strlen(tval->colVal));
    if (fstBuilderInsert(write->fb, key, tval->offset)) {
      fstSliceDestroy(&key);
      return 0;
    }
    fstSliceDestroy(&key);
    return -1;
  } else {
    // handle other type later
  }
dengyihao's avatar
dengyihao 已提交
571
  return 0;
dengyihao's avatar
dengyihao 已提交
572
}
dengyihao's avatar
dengyihao 已提交
573 574 575 576 577
static int tfileWriteFooter(TFileWriter* write) {
  char  buf[sizeof(tfileMagicNumber) + 1] = {0};
  void* pBuf = (void*)buf;
  taosEncodeFixedU64((void**)(void*)&pBuf, tfileMagicNumber);
  int nwrite = write->ctx->write(write->ctx, buf, strlen(buf));
dengyihao's avatar
dengyihao 已提交
578 579

  indexInfo("tfile write footer size: %d", write->ctx->size(write->ctx));
dengyihao's avatar
dengyihao 已提交
580 581 582
  assert(nwrite == sizeof(tfileMagicNumber));
  return nwrite;
}
dengyihao's avatar
dengyihao 已提交
583
static int tfileReaderLoadHeader(TFileReader* reader) {
dengyihao's avatar
dengyihao 已提交
584
  // TODO simple tfile header later
dengyihao's avatar
dengyihao 已提交
585
  char buf[TFILE_HEADER_SIZE] = {0};
dengyihao's avatar
dengyihao 已提交
586

dengyihao's avatar
dengyihao 已提交
587
  int64_t nread = reader->ctx->readFrom(reader->ctx, buf, sizeof(buf), 0);
dengyihao's avatar
dengyihao 已提交
588 589 590
  if (nread == -1) {
    indexError("actual Read: %d, to read: %d, errno: %d, filefd: %d, filename: %s", (int)(nread), (int)sizeof(buf),
               errno, reader->ctx->file.fd, reader->ctx->file.buf);
dengyihao's avatar
dengyihao 已提交
591
  } else {
dengyihao's avatar
dengyihao 已提交
592 593
    indexInfo("actual Read: %d, to read: %d, filefd: %d, filename: %s", (int)(nread), (int)sizeof(buf),
              reader->ctx->file.fd, reader->ctx->file.buf);
dengyihao's avatar
dengyihao 已提交
594 595
  }
  // assert(nread == sizeof(buf));
dengyihao's avatar
dengyihao 已提交
596
  memcpy(&reader->header, buf, sizeof(buf));
dengyihao's avatar
dengyihao 已提交
597

dengyihao's avatar
dengyihao 已提交
598 599
  return 0;
}
dengyihao's avatar
dengyihao 已提交
600
static int tfileReaderLoadFst(TFileReader* reader) {
dengyihao's avatar
dengyihao 已提交
601 602
  WriterCtx* ctx = reader->ctx;
  int        size = ctx->size(ctx);
dengyihao's avatar
dengyihao 已提交
603

dengyihao's avatar
dengyihao 已提交
604 605 606
  // current load fst into memory, refactor it later
  int   fstSize = size - reader->header.fstOffset - sizeof(tfileMagicNumber);
  char* buf = calloc(1, fstSize);
dengyihao's avatar
dengyihao 已提交
607 608 609
  if (buf == NULL) {
    return -1;
  }
dengyihao's avatar
dengyihao 已提交
610

dengyihao's avatar
dengyihao 已提交
611
  int64_t ts = taosGetTimestampUs();
dengyihao's avatar
dengyihao 已提交
612
  int32_t nread = ctx->readFrom(ctx, buf, fstSize, reader->header.fstOffset);
dengyihao's avatar
dengyihao 已提交
613
  int64_t cost = taosGetTimestampUs() - ts;
dengyihao's avatar
dengyihao 已提交
614 615
  indexInfo("nread = %d, and fst offset=%d, size: %d, filename: %s, size: %d, time cost: %" PRId64 "us", nread,
            reader->header.fstOffset, fstSize, ctx->file.buf, ctx->file.size, cost);
dengyihao's avatar
dengyihao 已提交
616
  // we assuse fst size less than FST_MAX_SIZE
dengyihao's avatar
dengyihao 已提交
617
  assert(nread > 0 && nread <= fstSize);
dengyihao's avatar
dengyihao 已提交
618 619 620 621 622 623

  FstSlice st = fstSliceCreate((uint8_t*)buf, nread);
  reader->fst = fstCreate(&st);
  free(buf);
  fstSliceDestroy(&st);

dengyihao's avatar
dengyihao 已提交
624
  return reader->fst != NULL ? 0 : -1;
dengyihao's avatar
dengyihao 已提交
625
}
dengyihao's avatar
dengyihao 已提交
626
static int tfileReaderLoadTableIds(TFileReader* reader, int32_t offset, SArray* result) {
dengyihao's avatar
dengyihao 已提交
627
  // TODO(yihao): opt later
dengyihao's avatar
dengyihao 已提交
628
  WriterCtx* ctx = reader->ctx;
dengyihao's avatar
dengyihao 已提交
629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644
  char       block[1024] = {0};
  int32_t    nread = ctx->readFrom(ctx, block, sizeof(block), offset);
  assert(nread >= sizeof(uint32_t));

  char*   p = block;
  int32_t nid = *(int32_t*)p;
  p += sizeof(nid);

  while (nid > 0) {
    int32_t left = block + sizeof(block) - p;
    if (left >= sizeof(uint64_t)) {
      taosArrayPush(result, (uint64_t*)p);
      p += sizeof(uint64_t);
    } else {
      char buf[sizeof(uint64_t)] = {0};
      memcpy(buf, p, left);
dengyihao's avatar
dengyihao 已提交
645

dengyihao's avatar
dengyihao 已提交
646 647 648 649
      memset(block, 0, sizeof(block));
      offset += sizeof(block);
      nread = ctx->readFrom(ctx, block, sizeof(block), offset);
      memcpy(buf + left, block, sizeof(uint64_t) - left);
dengyihao's avatar
dengyihao 已提交
650

dengyihao's avatar
dengyihao 已提交
651 652 653 654
      taosArrayPush(result, (uint64_t*)buf);
      p = block + sizeof(uint64_t) - left;
    }
    nid -= 1;
dengyihao's avatar
dengyihao 已提交
655
  }
dengyihao's avatar
dengyihao 已提交
656 657
  return 0;
}
dengyihao's avatar
dengyihao 已提交
658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676
static int tfileReaderVerify(TFileReader* reader) {
  // just validate header and Footer, file corrupted also shuild be verified later
  WriterCtx* ctx = reader->ctx;

  uint64_t tMagicNumber = 0;

  char buf[sizeof(tMagicNumber) + 1] = {0};
  int  size = ctx->size(ctx);

  if (size < sizeof(tMagicNumber) || size <= sizeof(reader->header)) {
    return -1;
  } else if (ctx->readFrom(ctx, buf, sizeof(tMagicNumber), size - sizeof(tMagicNumber)) != sizeof(tMagicNumber)) {
    return -1;
  }

  taosDecodeFixedU64(buf, &tMagicNumber);
  return tMagicNumber == tfileMagicNumber ? 0 : -1;
}

dengyihao's avatar
dengyihao 已提交
677
void tfileReaderRef(TFileReader* reader) {
dengyihao's avatar
dengyihao 已提交
678 679 680
  if (reader == NULL) {
    return;
  }
dengyihao's avatar
dengyihao 已提交
681 682 683 684
  int ref = T_REF_INC(reader);
  UNUSED(ref);
}

dengyihao's avatar
dengyihao 已提交
685
void tfileReaderUnRef(TFileReader* reader) {
dengyihao's avatar
dengyihao 已提交
686 687 688
  if (reader == NULL) {
    return;
  }
dengyihao's avatar
dengyihao 已提交
689
  int ref = T_REF_DEC(reader);
690
  if (ref == 0) {
dengyihao's avatar
dengyihao 已提交
691
    // do nothing
692 693
    tfileReaderDestroy(reader);
  }
dengyihao's avatar
dengyihao 已提交
694
}
dengyihao's avatar
dengyihao 已提交
695

dengyihao's avatar
dengyihao 已提交
696
static SArray* tfileGetFileList(const char* path) {
dengyihao's avatar
dengyihao 已提交
697 698 699
  char     buf[128] = {0};
  uint64_t suid;
  uint32_t version;
dengyihao's avatar
dengyihao 已提交
700
  SArray*  files = taosArrayInit(4, sizeof(void*));
dengyihao's avatar
dengyihao 已提交
701

dengyihao's avatar
dengyihao 已提交
702
  DIR* dir = opendir(path);
dengyihao's avatar
dengyihao 已提交
703 704 705
  if (NULL == dir) {
    return NULL;
  }
dengyihao's avatar
dengyihao 已提交
706 707
  struct dirent* entry;
  while ((entry = readdir(dir)) != NULL) {
dengyihao's avatar
dengyihao 已提交
708
    char* file = entry->d_name;
dengyihao's avatar
dengyihao 已提交
709 710 711
    if (0 != tfileParseFileName(file, &suid, buf, &version)) {
      continue;
    }
dengyihao's avatar
dengyihao 已提交
712 713 714 715

    size_t len = strlen(path) + 1 + strlen(file) + 1;
    char*  buf = calloc(1, len);
    sprintf(buf, "%s/%s", path, file);
dengyihao's avatar
dengyihao 已提交
716
    taosArrayPush(files, &buf);
dengyihao's avatar
dengyihao 已提交
717 718
  }
  closedir(dir);
dengyihao's avatar
dengyihao 已提交
719 720 721 722 723

  taosArraySort(files, tfileCompare);
  tfileRmExpireFile(files);

  return files;
dengyihao's avatar
dengyihao 已提交
724
}
dengyihao's avatar
dengyihao 已提交
725 726 727 728
static int tfileRmExpireFile(SArray* result) {
  // TODO(yihao): remove expire tindex after restart
  return 0;
}
dengyihao's avatar
dengyihao 已提交
729 730 731 732 733
static void tfileDestroyFileName(void* elem) {
  char* p = *(char**)elem;
  free(p);
}
static int tfileCompare(const void* a, const void* b) {
dengyihao's avatar
dengyihao 已提交
734 735 736
  const char* as = *(char**)a;
  const char* bs = *(char**)b;
  return strcmp(as, bs);
dengyihao's avatar
dengyihao 已提交
737
}
dengyihao's avatar
dengyihao 已提交
738 739 740

static int tfileParseFileName(const char* filename, uint64_t* suid, char* col, int* version) {
  if (3 == sscanf(filename, "%" PRIu64 "-%[^-]-%d.tindex", suid, col, version)) {
dengyihao's avatar
dengyihao 已提交
741 742 743 744 745
    // read suid & colid & version  success
    return 0;
  }
  return -1;
}
dengyihao's avatar
dengyihao 已提交
746 747 748 749 750 751 752 753 754 755
// tfile name suid-colId-version.tindex
static void tfileGenFileName(char* filename, uint64_t suid, const char* col, int version) {
  sprintf(filename, "%" PRIu64 "-%s-%d.tindex", suid, col, version);
  return;
}
static void tfileGenFileFullName(char* fullname, const char* path, uint64_t suid, const char* col, int32_t version) {
  char filename[128] = {0};
  tfileGenFileName(filename, suid, col, version);
  sprintf(fullname, "%s/%s", path, filename);
}