tsdbReaderWriter.c 40.0 KB
Newer Older
H
refact  
Hongze Cheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

#include "tsdb.h"
H
Hongze Cheng 已提交
17 18 19

#define TSDB_DEFAULT_PAGE_SIZE 4096

H
Hongze Cheng 已提交
20
// =============== PAGE-WISE FILE ===============
H
Hongze Cheng 已提交
21 22 23 24 25 26
#define PAGE_CONTENT_SIZE(PAGE) ((PAGE) - sizeof(TSCKSUM))
#define LOGIC_TO_FILE_OFFSET(OFFSET, PAGE) \
  ((OFFSET) / PAGE_CONTENT_SIZE(PAGE) * (PAGE) + (OFFSET) % PAGE_CONTENT_SIZE(PAGE))
#define FILE_TO_LOGIC_OFFSET(OFFSET, PAGE) ((OFFSET) / (PAGE)*PAGE_CONTENT_SIZE(PAGE) + (OFFSET) % (PAGE))
#define PAGE_OFFSET(PGNO, PAGE)            (((PGNO)-1) * (PAGE))
#define OFFSET_PGNO(OFFSET, PAGE)          ((OFFSET) / (PAGE) + 1)
H
Hongze Cheng 已提交
27 28

static int32_t tsdbOpenFile(const char *path, int32_t szPage, int32_t flag, STsdbFD **ppFD) {
H
Hongze Cheng 已提交
29 30
  int32_t  code = 0;
  STsdbFD *pFD;
H
refact  
Hongze Cheng 已提交
31

H
Hongze Cheng 已提交
32 33
  *ppFD = NULL;

H
Hongze Cheng 已提交
34
  pFD = (STsdbFD *)taosMemoryCalloc(1, sizeof(*pFD) + strlen(path) + 1);
H
Hongze Cheng 已提交
35 36 37 38
  if (pFD == NULL) {
    code = TSDB_CODE_OUT_OF_MEMORY;
    goto _exit;
  }
H
Hongze Cheng 已提交
39

H
Hongze Cheng 已提交
40 41 42 43 44
  pFD->path = (char *)&pFD[1];
  strcpy(pFD->path, path);
  pFD->szPage = szPage;
  pFD->flag = flag;
  pFD->pFD = taosOpenFile(path, flag);
H
Hongze Cheng 已提交
45
  if (pFD->pFD == NULL) {
H
Hongze Cheng 已提交
46
    code = TAOS_SYSTEM_ERROR(errno);
H
Hongze Cheng 已提交
47
    goto _exit;
H
Hongze Cheng 已提交
48
  }
H
Hongze Cheng 已提交
49
  pFD->szPage = szPage;
H
Hongze Cheng 已提交
50 51
  pFD->pgno = 0;
  pFD->nBuf = 0;
H
Hongze Cheng 已提交
52
  pFD->pBuf = taosMemoryMalloc(szPage);
H
Hongze Cheng 已提交
53
  if (pFD->pBuf == NULL) {
H
Hongze Cheng 已提交
54
    code = TSDB_CODE_OUT_OF_MEMORY;
H
Hongze Cheng 已提交
55
    taosMemoryFree(pFD);
H
Hongze Cheng 已提交
56
    goto _exit;
H
Hongze Cheng 已提交
57
  }
H
Hongze Cheng 已提交
58
  *ppFD = pFD;
H
Hongze Cheng 已提交
59 60 61

_exit:
  return code;
H
Hongze Cheng 已提交
62
}
H
Hongze Cheng 已提交
63

H
Hongze Cheng 已提交
64 65
static void tsdbCloseFile(STsdbFD **ppFD) {
  STsdbFD *pFD = *ppFD;
H
Hongze Cheng 已提交
66 67
  taosMemoryFree(pFD->pBuf);
  taosCloseFile(&pFD->pFD);
H
Hongze Cheng 已提交
68 69
  taosMemoryFree(pFD);
  *ppFD = NULL;
H
Hongze Cheng 已提交
70 71
}

H
Hongze Cheng 已提交
72
static int32_t tsdbFsyncFile(STsdbFD *pFD) {
H
Hongze Cheng 已提交
73
  int32_t code = 0;
H
Hongze Cheng 已提交
74

H
Hongze Cheng 已提交
75 76 77
  if (taosFsyncFile(pFD->pFD) < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _exit;
H
Hongze Cheng 已提交
78 79
  }

H
Hongze Cheng 已提交
80
_exit:
H
Hongze Cheng 已提交
81 82 83
  return code;
}

H
Hongze Cheng 已提交
84
static int32_t tsdbWriteFile(STsdbFD *pFD, uint8_t *pBuf, int32_t nBuf, int64_t *offset) {
H
Hongze Cheng 已提交
85
  int32_t code = 0;
H
Hongze Cheng 已提交
86

H
Hongze Cheng 已提交
87 88 89 90
  int32_t n = 0;
  while (n < nBuf) {
    int32_t remain = pFD->szPage - pFD->nBuf - sizeof(TSCKSUM);
    int32_t size = TMIN(remain, nBuf - n);
H
Hongze Cheng 已提交
91

H
Hongze Cheng 已提交
92 93 94
    memcpy(pFD->pBuf + pFD->nBuf, pBuf + n, size);
    n += size;
    pFD->nBuf += size;
H
Hongze Cheng 已提交
95

H
Hongze Cheng 已提交
96 97
    if (pFD->nBuf + sizeof(TSCKSUM) == pFD->szPage) {
      taosCalcChecksumAppend(0, pFD->pBuf, pFD->szPage);
H
Hongze Cheng 已提交
98

H
Hongze Cheng 已提交
99 100 101 102 103
      int64_t n = taosWriteFile(pFD->pFD, pFD->pBuf, pFD->szPage);
      if (n < 0) {
        code = TAOS_SYSTEM_ERROR(errno);
        goto _exit;
      }
H
Hongze Cheng 已提交
104

H
Hongze Cheng 已提交
105
      pFD->nBuf = 0;
H
Hongze Cheng 已提交
106
    }
H
Hongze Cheng 已提交
107
  }
H
Hongze Cheng 已提交
108 109

_exit:
H
Hongze Cheng 已提交
110 111 112
  return code;
}

H
Hongze Cheng 已提交
113
static int32_t tsdbReadFilePage(STsdbFD *pFD, int64_t pgno) {
H
Hongze Cheng 已提交
114
  int32_t code = 0;
H
more  
Hongze Cheng 已提交
115

H
Hongze Cheng 已提交
116 117 118
  // seek
  int64_t offset = PAGE_OFFSET(pgno, pFD->szPage);
  int64_t n = taosLSeekFile(pFD->pFD, offset, SEEK_SET);
H
Hongze Cheng 已提交
119
  if (n < 0) {
H
Hongze Cheng 已提交
120
    code = TAOS_SYSTEM_ERROR(errno);
H
Hongze Cheng 已提交
121
    goto _exit;
H
Hongze Cheng 已提交
122 123
  }

H
Hongze Cheng 已提交
124
  // read
H
Hongze Cheng 已提交
125
  n = taosReadFile(pFD->pFD, pFD->pBuf, pFD->szPage);
H
Hongze Cheng 已提交
126
  if (n < 0) {
H
Hongze Cheng 已提交
127
    code = TAOS_SYSTEM_ERROR(errno);
H
Hongze Cheng 已提交
128 129
    goto _exit;
  } else if (n < pFD->szPage) {
H
Hongze Cheng 已提交
130
    code = TSDB_CODE_FILE_CORRUPTED;
H
Hongze Cheng 已提交
131
    goto _exit;
H
Hongze Cheng 已提交
132 133
  }

H
Hongze Cheng 已提交
134
  // check
H
Hongze Cheng 已提交
135
  if (!taosCheckChecksumWhole(pFD->pBuf, pFD->szPage)) {
H
Hongze Cheng 已提交
136
    code = TSDB_CODE_FILE_CORRUPTED;
H
Hongze Cheng 已提交
137
    goto _exit;
H
Hongze Cheng 已提交
138 139
  }

H
Hongze Cheng 已提交
140
  pFD->pgno = pgno;
H
Hongze Cheng 已提交
141

H
Hongze Cheng 已提交
142 143 144
_exit:
  return code;
}
H
Hongze Cheng 已提交
145

H
Hongze Cheng 已提交
146
static int32_t tsdbReadFile(STsdbFD *pFD, int64_t offset, uint8_t *pBuf, int64_t size) {
H
Hongze Cheng 已提交
147
  int32_t code = 0;
H
Hongze Cheng 已提交
148
  int64_t n;
H
Hongze Cheng 已提交
149 150
  int64_t fOffset = LOGIC_TO_FILE_OFFSET(offset, pFD->szPage);
  int64_t pgno = OFFSET_PGNO(fOffset, pFD->szPage);
H
Hongze Cheng 已提交
151
  int32_t szPgCont = PAGE_CONTENT_SIZE(pFD->szPage);
H
Hongze Cheng 已提交
152

H
Hongze Cheng 已提交
153
  ASSERT(pgno);
H
Hongze Cheng 已提交
154
  if (pFD->pgno == pgno) {
H
Hongze Cheng 已提交
155 156
    int64_t bOff = fOffset % pFD->szPage;
    int64_t nRead = TMIN(szPgCont - bOff, size);
H
Hongze Cheng 已提交
157 158 159 160

    ASSERT(bOff < szPgCont);

    memcpy(pBuf, pFD->pBuf + bOff, nRead);
H
Hongze Cheng 已提交
161
    n = nRead;
H
Hongze Cheng 已提交
162
    pgno++;
H
Hongze Cheng 已提交
163 164
  }

H
Hongze Cheng 已提交
165
  while (n < size) {
H
Hongze Cheng 已提交
166 167
    code = tsdbReadFilePage(pFD, pgno);
    if (code) goto _exit;
H
Hongze Cheng 已提交
168

H
Hongze Cheng 已提交
169
    int64_t nRead = TMIN(szPgCont, size - n);
H
Hongze Cheng 已提交
170
    memcpy(pBuf + n, pFD->pBuf, nRead);
H
Hongze Cheng 已提交
171

H
Hongze Cheng 已提交
172
    n += nRead;
H
Hongze Cheng 已提交
173
    pgno++;
H
Hongze Cheng 已提交
174 175 176
  }

_exit:
H
Hongze Cheng 已提交
177
  return code;
H
Hongze Cheng 已提交
178 179
}

H
Hongze Cheng 已提交
180 181 182 183 184 185
static int32_t tsdbLSeekFile(STsdbFD *pFD, int64_t offset) {
  int32_t code = 0;
  ASSERT(0);
  return code;
}

H
Hongze Cheng 已提交
186 187
// SDataFWriter ====================================================
int32_t tsdbDataFWriterOpen(SDataFWriter **ppWriter, STsdb *pTsdb, SDFileSet *pSet) {
H
Hongze Cheng 已提交
188
  int32_t       code = 0;
H
Hongze Cheng 已提交
189 190
  int32_t       flag;
  int64_t       n;
H
Hongze Cheng 已提交
191
  int32_t       szPage = TSDB_DEFAULT_PAGE_SIZE;
H
Hongze Cheng 已提交
192
  SDataFWriter *pWriter = NULL;
H
Hongze Cheng 已提交
193
  char          fname[TSDB_FILENAME_LEN];
H
Hongze Cheng 已提交
194
  char          hdr[TSDB_FHDR_SIZE] = {0};
H
Hongze Cheng 已提交
195 196

  // alloc
H
Hongze Cheng 已提交
197 198
  pWriter = taosMemoryCalloc(1, sizeof(*pWriter));
  if (pWriter == NULL) {
H
Hongze Cheng 已提交
199 200 201
    code = TSDB_CODE_OUT_OF_MEMORY;
    goto _err;
  }
H
Hongze Cheng 已提交
202
  pWriter->pTsdb = pTsdb;
H
Hongze Cheng 已提交
203 204 205 206 207 208
  pWriter->wSet = (SDFileSet){.diskId = pSet->diskId,
                              .fid = pSet->fid,
                              .pHeadF = &pWriter->fHead,
                              .pDataF = &pWriter->fData,
                              .pSmaF = &pWriter->fSma,
                              .nSstF = pSet->nSstF};
H
Hongze Cheng 已提交
209 210 211 212 213 214 215
  pWriter->fHead = *pSet->pHeadF;
  pWriter->fData = *pSet->pDataF;
  pWriter->fSma = *pSet->pSmaF;
  for (int8_t iSst = 0; iSst < pSet->nSstF; iSst++) {
    pWriter->wSet.aSstF[iSst] = &pWriter->fSst[iSst];
    pWriter->fSst[iSst] = *pSet->aSstF[iSst];
  }
H
Hongze Cheng 已提交
216 217

  // head
H
Hongze Cheng 已提交
218
  flag = TD_FILE_READ | TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC;
H
Hongze Cheng 已提交
219
  tsdbHeadFileName(pTsdb, pWriter->wSet.diskId, pWriter->wSet.fid, &pWriter->fHead, fname);
H
Hongze Cheng 已提交
220 221
  code = tsdbOpenFile(fname, szPage, flag, &pWriter->pHeadFD);
  if (code) goto _err;
H
Hongze Cheng 已提交
222

H
Hongze Cheng 已提交
223 224
  code = tsdbWriteFile(pWriter->pHeadFD, hdr, TSDB_FHDR_SIZE, NULL);
  if (code) goto _err;
H
Hongze Cheng 已提交
225

H
Hongze Cheng 已提交
226 227 228 229
  ASSERT(n == TSDB_FHDR_SIZE);

  pWriter->fHead.size += TSDB_FHDR_SIZE;

H
Hongze Cheng 已提交
230
  // data
H
Hongze Cheng 已提交
231
  if (pWriter->fData.size == 0) {
H
Hongze Cheng 已提交
232
    flag = TD_FILE_READ | TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC;
H
Hongze Cheng 已提交
233
  } else {
H
Hongze Cheng 已提交
234
    flag = TD_FILE_READ | TD_FILE_WRITE;
H
Hongze Cheng 已提交
235 236
  }
  tsdbDataFileName(pTsdb, pWriter->wSet.diskId, pWriter->wSet.fid, &pWriter->fData, fname);
H
Hongze Cheng 已提交
237 238
  code = tsdbOpenFile(fname, szPage, flag, &pWriter->pDataFD);
  if (code) goto _err;
H
Hongze Cheng 已提交
239
  if (pWriter->fData.size == 0) {
H
Hongze Cheng 已提交
240 241
    code = tsdbWriteFile(pWriter->pDataFD, hdr, TSDB_FHDR_SIZE, NULL);
    if (code) goto _err;
H
Hongze Cheng 已提交
242 243
    pWriter->fData.size += TSDB_FHDR_SIZE;
  } else {
H
Hongze Cheng 已提交
244 245
    // code = tsdbLSeekFile(pWriter->pDataFD, 0, SEEK_END);
    // if (code) goto _err;
H
Hongze Cheng 已提交
246
  }
H
Hongze Cheng 已提交
247 248

  // sma
H
Hongze Cheng 已提交
249
  if (pWriter->fSma.size == 0) {
H
Hongze Cheng 已提交
250
    flag = TD_FILE_READ | TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC;
H
Hongze Cheng 已提交
251
  } else {
H
Hongze Cheng 已提交
252
    flag = TD_FILE_READ | TD_FILE_WRITE;
H
Hongze Cheng 已提交
253 254
  }
  tsdbSmaFileName(pTsdb, pWriter->wSet.diskId, pWriter->wSet.fid, &pWriter->fSma, fname);
H
Hongze Cheng 已提交
255 256
  code = tsdbOpenFile(fname, szPage, flag, &pWriter->pSmaFD);
  if (code) goto _err;
H
Hongze Cheng 已提交
257
  if (pWriter->fSma.size == 0) {
H
Hongze Cheng 已提交
258 259
    code = tsdbWriteFile(pWriter->pSmaFD, hdr, TSDB_FHDR_SIZE, NULL);
    if (code) goto _err;
H
Hongze Cheng 已提交
260

H
Hongze Cheng 已提交
261 262
    pWriter->fSma.size += TSDB_FHDR_SIZE;
  } else {
H
Hongze Cheng 已提交
263 264
    code = tsdbLSeekFile(pWriter->pSmaFD, 0);
    if (code) goto _err;
H
Hongze Cheng 已提交
265 266
  }

H
Hongze Cheng 已提交
267 268
  // sst
  ASSERT(pWriter->fSst[pSet->nSstF - 1].size == 0);
H
Hongze Cheng 已提交
269
  flag = TD_FILE_READ | TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC;
H
Hongze Cheng 已提交
270
  tsdbSstFileName(pTsdb, pWriter->wSet.diskId, pWriter->wSet.fid, &pWriter->fSst[pSet->nSstF - 1], fname);
H
Hongze Cheng 已提交
271 272 273 274
  code = tsdbOpenFile(fname, szPage, flag, &pWriter->pLastFD);
  if (code) goto _err;
  code = tsdbWriteFile(pWriter->pLastFD, hdr, TSDB_FHDR_SIZE, NULL);
  if (code) goto _err;
H
Hongze Cheng 已提交
275 276 277
  pWriter->fSst[pWriter->wSet.nSstF - 1].size += TSDB_FHDR_SIZE;

  *ppWriter = pWriter;
H
Hongze Cheng 已提交
278 279 280
  return code;

_err:
H
Hongze Cheng 已提交
281 282
  tsdbError("vgId:%d, tsdb data file writer open failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code));
  *ppWriter = NULL;
H
Hongze Cheng 已提交
283 284 285
  return code;
}

H
Hongze Cheng 已提交
286
int32_t tsdbDataFWriterClose(SDataFWriter **ppWriter, int8_t sync) {
H
Hongze Cheng 已提交
287
  int32_t code = 0;
H
Hongze Cheng 已提交
288
  STsdb  *pTsdb = NULL;
H
Hongze Cheng 已提交
289

H
Hongze Cheng 已提交
290 291 292 293
  if (*ppWriter == NULL) goto _exit;

  pTsdb = (*ppWriter)->pTsdb;
  if (sync) {
H
Hongze Cheng 已提交
294
    if (tsdbFsyncFile((*ppWriter)->pHeadFD) < 0) {
H
Hongze Cheng 已提交
295 296 297 298
      code = TAOS_SYSTEM_ERROR(errno);
      goto _err;
    }

H
Hongze Cheng 已提交
299
    if (tsdbFsyncFile((*ppWriter)->pDataFD) < 0) {
H
Hongze Cheng 已提交
300 301 302 303
      code = TAOS_SYSTEM_ERROR(errno);
      goto _err;
    }

H
Hongze Cheng 已提交
304
    if (tsdbFsyncFile((*ppWriter)->pSmaFD) < 0) {
H
Hongze Cheng 已提交
305 306 307 308
      code = TAOS_SYSTEM_ERROR(errno);
      goto _err;
    }

H
Hongze Cheng 已提交
309
    if (tsdbFsyncFile((*ppWriter)->pLastFD) < 0) {
H
Hongze Cheng 已提交
310 311 312 313 314
      code = TAOS_SYSTEM_ERROR(errno);
      goto _err;
    }
  }

H
Hongze Cheng 已提交
315 316 317 318
  tsdbCloseFile(&(*ppWriter)->pHeadFD);
  tsdbCloseFile(&(*ppWriter)->pDataFD);
  tsdbCloseFile(&(*ppWriter)->pSmaFD);
  tsdbCloseFile(&(*ppWriter)->pLastFD);
H
Hongze Cheng 已提交
319

H
Hongze Cheng 已提交
320 321
  for (int32_t iBuf = 0; iBuf < sizeof((*ppWriter)->aBuf) / sizeof(uint8_t *); iBuf++) {
    tFree((*ppWriter)->aBuf[iBuf]);
H
Hongze Cheng 已提交
322
  }
H
Hongze Cheng 已提交
323
  taosMemoryFree(*ppWriter);
H
Hongze Cheng 已提交
324
_exit:
H
Hongze Cheng 已提交
325
  *ppWriter = NULL;
H
Hongze Cheng 已提交
326 327 328
  return code;

_err:
H
Hongze Cheng 已提交
329
  tsdbError("vgId:%d, data file writer close failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code));
H
Hongze Cheng 已提交
330 331 332
  return code;
}

H
Hongze Cheng 已提交
333 334 335 336
int32_t tsdbUpdateDFileSetHeader(SDataFWriter *pWriter) {
  int32_t code = 0;
  int64_t n;
  char    hdr[TSDB_FHDR_SIZE];
H
Hongze Cheng 已提交
337

H
Hongze Cheng 已提交
338 339 340
  // head ==============
  memset(hdr, 0, TSDB_FHDR_SIZE);
  tPutHeadFile(hdr, &pWriter->fHead);
H
Hongze Cheng 已提交
341

H
Hongze Cheng 已提交
342 343
  code = tsdbLSeekFile(pWriter->pHeadFD, 0);
  if (code) goto _err;
H
Hongze Cheng 已提交
344

H
Hongze Cheng 已提交
345 346
  code = tsdbWriteFile(pWriter->pHeadFD, hdr, TSDB_FHDR_SIZE, NULL);
  if (code) goto _err;
H
Hongze Cheng 已提交
347 348 349 350 351

  // data ==============
  memset(hdr, 0, TSDB_FHDR_SIZE);
  tPutDataFile(hdr, &pWriter->fData);

H
Hongze Cheng 已提交
352 353
  code = tsdbLSeekFile(pWriter->pDataFD, 0);
  if (code) goto _err;
H
Hongze Cheng 已提交
354

H
Hongze Cheng 已提交
355 356
  code = tsdbWriteFile(pWriter->pDataFD, hdr, TSDB_FHDR_SIZE, NULL);
  if (code) goto _err;
H
Hongze Cheng 已提交
357

H
Hongze Cheng 已提交
358 359 360
  // sma ==============
  memset(hdr, 0, TSDB_FHDR_SIZE);
  tPutSmaFile(hdr, &pWriter->fSma);
H
Hongze Cheng 已提交
361

H
Hongze Cheng 已提交
362 363
  code = tsdbLSeekFile(pWriter->pSmaFD, 0);
  if (code) goto _err;
H
Hongze Cheng 已提交
364

H
Hongze Cheng 已提交
365 366
  code = tsdbWriteFile(pWriter->pSmaFD, hdr, TSDB_FHDR_SIZE, NULL);
  if (code) goto _err;
H
Hongze Cheng 已提交
367

H
Hongze Cheng 已提交
368 369 370 371
  // sst ==============
  memset(hdr, 0, TSDB_FHDR_SIZE);
  tPutSstFile(hdr, &pWriter->fSst[pWriter->wSet.nSstF - 1]);

H
Hongze Cheng 已提交
372 373
  code = tsdbLSeekFile(pWriter->pLastFD, 0);
  if (code) goto _err;
H
Hongze Cheng 已提交
374

H
Hongze Cheng 已提交
375 376
  code = tsdbWriteFile(pWriter->pLastFD, hdr, TSDB_FHDR_SIZE, NULL);
  if (code) goto _err;
H
Hongze Cheng 已提交
377 378 379 380

  return code;

_err:
H
Hongze Cheng 已提交
381
  tsdbError("vgId:%d, update DFileSet header failed since %s", TD_VID(pWriter->pTsdb->pVnode), tstrerror(code));
H
Hongze Cheng 已提交
382 383 384
  return code;
}

H
Hongze Cheng 已提交
385 386 387 388 389
int32_t tsdbWriteBlockIdx(SDataFWriter *pWriter, SArray *aBlockIdx) {
  int32_t    code = 0;
  SHeadFile *pHeadFile = &pWriter->fHead;
  int64_t    size = 0;
  int64_t    n;
H
Hongze Cheng 已提交
390

H
Hongze Cheng 已提交
391 392 393
  // check
  if (taosArrayGetSize(aBlockIdx) == 0) {
    pHeadFile->offset = pHeadFile->size;
H
Hongze Cheng 已提交
394 395
    goto _exit;
  }
H
Hongze Cheng 已提交
396

H
Hongze Cheng 已提交
397 398 399 400 401
  // prepare
  for (int32_t iBlockIdx = 0; iBlockIdx < taosArrayGetSize(aBlockIdx); iBlockIdx++) {
    size += tPutBlockIdx(NULL, taosArrayGet(aBlockIdx, iBlockIdx));
  }

H
Hongze Cheng 已提交
402
  // alloc
H
Hongze Cheng 已提交
403
  code = tRealloc(&pWriter->aBuf[0], size);
H
Hongze Cheng 已提交
404 405
  if (code) goto _err;

H
Hongze Cheng 已提交
406 407 408 409
  // build
  n = 0;
  for (int32_t iBlockIdx = 0; iBlockIdx < taosArrayGetSize(aBlockIdx); iBlockIdx++) {
    n += tPutBlockIdx(pWriter->aBuf[0] + n, taosArrayGet(aBlockIdx, iBlockIdx));
H
Hongze Cheng 已提交
410
  }
H
Hongze Cheng 已提交
411
  ASSERT(n == size);
H
Hongze Cheng 已提交
412 413

  // write
H
Hongze Cheng 已提交
414 415
  code = tsdbWriteFile(pWriter->pHeadFD, pWriter->aBuf[0], size, NULL);
  if (code) goto _err;
H
Hongze Cheng 已提交
416

H
Hongze Cheng 已提交
417 418 419
  // update
  pHeadFile->offset = pHeadFile->size;
  pHeadFile->size += size;
H
Hongze Cheng 已提交
420

H
Hongze Cheng 已提交
421
_exit:
H
Hongze Cheng 已提交
422 423
  tsdbTrace("vgId:%d write block idx, offset:%" PRId64 " size:%" PRId64 " nBlockIdx:%d", TD_VID(pWriter->pTsdb->pVnode),
            pHeadFile->offset, size, taosArrayGetSize(aBlockIdx));
H
Hongze Cheng 已提交
424 425 426
  return code;

_err:
H
Hongze Cheng 已提交
427
  tsdbError("vgId:%d, write block idx failed since %s", TD_VID(pWriter->pTsdb->pVnode), tstrerror(code));
H
Hongze Cheng 已提交
428 429 430
  return code;
}

H
Hongze Cheng 已提交
431 432 433 434 435 436 437
int32_t tsdbWriteBlock(SDataFWriter *pWriter, SMapData *mBlock, SBlockIdx *pBlockIdx) {
  int32_t    code = 0;
  SHeadFile *pHeadFile = &pWriter->fHead;
  int64_t    size;
  int64_t    n;

  ASSERT(mBlock->nItem > 0);
H
Hongze Cheng 已提交
438

H
Hongze Cheng 已提交
439
  // alloc
H
Hongze Cheng 已提交
440
  size = tPutMapData(NULL, mBlock);
H
Hongze Cheng 已提交
441
  code = tRealloc(&pWriter->aBuf[0], size);
H
Hongze Cheng 已提交
442 443
  if (code) goto _err;

H
Hongze Cheng 已提交
444
  // build
H
Hongze Cheng 已提交
445
  n = tPutMapData(pWriter->aBuf[0] + n, mBlock);
H
Hongze Cheng 已提交
446 447

  // write
H
Hongze Cheng 已提交
448 449
  code = tsdbWriteFile(pWriter->pHeadFD, pWriter->aBuf[0], size, NULL);
  if (code) goto _err;
H
Hongze Cheng 已提交
450

H
Hongze Cheng 已提交
451 452 453 454
  // update
  pBlockIdx->offset = pHeadFile->size;
  pBlockIdx->size = size;
  pHeadFile->size += size;
H
Hongze Cheng 已提交
455

H
Hongze Cheng 已提交
456 457 458 459
  tsdbTrace("vgId:%d, write block, file ID:%d commit ID:%d suid:%" PRId64 " uid:%" PRId64 " offset:%" PRId64
            " size:%" PRId64 " nItem:%d",
            TD_VID(pWriter->pTsdb->pVnode), pWriter->wSet.fid, pHeadFile->commitID, pBlockIdx->suid, pBlockIdx->uid,
            pBlockIdx->offset, pBlockIdx->size, mBlock->nItem);
H
Hongze Cheng 已提交
460 461 462
  return code;

_err:
H
Hongze Cheng 已提交
463
  tsdbError("vgId:%d, write block failed since %s", TD_VID(pWriter->pTsdb->pVnode), tstrerror(code));
H
Hongze Cheng 已提交
464 465 466
  return code;
}

H
Hongze Cheng 已提交
467
int32_t tsdbWriteSstBlk(SDataFWriter *pWriter, SArray *aSstBlk) {
H
Hongze Cheng 已提交
468
  int32_t   code = 0;
H
Hongze Cheng 已提交
469
  SSstFile *pSstFile = &pWriter->fSst[pWriter->wSet.nSstF - 1];
H
Hongze Cheng 已提交
470
  int64_t   size = 0;
H
Hongze Cheng 已提交
471
  int64_t   n;
H
Hongze Cheng 已提交
472

H
Hongze Cheng 已提交
473 474 475 476 477
  // check
  if (taosArrayGetSize(aSstBlk) == 0) {
    pSstFile->offset = pSstFile->size;
    goto _exit;
  }
H
Hongze Cheng 已提交
478

H
Hongze Cheng 已提交
479 480 481 482
  // size
  for (int32_t iBlockL = 0; iBlockL < taosArrayGetSize(aSstBlk); iBlockL++) {
    size += tPutSstBlk(NULL, taosArrayGet(aSstBlk, iBlockL));
  }
H
Hongze Cheng 已提交
483 484

  // alloc
H
Hongze Cheng 已提交
485
  code = tRealloc(&pWriter->aBuf[0], size);
H
Hongze Cheng 已提交
486 487
  if (code) goto _err;

H
Hongze Cheng 已提交
488 489 490 491
  // encode
  n = 0;
  for (int32_t iBlockL = 0; iBlockL < taosArrayGetSize(aSstBlk); iBlockL++) {
    n += tPutSstBlk(pWriter->aBuf[0] + n, taosArrayGet(aSstBlk, iBlockL));
H
Hongze Cheng 已提交
492
  }
H
Hongze Cheng 已提交
493 494

  // write
H
Hongze Cheng 已提交
495 496
  code = tsdbWriteFile(pWriter->pLastFD, pWriter->aBuf[0], size, NULL);
  if (code) goto _err;
H
Hongze Cheng 已提交
497

H
Hongze Cheng 已提交
498 499 500
  // update
  pSstFile->offset = pSstFile->size;
  pSstFile->size += size;
H
Hongze Cheng 已提交
501

H
Hongze Cheng 已提交
502 503 504
_exit:
  tsdbTrace("vgId:%d tsdb write blockl, loffset:%" PRId64 " size:%" PRId64, TD_VID(pWriter->pTsdb->pVnode),
            pSstFile->offset, size);
H
Hongze Cheng 已提交
505 506 507
  return code;

_err:
H
Hongze Cheng 已提交
508
  tsdbError("vgId:%d tsdb write blockl failed since %s", TD_VID(pWriter->pTsdb->pVnode), tstrerror(code));
H
Hongze Cheng 已提交
509 510 511
  return code;
}

H
Hongze Cheng 已提交
512
static int32_t tsdbWriteBlockSma(SDataFWriter *pWriter, SBlockData *pBlockData, SSmaInfo *pSmaInfo) {
H
Hongze Cheng 已提交
513 514
  int32_t code = 0;

H
Hongze Cheng 已提交
515 516
  pSmaInfo->offset = 0;
  pSmaInfo->size = 0;
H
Hongze Cheng 已提交
517

H
Hongze Cheng 已提交
518 519 520
  // encode
  for (int32_t iColData = 0; iColData < taosArrayGetSize(pBlockData->aIdx); iColData++) {
    SColData *pColData = tBlockDataGetColDataByIdx(pBlockData, iColData);
H
Hongze Cheng 已提交
521

H
Hongze Cheng 已提交
522
    if ((!pColData->smaOn) || IS_VAR_DATA_TYPE(pColData->type)) continue;
H
Hongze Cheng 已提交
523

H
Hongze Cheng 已提交
524 525
    SColumnDataAgg sma;
    tsdbCalcColDataSMA(pColData, &sma);
H
Hongze Cheng 已提交
526

H
Hongze Cheng 已提交
527 528 529 530
    code = tRealloc(&pWriter->aBuf[0], pSmaInfo->size + tPutColumnDataAgg(NULL, &sma));
    if (code) goto _err;
    pSmaInfo->size += tPutColumnDataAgg(pWriter->aBuf[0] + pSmaInfo->size, &sma);
  }
H
Hongze Cheng 已提交
531

H
Hongze Cheng 已提交
532 533
  // write
  if (pSmaInfo->size) {
H
Hongze Cheng 已提交
534
    code = tRealloc(&pWriter->aBuf[0], pSmaInfo->size);
H
Hongze Cheng 已提交
535
    if (code) goto _err;
H
Hongze Cheng 已提交
536

H
Hongze Cheng 已提交
537 538
    code = tsdbWriteFile(pWriter->pSmaFD, pWriter->aBuf[0], pSmaInfo->size, NULL);
    if (code) goto _err;
H
Hongze Cheng 已提交
539

H
Hongze Cheng 已提交
540
    pSmaInfo->offset = pWriter->fSma.size;
H
Hongze Cheng 已提交
541
    // pWriter->fSma.size += size;
H
Hongze Cheng 已提交
542 543 544 545 546
  }

  return code;

_err:
H
Hongze Cheng 已提交
547
  tsdbError("vgId:%d tsdb write block sma failed since %s", TD_VID(pWriter->pTsdb->pVnode), tstrerror(code));
H
Hongze Cheng 已提交
548 549 550
  return code;
}

H
Hongze Cheng 已提交
551 552
int32_t tsdbWriteBlockData(SDataFWriter *pWriter, SBlockData *pBlockData, SBlockInfo *pBlkInfo, SSmaInfo *pSmaInfo,
                           int8_t cmprAlg, int8_t toLast) {
H
Hongze Cheng 已提交
553 554
  int32_t code = 0;

H
Hongze Cheng 已提交
555
  ASSERT(pBlockData->nRow > 0);
H
Hongze Cheng 已提交
556

H
Hongze Cheng 已提交
557 558 559
  pBlkInfo->offset = toLast ? pWriter->fSst[pWriter->wSet.nSstF - 1].size : pWriter->fData.size;
  pBlkInfo->szBlock = 0;
  pBlkInfo->szKey = 0;
H
Hongze Cheng 已提交
560

H
Hongze Cheng 已提交
561 562 563
  int32_t aBufN[4] = {0};
  code = tCmprBlockData(pBlockData, cmprAlg, NULL, NULL, pWriter->aBuf, aBufN);
  if (code) goto _err;
H
Hongze Cheng 已提交
564

H
Hongze Cheng 已提交
565
  // write =================
H
Hongze Cheng 已提交
566
  STsdbFD *pFD = toLast ? pWriter->pLastFD : pWriter->pDataFD;
H
Hongze Cheng 已提交
567

H
Hongze Cheng 已提交
568 569
  pBlkInfo->szKey = aBufN[3] + aBufN[2];
  pBlkInfo->szBlock = aBufN[0] + aBufN[1] + aBufN[2] + aBufN[3];
H
Hongze Cheng 已提交
570

H
Hongze Cheng 已提交
571 572
  code = tsdbWriteFile(pFD, pWriter->aBuf[3], aBufN[3], NULL);
  if (code) goto _err;
H
Hongze Cheng 已提交
573

H
Hongze Cheng 已提交
574 575
  code = tsdbWriteFile(pFD, pWriter->aBuf[2], aBufN[2], NULL);
  if (code) goto _err;
H
Hongze Cheng 已提交
576

H
Hongze Cheng 已提交
577
  if (aBufN[1]) {
H
Hongze Cheng 已提交
578 579
    code = tsdbWriteFile(pFD, pWriter->aBuf[1], aBufN[1], NULL);
    if (code) goto _err;
H
Hongze Cheng 已提交
580
  }
H
Hongze Cheng 已提交
581

H
Hongze Cheng 已提交
582
  if (aBufN[0]) {
H
Hongze Cheng 已提交
583 584
    code = tsdbWriteFile(pFD, pWriter->aBuf[0], aBufN[0], NULL);
    if (code) goto _err;
H
Hongze Cheng 已提交
585
  }
H
Hongze Cheng 已提交
586

H
Hongze Cheng 已提交
587 588 589 590 591 592
  // update info
  if (toLast) {
    pWriter->fSst[pWriter->wSet.nSstF - 1].size += pBlkInfo->szBlock;
  } else {
    pWriter->fData.size += pBlkInfo->szBlock;
  }
H
Hongze Cheng 已提交
593

H
Hongze Cheng 已提交
594 595 596 597 598
  // ================= SMA ====================
  if (pSmaInfo) {
    code = tsdbWriteBlockSma(pWriter, pBlockData, pSmaInfo);
    if (code) goto _err;
  }
H
Hongze Cheng 已提交
599

H
Hongze Cheng 已提交
600 601 602 603
_exit:
  tsdbTrace("vgId:%d tsdb write block data, suid:%" PRId64 " uid:%" PRId64 " nRow:%d, offset:%" PRId64 " size:%d",
            TD_VID(pWriter->pTsdb->pVnode), pBlockData->suid, pBlockData->uid, pBlockData->nRow, pBlkInfo->offset,
            pBlkInfo->szBlock);
H
Hongze Cheng 已提交
604 605 606
  return code;

_err:
H
Hongze Cheng 已提交
607
  tsdbError("vgId:%d tsdb write block data failed since %s", TD_VID(pWriter->pTsdb->pVnode), tstrerror(code));
H
Hongze Cheng 已提交
608 609
  return code;
}
H
Hongze Cheng 已提交
610

H
Hongze Cheng 已提交
611 612 613 614 615 616 617 618
int32_t tsdbDFileSetCopy(STsdb *pTsdb, SDFileSet *pSetFrom, SDFileSet *pSetTo) {
  int32_t   code = 0;
  int64_t   n;
  int64_t   size;
  TdFilePtr pOutFD = NULL;  // TODO
  TdFilePtr PInFD = NULL;   // TODO
  char      fNameFrom[TSDB_FILENAME_LEN];
  char      fNameTo[TSDB_FILENAME_LEN];
H
Hongze Cheng 已提交
619

H
Hongze Cheng 已提交
620 621 622
  // head
  tsdbHeadFileName(pTsdb, pSetFrom->diskId, pSetFrom->fid, pSetFrom->pHeadF, fNameFrom);
  tsdbHeadFileName(pTsdb, pSetTo->diskId, pSetTo->fid, pSetTo->pHeadF, fNameTo);
H
Hongze Cheng 已提交
623

H
Hongze Cheng 已提交
624 625 626
  pOutFD = taosOpenFile(fNameTo, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC);
  if (pOutFD == NULL) {
    code = TAOS_SYSTEM_ERROR(errno);
H
Hongze Cheng 已提交
627 628 629
    goto _err;
  }

H
Hongze Cheng 已提交
630 631
  PInFD = taosOpenFile(fNameFrom, TD_FILE_READ);
  if (PInFD == NULL) {
H
Hongze Cheng 已提交
632 633 634 635
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }

H
Hongze Cheng 已提交
636
  n = taosFSendFile(pOutFD, PInFD, 0, pSetFrom->pHeadF->size);
H
Hongze Cheng 已提交
637 638 639 640
  if (n < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }
H
Hongze Cheng 已提交
641 642
  taosCloseFile(&pOutFD);
  taosCloseFile(&PInFD);
H
Hongze Cheng 已提交
643 644

  // data
H
Hongze Cheng 已提交
645 646
  tsdbDataFileName(pTsdb, pSetFrom->diskId, pSetFrom->fid, pSetFrom->pDataF, fNameFrom);
  tsdbDataFileName(pTsdb, pSetTo->diskId, pSetTo->fid, pSetTo->pDataF, fNameTo);
H
Hongze Cheng 已提交
647

H
Hongze Cheng 已提交
648 649
  pOutFD = taosOpenFile(fNameTo, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC);
  if (pOutFD == NULL) {
H
Hongze Cheng 已提交
650 651 652 653
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }

H
Hongze Cheng 已提交
654 655
  PInFD = taosOpenFile(fNameFrom, TD_FILE_READ);
  if (PInFD == NULL) {
H
Hongze Cheng 已提交
656 657 658
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }
H
Hongze Cheng 已提交
659 660

  n = taosFSendFile(pOutFD, PInFD, 0, pSetFrom->pDataF->size);
H
Hongze Cheng 已提交
661 662 663 664
  if (n < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }
H
Hongze Cheng 已提交
665 666
  taosCloseFile(&pOutFD);
  taosCloseFile(&PInFD);
H
Hongze Cheng 已提交
667

H
Hongze Cheng 已提交
668 669 670
  // sst
  tsdbSstFileName(pTsdb, pSetFrom->diskId, pSetFrom->fid, pSetFrom->aSstF[0], fNameFrom);
  tsdbSstFileName(pTsdb, pSetTo->diskId, pSetTo->fid, pSetTo->aSstF[0], fNameTo);
H
Hongze Cheng 已提交
671

H
Hongze Cheng 已提交
672 673 674 675
  pOutFD = taosOpenFile(fNameTo, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC);
  if (pOutFD == NULL) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
H
Hongze Cheng 已提交
676 677
  }

H
Hongze Cheng 已提交
678 679
  PInFD = taosOpenFile(fNameFrom, TD_FILE_READ);
  if (PInFD == NULL) {
H
Hongze Cheng 已提交
680 681 682 683
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }

H
Hongze Cheng 已提交
684 685
  n = taosFSendFile(pOutFD, PInFD, 0, pSetFrom->aSstF[0]->size);
  if (n < 0) {
H
Hongze Cheng 已提交
686 687 688
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }
H
Hongze Cheng 已提交
689 690
  taosCloseFile(&pOutFD);
  taosCloseFile(&PInFD);
H
Hongze Cheng 已提交
691

H
Hongze Cheng 已提交
692 693 694 695 696 697
  // sma
  tsdbSmaFileName(pTsdb, pSetFrom->diskId, pSetFrom->fid, pSetFrom->pSmaF, fNameFrom);
  tsdbSmaFileName(pTsdb, pSetTo->diskId, pSetTo->fid, pSetTo->pSmaF, fNameTo);

  pOutFD = taosOpenFile(fNameTo, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC);
  if (pOutFD == NULL) {
H
Hongze Cheng 已提交
698 699 700 701
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }

H
Hongze Cheng 已提交
702 703
  PInFD = taosOpenFile(fNameFrom, TD_FILE_READ);
  if (PInFD == NULL) {
H
Hongze Cheng 已提交
704 705 706 707
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }

H
Hongze Cheng 已提交
708 709 710 711
  n = taosFSendFile(pOutFD, PInFD, 0, pSetFrom->pSmaF->size);
  if (n < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
H
Hongze Cheng 已提交
712
  }
H
Hongze Cheng 已提交
713 714 715
  taosCloseFile(&pOutFD);
  taosCloseFile(&PInFD);

H
Hongze Cheng 已提交
716 717 718
  return code;

_err:
H
Hongze Cheng 已提交
719
  tsdbError("vgId:%d, tsdb DFileSet copy failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code));
H
Hongze Cheng 已提交
720 721 722
  return code;
}

H
Hongze Cheng 已提交
723 724 725 726
// SDataFReader ====================================================
int32_t tsdbDataFReaderOpen(SDataFReader **ppReader, STsdb *pTsdb, SDFileSet *pSet) {
  int32_t       code = 0;
  SDataFReader *pReader;
H
Hongze Cheng 已提交
727
  int32_t       szPage = TSDB_DEFAULT_PAGE_SIZE;
H
Hongze Cheng 已提交
728
  char          fname[TSDB_FILENAME_LEN];
H
Hongze Cheng 已提交
729

H
Hongze Cheng 已提交
730 731 732 733
  // alloc
  pReader = (SDataFReader *)taosMemoryCalloc(1, sizeof(*pReader));
  if (pReader == NULL) {
    code = TSDB_CODE_OUT_OF_MEMORY;
H
Hongze Cheng 已提交
734 735
    goto _err;
  }
H
Hongze Cheng 已提交
736 737
  pReader->pTsdb = pTsdb;
  pReader->pSet = pSet;
H
Hongze Cheng 已提交
738

H
Hongze Cheng 已提交
739 740
  // head
  tsdbHeadFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pHeadF, fname);
H
Hongze Cheng 已提交
741 742
  code = tsdbOpenFile(fname, szPage, TD_FILE_READ, &pReader->pHeadFD);
  if (code) goto _err;
H
Hongze Cheng 已提交
743

H
Hongze Cheng 已提交
744 745
  // data
  tsdbDataFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pDataF, fname);
H
Hongze Cheng 已提交
746 747
  code = tsdbOpenFile(fname, szPage, TD_FILE_READ, &pReader->pDataFD);
  if (code) goto _err;
H
Hongze Cheng 已提交
748

H
Hongze Cheng 已提交
749 750
  // sma
  tsdbSmaFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pSmaF, fname);
H
Hongze Cheng 已提交
751 752
  code = tsdbOpenFile(fname, szPage, TD_FILE_READ, &pReader->pSmaFD);
  if (code) goto _err;
H
Hongze Cheng 已提交
753

H
Hongze Cheng 已提交
754 755 756
  // sst
  for (int32_t iSst = 0; iSst < pSet->nSstF; iSst++) {
    tsdbSstFileName(pTsdb, pSet->diskId, pSet->fid, pSet->aSstF[iSst], fname);
H
Hongze Cheng 已提交
757
    code = tsdbOpenFile(fname, szPage, TD_FILE_READ, &pReader->aSstFD[iSst]);
H
Hongze Cheng 已提交
758
    if (code) goto _err;
H
Hongze Cheng 已提交
759
  }
H
Hongze Cheng 已提交
760

H
Hongze Cheng 已提交
761 762 763 764 765 766 767 768 769 770 771
  *ppReader = pReader;
  return code;

_err:
  tsdbError("vgId:%d, tsdb data file reader open failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code));
  *ppReader = NULL;
  return code;
}

int32_t tsdbDataFReaderClose(SDataFReader **ppReader) {
  int32_t code = 0;
H
Hongze Cheng 已提交
772
  if (*ppReader == NULL) return code;
H
Hongze Cheng 已提交
773 774

  // head
H
Hongze Cheng 已提交
775
  tsdbCloseFile(&(*ppReader)->pHeadFD);
H
Hongze Cheng 已提交
776

H
Hongze Cheng 已提交
777
  // data
H
Hongze Cheng 已提交
778
  tsdbCloseFile(&(*ppReader)->pDataFD);
H
Hongze Cheng 已提交
779

H
Hongze Cheng 已提交
780
  // sma
H
Hongze Cheng 已提交
781
  tsdbCloseFile(&(*ppReader)->pSmaFD);
H
Hongze Cheng 已提交
782

H
Hongze Cheng 已提交
783 784
  // sst
  for (int32_t iSst = 0; iSst < (*ppReader)->pSet->nSstF; iSst++) {
H
Hongze Cheng 已提交
785
    tsdbCloseFile(&(*ppReader)->aSstFD[iSst]);
H
Hongze Cheng 已提交
786 787 788 789
  }

  for (int32_t iBuf = 0; iBuf < sizeof((*ppReader)->aBuf) / sizeof(uint8_t *); iBuf++) {
    tFree((*ppReader)->aBuf[iBuf]);
H
Hongze Cheng 已提交
790
  }
H
Hongze Cheng 已提交
791 792
  taosMemoryFree(*ppReader);
  *ppReader = NULL;
H
Hongze Cheng 已提交
793 794 795
  return code;

_err:
H
Hongze Cheng 已提交
796
  tsdbError("vgId:%d, data file reader close failed since %s", TD_VID((*ppReader)->pTsdb->pVnode), tstrerror(code));
H
Hongze Cheng 已提交
797 798 799
  return code;
}

H
Hongze Cheng 已提交
800
int32_t tsdbReadBlockIdx(SDataFReader *pReader, SArray *aBlockIdx) {
H
Hongze Cheng 已提交
801 802 803 804
  int32_t    code = 0;
  SHeadFile *pHeadFile = pReader->pSet->pHeadF;
  int64_t    offset = pHeadFile->offset;
  int64_t    size = pHeadFile->size - offset;
H
Hongze Cheng 已提交
805

H
Hongze Cheng 已提交
806
  taosArrayClear(aBlockIdx);
H
Hongze Cheng 已提交
807
  if (size == 0) return code;
H
Hongze Cheng 已提交
808 809

  // alloc
H
Hongze Cheng 已提交
810
  code = tRealloc(&pReader->aBuf[0], size);
H
Hongze Cheng 已提交
811 812
  if (code) goto _err;

H
Hongze Cheng 已提交
813
  // read
H
Hongze Cheng 已提交
814 815
  code = tsdbReadFile(pReader->pHeadFD, offset, pReader->aBuf[0], size);
  if (code) goto _err;
H
Hongze Cheng 已提交
816 817

  // decode
H
Hongze Cheng 已提交
818
  int64_t n = 0;
H
Hongze Cheng 已提交
819
  while (n < size) {
H
Hongze Cheng 已提交
820 821 822 823 824 825 826
    SBlockIdx blockIdx;
    n += tGetBlockIdx(pReader->aBuf[0] + n, &blockIdx);

    if (taosArrayPush(aBlockIdx, &blockIdx) == NULL) {
      code = TSDB_CODE_OUT_OF_MEMORY;
      goto _err;
    }
H
Hongze Cheng 已提交
827
  }
H
Hongze Cheng 已提交
828
  ASSERT(n == size);
H
Hongze Cheng 已提交
829 830 831 832

  return code;

_err:
H
Hongze Cheng 已提交
833
  tsdbError("vgId:%d, read block idx failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code));
H
Hongze Cheng 已提交
834 835 836
  return code;
}

H
Hongze Cheng 已提交
837
int32_t tsdbReadSstBlk(SDataFReader *pReader, int32_t iSst, SArray *aSstBlk) {
H
Hongze Cheng 已提交
838 839 840 841
  int32_t   code = 0;
  SSstFile *pSstFile = pReader->pSet->aSstF[iSst];
  int64_t   offset = pSstFile->offset;
  int64_t   size = pSstFile->size - offset;
H
Hongze Cheng 已提交
842

H
Hongze Cheng 已提交
843
  taosArrayClear(aSstBlk);
H
Hongze Cheng 已提交
844
  if (size == 0) return code;
H
Hongze Cheng 已提交
845 846

  // alloc
H
Hongze Cheng 已提交
847
  code = tRealloc(&pReader->aBuf[0], size);
H
Hongze Cheng 已提交
848 849
  if (code) goto _err;

H
Hongze Cheng 已提交
850
  // read
H
Hongze Cheng 已提交
851 852
  code = tsdbReadFile(pReader->aSstFD[iSst], offset, pReader->aBuf[0], size);
  if (code) goto _err;
H
Hongze Cheng 已提交
853

H
Hongze Cheng 已提交
854
  // decode
H
Hongze Cheng 已提交
855
  int64_t n = 0;
H
Hongze Cheng 已提交
856 857 858
  while (n < size) {
    SSstBlk sstBlk;
    n += tGetSstBlk(pReader->aBuf[0] + n, &sstBlk);
H
Hongze Cheng 已提交
859

H
Hongze Cheng 已提交
860
    if (taosArrayPush(aSstBlk, &sstBlk) == NULL) {
H
Hongze Cheng 已提交
861 862 863 864
      code = TSDB_CODE_OUT_OF_MEMORY;
      goto _err;
    }
  }
H
Hongze Cheng 已提交
865
  ASSERT(n == size);
H
Hongze Cheng 已提交
866

H
Hongze Cheng 已提交
867 868 869
  return code;

_err:
H
Hongze Cheng 已提交
870
  tsdbError("vgId:%d read sst blk failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code));
H
Hongze Cheng 已提交
871 872 873
  return code;
}

H
Hongze Cheng 已提交
874 875 876 877
int32_t tsdbReadBlock(SDataFReader *pReader, SBlockIdx *pBlockIdx, SMapData *mBlock) {
  int32_t code = 0;
  int64_t offset = pBlockIdx->offset;
  int64_t size = pBlockIdx->size;
H
Hongze Cheng 已提交
878 879

  // alloc
H
Hongze Cheng 已提交
880
  code = tRealloc(&pReader->aBuf[0], size);
H
Hongze Cheng 已提交
881 882
  if (code) goto _err;

H
Hongze Cheng 已提交
883
  // read
H
Hongze Cheng 已提交
884 885
  code = tsdbReadFile(pReader->pHeadFD, offset, pReader->aBuf[0], size);
  if (code) goto _err;
H
Hongze Cheng 已提交
886

H
Hongze Cheng 已提交
887
  // decode
H
Hongze Cheng 已提交
888 889
  int64_t n = tGetMapData(pReader->aBuf[0], mBlock);
  if (n < 0) {
H
Hongze Cheng 已提交
890 891 892
    code = TSDB_CODE_OUT_OF_MEMORY;
    goto _err;
  }
H
Hongze Cheng 已提交
893
  ASSERT(n == size);
H
Hongze Cheng 已提交
894

H
Hongze Cheng 已提交
895
  return code;
H
Hongze Cheng 已提交
896

H
Hongze Cheng 已提交
897 898 899
_err:
  tsdbError("vgId:%d, read block failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code));
  return code;
H
Hongze Cheng 已提交
900
}
H
Hongze Cheng 已提交
901

H
Hongze Cheng 已提交
902 903 904
int32_t tsdbReadBlockSma(SDataFReader *pReader, SDataBlk *pDataBlk, SArray *aColumnDataAgg) {
  int32_t   code = 0;
  SSmaInfo *pSmaInfo = &pDataBlk->smaInfo;
H
Hongze Cheng 已提交
905

H
Hongze Cheng 已提交
906
  ASSERT(pSmaInfo->size > 0);
H
Hongze Cheng 已提交
907

H
Hongze Cheng 已提交
908
  taosArrayClear(aColumnDataAgg);
H
Hongze Cheng 已提交
909

H
Hongze Cheng 已提交
910
  // alloc
H
Hongze Cheng 已提交
911
  code = tRealloc(&pReader->aBuf[0], pSmaInfo->size);
H
Hongze Cheng 已提交
912
  if (code) goto _err;
H
Hongze Cheng 已提交
913

H
Hongze Cheng 已提交
914
  // read
H
Hongze Cheng 已提交
915
  code = tsdbReadFile(pReader->pSmaFD, pSmaInfo->offset, pReader->aBuf[0], pSmaInfo->size);
H
Hongze Cheng 已提交
916
  if (code) goto _err;
H
Hongze Cheng 已提交
917

H
Hongze Cheng 已提交
918
  // decode
H
Hongze Cheng 已提交
919
  int32_t n = 0;
H
Hongze Cheng 已提交
920 921 922
  while (n < pSmaInfo->size) {
    SColumnDataAgg sma;
    n += tGetColumnDataAgg(pReader->aBuf[0] + n, &sma);
H
Hongze Cheng 已提交
923

H
Hongze Cheng 已提交
924 925
    if (taosArrayPush(aColumnDataAgg, &sma) == NULL) {
      code = TSDB_CODE_OUT_OF_MEMORY;
H
Hongze Cheng 已提交
926 927
      goto _err;
    }
H
Hongze Cheng 已提交
928
  }
H
Hongze Cheng 已提交
929
  ASSERT(n == pSmaInfo->size);
H
Hongze Cheng 已提交
930
  return code;
H
Hongze Cheng 已提交
931

H
Hongze Cheng 已提交
932
_err:
H
Hongze Cheng 已提交
933
  tsdbError("vgId:%d tsdb read block sma failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code));
H
Hongze Cheng 已提交
934 935
  return code;
}
H
Hongze Cheng 已提交
936

H
Hongze Cheng 已提交
937 938
static int32_t tsdbReadBlockDataImpl(SDataFReader *pReader, SBlockInfo *pBlkInfo, int8_t fromLast,
                                     SBlockData *pBlockData) {
H
Hongze Cheng 已提交
939
  int32_t code = 0;
H
Hongze Cheng 已提交
940

H
Hongze Cheng 已提交
941 942
  tBlockDataClear(pBlockData);

H
Hongze Cheng 已提交
943
  STsdbFD *pFD = fromLast ? pReader->aSstFD[0] : pReader->pDataFD;  // (todo)
H
Hongze Cheng 已提交
944 945

  // todo: realloc pReader->aBuf[0]
H
Hongze Cheng 已提交
946 947

  // uid + version + tskey
H
Hongze Cheng 已提交
948
  tsdbReadFile(pFD, pBlkInfo->offset, pReader->aBuf[0], pBlkInfo->szKey);  // todo
H
Hongze Cheng 已提交
949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967
  SDiskDataHdr hdr;
  uint8_t     *p = pReader->aBuf[0] + tGetDiskDataHdr(pReader->aBuf[0], &hdr);

  ASSERT(hdr.delimiter == TSDB_FILE_DLMT);
  ASSERT(pBlockData->suid == hdr.suid);
  ASSERT(pBlockData->uid == hdr.uid);

  pBlockData->nRow = hdr.nRow;

  // uid
  if (hdr.uid == 0) {
    ASSERT(hdr.szUid);
    code = tsdbDecmprData(p, hdr.szUid, TSDB_DATA_TYPE_BIGINT, hdr.cmprAlg, (uint8_t **)&pBlockData->aUid,
                          sizeof(int64_t) * hdr.nRow, &pReader->aBuf[1]);
    if (code) goto _err;
  } else {
    ASSERT(!hdr.szUid);
  }
  p += hdr.szUid;
H
Hongze Cheng 已提交
968

H
Hongze Cheng 已提交
969 970 971 972 973
  // version
  code = tsdbDecmprData(p, hdr.szVer, TSDB_DATA_TYPE_BIGINT, hdr.cmprAlg, (uint8_t **)&pBlockData->aVersion,
                        sizeof(int64_t) * hdr.nRow, &pReader->aBuf[1]);
  if (code) goto _err;
  p += hdr.szVer;
H
Hongze Cheng 已提交
974

H
Hongze Cheng 已提交
975 976 977
  // TSKEY
  code = tsdbDecmprData(p, hdr.szKey, TSDB_DATA_TYPE_TIMESTAMP, hdr.cmprAlg, (uint8_t **)&pBlockData->aTSKEY,
                        sizeof(TSKEY) * hdr.nRow, &pReader->aBuf[1]);
H
Hongze Cheng 已提交
978
  if (code) goto _err;
H
Hongze Cheng 已提交
979
  p += hdr.szKey;
H
Hongze Cheng 已提交
980

H
Hongze Cheng 已提交
981
  ASSERT(p - pReader->aBuf[0] == pBlkInfo->szKey - sizeof(TSCKSUM));
H
Hongze Cheng 已提交
982

H
Hongze Cheng 已提交
983 984
  // read and decode columns
  if (taosArrayGetSize(pBlockData->aIdx) == 0) goto _exit;
H
Hongze Cheng 已提交
985

H
Hongze Cheng 已提交
986 987
  if (hdr.szBlkCol > 0) {
    int64_t offset = pBlkInfo->offset + pBlkInfo->szKey;
H
Hongze Cheng 已提交
988
    tsdbReadFile(pFD, offset, pReader->aBuf[0], hdr.szBlkCol + sizeof(TSCKSUM));
H
Hongze Cheng 已提交
989 990
  }

H
Hongze Cheng 已提交
991 992 993
  SBlockCol  blockCol = {.cid = 0};
  SBlockCol *pBlockCol = &blockCol;
  int32_t    n = 0;
H
Hongze Cheng 已提交
994

H
Hongze Cheng 已提交
995 996
  for (int32_t iColData = 0; iColData < taosArrayGetSize(pBlockData->aIdx); iColData++) {
    SColData *pColData = tBlockDataGetColDataByIdx(pBlockData, iColData);
H
Hongze Cheng 已提交
997

H
Hongze Cheng 已提交
998 999 1000 1001 1002 1003 1004
    while (pBlockCol && pBlockCol->cid < pColData->cid) {
      if (n < hdr.szBlkCol) {
        n += tGetBlockCol(pReader->aBuf[0] + n, pBlockCol);
      } else {
        ASSERT(n == hdr.szBlkCol);
        pBlockCol = NULL;
      }
H
Hongze Cheng 已提交
1005
    }
H
Hongze Cheng 已提交
1006

H
Hongze Cheng 已提交
1007 1008 1009 1010 1011 1012 1013 1014 1015
    if (pBlockCol == NULL || pBlockCol->cid > pColData->cid) {
      // add a lot of NONE
      for (int32_t iRow = 0; iRow < hdr.nRow; iRow++) {
        code = tColDataAppendValue(pColData, &COL_VAL_NONE(pColData->cid, pColData->type));
        if (code) goto _err;
      }
    } else {
      ASSERT(pBlockCol->type == pColData->type);
      ASSERT(pBlockCol->flag && pBlockCol->flag != HAS_NONE);
H
Hongze Cheng 已提交
1016

H
Hongze Cheng 已提交
1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027
      if (pBlockCol->flag == HAS_NULL) {
        // add a lot of NULL
        for (int32_t iRow = 0; iRow < hdr.nRow; iRow++) {
          code = tColDataAppendValue(pColData, &COL_VAL_NULL(pBlockCol->cid, pBlockCol->type));
          if (code) goto _err;
        }
      } else {
        // decode from binary
        int64_t offset = pBlkInfo->offset + pBlkInfo->szKey + hdr.szBlkCol + sizeof(TSCKSUM) + pBlockCol->offset;
        int32_t size = pBlockCol->szBitmap + pBlockCol->szOffset + pBlockCol->szValue + sizeof(TSCKSUM);

H
Hongze Cheng 已提交
1028
        tsdbReadFile(pFD, offset, pReader->aBuf[1], size);
H
Hongze Cheng 已提交
1029 1030 1031 1032 1033

        code = tsdbDecmprColData(pReader->aBuf[1], pBlockCol, hdr.cmprAlg, hdr.nRow, pColData, &pReader->aBuf[2]);
        if (code) goto _err;
      }
    }
H
Hongze Cheng 已提交
1034
  }
H
Hongze Cheng 已提交
1035

H
Hongze Cheng 已提交
1036
_exit:
H
Hongze Cheng 已提交
1037 1038
  return code;

H
Hongze Cheng 已提交
1039
_err:
H
Hongze Cheng 已提交
1040
  tsdbError("vgId:%d tsdb read block data impl failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code));
H
Hongze Cheng 已提交
1041
  return code;
H
Hongze Cheng 已提交
1042
}
H
Hongze Cheng 已提交
1043

H
Hongze Cheng 已提交
1044 1045
int32_t tsdbReadDataBlock(SDataFReader *pReader, SDataBlk *pDataBlk, SBlockData *pBlockData) {
  int32_t code = 0;
H
Hongze Cheng 已提交
1046

H
Hongze Cheng 已提交
1047 1048
  code = tsdbReadBlockDataImpl(pReader, &pDataBlk->aSubBlock[0], 0, pBlockData);
  if (code) goto _err;
H
Hongze Cheng 已提交
1049

H
Hongze Cheng 已提交
1050 1051 1052
  if (pDataBlk->nSubBlock > 1) {
    SBlockData bData1;
    SBlockData bData2;
H
Hongze Cheng 已提交
1053

H
Hongze Cheng 已提交
1054 1055 1056 1057 1058
    // create
    code = tBlockDataCreate(&bData1);
    if (code) goto _err;
    code = tBlockDataCreate(&bData2);
    if (code) goto _err;
H
Hongze Cheng 已提交
1059

H
Hongze Cheng 已提交
1060 1061 1062
    // init
    tBlockDataInitEx(&bData1, pBlockData);
    tBlockDataInitEx(&bData2, pBlockData);
H
Hongze Cheng 已提交
1063

H
Hongze Cheng 已提交
1064 1065 1066 1067 1068 1069 1070
    for (int32_t iSubBlock = 1; iSubBlock < pDataBlk->nSubBlock; iSubBlock++) {
      code = tsdbReadBlockDataImpl(pReader, &pDataBlk->aSubBlock[iSubBlock], 0, &bData1);
      if (code) {
        tBlockDataDestroy(&bData1, 1);
        tBlockDataDestroy(&bData2, 1);
        goto _err;
      }
H
Hongze Cheng 已提交
1071

H
Hongze Cheng 已提交
1072 1073 1074 1075 1076 1077
      code = tBlockDataCopy(pBlockData, &bData2);
      if (code) {
        tBlockDataDestroy(&bData1, 1);
        tBlockDataDestroy(&bData2, 1);
        goto _err;
      }
H
Hongze Cheng 已提交
1078

H
Hongze Cheng 已提交
1079 1080 1081 1082 1083 1084 1085
      code = tBlockDataMerge(&bData1, &bData2, pBlockData);
      if (code) {
        tBlockDataDestroy(&bData1, 1);
        tBlockDataDestroy(&bData2, 1);
        goto _err;
      }
    }
H
Hongze Cheng 已提交
1086

H
Hongze Cheng 已提交
1087 1088
    tBlockDataDestroy(&bData1, 1);
    tBlockDataDestroy(&bData2, 1);
H
Hongze Cheng 已提交
1089 1090
  }

H
Hongze Cheng 已提交
1091
  return code;
H
Hongze Cheng 已提交
1092

H
Hongze Cheng 已提交
1093 1094 1095 1096
_err:
  tsdbError("vgId:%d tsdb read data block failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code));
  return code;
}
H
Hongze Cheng 已提交
1097

H
Hongze Cheng 已提交
1098
int32_t tsdbReadSstBlock(SDataFReader *pReader, int32_t iSst, SSstBlk *pSstBlk, SBlockData *pBlockData) {
H
Hongze Cheng 已提交
1099
  int32_t code = 0;
H
Hongze Cheng 已提交
1100

H
Hongze Cheng 已提交
1101 1102 1103 1104
  // alloc
  code = tRealloc(&pReader->aBuf[0], pSstBlk->bInfo.szBlock);
  if (code) goto _err;

H
Hongze Cheng 已提交
1105
  // read
H
Hongze Cheng 已提交
1106
  code = tsdbReadFile(pReader->aSstFD[iSst], pSstBlk->bInfo.offset, pReader->aBuf[0], pSstBlk->bInfo.szBlock);
H
Hongze Cheng 已提交
1107
  if (code) goto _err;
H
Hongze Cheng 已提交
1108

H
Hongze Cheng 已提交
1109 1110
  // decmpr
  code = tDecmprBlockData(pReader->aBuf[0], pSstBlk->bInfo.szBlock, pBlockData, &pReader->aBuf[1]);
H
Hongze Cheng 已提交
1111
  if (code) goto _err;
H
Hongze Cheng 已提交
1112

H
Hongze Cheng 已提交
1113 1114 1115 1116
  return code;

_err:
  tsdbError("vgId:%d tsdb read sst block failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code));
H
Hongze Cheng 已提交
1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268
  return code;
}

// SDelFWriter ====================================================
int32_t tsdbDelFWriterOpen(SDelFWriter **ppWriter, SDelFile *pFile, STsdb *pTsdb) {
  int32_t      code = 0;
  char         fname[TSDB_FILENAME_LEN];
  char         hdr[TSDB_FHDR_SIZE] = {0};
  SDelFWriter *pDelFWriter;
  int64_t      n;

  // alloc
  pDelFWriter = (SDelFWriter *)taosMemoryCalloc(1, sizeof(*pDelFWriter));
  if (pDelFWriter == NULL) {
    code = TSDB_CODE_OUT_OF_MEMORY;
    goto _err;
  }
  pDelFWriter->pTsdb = pTsdb;
  pDelFWriter->fDel = *pFile;

  tsdbDelFileName(pTsdb, pFile, fname);
  pDelFWriter->pWriteH = taosOpenFile(fname, TD_FILE_WRITE | TD_FILE_CREATE);
  if (pDelFWriter->pWriteH == NULL) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }

  // update header
  n = taosWriteFile(pDelFWriter->pWriteH, &hdr, TSDB_FHDR_SIZE);
  if (n < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }

  pDelFWriter->fDel.size = TSDB_FHDR_SIZE;
  pDelFWriter->fDel.offset = 0;

  *ppWriter = pDelFWriter;
  return code;

_err:
  tsdbError("vgId:%d, failed to open del file writer since %s", TD_VID(pTsdb->pVnode), tstrerror(code));
  *ppWriter = NULL;
  return code;
}

int32_t tsdbDelFWriterClose(SDelFWriter **ppWriter, int8_t sync) {
  int32_t      code = 0;
  SDelFWriter *pWriter = *ppWriter;
  STsdb       *pTsdb = pWriter->pTsdb;

  // sync
  if (sync && taosFsyncFile(pWriter->pWriteH) < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }

  // close
  if (taosCloseFile(&pWriter->pWriteH) < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }

  for (int32_t iBuf = 0; iBuf < sizeof(pWriter->aBuf) / sizeof(uint8_t *); iBuf++) {
    tFree(pWriter->aBuf[iBuf]);
  }
  taosMemoryFree(pWriter);

  *ppWriter = NULL;
  return code;

_err:
  tsdbError("vgId:%d, failed to close del file writer since %s", TD_VID(pTsdb->pVnode), tstrerror(code));
  return code;
}

int32_t tsdbWriteDelData(SDelFWriter *pWriter, SArray *aDelData, SDelIdx *pDelIdx) {
  int32_t code = 0;
  int64_t size;
  int64_t n;

  // prepare
  size = sizeof(uint32_t);
  for (int32_t iDelData = 0; iDelData < taosArrayGetSize(aDelData); iDelData++) {
    size += tPutDelData(NULL, taosArrayGet(aDelData, iDelData));
  }
  size += sizeof(TSCKSUM);

  // alloc
  code = tRealloc(&pWriter->aBuf[0], size);
  if (code) goto _err;

  // build
  n = 0;
  n += tPutU32(pWriter->aBuf[0] + n, TSDB_FILE_DLMT);
  for (int32_t iDelData = 0; iDelData < taosArrayGetSize(aDelData); iDelData++) {
    n += tPutDelData(pWriter->aBuf[0] + n, taosArrayGet(aDelData, iDelData));
  }
  taosCalcChecksumAppend(0, pWriter->aBuf[0], size);

  ASSERT(n + sizeof(TSCKSUM) == size);

  // write
  n = taosWriteFile(pWriter->pWriteH, pWriter->aBuf[0], size);
  if (n < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }

  ASSERT(n == size);

  // update
  pDelIdx->offset = pWriter->fDel.size;
  pDelIdx->size = size;
  pWriter->fDel.size += size;

  return code;

_err:
  tsdbError("vgId:%d, failed to write del data since %s", TD_VID(pWriter->pTsdb->pVnode), tstrerror(code));
  return code;
}

int32_t tsdbWriteDelIdx(SDelFWriter *pWriter, SArray *aDelIdx) {
  int32_t  code = 0;
  int64_t  size;
  int64_t  n;
  SDelIdx *pDelIdx;

  // prepare
  size = sizeof(uint32_t);
  for (int32_t iDelIdx = 0; iDelIdx < taosArrayGetSize(aDelIdx); iDelIdx++) {
    size += tPutDelIdx(NULL, taosArrayGet(aDelIdx, iDelIdx));
  }
  size += sizeof(TSCKSUM);

  // alloc
  code = tRealloc(&pWriter->aBuf[0], size);
  if (code) goto _err;

  // build
  n = 0;
  n += tPutU32(pWriter->aBuf[0] + n, TSDB_FILE_DLMT);
  for (int32_t iDelIdx = 0; iDelIdx < taosArrayGetSize(aDelIdx); iDelIdx++) {
    n += tPutDelIdx(pWriter->aBuf[0] + n, taosArrayGet(aDelIdx, iDelIdx));
  }
  taosCalcChecksumAppend(0, pWriter->aBuf[0], size);

  ASSERT(n + sizeof(TSCKSUM) == size);

  // write
  n = taosWriteFile(pWriter->pWriteH, pWriter->aBuf[0], size);
H
Hongze Cheng 已提交
1269 1270 1271 1272 1273
  if (n < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }

H
Hongze Cheng 已提交
1274 1275 1276
  // update
  pWriter->fDel.offset = pWriter->fDel.size;
  pWriter->fDel.size += size;
H
Hongze Cheng 已提交
1277

H
Hongze Cheng 已提交
1278
  return code;
H
Hongze Cheng 已提交
1279

H
Hongze Cheng 已提交
1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297
_err:
  tsdbError("vgId:%d, write del idx failed since %s", TD_VID(pWriter->pTsdb->pVnode), tstrerror(code));
  return code;
}

int32_t tsdbUpdateDelFileHdr(SDelFWriter *pWriter) {
  int32_t code = 0;
  char    hdr[TSDB_FHDR_SIZE];
  int64_t size = TSDB_FHDR_SIZE;
  int64_t n;

  // build
  memset(hdr, 0, size);
  tPutDelFile(hdr, &pWriter->fDel);
  taosCalcChecksumAppend(0, hdr, size);

  // seek
  if (taosLSeekFile(pWriter->pWriteH, 0, SEEK_SET) < 0) {
H
Hongze Cheng 已提交
1298 1299 1300 1301
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }

H
Hongze Cheng 已提交
1302 1303
  // write
  n = taosWriteFile(pWriter->pWriteH, hdr, size);
H
Hongze Cheng 已提交
1304 1305 1306 1307 1308 1309 1310 1311
  if (n < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }

  return code;

_err:
H
Hongze Cheng 已提交
1312
  tsdbError("vgId:%d, update del file hdr failed since %s", TD_VID(pWriter->pTsdb->pVnode), tstrerror(code));
H
Hongze Cheng 已提交
1313
  return code;
H
Hongze Cheng 已提交
1314
}
H
Hongze Cheng 已提交
1315 1316 1317 1318 1319
// SDelFReader ====================================================
struct SDelFReader {
  STsdb    *pTsdb;
  SDelFile  fDel;
  TdFilePtr pReadH;
H
Hongze Cheng 已提交
1320

H
Hongze Cheng 已提交
1321 1322
  uint8_t *aBuf[1];
};
H
Hongze Cheng 已提交
1323

H
Hongze Cheng 已提交
1324 1325 1326 1327 1328
int32_t tsdbDelFReaderOpen(SDelFReader **ppReader, SDelFile *pFile, STsdb *pTsdb) {
  int32_t      code = 0;
  char         fname[TSDB_FILENAME_LEN];
  SDelFReader *pDelFReader;
  int64_t      n;
H
Hongze Cheng 已提交
1329

H
Hongze Cheng 已提交
1330 1331 1332
  // alloc
  pDelFReader = (SDelFReader *)taosMemoryCalloc(1, sizeof(*pDelFReader));
  if (pDelFReader == NULL) {
H
Hongze Cheng 已提交
1333
    code = TSDB_CODE_OUT_OF_MEMORY;
H
Hongze Cheng 已提交
1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346
    goto _err;
  }

  // open impl
  pDelFReader->pTsdb = pTsdb;
  pDelFReader->fDel = *pFile;

  tsdbDelFileName(pTsdb, pFile, fname);
  pDelFReader->pReadH = taosOpenFile(fname, TD_FILE_READ);
  if (pDelFReader->pReadH == NULL) {
    code = TAOS_SYSTEM_ERROR(errno);
    taosMemoryFree(pDelFReader);
    goto _err;
H
Hongze Cheng 已提交
1347 1348 1349
  }

_exit:
H
Hongze Cheng 已提交
1350
  *ppReader = pDelFReader;
H
Hongze Cheng 已提交
1351 1352
  return code;

H
Hongze Cheng 已提交
1353 1354 1355 1356
_err:
  tsdbError("vgId:%d, del file reader open failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code));
  *ppReader = NULL;
  return code;
H
Hongze Cheng 已提交
1357 1358
}

H
Hongze Cheng 已提交
1359 1360 1361
int32_t tsdbDelFReaderClose(SDelFReader **ppReader) {
  int32_t      code = 0;
  SDelFReader *pReader = *ppReader;
H
Hongze Cheng 已提交
1362

H
Hongze Cheng 已提交
1363 1364 1365 1366 1367 1368 1369 1370 1371
  if (pReader) {
    if (taosCloseFile(&pReader->pReadH) < 0) {
      code = TAOS_SYSTEM_ERROR(errno);
      goto _exit;
    }
    for (int32_t iBuf = 0; iBuf < sizeof(pReader->aBuf) / sizeof(uint8_t *); iBuf++) {
      tFree(pReader->aBuf[iBuf]);
    }
    taosMemoryFree(pReader);
H
Hongze Cheng 已提交
1372
  }
H
Hongze Cheng 已提交
1373
  *ppReader = NULL;
H
Hongze Cheng 已提交
1374 1375 1376 1377 1378

_exit:
  return code;
}

H
Hongze Cheng 已提交
1379
int32_t tsdbReadDelData(SDelFReader *pReader, SDelIdx *pDelIdx, SArray *aDelData) {
H
Hongze Cheng 已提交
1380
  int32_t code = 0;
H
Hongze Cheng 已提交
1381 1382 1383
  int64_t offset = pDelIdx->offset;
  int64_t size = pDelIdx->size;
  int64_t n;
H
Hongze Cheng 已提交
1384

H
Hongze Cheng 已提交
1385
  taosArrayClear(aDelData);
H
Hongze Cheng 已提交
1386

H
Hongze Cheng 已提交
1387 1388 1389 1390 1391
  // seek
  if (taosLSeekFile(pReader->pReadH, offset, SEEK_SET) < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }
H
Hongze Cheng 已提交
1392

H
Hongze Cheng 已提交
1393 1394 1395
  // alloc
  code = tRealloc(&pReader->aBuf[0], size);
  if (code) goto _err;
H
Hongze Cheng 已提交
1396

H
Hongze Cheng 已提交
1397 1398 1399 1400 1401 1402 1403 1404 1405
  // read
  n = taosReadFile(pReader->pReadH, pReader->aBuf[0], size);
  if (n < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  } else if (n < size) {
    code = TSDB_CODE_FILE_CORRUPTED;
    goto _err;
  }
H
Hongze Cheng 已提交
1406

H
Hongze Cheng 已提交
1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424
  // check
  if (!taosCheckChecksumWhole(pReader->aBuf[0], size)) {
    code = TSDB_CODE_FILE_CORRUPTED;
    goto _err;
  }

  // // decode
  n = 0;

  uint32_t delimiter;
  n += tGetU32(pReader->aBuf[0] + n, &delimiter);
  while (n < size - sizeof(TSCKSUM)) {
    SDelData delData;
    n += tGetDelData(pReader->aBuf[0] + n, &delData);

    if (taosArrayPush(aDelData, &delData) == NULL) {
      code = TSDB_CODE_OUT_OF_MEMORY;
      goto _err;
H
Hongze Cheng 已提交
1425 1426 1427
    }
  }

H
Hongze Cheng 已提交
1428 1429 1430 1431 1432 1433
  ASSERT(n == size - sizeof(TSCKSUM));

  return code;

_err:
  tsdbError("vgId:%d, read del data failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code));
H
Hongze Cheng 已提交
1434 1435 1436
  return code;
}

H
Hongze Cheng 已提交
1437
int32_t tsdbReadDelIdx(SDelFReader *pReader, SArray *aDelIdx) {
H
Hongze Cheng 已提交
1438
  int32_t code = 0;
H
Hongze Cheng 已提交
1439 1440 1441
  int32_t n;
  int64_t offset = pReader->fDel.offset;
  int64_t size = pReader->fDel.size - offset;
H
Hongze Cheng 已提交
1442

H
Hongze Cheng 已提交
1443 1444 1445 1446
  taosArrayClear(aDelIdx);

  // seek
  if (taosLSeekFile(pReader->pReadH, offset, SEEK_SET) < 0) {
H
Hongze Cheng 已提交
1447
    code = TAOS_SYSTEM_ERROR(errno);
H
Hongze Cheng 已提交
1448
    goto _err;
H
Hongze Cheng 已提交
1449 1450
  }

H
Hongze Cheng 已提交
1451 1452 1453 1454 1455 1456
  // alloc
  code = tRealloc(&pReader->aBuf[0], size);
  if (code) goto _err;

  // read
  n = taosReadFile(pReader->pReadH, pReader->aBuf[0], size);
H
Hongze Cheng 已提交
1457 1458
  if (n < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
H
Hongze Cheng 已提交
1459 1460
    goto _err;
  } else if (n < size) {
H
Hongze Cheng 已提交
1461
    code = TSDB_CODE_FILE_CORRUPTED;
H
Hongze Cheng 已提交
1462
    goto _err;
H
Hongze Cheng 已提交
1463 1464
  }

H
Hongze Cheng 已提交
1465 1466
  // check
  if (!taosCheckChecksumWhole(pReader->aBuf[0], size)) {
H
Hongze Cheng 已提交
1467
    code = TSDB_CODE_FILE_CORRUPTED;
H
Hongze Cheng 已提交
1468
    goto _err;
H
Hongze Cheng 已提交
1469 1470
  }

H
Hongze Cheng 已提交
1471 1472 1473 1474 1475
  // decode
  n = 0;
  uint32_t delimiter;
  n += tGetU32(pReader->aBuf[0] + n, &delimiter);
  ASSERT(delimiter == TSDB_FILE_DLMT);
H
Hongze Cheng 已提交
1476

H
Hongze Cheng 已提交
1477 1478
  while (n < size - sizeof(TSCKSUM)) {
    SDelIdx delIdx;
H
Hongze Cheng 已提交
1479

H
Hongze Cheng 已提交
1480
    n += tGetDelIdx(pReader->aBuf[0] + n, &delIdx);
H
Hongze Cheng 已提交
1481

H
Hongze Cheng 已提交
1482 1483 1484 1485
    if (taosArrayPush(aDelIdx, &delIdx) == NULL) {
      code = TSDB_CODE_OUT_OF_MEMORY;
      goto _err;
    }
H
Hongze Cheng 已提交
1486 1487
  }

H
Hongze Cheng 已提交
1488
  ASSERT(n == size - sizeof(TSCKSUM));
H
Hongze Cheng 已提交
1489

H
Hongze Cheng 已提交
1490
  return code;
H
Hongze Cheng 已提交
1491

H
Hongze Cheng 已提交
1492 1493
_err:
  tsdbError("vgId:%d, read del idx failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code));
H
Hongze Cheng 已提交
1494
  return code;
H
Hongze Cheng 已提交
1495
}
H
Hongze Cheng 已提交
1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529

static int32_t tsdbReadAndCheck(TdFilePtr pFD, int64_t offset, uint8_t **ppOut, int32_t size, int8_t toCheck) {
  int32_t code = 0;

  // alloc
  code = tRealloc(ppOut, size);
  if (code) goto _exit;

  // seek
  int64_t n = taosLSeekFile(pFD, offset, SEEK_SET);
  if (n < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _exit;
  }

  // read
  n = taosReadFile(pFD, *ppOut, size);
  if (n < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _exit;
  } else if (n < size) {
    code = TSDB_CODE_FILE_CORRUPTED;
    goto _exit;
  }

  // check
  if (toCheck && !taosCheckChecksumWhole(*ppOut, size)) {
    code = TSDB_CODE_FILE_CORRUPTED;
    goto _exit;
  }

_exit:
  return code;
}