tsdbReaderWriter.c 43.0 KB
Newer Older
H
refact  
Hongze Cheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

#include "tsdb.h"
H
Hongze Cheng 已提交
17
// =============== PAGE-WISE FILE ===============
H
refact  
Hongze Cheng 已提交
18

H
Hongze Cheng 已提交
19
static int32_t tsdbOpenFile(const char *path, int32_t opt, STsdbFD *pFD) {
H
Hongze Cheng 已提交
20
  int32_t code = 0;
H
Hongze Cheng 已提交
21

H
Hongze Cheng 已提交
22 23
  pFD->pFD = taosOpenFile(path, opt);
  if (pFD->pFD == NULL) {
H
Hongze Cheng 已提交
24
    code = TAOS_SYSTEM_ERROR(errno);
H
Hongze Cheng 已提交
25
    goto _exit;
H
Hongze Cheng 已提交
26 27
  }

H
Hongze Cheng 已提交
28 29 30 31 32
  pFD->szPage = 4096;
  pFD->pgno = 0;
  pFD->nBuf = 0;
  pFD->pBuf = taosMemoryMalloc(pFD->szPage);
  if (pFD->pBuf == NULL) {
H
Hongze Cheng 已提交
33
    code = TSDB_CODE_OUT_OF_MEMORY;
H
Hongze Cheng 已提交
34
    goto _exit;
H
Hongze Cheng 已提交
35 36 37 38
  }

_exit:
  return code;
H
Hongze Cheng 已提交
39
}
H
Hongze Cheng 已提交
40

H
Hongze Cheng 已提交
41
static void tsdbCloseFile(STsdbFD *pFD) {
H
Hongze Cheng 已提交
42 43
  taosMemoryFree(pFD->pBuf);
  taosCloseFile(&pFD->pFD);
H
Hongze Cheng 已提交
44 45
}

H
Hongze Cheng 已提交
46
static int32_t tsdbSyncFile(STsdbFD *pFD) {
H
Hongze Cheng 已提交
47
  int32_t code = 0;
H
Hongze Cheng 已提交
48

H
Hongze Cheng 已提交
49 50 51
  if (taosFsyncFile(pFD->pFD) < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _exit;
H
Hongze Cheng 已提交
52 53
  }

H
Hongze Cheng 已提交
54
_exit:
H
Hongze Cheng 已提交
55 56 57
  return code;
}

H
Hongze Cheng 已提交
58
static int32_t tsdbWriteFile(STsdbFD *pFD, uint8_t *pBuf, int32_t nBuf, int64_t *offset) {
H
Hongze Cheng 已提交
59
  int32_t code = 0;
H
Hongze Cheng 已提交
60

H
Hongze Cheng 已提交
61 62 63 64
  int32_t n = 0;
  while (n < nBuf) {
    int32_t remain = pFD->szPage - pFD->nBuf - sizeof(TSCKSUM);
    int32_t size = TMIN(remain, nBuf - n);
H
Hongze Cheng 已提交
65

H
Hongze Cheng 已提交
66 67 68
    memcpy(pFD->pBuf + pFD->nBuf, pBuf + n, size);
    n += size;
    pFD->nBuf += size;
H
Hongze Cheng 已提交
69

H
Hongze Cheng 已提交
70 71
    if (pFD->nBuf + sizeof(TSCKSUM) == pFD->szPage) {
      taosCalcChecksumAppend(0, pFD->pBuf, pFD->szPage);
H
Hongze Cheng 已提交
72

H
Hongze Cheng 已提交
73 74 75 76 77
      int64_t n = taosWriteFile(pFD->pFD, pFD->pBuf, pFD->szPage);
      if (n < 0) {
        code = TAOS_SYSTEM_ERROR(errno);
        goto _exit;
      }
H
Hongze Cheng 已提交
78

H
Hongze Cheng 已提交
79
      pFD->nBuf = 0;
H
Hongze Cheng 已提交
80
    }
H
Hongze Cheng 已提交
81
  }
H
Hongze Cheng 已提交
82 83

_exit:
H
Hongze Cheng 已提交
84 85 86
  return code;
}

H
Hongze Cheng 已提交
87
static int32_t tsdbReadFilePage(STsdbFD *pFD, int64_t pgno) {
H
Hongze Cheng 已提交
88
  int32_t code = 0;
H
more  
Hongze Cheng 已提交
89

H
Hongze Cheng 已提交
90 91
  int64_t n = taosLSeekFile(pFD->pFD, pgno * pFD->szPage, SEEK_SET);
  if (n < 0) {
H
Hongze Cheng 已提交
92
    code = TAOS_SYSTEM_ERROR(errno);
H
Hongze Cheng 已提交
93
    goto _exit;
H
Hongze Cheng 已提交
94 95
  }

H
Hongze Cheng 已提交
96
  n = taosReadFile(pFD->pFD, pFD->pBuf, pFD->szPage);
H
Hongze Cheng 已提交
97
  if (n < 0) {
H
Hongze Cheng 已提交
98
    code = TAOS_SYSTEM_ERROR(errno);
H
Hongze Cheng 已提交
99 100
    goto _exit;
  } else if (n < pFD->szPage) {
H
Hongze Cheng 已提交
101
    code = TSDB_CODE_FILE_CORRUPTED;
H
Hongze Cheng 已提交
102
    goto _exit;
H
Hongze Cheng 已提交
103 104
  }

H
Hongze Cheng 已提交
105
  if (!taosCheckChecksumWhole(pFD->pBuf, pFD->szPage)) {
H
Hongze Cheng 已提交
106
    code = TSDB_CODE_FILE_CORRUPTED;
H
Hongze Cheng 已提交
107
    goto _exit;
H
Hongze Cheng 已提交
108 109
  }

H
Hongze Cheng 已提交
110
  pFD->pgno = pgno;
H
Hongze Cheng 已提交
111

H
Hongze Cheng 已提交
112 113 114
_exit:
  return code;
}
H
Hongze Cheng 已提交
115

H
Hongze Cheng 已提交
116
static int64_t tsdbReadFile(STsdbFD *pFD, int64_t offset, uint8_t *pBuf, int64_t count) {
H
Hongze Cheng 已提交
117
  int32_t code = 0;
H
Hongze Cheng 已提交
118

H
Hongze Cheng 已提交
119 120 121 122 123 124 125
  int64_t pgno = offset / pFD->szPage;
  int64_t n = 0;
  if (pFD->pgno == pgno) {
    int64_t bOff = offset % pFD->szPage;
    int64_t nRead = TMIN(pFD->szPage - bOff - sizeof(TSCKSUM), count);
    memcpy(pBuf + n, pFD->pBuf + bOff, nRead);
    n = nRead;
H
Hongze Cheng 已提交
126 127
  }

H
Hongze Cheng 已提交
128 129 130
  while (n < count) {
    code = tsdbReadFilePage(pFD, pgno);
    if (code) goto _exit;
H
Hongze Cheng 已提交
131

H
Hongze Cheng 已提交
132
    pgno++;
H
Hongze Cheng 已提交
133

H
Hongze Cheng 已提交
134 135 136 137 138 139
    int64_t nRead = TMIN(pFD->szPage - sizeof(TSCKSUM), count - n);
    memcpy(pBuf + n, pFD->pBuf, nRead);
    n += nRead;
  }

_exit:
H
Hongze Cheng 已提交
140
  return code;
H
Hongze Cheng 已提交
141 142
}

H
Hongze Cheng 已提交
143 144
// SDataFWriter ====================================================
int32_t tsdbDataFWriterOpen(SDataFWriter **ppWriter, STsdb *pTsdb, SDFileSet *pSet) {
H
Hongze Cheng 已提交
145
  int32_t       code = 0;
H
Hongze Cheng 已提交
146 147 148
  int32_t       flag;
  int64_t       n;
  SDataFWriter *pWriter = NULL;
H
Hongze Cheng 已提交
149
  char          fname[TSDB_FILENAME_LEN];
H
Hongze Cheng 已提交
150
  char          hdr[TSDB_FHDR_SIZE] = {0};
H
Hongze Cheng 已提交
151 152

  // alloc
H
Hongze Cheng 已提交
153 154
  pWriter = taosMemoryCalloc(1, sizeof(*pWriter));
  if (pWriter == NULL) {
H
Hongze Cheng 已提交
155 156 157
    code = TSDB_CODE_OUT_OF_MEMORY;
    goto _err;
  }
H
Hongze Cheng 已提交
158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
  pWriter->pTsdb = pTsdb;
  pWriter->wSet = (SDFileSet){
      .diskId = pSet->diskId,
      .fid = pSet->fid,
      .pHeadF = &pWriter->fHead,
      .pDataF = &pWriter->fData,
      .pSmaF = &pWriter->fSma,
      .nSstF = pSet->nSstF  //
  };
  pWriter->fHead = *pSet->pHeadF;
  pWriter->fData = *pSet->pDataF;
  pWriter->fSma = *pSet->pSmaF;
  for (int8_t iSst = 0; iSst < pSet->nSstF; iSst++) {
    pWriter->wSet.aSstF[iSst] = &pWriter->fSst[iSst];
    pWriter->fSst[iSst] = *pSet->aSstF[iSst];
  }
H
Hongze Cheng 已提交
174 175

  // head
H
Hongze Cheng 已提交
176 177 178 179 180 181 182 183 184 185
  flag = TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC;
  tsdbHeadFileName(pTsdb, pWriter->wSet.diskId, pWriter->wSet.fid, &pWriter->fHead, fname);
  pWriter->pHeadFD = taosOpenFile(fname, flag);
  if (pWriter->pHeadFD == NULL) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }

  n = taosWriteFile(pWriter->pHeadFD, hdr, TSDB_FHDR_SIZE);
  if (n < 0) {
H
Hongze Cheng 已提交
186 187 188 189
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }

H
Hongze Cheng 已提交
190 191 192 193
  ASSERT(n == TSDB_FHDR_SIZE);

  pWriter->fHead.size += TSDB_FHDR_SIZE;

H
Hongze Cheng 已提交
194
  // data
H
Hongze Cheng 已提交
195 196 197 198 199 200 201 202
  if (pWriter->fData.size == 0) {
    flag = TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC;
  } else {
    flag = TD_FILE_WRITE;
  }
  tsdbDataFileName(pTsdb, pWriter->wSet.diskId, pWriter->wSet.fid, &pWriter->fData, fname);
  pWriter->pDataFD = taosOpenFile(fname, flag);
  if (pWriter->pDataFD == NULL) {
H
Hongze Cheng 已提交
203 204 205
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }
H
Hongze Cheng 已提交
206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222
  if (pWriter->fData.size == 0) {
    n = taosWriteFile(pWriter->pDataFD, hdr, TSDB_FHDR_SIZE);
    if (n < 0) {
      code = TAOS_SYSTEM_ERROR(errno);
      goto _err;
    }

    pWriter->fData.size += TSDB_FHDR_SIZE;
  } else {
    n = taosLSeekFile(pWriter->pDataFD, 0, SEEK_END);
    if (n < 0) {
      code = TAOS_SYSTEM_ERROR(errno);
      goto _err;
    }

    ASSERT(n == pWriter->fData.size);
  }
H
Hongze Cheng 已提交
223 224

  // sma
H
Hongze Cheng 已提交
225 226 227 228 229 230 231 232
  if (pWriter->fSma.size == 0) {
    flag = TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC;
  } else {
    flag = TD_FILE_WRITE;
  }
  tsdbSmaFileName(pTsdb, pWriter->wSet.diskId, pWriter->wSet.fid, &pWriter->fSma, fname);
  pWriter->pSmaFD = taosOpenFile(fname, flag);
  if (pWriter->pSmaFD == NULL) {
H
Hongze Cheng 已提交
233 234 235
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }
H
Hongze Cheng 已提交
236 237 238 239 240 241
  if (pWriter->fSma.size == 0) {
    n = taosWriteFile(pWriter->pSmaFD, hdr, TSDB_FHDR_SIZE);
    if (n < 0) {
      code = TAOS_SYSTEM_ERROR(errno);
      goto _err;
    }
H
Hongze Cheng 已提交
242

H
Hongze Cheng 已提交
243 244 245 246
    pWriter->fSma.size += TSDB_FHDR_SIZE;
  } else {
    n = taosLSeekFile(pWriter->pSmaFD, 0, SEEK_END);
    if (n < 0) {
H
Hongze Cheng 已提交
247 248 249
      code = TAOS_SYSTEM_ERROR(errno);
      goto _err;
    }
H
Hongze Cheng 已提交
250 251

    ASSERT(n == pWriter->fSma.size);
H
Hongze Cheng 已提交
252 253
  }

H
Hongze Cheng 已提交
254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270
  // sst
  ASSERT(pWriter->fSst[pSet->nSstF - 1].size == 0);
  flag = TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC;
  tsdbSstFileName(pTsdb, pWriter->wSet.diskId, pWriter->wSet.fid, &pWriter->fSst[pSet->nSstF - 1], fname);
  pWriter->pLastFD = taosOpenFile(fname, flag);
  if (pWriter->pLastFD == NULL) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }
  n = taosWriteFile(pWriter->pLastFD, hdr, TSDB_FHDR_SIZE);
  if (n < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }
  pWriter->fSst[pWriter->wSet.nSstF - 1].size += TSDB_FHDR_SIZE;

  *ppWriter = pWriter;
H
Hongze Cheng 已提交
271 272 273
  return code;

_err:
H
Hongze Cheng 已提交
274 275
  tsdbError("vgId:%d, tsdb data file writer open failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code));
  *ppWriter = NULL;
H
Hongze Cheng 已提交
276 277 278
  return code;
}

H
Hongze Cheng 已提交
279
int32_t tsdbDataFWriterClose(SDataFWriter **ppWriter, int8_t sync) {
H
Hongze Cheng 已提交
280
  int32_t code = 0;
H
Hongze Cheng 已提交
281
  STsdb  *pTsdb = NULL;
H
Hongze Cheng 已提交
282

H
Hongze Cheng 已提交
283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308
  if (*ppWriter == NULL) goto _exit;

  pTsdb = (*ppWriter)->pTsdb;
  if (sync) {
    if (taosFsyncFile((*ppWriter)->pHeadFD) < 0) {
      code = TAOS_SYSTEM_ERROR(errno);
      goto _err;
    }

    if (taosFsyncFile((*ppWriter)->pDataFD) < 0) {
      code = TAOS_SYSTEM_ERROR(errno);
      goto _err;
    }

    if (taosFsyncFile((*ppWriter)->pSmaFD) < 0) {
      code = TAOS_SYSTEM_ERROR(errno);
      goto _err;
    }

    if (taosFsyncFile((*ppWriter)->pLastFD) < 0) {
      code = TAOS_SYSTEM_ERROR(errno);
      goto _err;
    }
  }

  if (taosCloseFile(&(*ppWriter)->pHeadFD) < 0) {
H
Hongze Cheng 已提交
309 310 311 312
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }

H
Hongze Cheng 已提交
313
  if (taosCloseFile(&(*ppWriter)->pDataFD) < 0) {
H
Hongze Cheng 已提交
314 315 316 317
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }

H
Hongze Cheng 已提交
318
  if (taosCloseFile(&(*ppWriter)->pSmaFD) < 0) {
H
Hongze Cheng 已提交
319 320 321 322
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }

H
Hongze Cheng 已提交
323 324 325
  if (taosCloseFile(&(*ppWriter)->pLastFD) < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
H
Hongze Cheng 已提交
326 327
  }

H
Hongze Cheng 已提交
328 329
  for (int32_t iBuf = 0; iBuf < sizeof((*ppWriter)->aBuf) / sizeof(uint8_t *); iBuf++) {
    tFree((*ppWriter)->aBuf[iBuf]);
H
Hongze Cheng 已提交
330
  }
H
Hongze Cheng 已提交
331
  taosMemoryFree(*ppWriter);
H
Hongze Cheng 已提交
332
_exit:
H
Hongze Cheng 已提交
333
  *ppWriter = NULL;
H
Hongze Cheng 已提交
334 335 336
  return code;

_err:
H
Hongze Cheng 已提交
337
  tsdbError("vgId:%d, data file writer close failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code));
H
Hongze Cheng 已提交
338 339 340
  return code;
}

H
Hongze Cheng 已提交
341 342 343 344
int32_t tsdbUpdateDFileSetHeader(SDataFWriter *pWriter) {
  int32_t code = 0;
  int64_t n;
  char    hdr[TSDB_FHDR_SIZE];
H
Hongze Cheng 已提交
345

H
Hongze Cheng 已提交
346 347 348 349
  // head ==============
  memset(hdr, 0, TSDB_FHDR_SIZE);
  tPutHeadFile(hdr, &pWriter->fHead);
  taosCalcChecksumAppend(0, hdr, TSDB_FHDR_SIZE);
H
Hongze Cheng 已提交
350

H
Hongze Cheng 已提交
351 352
  n = taosLSeekFile(pWriter->pHeadFD, 0, SEEK_SET);
  if (n < 0) {
H
Hongze Cheng 已提交
353 354 355 356
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }

H
Hongze Cheng 已提交
357
  n = taosWriteFile(pWriter->pHeadFD, hdr, TSDB_FHDR_SIZE);
H
Hongze Cheng 已提交
358 359 360
  if (n < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
H
Hongze Cheng 已提交
361 362 363 364 365 366 367 368 369 370
  }

  // data ==============
  memset(hdr, 0, TSDB_FHDR_SIZE);
  tPutDataFile(hdr, &pWriter->fData);
  taosCalcChecksumAppend(0, hdr, TSDB_FHDR_SIZE);

  n = taosLSeekFile(pWriter->pDataFD, 0, SEEK_SET);
  if (n < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
H
Hongze Cheng 已提交
371 372 373
    goto _err;
  }

H
Hongze Cheng 已提交
374 375 376
  n = taosWriteFile(pWriter->pDataFD, hdr, TSDB_FHDR_SIZE);
  if (n < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
H
Hongze Cheng 已提交
377 378 379
    goto _err;
  }

H
Hongze Cheng 已提交
380 381 382 383
  // sma ==============
  memset(hdr, 0, TSDB_FHDR_SIZE);
  tPutSmaFile(hdr, &pWriter->fSma);
  taosCalcChecksumAppend(0, hdr, TSDB_FHDR_SIZE);
H
Hongze Cheng 已提交
384

H
Hongze Cheng 已提交
385 386 387 388 389
  n = taosLSeekFile(pWriter->pSmaFD, 0, SEEK_SET);
  if (n < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }
H
Hongze Cheng 已提交
390

H
Hongze Cheng 已提交
391 392 393 394
  n = taosWriteFile(pWriter->pSmaFD, hdr, TSDB_FHDR_SIZE);
  if (n < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
H
Hongze Cheng 已提交
395 396
  }

H
Hongze Cheng 已提交
397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412
  // sst ==============
  memset(hdr, 0, TSDB_FHDR_SIZE);
  tPutSstFile(hdr, &pWriter->fSst[pWriter->wSet.nSstF - 1]);
  taosCalcChecksumAppend(0, hdr, TSDB_FHDR_SIZE);

  n = taosLSeekFile(pWriter->pLastFD, 0, SEEK_SET);
  if (n < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }

  n = taosWriteFile(pWriter->pLastFD, hdr, TSDB_FHDR_SIZE);
  if (n < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }
H
Hongze Cheng 已提交
413 414 415 416

  return code;

_err:
H
Hongze Cheng 已提交
417
  tsdbError("vgId:%d, update DFileSet header failed since %s", TD_VID(pWriter->pTsdb->pVnode), tstrerror(code));
H
Hongze Cheng 已提交
418 419 420
  return code;
}

H
Hongze Cheng 已提交
421 422 423 424 425
int32_t tsdbWriteBlockIdx(SDataFWriter *pWriter, SArray *aBlockIdx) {
  int32_t    code = 0;
  SHeadFile *pHeadFile = &pWriter->fHead;
  int64_t    size = 0;
  int64_t    n;
H
Hongze Cheng 已提交
426

H
Hongze Cheng 已提交
427 428 429
  // check
  if (taosArrayGetSize(aBlockIdx) == 0) {
    pHeadFile->offset = pHeadFile->size;
H
Hongze Cheng 已提交
430 431
    goto _exit;
  }
H
Hongze Cheng 已提交
432

H
Hongze Cheng 已提交
433 434 435 436 437 438 439
  // prepare
  size = sizeof(uint32_t);
  for (int32_t iBlockIdx = 0; iBlockIdx < taosArrayGetSize(aBlockIdx); iBlockIdx++) {
    size += tPutBlockIdx(NULL, taosArrayGet(aBlockIdx, iBlockIdx));
  }
  size += sizeof(TSCKSUM);

H
Hongze Cheng 已提交
440
  // alloc
H
Hongze Cheng 已提交
441
  code = tRealloc(&pWriter->aBuf[0], size);
H
Hongze Cheng 已提交
442 443
  if (code) goto _err;

H
Hongze Cheng 已提交
444 445 446 447 448
  // build
  n = 0;
  n = tPutU32(pWriter->aBuf[0] + n, TSDB_FILE_DLMT);
  for (int32_t iBlockIdx = 0; iBlockIdx < taosArrayGetSize(aBlockIdx); iBlockIdx++) {
    n += tPutBlockIdx(pWriter->aBuf[0] + n, taosArrayGet(aBlockIdx, iBlockIdx));
H
Hongze Cheng 已提交
449
  }
H
Hongze Cheng 已提交
450
  taosCalcChecksumAppend(0, pWriter->aBuf[0], size);
H
Hongze Cheng 已提交
451

H
Hongze Cheng 已提交
452 453 454 455
  ASSERT(n + sizeof(TSCKSUM) == size);

  // write
  n = taosWriteFile(pWriter->pHeadFD, pWriter->aBuf[0], size);
H
Hongze Cheng 已提交
456 457 458 459 460
  if (n < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }

H
Hongze Cheng 已提交
461 462 463
  // update
  pHeadFile->offset = pHeadFile->size;
  pHeadFile->size += size;
H
Hongze Cheng 已提交
464

H
Hongze Cheng 已提交
465
_exit:
H
Hongze Cheng 已提交
466 467
  tsdbTrace("vgId:%d write block idx, offset:%" PRId64 " size:%" PRId64 " nBlockIdx:%d", TD_VID(pWriter->pTsdb->pVnode),
            pHeadFile->offset, size, taosArrayGetSize(aBlockIdx));
H
Hongze Cheng 已提交
468 469 470
  return code;

_err:
H
Hongze Cheng 已提交
471
  tsdbError("vgId:%d, write block idx failed since %s", TD_VID(pWriter->pTsdb->pVnode), tstrerror(code));
H
Hongze Cheng 已提交
472 473 474
  return code;
}

H
Hongze Cheng 已提交
475 476 477 478 479 480 481
int32_t tsdbWriteBlock(SDataFWriter *pWriter, SMapData *mBlock, SBlockIdx *pBlockIdx) {
  int32_t    code = 0;
  SHeadFile *pHeadFile = &pWriter->fHead;
  int64_t    size;
  int64_t    n;

  ASSERT(mBlock->nItem > 0);
H
Hongze Cheng 已提交
482

H
Hongze Cheng 已提交
483
  // alloc
H
Hongze Cheng 已提交
484 485
  size = sizeof(uint32_t) + tPutMapData(NULL, mBlock) + sizeof(TSCKSUM);
  code = tRealloc(&pWriter->aBuf[0], size);
H
Hongze Cheng 已提交
486 487
  if (code) goto _err;

H
Hongze Cheng 已提交
488 489 490 491 492
  // build
  n = 0;
  n += tPutU32(pWriter->aBuf[0] + n, TSDB_FILE_DLMT);
  n += tPutMapData(pWriter->aBuf[0] + n, mBlock);
  taosCalcChecksumAppend(0, pWriter->aBuf[0], size);
H
Hongze Cheng 已提交
493

H
Hongze Cheng 已提交
494 495 496 497
  ASSERT(n + sizeof(TSCKSUM) == size);

  // write
  n = taosWriteFile(pWriter->pHeadFD, pWriter->aBuf[0], size);
H
Hongze Cheng 已提交
498 499 500 501 502
  if (n < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }

H
Hongze Cheng 已提交
503 504 505 506
  // update
  pBlockIdx->offset = pHeadFile->size;
  pBlockIdx->size = size;
  pHeadFile->size += size;
H
Hongze Cheng 已提交
507

H
Hongze Cheng 已提交
508 509 510 511
  tsdbTrace("vgId:%d, write block, file ID:%d commit ID:%d suid:%" PRId64 " uid:%" PRId64 " offset:%" PRId64
            " size:%" PRId64 " nItem:%d",
            TD_VID(pWriter->pTsdb->pVnode), pWriter->wSet.fid, pHeadFile->commitID, pBlockIdx->suid, pBlockIdx->uid,
            pBlockIdx->offset, pBlockIdx->size, mBlock->nItem);
H
Hongze Cheng 已提交
512 513 514
  return code;

_err:
H
Hongze Cheng 已提交
515
  tsdbError("vgId:%d, write block failed since %s", TD_VID(pWriter->pTsdb->pVnode), tstrerror(code));
H
Hongze Cheng 已提交
516 517 518
  return code;
}

H
Hongze Cheng 已提交
519
int32_t tsdbWriteSstBlk(SDataFWriter *pWriter, SArray *aSstBlk) {
H
Hongze Cheng 已提交
520
  int32_t   code = 0;
H
Hongze Cheng 已提交
521 522 523
  SSstFile *pSstFile = &pWriter->fSst[pWriter->wSet.nSstF - 1];
  int64_t   size;
  int64_t   n;
H
Hongze Cheng 已提交
524

H
Hongze Cheng 已提交
525 526 527 528 529
  // check
  if (taosArrayGetSize(aSstBlk) == 0) {
    pSstFile->offset = pSstFile->size;
    goto _exit;
  }
H
Hongze Cheng 已提交
530

H
Hongze Cheng 已提交
531 532 533 534 535 536
  // size
  size = sizeof(uint32_t);  // TSDB_FILE_DLMT
  for (int32_t iBlockL = 0; iBlockL < taosArrayGetSize(aSstBlk); iBlockL++) {
    size += tPutSstBlk(NULL, taosArrayGet(aSstBlk, iBlockL));
  }
  size += sizeof(TSCKSUM);
H
Hongze Cheng 已提交
537 538

  // alloc
H
Hongze Cheng 已提交
539
  code = tRealloc(&pWriter->aBuf[0], size);
H
Hongze Cheng 已提交
540 541
  if (code) goto _err;

H
Hongze Cheng 已提交
542 543 544 545 546
  // encode
  n = 0;
  n += tPutU32(pWriter->aBuf[0] + n, TSDB_FILE_DLMT);
  for (int32_t iBlockL = 0; iBlockL < taosArrayGetSize(aSstBlk); iBlockL++) {
    n += tPutSstBlk(pWriter->aBuf[0] + n, taosArrayGet(aSstBlk, iBlockL));
H
Hongze Cheng 已提交
547
  }
H
Hongze Cheng 已提交
548
  taosCalcChecksumAppend(0, pWriter->aBuf[0], size);
H
Hongze Cheng 已提交
549

H
Hongze Cheng 已提交
550 551 552 553
  ASSERT(n + sizeof(TSCKSUM) == size);

  // write
  n = taosWriteFile(pWriter->pLastFD, pWriter->aBuf[0], size);
H
Hongze Cheng 已提交
554 555 556 557 558
  if (n < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }

H
Hongze Cheng 已提交
559 560 561
  // update
  pSstFile->offset = pSstFile->size;
  pSstFile->size += size;
H
Hongze Cheng 已提交
562

H
Hongze Cheng 已提交
563 564 565
_exit:
  tsdbTrace("vgId:%d tsdb write blockl, loffset:%" PRId64 " size:%" PRId64, TD_VID(pWriter->pTsdb->pVnode),
            pSstFile->offset, size);
H
Hongze Cheng 已提交
566 567 568
  return code;

_err:
H
Hongze Cheng 已提交
569
  tsdbError("vgId:%d tsdb write blockl failed since %s", TD_VID(pWriter->pTsdb->pVnode), tstrerror(code));
H
Hongze Cheng 已提交
570 571 572
  return code;
}

H
Hongze Cheng 已提交
573
static int32_t tsdbWriteBlockSma(SDataFWriter *pWriter, SBlockData *pBlockData, SSmaInfo *pSmaInfo) {
H
Hongze Cheng 已提交
574 575
  int32_t code = 0;

H
Hongze Cheng 已提交
576 577
  pSmaInfo->offset = 0;
  pSmaInfo->size = 0;
H
Hongze Cheng 已提交
578

H
Hongze Cheng 已提交
579 580 581
  // encode
  for (int32_t iColData = 0; iColData < taosArrayGetSize(pBlockData->aIdx); iColData++) {
    SColData *pColData = tBlockDataGetColDataByIdx(pBlockData, iColData);
H
Hongze Cheng 已提交
582

H
Hongze Cheng 已提交
583
    if ((!pColData->smaOn) || IS_VAR_DATA_TYPE(pColData->type)) continue;
H
Hongze Cheng 已提交
584

H
Hongze Cheng 已提交
585 586
    SColumnDataAgg sma;
    tsdbCalcColDataSMA(pColData, &sma);
H
Hongze Cheng 已提交
587

H
Hongze Cheng 已提交
588 589 590 591
    code = tRealloc(&pWriter->aBuf[0], pSmaInfo->size + tPutColumnDataAgg(NULL, &sma));
    if (code) goto _err;
    pSmaInfo->size += tPutColumnDataAgg(pWriter->aBuf[0] + pSmaInfo->size, &sma);
  }
H
Hongze Cheng 已提交
592

H
Hongze Cheng 已提交
593 594 595
  // write
  if (pSmaInfo->size) {
    int32_t size = pSmaInfo->size + sizeof(TSCKSUM);
H
Hongze Cheng 已提交
596

H
Hongze Cheng 已提交
597
    code = tRealloc(&pWriter->aBuf[0], size);
H
Hongze Cheng 已提交
598
    if (code) goto _err;
H
Hongze Cheng 已提交
599

H
Hongze Cheng 已提交
600
    taosCalcChecksumAppend(0, pWriter->aBuf[0], size);
H
Hongze Cheng 已提交
601

H
Hongze Cheng 已提交
602 603 604 605
    int64_t n = taosWriteFile(pWriter->pSmaFD, pWriter->aBuf[0], size);
    if (n < 0) {
      code = TAOS_SYSTEM_ERROR(errno);
      goto _err;
H
Hongze Cheng 已提交
606
    }
H
Hongze Cheng 已提交
607

H
Hongze Cheng 已提交
608 609
    pSmaInfo->offset = pWriter->fSma.size;
    pWriter->fSma.size += size;
H
Hongze Cheng 已提交
610 611 612 613 614
  }

  return code;

_err:
H
Hongze Cheng 已提交
615
  tsdbError("vgId:%d tsdb write block sma failed since %s", TD_VID(pWriter->pTsdb->pVnode), tstrerror(code));
H
Hongze Cheng 已提交
616 617 618
  return code;
}

H
Hongze Cheng 已提交
619 620
int32_t tsdbWriteBlockData(SDataFWriter *pWriter, SBlockData *pBlockData, SBlockInfo *pBlkInfo, SSmaInfo *pSmaInfo,
                           int8_t cmprAlg, int8_t toLast) {
H
Hongze Cheng 已提交
621 622
  int32_t code = 0;

H
Hongze Cheng 已提交
623
  ASSERT(pBlockData->nRow > 0);
H
Hongze Cheng 已提交
624

H
Hongze Cheng 已提交
625 626 627
  pBlkInfo->offset = toLast ? pWriter->fSst[pWriter->wSet.nSstF - 1].size : pWriter->fData.size;
  pBlkInfo->szBlock = 0;
  pBlkInfo->szKey = 0;
H
Hongze Cheng 已提交
628

H
Hongze Cheng 已提交
629 630 631
  int32_t aBufN[4] = {0};
  code = tCmprBlockData(pBlockData, cmprAlg, NULL, NULL, pWriter->aBuf, aBufN);
  if (code) goto _err;
H
Hongze Cheng 已提交
632

H
Hongze Cheng 已提交
633 634
  // write =================
  TdFilePtr pFD = toLast ? pWriter->pLastFD : pWriter->pDataFD;
H
Hongze Cheng 已提交
635

H
Hongze Cheng 已提交
636 637
  pBlkInfo->szKey = aBufN[3] + aBufN[2];
  pBlkInfo->szBlock = aBufN[0] + aBufN[1] + aBufN[2] + aBufN[3];
H
Hongze Cheng 已提交
638

H
Hongze Cheng 已提交
639 640 641 642 643
  int64_t n = taosWriteFile(pFD, pWriter->aBuf[3], aBufN[3]);
  if (n < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }
H
Hongze Cheng 已提交
644

H
Hongze Cheng 已提交
645 646 647 648
  n = taosWriteFile(pFD, pWriter->aBuf[2], aBufN[2]);
  if (n < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
H
Hongze Cheng 已提交
649 650
  }

H
Hongze Cheng 已提交
651 652 653 654 655 656 657
  if (aBufN[1]) {
    n = taosWriteFile(pFD, pWriter->aBuf[1], aBufN[1]);
    if (n < 0) {
      code = TAOS_SYSTEM_ERROR(errno);
      goto _err;
    }
  }
H
Hongze Cheng 已提交
658

H
Hongze Cheng 已提交
659 660 661 662 663 664 665
  if (aBufN[0]) {
    n = taosWriteFile(pFD, pWriter->aBuf[0], aBufN[0]);
    if (n < 0) {
      code = TAOS_SYSTEM_ERROR(errno);
      goto _err;
    }
  }
H
Hongze Cheng 已提交
666

H
Hongze Cheng 已提交
667 668 669 670 671 672
  // update info
  if (toLast) {
    pWriter->fSst[pWriter->wSet.nSstF - 1].size += pBlkInfo->szBlock;
  } else {
    pWriter->fData.size += pBlkInfo->szBlock;
  }
H
Hongze Cheng 已提交
673

H
Hongze Cheng 已提交
674 675 676 677 678
  // ================= SMA ====================
  if (pSmaInfo) {
    code = tsdbWriteBlockSma(pWriter, pBlockData, pSmaInfo);
    if (code) goto _err;
  }
H
Hongze Cheng 已提交
679

H
Hongze Cheng 已提交
680 681 682 683
_exit:
  tsdbTrace("vgId:%d tsdb write block data, suid:%" PRId64 " uid:%" PRId64 " nRow:%d, offset:%" PRId64 " size:%d",
            TD_VID(pWriter->pTsdb->pVnode), pBlockData->suid, pBlockData->uid, pBlockData->nRow, pBlkInfo->offset,
            pBlkInfo->szBlock);
H
Hongze Cheng 已提交
684 685 686
  return code;

_err:
H
Hongze Cheng 已提交
687
  tsdbError("vgId:%d tsdb write block data failed since %s", TD_VID(pWriter->pTsdb->pVnode), tstrerror(code));
H
Hongze Cheng 已提交
688 689
  return code;
}
H
Hongze Cheng 已提交
690

H
Hongze Cheng 已提交
691 692 693 694 695 696 697 698
int32_t tsdbDFileSetCopy(STsdb *pTsdb, SDFileSet *pSetFrom, SDFileSet *pSetTo) {
  int32_t   code = 0;
  int64_t   n;
  int64_t   size;
  TdFilePtr pOutFD = NULL;  // TODO
  TdFilePtr PInFD = NULL;   // TODO
  char      fNameFrom[TSDB_FILENAME_LEN];
  char      fNameTo[TSDB_FILENAME_LEN];
H
Hongze Cheng 已提交
699

H
Hongze Cheng 已提交
700 701 702
  // head
  tsdbHeadFileName(pTsdb, pSetFrom->diskId, pSetFrom->fid, pSetFrom->pHeadF, fNameFrom);
  tsdbHeadFileName(pTsdb, pSetTo->diskId, pSetTo->fid, pSetTo->pHeadF, fNameTo);
H
Hongze Cheng 已提交
703

H
Hongze Cheng 已提交
704 705 706
  pOutFD = taosOpenFile(fNameTo, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC);
  if (pOutFD == NULL) {
    code = TAOS_SYSTEM_ERROR(errno);
H
Hongze Cheng 已提交
707 708 709
    goto _err;
  }

H
Hongze Cheng 已提交
710 711
  PInFD = taosOpenFile(fNameFrom, TD_FILE_READ);
  if (PInFD == NULL) {
H
Hongze Cheng 已提交
712 713 714 715
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }

H
Hongze Cheng 已提交
716
  n = taosFSendFile(pOutFD, PInFD, 0, pSetFrom->pHeadF->size);
H
Hongze Cheng 已提交
717 718 719 720
  if (n < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }
H
Hongze Cheng 已提交
721 722
  taosCloseFile(&pOutFD);
  taosCloseFile(&PInFD);
H
Hongze Cheng 已提交
723 724

  // data
H
Hongze Cheng 已提交
725 726
  tsdbDataFileName(pTsdb, pSetFrom->diskId, pSetFrom->fid, pSetFrom->pDataF, fNameFrom);
  tsdbDataFileName(pTsdb, pSetTo->diskId, pSetTo->fid, pSetTo->pDataF, fNameTo);
H
Hongze Cheng 已提交
727

H
Hongze Cheng 已提交
728 729
  pOutFD = taosOpenFile(fNameTo, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC);
  if (pOutFD == NULL) {
H
Hongze Cheng 已提交
730 731 732 733
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }

H
Hongze Cheng 已提交
734 735
  PInFD = taosOpenFile(fNameFrom, TD_FILE_READ);
  if (PInFD == NULL) {
H
Hongze Cheng 已提交
736 737 738
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }
H
Hongze Cheng 已提交
739 740

  n = taosFSendFile(pOutFD, PInFD, 0, pSetFrom->pDataF->size);
H
Hongze Cheng 已提交
741 742 743 744
  if (n < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }
H
Hongze Cheng 已提交
745 746
  taosCloseFile(&pOutFD);
  taosCloseFile(&PInFD);
H
Hongze Cheng 已提交
747

H
Hongze Cheng 已提交
748 749 750
  // sst
  tsdbSstFileName(pTsdb, pSetFrom->diskId, pSetFrom->fid, pSetFrom->aSstF[0], fNameFrom);
  tsdbSstFileName(pTsdb, pSetTo->diskId, pSetTo->fid, pSetTo->aSstF[0], fNameTo);
H
Hongze Cheng 已提交
751

H
Hongze Cheng 已提交
752 753 754 755
  pOutFD = taosOpenFile(fNameTo, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC);
  if (pOutFD == NULL) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
H
Hongze Cheng 已提交
756 757
  }

H
Hongze Cheng 已提交
758 759
  PInFD = taosOpenFile(fNameFrom, TD_FILE_READ);
  if (PInFD == NULL) {
H
Hongze Cheng 已提交
760 761 762 763
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }

H
Hongze Cheng 已提交
764 765
  n = taosFSendFile(pOutFD, PInFD, 0, pSetFrom->aSstF[0]->size);
  if (n < 0) {
H
Hongze Cheng 已提交
766 767 768
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }
H
Hongze Cheng 已提交
769 770
  taosCloseFile(&pOutFD);
  taosCloseFile(&PInFD);
H
Hongze Cheng 已提交
771

H
Hongze Cheng 已提交
772 773 774 775 776 777
  // sma
  tsdbSmaFileName(pTsdb, pSetFrom->diskId, pSetFrom->fid, pSetFrom->pSmaF, fNameFrom);
  tsdbSmaFileName(pTsdb, pSetTo->diskId, pSetTo->fid, pSetTo->pSmaF, fNameTo);

  pOutFD = taosOpenFile(fNameTo, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC);
  if (pOutFD == NULL) {
H
Hongze Cheng 已提交
778 779 780 781
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }

H
Hongze Cheng 已提交
782 783
  PInFD = taosOpenFile(fNameFrom, TD_FILE_READ);
  if (PInFD == NULL) {
H
Hongze Cheng 已提交
784 785 786 787
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }

H
Hongze Cheng 已提交
788 789 790 791
  n = taosFSendFile(pOutFD, PInFD, 0, pSetFrom->pSmaF->size);
  if (n < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
H
Hongze Cheng 已提交
792
  }
H
Hongze Cheng 已提交
793 794 795
  taosCloseFile(&pOutFD);
  taosCloseFile(&PInFD);

H
Hongze Cheng 已提交
796 797 798
  return code;

_err:
H
Hongze Cheng 已提交
799
  tsdbError("vgId:%d, tsdb DFileSet copy failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code));
H
Hongze Cheng 已提交
800 801 802
  return code;
}

H
Hongze Cheng 已提交
803 804 805 806 807
// SDataFReader ====================================================
int32_t tsdbDataFReaderOpen(SDataFReader **ppReader, STsdb *pTsdb, SDFileSet *pSet) {
  int32_t       code = 0;
  SDataFReader *pReader;
  char          fname[TSDB_FILENAME_LEN];
H
Hongze Cheng 已提交
808

H
Hongze Cheng 已提交
809 810 811 812
  // alloc
  pReader = (SDataFReader *)taosMemoryCalloc(1, sizeof(*pReader));
  if (pReader == NULL) {
    code = TSDB_CODE_OUT_OF_MEMORY;
H
Hongze Cheng 已提交
813 814
    goto _err;
  }
H
Hongze Cheng 已提交
815 816
  pReader->pTsdb = pTsdb;
  pReader->pSet = pSet;
H
Hongze Cheng 已提交
817

H
Hongze Cheng 已提交
818 819 820 821 822
  // open impl
  // head
  tsdbHeadFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pHeadF, fname);
  pReader->pHeadFD = taosOpenFile(fname, TD_FILE_READ);
  if (pReader->pHeadFD == NULL) {
H
Hongze Cheng 已提交
823 824 825
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }
H
Hongze Cheng 已提交
826

H
Hongze Cheng 已提交
827 828 829 830
  // data
  tsdbDataFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pDataF, fname);
  pReader->pDataFD = taosOpenFile(fname, TD_FILE_READ);
  if (pReader->pDataFD == NULL) {
H
Hongze Cheng 已提交
831 832 833 834
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }

H
Hongze Cheng 已提交
835 836 837 838
  // sma
  tsdbSmaFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pSmaF, fname);
  pReader->pSmaFD = taosOpenFile(fname, TD_FILE_READ);
  if (pReader->pSmaFD == NULL) {
H
Hongze Cheng 已提交
839 840 841
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }
H
Hongze Cheng 已提交
842

H
Hongze Cheng 已提交
843 844 845 846 847 848 849 850 851
  // sst
  for (int32_t iSst = 0; iSst < pSet->nSstF; iSst++) {
    tsdbSstFileName(pTsdb, pSet->diskId, pSet->fid, pSet->aSstF[iSst], fname);
    pReader->aLastFD[iSst] = taosOpenFile(fname, TD_FILE_READ);
    if (pReader->aLastFD[iSst] == NULL) {
      code = TAOS_SYSTEM_ERROR(errno);
      goto _err;
    }
  }
H
Hongze Cheng 已提交
852

H
Hongze Cheng 已提交
853 854 855 856 857 858 859 860 861 862 863 864 865 866 867
  *ppReader = pReader;
  return code;

_err:
  tsdbError("vgId:%d, tsdb data file reader open failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code));
  *ppReader = NULL;
  return code;
}

int32_t tsdbDataFReaderClose(SDataFReader **ppReader) {
  int32_t code = 0;
  if (*ppReader == NULL) goto _exit;

  // head
  if (taosCloseFile(&(*ppReader)->pHeadFD) < 0) {
H
Hongze Cheng 已提交
868 869 870 871
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }

H
Hongze Cheng 已提交
872 873
  // data
  if (taosCloseFile(&(*ppReader)->pDataFD) < 0) {
H
Hongze Cheng 已提交
874 875 876
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }
H
Hongze Cheng 已提交
877

H
Hongze Cheng 已提交
878 879
  // sma
  if (taosCloseFile(&(*ppReader)->pSmaFD) < 0) {
H
Hongze Cheng 已提交
880 881 882 883
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }

H
Hongze Cheng 已提交
884 885 886 887 888 889 890 891 892 893
  // sst
  for (int32_t iSst = 0; iSst < (*ppReader)->pSet->nSstF; iSst++) {
    if (taosCloseFile(&(*ppReader)->aLastFD[iSst]) < 0) {
      code = TAOS_SYSTEM_ERROR(errno);
      goto _err;
    }
  }

  for (int32_t iBuf = 0; iBuf < sizeof((*ppReader)->aBuf) / sizeof(uint8_t *); iBuf++) {
    tFree((*ppReader)->aBuf[iBuf]);
H
Hongze Cheng 已提交
894
  }
H
Hongze Cheng 已提交
895
  taosMemoryFree(*ppReader);
H
Hongze Cheng 已提交
896

H
Hongze Cheng 已提交
897 898
_exit:
  *ppReader = NULL;
H
Hongze Cheng 已提交
899 900 901
  return code;

_err:
H
Hongze Cheng 已提交
902
  tsdbError("vgId:%d, data file reader close failed since %s", TD_VID((*ppReader)->pTsdb->pVnode), tstrerror(code));
H
Hongze Cheng 已提交
903 904 905
  return code;
}

H
Hongze Cheng 已提交
906 907 908 909 910 911
int32_t tsdbReadBlockIdx(SDataFReader *pReader, SArray *aBlockIdx) {
  int32_t  code = 0;
  int64_t  offset = pReader->pSet->pHeadF->offset;
  int64_t  size = pReader->pSet->pHeadF->size - offset;
  int64_t  n;
  uint32_t delimiter;
H
Hongze Cheng 已提交
912

H
Hongze Cheng 已提交
913 914
  taosArrayClear(aBlockIdx);
  if (size == 0) {
H
Hongze Cheng 已提交
915 916
    goto _exit;
  }
H
Hongze Cheng 已提交
917 918

  // alloc
H
Hongze Cheng 已提交
919
  code = tRealloc(&pReader->aBuf[0], size);
H
Hongze Cheng 已提交
920 921
  if (code) goto _err;

H
Hongze Cheng 已提交
922 923 924 925
  // seek
  if (taosLSeekFile(pReader->pHeadFD, offset, SEEK_SET) < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
H
Hongze Cheng 已提交
926
  }
H
Hongze Cheng 已提交
927

H
Hongze Cheng 已提交
928 929
  // read
  n = taosReadFile(pReader->pHeadFD, pReader->aBuf[0], size);
H
Hongze Cheng 已提交
930 931 932
  if (n < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
H
Hongze Cheng 已提交
933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956
  } else if (n < size) {
    code = TSDB_CODE_FILE_CORRUPTED;
    goto _err;
  }

  // check
  if (!taosCheckChecksumWhole(pReader->aBuf[0], size)) {
    code = TSDB_CODE_FILE_CORRUPTED;
    goto _err;
  }

  // decode
  n = 0;
  n = tGetU32(pReader->aBuf[0] + n, &delimiter);
  ASSERT(delimiter == TSDB_FILE_DLMT);

  while (n < size - sizeof(TSCKSUM)) {
    SBlockIdx blockIdx;
    n += tGetBlockIdx(pReader->aBuf[0] + n, &blockIdx);

    if (taosArrayPush(aBlockIdx, &blockIdx) == NULL) {
      code = TSDB_CODE_OUT_OF_MEMORY;
      goto _err;
    }
H
Hongze Cheng 已提交
957 958
  }

H
Hongze Cheng 已提交
959
  ASSERT(n + sizeof(TSCKSUM) == size);
H
Hongze Cheng 已提交
960

H
Hongze Cheng 已提交
961
_exit:
H
Hongze Cheng 已提交
962 963 964
  return code;

_err:
H
Hongze Cheng 已提交
965
  tsdbError("vgId:%d, read block idx failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code));
H
Hongze Cheng 已提交
966 967 968
  return code;
}

H
Hongze Cheng 已提交
969 970 971 972 973 974
int32_t tsdbReadSstBlk(SDataFReader *pReader, int32_t iSst, SArray *aSstBlk) {
  int32_t  code = 0;
  int64_t  offset = pReader->pSet->aSstF[iSst]->offset;
  int64_t  size = pReader->pSet->aSstF[iSst]->size - offset;
  int64_t  n;
  uint32_t delimiter;
H
Hongze Cheng 已提交
975

H
Hongze Cheng 已提交
976 977 978 979
  taosArrayClear(aSstBlk);
  if (size == 0) {
    goto _exit;
  }
H
Hongze Cheng 已提交
980 981

  // alloc
H
Hongze Cheng 已提交
982
  code = tRealloc(&pReader->aBuf[0], size);
H
Hongze Cheng 已提交
983 984
  if (code) goto _err;

H
Hongze Cheng 已提交
985 986 987 988 989
  // seek
  if (taosLSeekFile(pReader->aLastFD[iSst], offset, SEEK_SET) < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }
H
Hongze Cheng 已提交
990

H
Hongze Cheng 已提交
991 992
  // read
  n = taosReadFile(pReader->aLastFD[iSst], pReader->aBuf[0], size);
H
Hongze Cheng 已提交
993 994 995
  if (n < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
H
Hongze Cheng 已提交
996 997 998
  } else if (n < size) {
    code = TSDB_CODE_FILE_CORRUPTED;
    goto _err;
H
Hongze Cheng 已提交
999 1000
  }

H
Hongze Cheng 已提交
1001 1002 1003 1004 1005
  // check
  if (!taosCheckChecksumWhole(pReader->aBuf[0], size)) {
    code = TSDB_CODE_FILE_CORRUPTED;
    goto _err;
  }
H
Hongze Cheng 已提交
1006

H
Hongze Cheng 已提交
1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024
  // decode
  n = 0;
  n = tGetU32(pReader->aBuf[0] + n, &delimiter);
  ASSERT(delimiter == TSDB_FILE_DLMT);

  while (n < size - sizeof(TSCKSUM)) {
    SSstBlk blockl;
    n += tGetSstBlk(pReader->aBuf[0] + n, &blockl);

    if (taosArrayPush(aSstBlk, &blockl) == NULL) {
      code = TSDB_CODE_OUT_OF_MEMORY;
      goto _err;
    }
  }

  ASSERT(n + sizeof(TSCKSUM) == size);

_exit:
H
Hongze Cheng 已提交
1025 1026 1027
  return code;

_err:
H
Hongze Cheng 已提交
1028
  tsdbError("vgId:%d read blockl failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code));
H
Hongze Cheng 已提交
1029 1030 1031
  return code;
}

H
Hongze Cheng 已提交
1032 1033 1034 1035 1036 1037
int32_t tsdbReadBlock(SDataFReader *pReader, SBlockIdx *pBlockIdx, SMapData *mBlock) {
  int32_t code = 0;
  int64_t offset = pBlockIdx->offset;
  int64_t size = pBlockIdx->size;
  int64_t n;
  int64_t tn;
H
Hongze Cheng 已提交
1038 1039

  // alloc
H
Hongze Cheng 已提交
1040
  code = tRealloc(&pReader->aBuf[0], size);
H
Hongze Cheng 已提交
1041 1042
  if (code) goto _err;

H
Hongze Cheng 已提交
1043 1044 1045 1046
  // seek
  if (taosLSeekFile(pReader->pHeadFD, offset, SEEK_SET) < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
H
Hongze Cheng 已提交
1047 1048
  }

H
Hongze Cheng 已提交
1049 1050
  // read
  n = taosReadFile(pReader->pHeadFD, pReader->aBuf[0], size);
H
Hongze Cheng 已提交
1051 1052 1053
  if (n < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
H
Hongze Cheng 已提交
1054 1055 1056
  } else if (n < size) {
    code = TSDB_CODE_FILE_CORRUPTED;
    goto _err;
H
Hongze Cheng 已提交
1057 1058
  }

H
Hongze Cheng 已提交
1059 1060 1061 1062 1063
  // check
  if (!taosCheckChecksumWhole(pReader->aBuf[0], size)) {
    code = TSDB_CODE_FILE_CORRUPTED;
    goto _err;
  }
H
Hongze Cheng 已提交
1064

H
Hongze Cheng 已提交
1065 1066
  // decode
  n = 0;
H
Hongze Cheng 已提交
1067

H
Hongze Cheng 已提交
1068 1069 1070
  uint32_t delimiter;
  n += tGetU32(pReader->aBuf[0] + n, &delimiter);
  ASSERT(delimiter == TSDB_FILE_DLMT);
H
Hongze Cheng 已提交
1071

H
Hongze Cheng 已提交
1072 1073 1074 1075 1076 1077 1078
  tn = tGetMapData(pReader->aBuf[0] + n, mBlock);
  if (tn < 0) {
    code = TSDB_CODE_OUT_OF_MEMORY;
    goto _err;
  }
  n += tn;
  ASSERT(n + sizeof(TSCKSUM) == size);
H
Hongze Cheng 已提交
1079

H
Hongze Cheng 已提交
1080
  return code;
H
Hongze Cheng 已提交
1081

H
Hongze Cheng 已提交
1082 1083 1084
_err:
  tsdbError("vgId:%d, read block failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code));
  return code;
H
Hongze Cheng 已提交
1085
}
H
Hongze Cheng 已提交
1086

H
Hongze Cheng 已提交
1087 1088 1089
int32_t tsdbReadBlockSma(SDataFReader *pReader, SDataBlk *pDataBlk, SArray *aColumnDataAgg) {
  int32_t   code = 0;
  SSmaInfo *pSmaInfo = &pDataBlk->smaInfo;
H
Hongze Cheng 已提交
1090

H
Hongze Cheng 已提交
1091
  ASSERT(pSmaInfo->size > 0);
H
Hongze Cheng 已提交
1092

H
Hongze Cheng 已提交
1093
  taosArrayClear(aColumnDataAgg);
H
Hongze Cheng 已提交
1094

H
Hongze Cheng 已提交
1095 1096 1097 1098
  // alloc
  int32_t size = pSmaInfo->size + sizeof(TSCKSUM);
  code = tRealloc(&pReader->aBuf[0], size);
  if (code) goto _err;
H
Hongze Cheng 已提交
1099

H
Hongze Cheng 已提交
1100 1101 1102 1103 1104 1105 1106 1107
  // seek
  int64_t n = taosLSeekFile(pReader->pSmaFD, pSmaInfo->offset, SEEK_SET);
  if (n < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  } else if (n < pSmaInfo->offset) {
    code = TSDB_CODE_FILE_CORRUPTED;
    goto _err;
H
Hongze Cheng 已提交
1108
  }
H
Hongze Cheng 已提交
1109

H
Hongze Cheng 已提交
1110 1111 1112 1113 1114 1115 1116 1117 1118
  // read
  n = taosReadFile(pReader->pSmaFD, pReader->aBuf[0], size);
  if (n < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  } else if (n < size) {
    code = TSDB_CODE_FILE_CORRUPTED;
    goto _err;
  }
H
Hongze Cheng 已提交
1119

H
Hongze Cheng 已提交
1120 1121 1122 1123 1124
  // check
  if (!taosCheckChecksumWhole(pReader->aBuf[0], size)) {
    code = TSDB_CODE_FILE_CORRUPTED;
    goto _err;
  }
H
Hongze Cheng 已提交
1125

H
Hongze Cheng 已提交
1126 1127 1128 1129
  // decode
  n = 0;
  while (n < pSmaInfo->size) {
    SColumnDataAgg sma;
H
Hongze Cheng 已提交
1130

H
Hongze Cheng 已提交
1131 1132 1133
    n += tGetColumnDataAgg(pReader->aBuf[0] + n, &sma);
    if (taosArrayPush(aColumnDataAgg, &sma) == NULL) {
      code = TSDB_CODE_OUT_OF_MEMORY;
H
Hongze Cheng 已提交
1134 1135
      goto _err;
    }
H
Hongze Cheng 已提交
1136
  }
H
Hongze Cheng 已提交
1137

H
Hongze Cheng 已提交
1138
  return code;
H
Hongze Cheng 已提交
1139

H
Hongze Cheng 已提交
1140
_err:
H
Hongze Cheng 已提交
1141
  tsdbError("vgId:%d tsdb read block sma failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code));
H
Hongze Cheng 已提交
1142 1143
  return code;
}
H
Hongze Cheng 已提交
1144

H
Hongze Cheng 已提交
1145 1146
static int32_t tsdbReadBlockDataImpl(SDataFReader *pReader, SBlockInfo *pBlkInfo, int8_t fromLast,
                                     SBlockData *pBlockData) {
H
Hongze Cheng 已提交
1147
  int32_t code = 0;
H
Hongze Cheng 已提交
1148

H
Hongze Cheng 已提交
1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174
  tBlockDataClear(pBlockData);

  TdFilePtr pFD = fromLast ? pReader->aLastFD[0] : pReader->pDataFD;  // (todo)

  // uid + version + tskey
  code = tsdbReadAndCheck(pFD, pBlkInfo->offset, &pReader->aBuf[0], pBlkInfo->szKey, 1);
  if (code) goto _err;
  SDiskDataHdr hdr;
  uint8_t     *p = pReader->aBuf[0] + tGetDiskDataHdr(pReader->aBuf[0], &hdr);

  ASSERT(hdr.delimiter == TSDB_FILE_DLMT);
  ASSERT(pBlockData->suid == hdr.suid);
  ASSERT(pBlockData->uid == hdr.uid);

  pBlockData->nRow = hdr.nRow;

  // uid
  if (hdr.uid == 0) {
    ASSERT(hdr.szUid);
    code = tsdbDecmprData(p, hdr.szUid, TSDB_DATA_TYPE_BIGINT, hdr.cmprAlg, (uint8_t **)&pBlockData->aUid,
                          sizeof(int64_t) * hdr.nRow, &pReader->aBuf[1]);
    if (code) goto _err;
  } else {
    ASSERT(!hdr.szUid);
  }
  p += hdr.szUid;
H
Hongze Cheng 已提交
1175

H
Hongze Cheng 已提交
1176 1177 1178 1179 1180
  // version
  code = tsdbDecmprData(p, hdr.szVer, TSDB_DATA_TYPE_BIGINT, hdr.cmprAlg, (uint8_t **)&pBlockData->aVersion,
                        sizeof(int64_t) * hdr.nRow, &pReader->aBuf[1]);
  if (code) goto _err;
  p += hdr.szVer;
H
Hongze Cheng 已提交
1181

H
Hongze Cheng 已提交
1182 1183 1184
  // TSKEY
  code = tsdbDecmprData(p, hdr.szKey, TSDB_DATA_TYPE_TIMESTAMP, hdr.cmprAlg, (uint8_t **)&pBlockData->aTSKEY,
                        sizeof(TSKEY) * hdr.nRow, &pReader->aBuf[1]);
H
Hongze Cheng 已提交
1185
  if (code) goto _err;
H
Hongze Cheng 已提交
1186
  p += hdr.szKey;
H
Hongze Cheng 已提交
1187

H
Hongze Cheng 已提交
1188
  ASSERT(p - pReader->aBuf[0] == pBlkInfo->szKey - sizeof(TSCKSUM));
H
Hongze Cheng 已提交
1189

H
Hongze Cheng 已提交
1190 1191
  // read and decode columns
  if (taosArrayGetSize(pBlockData->aIdx) == 0) goto _exit;
H
Hongze Cheng 已提交
1192

H
Hongze Cheng 已提交
1193 1194 1195 1196
  if (hdr.szBlkCol > 0) {
    int64_t offset = pBlkInfo->offset + pBlkInfo->szKey;
    code = tsdbReadAndCheck(pFD, offset, &pReader->aBuf[0], hdr.szBlkCol + sizeof(TSCKSUM), 1);
    if (code) goto _err;
H
Hongze Cheng 已提交
1197 1198
  }

H
Hongze Cheng 已提交
1199 1200 1201
  SBlockCol  blockCol = {.cid = 0};
  SBlockCol *pBlockCol = &blockCol;
  int32_t    n = 0;
H
Hongze Cheng 已提交
1202

H
Hongze Cheng 已提交
1203 1204
  for (int32_t iColData = 0; iColData < taosArrayGetSize(pBlockData->aIdx); iColData++) {
    SColData *pColData = tBlockDataGetColDataByIdx(pBlockData, iColData);
H
Hongze Cheng 已提交
1205

H
Hongze Cheng 已提交
1206 1207 1208 1209 1210 1211 1212
    while (pBlockCol && pBlockCol->cid < pColData->cid) {
      if (n < hdr.szBlkCol) {
        n += tGetBlockCol(pReader->aBuf[0] + n, pBlockCol);
      } else {
        ASSERT(n == hdr.szBlkCol);
        pBlockCol = NULL;
      }
H
Hongze Cheng 已提交
1213
    }
H
Hongze Cheng 已提交
1214

H
Hongze Cheng 已提交
1215 1216 1217 1218 1219 1220 1221 1222 1223
    if (pBlockCol == NULL || pBlockCol->cid > pColData->cid) {
      // add a lot of NONE
      for (int32_t iRow = 0; iRow < hdr.nRow; iRow++) {
        code = tColDataAppendValue(pColData, &COL_VAL_NONE(pColData->cid, pColData->type));
        if (code) goto _err;
      }
    } else {
      ASSERT(pBlockCol->type == pColData->type);
      ASSERT(pBlockCol->flag && pBlockCol->flag != HAS_NONE);
H
Hongze Cheng 已提交
1224

H
Hongze Cheng 已提交
1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242
      if (pBlockCol->flag == HAS_NULL) {
        // add a lot of NULL
        for (int32_t iRow = 0; iRow < hdr.nRow; iRow++) {
          code = tColDataAppendValue(pColData, &COL_VAL_NULL(pBlockCol->cid, pBlockCol->type));
          if (code) goto _err;
        }
      } else {
        // decode from binary
        int64_t offset = pBlkInfo->offset + pBlkInfo->szKey + hdr.szBlkCol + sizeof(TSCKSUM) + pBlockCol->offset;
        int32_t size = pBlockCol->szBitmap + pBlockCol->szOffset + pBlockCol->szValue + sizeof(TSCKSUM);

        code = tsdbReadAndCheck(pFD, offset, &pReader->aBuf[1], size, 0);
        if (code) goto _err;

        code = tsdbDecmprColData(pReader->aBuf[1], pBlockCol, hdr.cmprAlg, hdr.nRow, pColData, &pReader->aBuf[2]);
        if (code) goto _err;
      }
    }
H
Hongze Cheng 已提交
1243
  }
H
Hongze Cheng 已提交
1244

H
Hongze Cheng 已提交
1245
_exit:
H
Hongze Cheng 已提交
1246 1247
  return code;

H
Hongze Cheng 已提交
1248
_err:
H
Hongze Cheng 已提交
1249
  tsdbError("vgId:%d tsdb read block data impl failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code));
H
Hongze Cheng 已提交
1250
  return code;
H
Hongze Cheng 已提交
1251
}
H
Hongze Cheng 已提交
1252

H
Hongze Cheng 已提交
1253 1254
int32_t tsdbReadDataBlock(SDataFReader *pReader, SDataBlk *pDataBlk, SBlockData *pBlockData) {
  int32_t code = 0;
H
Hongze Cheng 已提交
1255

H
Hongze Cheng 已提交
1256 1257
  code = tsdbReadBlockDataImpl(pReader, &pDataBlk->aSubBlock[0], 0, pBlockData);
  if (code) goto _err;
H
Hongze Cheng 已提交
1258

H
Hongze Cheng 已提交
1259 1260 1261
  if (pDataBlk->nSubBlock > 1) {
    SBlockData bData1;
    SBlockData bData2;
H
Hongze Cheng 已提交
1262

H
Hongze Cheng 已提交
1263 1264 1265 1266 1267
    // create
    code = tBlockDataCreate(&bData1);
    if (code) goto _err;
    code = tBlockDataCreate(&bData2);
    if (code) goto _err;
H
Hongze Cheng 已提交
1268

H
Hongze Cheng 已提交
1269 1270 1271
    // init
    tBlockDataInitEx(&bData1, pBlockData);
    tBlockDataInitEx(&bData2, pBlockData);
H
Hongze Cheng 已提交
1272

H
Hongze Cheng 已提交
1273 1274 1275 1276 1277 1278 1279
    for (int32_t iSubBlock = 1; iSubBlock < pDataBlk->nSubBlock; iSubBlock++) {
      code = tsdbReadBlockDataImpl(pReader, &pDataBlk->aSubBlock[iSubBlock], 0, &bData1);
      if (code) {
        tBlockDataDestroy(&bData1, 1);
        tBlockDataDestroy(&bData2, 1);
        goto _err;
      }
H
Hongze Cheng 已提交
1280

H
Hongze Cheng 已提交
1281 1282 1283 1284 1285 1286
      code = tBlockDataCopy(pBlockData, &bData2);
      if (code) {
        tBlockDataDestroy(&bData1, 1);
        tBlockDataDestroy(&bData2, 1);
        goto _err;
      }
H
Hongze Cheng 已提交
1287

H
Hongze Cheng 已提交
1288 1289 1290 1291 1292 1293 1294
      code = tBlockDataMerge(&bData1, &bData2, pBlockData);
      if (code) {
        tBlockDataDestroy(&bData1, 1);
        tBlockDataDestroy(&bData2, 1);
        goto _err;
      }
    }
H
Hongze Cheng 已提交
1295

H
Hongze Cheng 已提交
1296 1297
    tBlockDataDestroy(&bData1, 1);
    tBlockDataDestroy(&bData2, 1);
H
Hongze Cheng 已提交
1298 1299
  }

H
Hongze Cheng 已提交
1300
  return code;
H
Hongze Cheng 已提交
1301

H
Hongze Cheng 已提交
1302 1303 1304 1305
_err:
  tsdbError("vgId:%d tsdb read data block failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code));
  return code;
}
H
Hongze Cheng 已提交
1306

H
Hongze Cheng 已提交
1307 1308
int32_t tsdbReadSstBlock(SDataFReader *pReader, int32_t iSst, SSstBlk *pSstBlk, SBlockData *pBlockData) {
  int32_t code = 0;
H
Hongze Cheng 已提交
1309

H
Hongze Cheng 已提交
1310 1311
  code = tsdbReadBlockDataImpl(pReader, &pSstBlk->bInfo, 1, pBlockData);
  if (code) goto _err;
H
Hongze Cheng 已提交
1312

H
Hongze Cheng 已提交
1313
  return code;
H
Hongze Cheng 已提交
1314

H
Hongze Cheng 已提交
1315 1316 1317 1318
_err:
  tsdbError("vgId:%d tsdb read last block failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code));
  return code;
}
H
Hongze Cheng 已提交
1319

H
Hongze Cheng 已提交
1320 1321
int32_t tsdbReadSstBlockEx(SDataFReader *pReader, int32_t iSst, SSstBlk *pSstBlk, SBlockData *pBlockData) {
  int32_t code = 0;
H
Hongze Cheng 已提交
1322

H
Hongze Cheng 已提交
1323 1324 1325
  // read
  code = tsdbReadAndCheck(pReader->aLastFD[iSst], pSstBlk->bInfo.offset, &pReader->aBuf[0], pSstBlk->bInfo.szBlock, 0);
  if (code) goto _exit;
H
Hongze Cheng 已提交
1326

H
Hongze Cheng 已提交
1327 1328 1329
  // decmpr
  code = tDecmprBlockData(pReader->aBuf[0], pSstBlk->bInfo.szBlock, pBlockData, &pReader->aBuf[1]);
  if (code) goto _exit;
H
Hongze Cheng 已提交
1330

H
Hongze Cheng 已提交
1331
_exit:
H
Hongze Cheng 已提交
1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483
  return code;
}

// SDelFWriter ====================================================
int32_t tsdbDelFWriterOpen(SDelFWriter **ppWriter, SDelFile *pFile, STsdb *pTsdb) {
  int32_t      code = 0;
  char         fname[TSDB_FILENAME_LEN];
  char         hdr[TSDB_FHDR_SIZE] = {0};
  SDelFWriter *pDelFWriter;
  int64_t      n;

  // alloc
  pDelFWriter = (SDelFWriter *)taosMemoryCalloc(1, sizeof(*pDelFWriter));
  if (pDelFWriter == NULL) {
    code = TSDB_CODE_OUT_OF_MEMORY;
    goto _err;
  }
  pDelFWriter->pTsdb = pTsdb;
  pDelFWriter->fDel = *pFile;

  tsdbDelFileName(pTsdb, pFile, fname);
  pDelFWriter->pWriteH = taosOpenFile(fname, TD_FILE_WRITE | TD_FILE_CREATE);
  if (pDelFWriter->pWriteH == NULL) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }

  // update header
  n = taosWriteFile(pDelFWriter->pWriteH, &hdr, TSDB_FHDR_SIZE);
  if (n < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }

  pDelFWriter->fDel.size = TSDB_FHDR_SIZE;
  pDelFWriter->fDel.offset = 0;

  *ppWriter = pDelFWriter;
  return code;

_err:
  tsdbError("vgId:%d, failed to open del file writer since %s", TD_VID(pTsdb->pVnode), tstrerror(code));
  *ppWriter = NULL;
  return code;
}

int32_t tsdbDelFWriterClose(SDelFWriter **ppWriter, int8_t sync) {
  int32_t      code = 0;
  SDelFWriter *pWriter = *ppWriter;
  STsdb       *pTsdb = pWriter->pTsdb;

  // sync
  if (sync && taosFsyncFile(pWriter->pWriteH) < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }

  // close
  if (taosCloseFile(&pWriter->pWriteH) < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }

  for (int32_t iBuf = 0; iBuf < sizeof(pWriter->aBuf) / sizeof(uint8_t *); iBuf++) {
    tFree(pWriter->aBuf[iBuf]);
  }
  taosMemoryFree(pWriter);

  *ppWriter = NULL;
  return code;

_err:
  tsdbError("vgId:%d, failed to close del file writer since %s", TD_VID(pTsdb->pVnode), tstrerror(code));
  return code;
}

int32_t tsdbWriteDelData(SDelFWriter *pWriter, SArray *aDelData, SDelIdx *pDelIdx) {
  int32_t code = 0;
  int64_t size;
  int64_t n;

  // prepare
  size = sizeof(uint32_t);
  for (int32_t iDelData = 0; iDelData < taosArrayGetSize(aDelData); iDelData++) {
    size += tPutDelData(NULL, taosArrayGet(aDelData, iDelData));
  }
  size += sizeof(TSCKSUM);

  // alloc
  code = tRealloc(&pWriter->aBuf[0], size);
  if (code) goto _err;

  // build
  n = 0;
  n += tPutU32(pWriter->aBuf[0] + n, TSDB_FILE_DLMT);
  for (int32_t iDelData = 0; iDelData < taosArrayGetSize(aDelData); iDelData++) {
    n += tPutDelData(pWriter->aBuf[0] + n, taosArrayGet(aDelData, iDelData));
  }
  taosCalcChecksumAppend(0, pWriter->aBuf[0], size);

  ASSERT(n + sizeof(TSCKSUM) == size);

  // write
  n = taosWriteFile(pWriter->pWriteH, pWriter->aBuf[0], size);
  if (n < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }

  ASSERT(n == size);

  // update
  pDelIdx->offset = pWriter->fDel.size;
  pDelIdx->size = size;
  pWriter->fDel.size += size;

  return code;

_err:
  tsdbError("vgId:%d, failed to write del data since %s", TD_VID(pWriter->pTsdb->pVnode), tstrerror(code));
  return code;
}

int32_t tsdbWriteDelIdx(SDelFWriter *pWriter, SArray *aDelIdx) {
  int32_t  code = 0;
  int64_t  size;
  int64_t  n;
  SDelIdx *pDelIdx;

  // prepare
  size = sizeof(uint32_t);
  for (int32_t iDelIdx = 0; iDelIdx < taosArrayGetSize(aDelIdx); iDelIdx++) {
    size += tPutDelIdx(NULL, taosArrayGet(aDelIdx, iDelIdx));
  }
  size += sizeof(TSCKSUM);

  // alloc
  code = tRealloc(&pWriter->aBuf[0], size);
  if (code) goto _err;

  // build
  n = 0;
  n += tPutU32(pWriter->aBuf[0] + n, TSDB_FILE_DLMT);
  for (int32_t iDelIdx = 0; iDelIdx < taosArrayGetSize(aDelIdx); iDelIdx++) {
    n += tPutDelIdx(pWriter->aBuf[0] + n, taosArrayGet(aDelIdx, iDelIdx));
  }
  taosCalcChecksumAppend(0, pWriter->aBuf[0], size);

  ASSERT(n + sizeof(TSCKSUM) == size);

  // write
  n = taosWriteFile(pWriter->pWriteH, pWriter->aBuf[0], size);
H
Hongze Cheng 已提交
1484 1485 1486 1487 1488
  if (n < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }

H
Hongze Cheng 已提交
1489 1490 1491
  // update
  pWriter->fDel.offset = pWriter->fDel.size;
  pWriter->fDel.size += size;
H
Hongze Cheng 已提交
1492

H
Hongze Cheng 已提交
1493
  return code;
H
Hongze Cheng 已提交
1494

H
Hongze Cheng 已提交
1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512
_err:
  tsdbError("vgId:%d, write del idx failed since %s", TD_VID(pWriter->pTsdb->pVnode), tstrerror(code));
  return code;
}

int32_t tsdbUpdateDelFileHdr(SDelFWriter *pWriter) {
  int32_t code = 0;
  char    hdr[TSDB_FHDR_SIZE];
  int64_t size = TSDB_FHDR_SIZE;
  int64_t n;

  // build
  memset(hdr, 0, size);
  tPutDelFile(hdr, &pWriter->fDel);
  taosCalcChecksumAppend(0, hdr, size);

  // seek
  if (taosLSeekFile(pWriter->pWriteH, 0, SEEK_SET) < 0) {
H
Hongze Cheng 已提交
1513 1514 1515 1516
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }

H
Hongze Cheng 已提交
1517 1518
  // write
  n = taosWriteFile(pWriter->pWriteH, hdr, size);
H
Hongze Cheng 已提交
1519 1520 1521 1522 1523 1524 1525 1526
  if (n < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }

  return code;

_err:
H
Hongze Cheng 已提交
1527
  tsdbError("vgId:%d, update del file hdr failed since %s", TD_VID(pWriter->pTsdb->pVnode), tstrerror(code));
H
Hongze Cheng 已提交
1528
  return code;
H
Hongze Cheng 已提交
1529
}
H
Hongze Cheng 已提交
1530 1531 1532 1533 1534
// SDelFReader ====================================================
struct SDelFReader {
  STsdb    *pTsdb;
  SDelFile  fDel;
  TdFilePtr pReadH;
H
Hongze Cheng 已提交
1535

H
Hongze Cheng 已提交
1536 1537
  uint8_t *aBuf[1];
};
H
Hongze Cheng 已提交
1538

H
Hongze Cheng 已提交
1539 1540 1541 1542 1543
int32_t tsdbDelFReaderOpen(SDelFReader **ppReader, SDelFile *pFile, STsdb *pTsdb) {
  int32_t      code = 0;
  char         fname[TSDB_FILENAME_LEN];
  SDelFReader *pDelFReader;
  int64_t      n;
H
Hongze Cheng 已提交
1544

H
Hongze Cheng 已提交
1545 1546 1547
  // alloc
  pDelFReader = (SDelFReader *)taosMemoryCalloc(1, sizeof(*pDelFReader));
  if (pDelFReader == NULL) {
H
Hongze Cheng 已提交
1548
    code = TSDB_CODE_OUT_OF_MEMORY;
H
Hongze Cheng 已提交
1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561
    goto _err;
  }

  // open impl
  pDelFReader->pTsdb = pTsdb;
  pDelFReader->fDel = *pFile;

  tsdbDelFileName(pTsdb, pFile, fname);
  pDelFReader->pReadH = taosOpenFile(fname, TD_FILE_READ);
  if (pDelFReader->pReadH == NULL) {
    code = TAOS_SYSTEM_ERROR(errno);
    taosMemoryFree(pDelFReader);
    goto _err;
H
Hongze Cheng 已提交
1562 1563 1564
  }

_exit:
H
Hongze Cheng 已提交
1565
  *ppReader = pDelFReader;
H
Hongze Cheng 已提交
1566 1567
  return code;

H
Hongze Cheng 已提交
1568 1569 1570 1571
_err:
  tsdbError("vgId:%d, del file reader open failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code));
  *ppReader = NULL;
  return code;
H
Hongze Cheng 已提交
1572 1573
}

H
Hongze Cheng 已提交
1574 1575 1576
int32_t tsdbDelFReaderClose(SDelFReader **ppReader) {
  int32_t      code = 0;
  SDelFReader *pReader = *ppReader;
H
Hongze Cheng 已提交
1577

H
Hongze Cheng 已提交
1578 1579 1580 1581 1582 1583 1584 1585 1586
  if (pReader) {
    if (taosCloseFile(&pReader->pReadH) < 0) {
      code = TAOS_SYSTEM_ERROR(errno);
      goto _exit;
    }
    for (int32_t iBuf = 0; iBuf < sizeof(pReader->aBuf) / sizeof(uint8_t *); iBuf++) {
      tFree(pReader->aBuf[iBuf]);
    }
    taosMemoryFree(pReader);
H
Hongze Cheng 已提交
1587
  }
H
Hongze Cheng 已提交
1588
  *ppReader = NULL;
H
Hongze Cheng 已提交
1589 1590 1591 1592 1593

_exit:
  return code;
}

H
Hongze Cheng 已提交
1594
int32_t tsdbReadDelData(SDelFReader *pReader, SDelIdx *pDelIdx, SArray *aDelData) {
H
Hongze Cheng 已提交
1595
  int32_t code = 0;
H
Hongze Cheng 已提交
1596 1597 1598
  int64_t offset = pDelIdx->offset;
  int64_t size = pDelIdx->size;
  int64_t n;
H
Hongze Cheng 已提交
1599

H
Hongze Cheng 已提交
1600
  taosArrayClear(aDelData);
H
Hongze Cheng 已提交
1601

H
Hongze Cheng 已提交
1602 1603 1604 1605 1606
  // seek
  if (taosLSeekFile(pReader->pReadH, offset, SEEK_SET) < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  }
H
Hongze Cheng 已提交
1607

H
Hongze Cheng 已提交
1608 1609 1610
  // alloc
  code = tRealloc(&pReader->aBuf[0], size);
  if (code) goto _err;
H
Hongze Cheng 已提交
1611

H
Hongze Cheng 已提交
1612 1613 1614 1615 1616 1617 1618 1619 1620
  // read
  n = taosReadFile(pReader->pReadH, pReader->aBuf[0], size);
  if (n < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
    goto _err;
  } else if (n < size) {
    code = TSDB_CODE_FILE_CORRUPTED;
    goto _err;
  }
H
Hongze Cheng 已提交
1621

H
Hongze Cheng 已提交
1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639
  // check
  if (!taosCheckChecksumWhole(pReader->aBuf[0], size)) {
    code = TSDB_CODE_FILE_CORRUPTED;
    goto _err;
  }

  // // decode
  n = 0;

  uint32_t delimiter;
  n += tGetU32(pReader->aBuf[0] + n, &delimiter);
  while (n < size - sizeof(TSCKSUM)) {
    SDelData delData;
    n += tGetDelData(pReader->aBuf[0] + n, &delData);

    if (taosArrayPush(aDelData, &delData) == NULL) {
      code = TSDB_CODE_OUT_OF_MEMORY;
      goto _err;
H
Hongze Cheng 已提交
1640 1641 1642
    }
  }

H
Hongze Cheng 已提交
1643 1644 1645 1646 1647 1648
  ASSERT(n == size - sizeof(TSCKSUM));

  return code;

_err:
  tsdbError("vgId:%d, read del data failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code));
H
Hongze Cheng 已提交
1649 1650 1651
  return code;
}

H
Hongze Cheng 已提交
1652
int32_t tsdbReadDelIdx(SDelFReader *pReader, SArray *aDelIdx) {
H
Hongze Cheng 已提交
1653
  int32_t code = 0;
H
Hongze Cheng 已提交
1654 1655 1656
  int32_t n;
  int64_t offset = pReader->fDel.offset;
  int64_t size = pReader->fDel.size - offset;
H
Hongze Cheng 已提交
1657

H
Hongze Cheng 已提交
1658 1659 1660 1661
  taosArrayClear(aDelIdx);

  // seek
  if (taosLSeekFile(pReader->pReadH, offset, SEEK_SET) < 0) {
H
Hongze Cheng 已提交
1662
    code = TAOS_SYSTEM_ERROR(errno);
H
Hongze Cheng 已提交
1663
    goto _err;
H
Hongze Cheng 已提交
1664 1665
  }

H
Hongze Cheng 已提交
1666 1667 1668 1669 1670 1671
  // alloc
  code = tRealloc(&pReader->aBuf[0], size);
  if (code) goto _err;

  // read
  n = taosReadFile(pReader->pReadH, pReader->aBuf[0], size);
H
Hongze Cheng 已提交
1672 1673
  if (n < 0) {
    code = TAOS_SYSTEM_ERROR(errno);
H
Hongze Cheng 已提交
1674 1675
    goto _err;
  } else if (n < size) {
H
Hongze Cheng 已提交
1676
    code = TSDB_CODE_FILE_CORRUPTED;
H
Hongze Cheng 已提交
1677
    goto _err;
H
Hongze Cheng 已提交
1678 1679
  }

H
Hongze Cheng 已提交
1680 1681
  // check
  if (!taosCheckChecksumWhole(pReader->aBuf[0], size)) {
H
Hongze Cheng 已提交
1682
    code = TSDB_CODE_FILE_CORRUPTED;
H
Hongze Cheng 已提交
1683
    goto _err;
H
Hongze Cheng 已提交
1684 1685
  }

H
Hongze Cheng 已提交
1686 1687 1688 1689 1690
  // decode
  n = 0;
  uint32_t delimiter;
  n += tGetU32(pReader->aBuf[0] + n, &delimiter);
  ASSERT(delimiter == TSDB_FILE_DLMT);
H
Hongze Cheng 已提交
1691

H
Hongze Cheng 已提交
1692 1693
  while (n < size - sizeof(TSCKSUM)) {
    SDelIdx delIdx;
H
Hongze Cheng 已提交
1694

H
Hongze Cheng 已提交
1695
    n += tGetDelIdx(pReader->aBuf[0] + n, &delIdx);
H
Hongze Cheng 已提交
1696

H
Hongze Cheng 已提交
1697 1698 1699 1700
    if (taosArrayPush(aDelIdx, &delIdx) == NULL) {
      code = TSDB_CODE_OUT_OF_MEMORY;
      goto _err;
    }
H
Hongze Cheng 已提交
1701 1702
  }

H
Hongze Cheng 已提交
1703
  ASSERT(n == size - sizeof(TSCKSUM));
H
Hongze Cheng 已提交
1704

H
Hongze Cheng 已提交
1705
  return code;
H
Hongze Cheng 已提交
1706

H
Hongze Cheng 已提交
1707 1708
_err:
  tsdbError("vgId:%d, read del idx failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code));
H
Hongze Cheng 已提交
1709
  return code;
H
Hongze Cheng 已提交
1710
}