insertParser.c 22.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

#include "insertParser.h"

#include "dataBlockMgt.h"
#include "parserInt.h"
#include "parserUtil.h"
#include "queryInfoUtil.h"
#include "tglobal.h"
#include "ttime.h"
#include "ttoken.h"
#include "ttypes.h"

#define NEXT_TOKEN(pSql, sToken) \
  do { \
    int32_t index = 0; \
    sToken = tStrGetToken(pSql, &index, false); \
    pSql += index; \
  } while (0)

34
#define NEXT_TOKEN_KEEP_SQL(pSql, sToken, index) \
35
  do { \
36
    sToken = tStrGetToken(pSql, &index, false); \
37 38
  } while (0)

39
#define CHECK_CODE(expr) \
40 41 42
  do { \
    int32_t code = expr; \
    if (TSDB_CODE_SUCCESS != code) { \
43
      return code; \
44 45 46 47 48 49 50 51 52
    } \
  } while (0)

enum {
  TSDB_USE_SERVER_TS = 0,
  TSDB_USE_CLI_TS = 1,
};

typedef struct SInsertParseContext {
X
Xiaoyu Wang 已提交
53
  SParseContext* pComCxt;       // input
54 55
  char          *pSql;          // input
  SMsgBuf        msg;           // input
X
Xiaoyu Wang 已提交
56 57 58 59 60 61 62
  STableMeta* pTableMeta;       // each table
  SParsedDataColInfo tags;      // each table
  SKVRowBuilder tagsBuilder;    // each table
  SHashObj* pVgroupsHashObj;    // global
  SHashObj* pTableBlockHashObj; // global
  SArray* pTableDataBlocks;     // global
  SArray* pVgDataBlocks;        // global
63
  int32_t totalNum;
H
Haojun Liao 已提交
64
  SVnodeModifOpStmtInfo* pOutput;
65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
} SInsertParseContext;

static int32_t skipInsertInto(SInsertParseContext* pCxt) {
  SToken sToken;
  NEXT_TOKEN(pCxt->pSql, sToken);
  if (TK_INSERT != sToken.type) {
    return buildSyntaxErrMsg(&pCxt->msg, "keyword INSERT is expected", sToken.z);
  }
  NEXT_TOKEN(pCxt->pSql, sToken);
  if (TK_INTO != sToken.type) {
    return buildSyntaxErrMsg(&pCxt->msg, "keyword INTO is expected", sToken.z);
  }
  return TSDB_CODE_SUCCESS;
}

80
static int32_t buildName(SInsertParseContext* pCxt, SToken* pStname, char* fullDbName, char* tableName) {
81
  if (parserValidateIdToken(pStname) != TSDB_CODE_SUCCESS) {
82
    return buildSyntaxErrMsg(&pCxt->msg, "invalid table name", pStname->z);
83 84
  }

85 86
  char* p = strnchr(pStname->z, TS_PATH_DELIMITER[0], pStname->n, false);
  if (NULL != p) { // db.table
H
Haojun Liao 已提交
87
    int32_t n = sprintf(fullDbName, "%d.", pCxt->pComCxt->acctId);
H
Haojun Liao 已提交
88
    strncpy(fullDbName + n, pStname->z, p - pStname->z);
89 90
    strncpy(tableName, p + 1, pStname->n - (p - pStname->z) - 1);
  } else {
H
Haojun Liao 已提交
91
    snprintf(fullDbName, TSDB_DB_FNAME_LEN, "%d.%s", pCxt->pComCxt->acctId, pCxt->pComCxt->db);
92 93
    strncpy(tableName, pStname->z, pStname->n);
  }
H
Haojun Liao 已提交
94

95 96 97 98
  return TSDB_CODE_SUCCESS;
}

static int32_t getTableMeta(SInsertParseContext* pCxt, SToken* pTname) {
H
Haojun Liao 已提交
99
  SName name = {0};
H
Haojun Liao 已提交
100
  createSName(&name, pTname, pCxt->pComCxt, &pCxt->msg);
H
Haojun Liao 已提交
101 102 103

  char tableName[TSDB_TABLE_FNAME_LEN] = {0};
  tNameExtractFullName(&name, tableName);
H
Haojun Liao 已提交
104
  SParseContext* pBasicCtx = pCxt->pComCxt;
H
Haojun Liao 已提交
105
  CHECK_CODE(catalogGetTableMeta(pBasicCtx->pCatalog, pBasicCtx->pTransporter, &pBasicCtx->mgmtEpSet, &name, &pCxt->pTableMeta));
X
Xiaoyu Wang 已提交
106
  SVgroupInfo vg;
H
Haojun Liao 已提交
107
  CHECK_CODE(catalogGetTableHashVgroup(pBasicCtx->pCatalog, pBasicCtx->pTransporter, &pBasicCtx->mgmtEpSet, &name, &vg));
X
Xiaoyu Wang 已提交
108
  CHECK_CODE(taosHashPut(pCxt->pVgroupsHashObj, (const char*)&vg.vgId, sizeof(vg.vgId), (char*)&vg, sizeof(vg)));
X
Xiaoyu Wang 已提交
109
  pCxt->pTableMeta->vgId = vg.vgId; // todo remove
110 111 112 113 114 115 116 117 118 119 120 121 122
  return TSDB_CODE_SUCCESS;
}

static int32_t findCol(SToken* pColname, int32_t start, int32_t end, SSchema* pSchema) {
  while (start < end) {
    if (strlen(pSchema[start].name) == pColname->n && strncmp(pColname->z, pSchema[start].name, pColname->n) == 0) {
      return start;
    }
    ++start;
  }
  return -1;
}

X
Xiaoyu Wang 已提交
123
static void buildMsgHeader(SVgDataBlocks* blocks) {
X
Xiaoyu Wang 已提交
124
    SSubmitMsg* submit = (SSubmitMsg*)blocks->pData;
X
Xiaoyu Wang 已提交
125
    submit->header.vgId    = htonl(blocks->vg.vgId);
X
Xiaoyu Wang 已提交
126
    submit->header.contLen = htonl(blocks->size);
127
    submit->length         = submit->header.contLen;
X
Xiaoyu Wang 已提交
128
    submit->numOfBlocks    = htonl(blocks->numOfTables);
129
    SSubmitBlk* blk = (SSubmitBlk*)(submit + 1);
X
Xiaoyu Wang 已提交
130
    int32_t numOfBlocks = blocks->numOfTables;
131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
    while (numOfBlocks--) {
      int32_t dataLen = blk->dataLen;
      blk->uid = htobe64(blk->uid);
      blk->tid = htonl(blk->tid);
      blk->padding = htonl(blk->padding);
      blk->sversion = htonl(blk->sversion);
      blk->dataLen = htonl(blk->dataLen);
      blk->schemaLen = htonl(blk->schemaLen);
      blk->numOfRows = htons(blk->numOfRows);
      blk = (SSubmitBlk*)(blk->data + dataLen);
    }
}

static int32_t buildOutput(SInsertParseContext* pCxt) {
  size_t numOfVg = taosArrayGetSize(pCxt->pVgDataBlocks);
  pCxt->pOutput->pDataBlocks = taosArrayInit(numOfVg, POINTER_BYTES);
  if (NULL == pCxt->pOutput->pDataBlocks) {
    return TSDB_CODE_TSC_OUT_OF_MEMORY;
  }
  for (size_t i = 0; i < numOfVg; ++i) {
    STableDataBlocks* src = taosArrayGetP(pCxt->pVgDataBlocks, i);
    SVgDataBlocks* dst = calloc(1, sizeof(SVgDataBlocks));
    if (NULL == dst) {
      return TSDB_CODE_TSC_OUT_OF_MEMORY;
    }
X
Xiaoyu Wang 已提交
156
    taosHashGetClone(pCxt->pVgroupsHashObj, (const char*)&src->vgId, sizeof(src->vgId), &dst->vg);
157 158
    dst->numOfTables = src->numOfTables;
    dst->size = src->size;
dengyihao's avatar
dengyihao 已提交
159
    TSWAP(dst->pData, src->pData, char*);
X
Xiaoyu Wang 已提交
160
    buildMsgHeader(dst);
161 162 163 164 165
    taosArrayPush(pCxt->pOutput->pDataBlocks, &dst);
  }
  return TSDB_CODE_SUCCESS;
}

166 167 168 169 170 171 172 173 174 175
static int32_t checkTimestamp(STableDataBlocks *pDataBlocks, const char *start) {
  // once the data block is disordered, we do NOT keep previous timestamp any more
  if (!pDataBlocks->ordered) {
    return TSDB_CODE_SUCCESS;
  }

  TSKEY k = *(TSKEY *)start;

  if (k == INT64_MIN) {
    if (pDataBlocks->tsSource == TSDB_USE_CLI_TS) {
176
      return TSDB_CODE_FAILED; // client time/server time can not be mixed
177
    }
178
    pDataBlocks->tsSource = TSDB_USE_SERVER_TS;
179 180
  } else {
    if (pDataBlocks->tsSource == TSDB_USE_SERVER_TS) {
181
      return TSDB_CODE_FAILED;  // client time/server time can not be mixed
182
    }
183
    pDataBlocks->tsSource = TSDB_USE_CLI_TS;
184 185 186 187 188 189 190 191 192 193
  }

  if (k <= pDataBlocks->prevTS && (pDataBlocks->tsSource == TSDB_USE_CLI_TS)) {
    pDataBlocks->ordered = false;
  }

  pDataBlocks->prevTS = k;
  return TSDB_CODE_SUCCESS;
}

194
static int parseTime(char **end, SToken *pToken, int16_t timePrec, int64_t *time, SMsgBuf* pMsgBuf) {
195
  int32_t   index = 0;
196
  SToken    sToken;
197
  int64_t   interval;
198 199
  int64_t   ts = 0;
  char* pTokenEnd = *end;
200 201

  if (pToken->type == TK_NOW) {
202
    ts = taosGetTimestamp(timePrec);
203
  } else if (pToken->type == TK_INTEGER) {
204 205 206
    bool isSigned = false;
    toInteger(pToken->z, pToken->n, 10, &ts, &isSigned);
  } else { // parse the RFC-3339/ISO-8601 timestamp format string
207
    if (taosParseTime(pToken->z, time, pToken->n, timePrec, tsDaylight) != TSDB_CODE_SUCCESS) {
208
      return buildSyntaxErrMsg(pMsgBuf, "invalid timestamp format", pToken->z);
209 210 211 212 213 214 215 216
    }

    return TSDB_CODE_SUCCESS;
  }

  for (int k = pToken->n; pToken->z[k] != '\0'; k++) {
    if (pToken->z[k] == ' ' || pToken->z[k] == '\t') continue;
    if (pToken->z[k] == ',') {
217 218
      *end = pTokenEnd;
      *time = ts;
219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239
      return 0;
    }

    break;
  }

  /*
   * time expression:
   * e.g., now+12a, now-5h
   */
  SToken valueToken;
  index = 0;
  sToken = tStrGetToken(pTokenEnd, &index, false);
  pTokenEnd += index;

  if (sToken.type == TK_MINUS || sToken.type == TK_PLUS) {
    index = 0;
    valueToken = tStrGetToken(pTokenEnd, &index, false);
    pTokenEnd += index;

    if (valueToken.n < 2) {
240
      return buildSyntaxErrMsg(pMsgBuf, "value expected in timestamp", sToken.z);
241 242 243 244 245 246 247 248
    }

    char unit = 0;
    if (parseAbsoluteDuration(valueToken.z, valueToken.n, &interval, &unit, timePrec) != TSDB_CODE_SUCCESS) {
      return TSDB_CODE_TSC_INVALID_OPERATION;
    }

    if (sToken.type == TK_PLUS) {
249
      ts += interval;
250
    } else {
251
      ts = ts - interval;
252 253
    }

254
    *end = pTokenEnd;
255 256
  }

257
  *time = ts;
258 259
  return TSDB_CODE_SUCCESS;
}
260

261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281
typedef struct SMemParam {
  SMemRow row;
  SSchema* schema;
  int32_t toffset;
  uint8_t compareStat;
  int32_t dataLen;
  int32_t kvLen;
} SMemParam;

static FORCE_INLINE int32_t MemRowAppend(const void *value, int32_t len, void *param) {
  SMemParam* pa = (SMemParam*)param;
  if (TSDB_DATA_TYPE_BINARY == pa->schema->type) {
    char *rowEnd = memRowEnd(pa->row);
    STR_WITH_SIZE_TO_VARSTR(rowEnd, value, len);
    appendMemRowColValEx(pa->row, rowEnd, true, pa->schema->colId, pa->schema->type, pa->toffset, &pa->dataLen, &pa->kvLen, pa->compareStat);
  } else if (TSDB_DATA_TYPE_NCHAR == pa->schema->type) {
    // if the converted output len is over than pColumnModel->bytes, return error: 'Argument list too long'
    int32_t output = 0;
    char *  rowEnd = memRowEnd(pa->row);
    if (!taosMbsToUcs4(value, len, (char *)varDataVal(rowEnd), pa->schema->bytes - VARSTR_HEADER_SIZE, &output)) {
      return TSDB_CODE_TSC_SQL_SYNTAX_ERROR;
282
    }
283 284 285 286 287 288 289
    varDataSetLen(rowEnd, output);
    appendMemRowColValEx(pa->row, rowEnd, false, pa->schema->colId, pa->schema->type, pa->toffset, &pa->dataLen, &pa->kvLen, pa->compareStat);
  } else {
    appendMemRowColValEx(pa->row, value, true, pa->schema->colId, pa->schema->type, pa->toffset, &pa->dataLen, &pa->kvLen, pa->compareStat);
  }
  return TSDB_CODE_SUCCESS;
}
290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353

// pSql -> tag1_name, ...)
static int32_t parseBoundColumns(SInsertParseContext* pCxt, SParsedDataColInfo* pColList, SSchema* pSchema) {
  int32_t nCols = pColList->numOfCols;

  pColList->numOfBound = 0; 
  memset(pColList->boundedColumns, 0, sizeof(int32_t) * nCols);
  for (int32_t i = 0; i < nCols; ++i) {
    pColList->cols[i].valStat = VAL_STAT_NONE;
  }

  SToken sToken;
  bool    isOrdered = true;
  int32_t lastColIdx = -1;  // last column found
  while (1) {
    NEXT_TOKEN(pCxt->pSql, sToken);

    if (TK_RP == sToken.type) {
      break;
    }

    int32_t t = lastColIdx + 1;
    int32_t index = findCol(&sToken, t, nCols, pSchema);
    if (index < 0 && t > 0) {
      index = findCol(&sToken, 0, t, pSchema);
      isOrdered = false;
    }
    if (index < 0) {
      return buildSyntaxErrMsg(&pCxt->msg, "invalid column/tag name", sToken.z);
    }
    if (pColList->cols[index].valStat == VAL_STAT_HAS) {
      return buildSyntaxErrMsg(&pCxt->msg, "duplicated column name", sToken.z);
    }
    lastColIdx = index;
    pColList->cols[index].valStat = VAL_STAT_HAS;
    pColList->boundedColumns[pColList->numOfBound] = index;
    ++pColList->numOfBound;
  }

  pColList->orderStatus = isOrdered ? ORDER_STATUS_ORDERED : ORDER_STATUS_DISORDERED;

  if (!isOrdered) {
    pColList->colIdxInfo = calloc(pColList->numOfBound, sizeof(SBoundIdxInfo));
    if (NULL == pColList->colIdxInfo) {
      return TSDB_CODE_TSC_OUT_OF_MEMORY;
    }
    SBoundIdxInfo* pColIdx = pColList->colIdxInfo;
    for (uint16_t i = 0; i < pColList->numOfBound; ++i) {
      pColIdx[i].schemaColIdx = (uint16_t)pColList->boundedColumns[i];
      pColIdx[i].boundIdx = i;
    }
    qsort(pColIdx, pColList->numOfBound, sizeof(SBoundIdxInfo), schemaIdxCompar);
    for (uint16_t i = 0; i < pColList->numOfBound; ++i) {
      pColIdx[i].finalIdx = i;
    }
    qsort(pColIdx, pColList->numOfBound, sizeof(SBoundIdxInfo), boundIdxCompar);
  }

  memset(&pColList->boundedColumns[pColList->numOfBound], 0, sizeof(int32_t) * (pColList->numOfCols - pColList->numOfBound));

  return TSDB_CODE_SUCCESS;
}

// pSql -> tag1_value, ...)
354 355
static int32_t parseTagsClause(SInsertParseContext* pCxt, SSchema* pTagsSchema, uint8_t precision) {
  if (tdInitKVRowBuilder(&pCxt->tagsBuilder) < 0) {
356 357 358
    return TSDB_CODE_TSC_OUT_OF_MEMORY;
  }

359
  SKvParam param = {.builder = &pCxt->tagsBuilder};
360
  SToken sToken;
361
  char tmpTokenBuf[TSDB_MAX_BYTES_PER_ROW] = {0};  // used for deleting Escape character: \\, \', \"
362
  for (int i = 0; i < pCxt->tags.numOfBound; ++i) {
363
    NEXT_TOKEN(pCxt->pSql, sToken);
364
    SSchema* pSchema = &pTagsSchema[pCxt->tags.boundedColumns[i]];
365
    param.schema = pSchema;
366
    CHECK_CODE(parseValueToken(&pCxt->pSql, &sToken, pSchema, precision, tmpTokenBuf, KvRowAppend, &param, &pCxt->msg));
367 368
  }

369
  SKVRow row = tdGetKVRowFromBuilder(&pCxt->tagsBuilder);
370 371 372 373 374
  if (NULL == row) {
    return buildInvalidOperationMsg(&pCxt->msg, "tag value expected");
  }
  tdSortKVRowByColIdx(row);

375
  // todo construct payload
376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391

  tfree(row);
}

// pSql -> stb_name [(tag1_name, ...)] TAGS (tag1_value, ...)
static int32_t parseUsingClause(SInsertParseContext* pCxt, SToken* pTbnameToken) {
  SToken sToken;

  // pSql -> stb_name [(tag1_name, ...)] TAGS (tag1_value, ...)
  NEXT_TOKEN(pCxt->pSql, sToken);
  CHECK_CODE(getTableMeta(pCxt, &sToken));
  if (TSDB_SUPER_TABLE != pCxt->pTableMeta->tableType) {
    return buildInvalidOperationMsg(&pCxt->msg, "create table only from super table is allowed");
  }

  SSchema* pTagsSchema = getTableTagSchema(pCxt->pTableMeta);
392
  setBoundColumnInfo(&pCxt->tags, pTagsSchema, getNumOfTags(pCxt->pTableMeta));
393 394 395 396

  // pSql -> [(tag1_name, ...)] TAGS (tag1_value, ...)
  NEXT_TOKEN(pCxt->pSql, sToken);
  if (TK_LP == sToken.type) {
397
    CHECK_CODE(parseBoundColumns(pCxt, &pCxt->tags, pTagsSchema));
398 399 400 401 402 403 404 405 406 407 408
    NEXT_TOKEN(pCxt->pSql, sToken);
  }

  if (TK_TAGS != sToken.type) {
    return buildSyntaxErrMsg(&pCxt->msg, "TAGS is expected", sToken.z);
  }
  // pSql -> (tag1_value, ...)
  NEXT_TOKEN(pCxt->pSql, sToken);
  if (TK_LP != sToken.type) {
    return buildSyntaxErrMsg(&pCxt->msg, "( is expected", sToken.z);
  }
409
  CHECK_CODE(parseTagsClause(pCxt, pTagsSchema, getTableInfo(pCxt->pTableMeta).precision));
410 411 412 413

  return TSDB_CODE_SUCCESS;
}

414 415 416
static int parseOneRow(SInsertParseContext* pCxt, STableDataBlocks* pDataBlocks, int16_t timePrec, int32_t* len, char* tmpTokenBuf) {  
  SParsedDataColInfo* spd = &pDataBlocks->boundColumnInfo;
  SMemRowBuilder* pBuilder = &pDataBlocks->rowBuilder;
417 418 419
  char *row = pDataBlocks->pData + pDataBlocks->size;  // skip the SSubmitBlk header
  initSMemRow(row, pBuilder->memRowType, pDataBlocks, spd->numOfBound);

420 421 422 423
  bool isParseBindParam = false;
  SSchema* schema = getTableColumnSchema(pDataBlocks->pTableMeta);
  SMemParam param = {.row = row};
  SToken sToken = {0};
424 425 426
  // 1. set the parsed value from sql string
  for (int i = 0; i < spd->numOfBound; ++i) {
    NEXT_TOKEN(pCxt->pSql, sToken);
X
Xiaoyu Wang 已提交
427
    SSchema *pSchema = &schema[spd->boundedColumns[i] - 1];
428 429 430
    param.schema = pSchema;
    param.compareStat = pBuilder->compareStat;
    getMemRowAppendInfo(schema, pBuilder->memRowType, spd, i, &param.toffset);
431
    CHECK_CODE(parseValueToken(&pCxt->pSql, &sToken, pSchema, timePrec, tmpTokenBuf, MemRowAppend, &param, &pCxt->msg));
432 433

    if (PRIMARYKEY_TIMESTAMP_COL_ID == pSchema->colId) {
434 435 436 437 438 439 440 441 442 443 444
      TSKEY tsKey = memRowKey(row);
      if (checkTimestamp(pDataBlocks, (const char *)&tsKey) != TSDB_CODE_SUCCESS) {
        buildSyntaxErrMsg(&pCxt->msg, "client time/server time can not be mixed up", sToken.z);
        return TSDB_CODE_TSC_INVALID_TIME_STAMP;
      }
    }
  }

  if (!isParseBindParam) {
    // 2. check and set convert flag
    if (pBuilder->compareStat == ROW_COMPARE_NEED) {
445
      convertMemRow(row, spd->allNullLen + TD_MEM_ROW_DATA_HEAD_SIZE, pBuilder->kvRowInitLen);
446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463
    }

    // 3. set the null value for the columns that do not assign values
    if ((spd->numOfBound < spd->numOfCols) && isDataRow(row) && !isNeedConvertRow(row)) {
      SDataRow dataRow = memRowDataBody(row);
      for (int32_t i = 0; i < spd->numOfCols; ++i) {
        if (spd->cols[i].valStat == VAL_STAT_NONE) {
          tdAppendDataColVal(dataRow, getNullValue(schema[i].type), true, schema[i].type, spd->cols[i].toffset);
        }
      }
    }
  }

  *len = getExtendedRowSize(pDataBlocks);
  return TSDB_CODE_SUCCESS;
}

// pSql -> (field1_value, ...) [(field1_value2, ...) ...]
464
static int32_t parseValues(SInsertParseContext* pCxt, STableDataBlocks* pDataBlock, int maxRows, int32_t* numOfRows) {
465 466 467 468 469
  STableComInfo tinfo = getTableInfo(pDataBlock->pTableMeta);
  int32_t extendedRowSize = getExtendedRowSize(pDataBlock);
  CHECK_CODE(initMemRowBuilder(&pDataBlock->rowBuilder, 0, tinfo.numOfColumns, pDataBlock->boundColumnInfo.numOfBound, pDataBlock->boundColumnInfo.allNullLen));

  (*numOfRows) = 0;
470
  char tmpTokenBuf[TSDB_MAX_BYTES_PER_ROW] = {0};  // used for deleting Escape character: \\, \', \"
471 472
  SToken sToken;
  while (1) {
473 474
    int32_t index = 0;
    NEXT_TOKEN_KEEP_SQL(pCxt->pSql, sToken, index);
475 476 477
    if (TK_LP != sToken.type) {
      break;
    }
478
    pCxt->pSql += index;
479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509

    if ((*numOfRows) >= maxRows || pDataBlock->size + extendedRowSize >= pDataBlock->nAllocSize) {
      int32_t tSize;
      CHECK_CODE(allocateMemIfNeed(pDataBlock, extendedRowSize, &tSize));
      ASSERT(tSize >= maxRows);
      maxRows = tSize;
    }

    int32_t len = 0;
    CHECK_CODE(parseOneRow(pCxt, pDataBlock, tinfo.precision, &len, tmpTokenBuf));
    pDataBlock->size += len;

    NEXT_TOKEN(pCxt->pSql, sToken);
    if (TK_RP != sToken.type) {
      return buildSyntaxErrMsg(&pCxt->msg, ") expected", sToken.z);
    }

    (*numOfRows)++;
  }

  if (0 == (*numOfRows)) {
    return  buildSyntaxErrMsg(&pCxt->msg, "no any data points", NULL);
  }
  return TSDB_CODE_SUCCESS;
}

static int32_t parseValuesClause(SInsertParseContext* pCxt, STableDataBlocks* dataBuf) {  
  int32_t maxNumOfRows;
  CHECK_CODE(allocateMemIfNeed(dataBuf, getExtendedRowSize(dataBuf), &maxNumOfRows));

  int32_t numOfRows = 0;
510
  CHECK_CODE(parseValues(pCxt, dataBuf, maxNumOfRows, &numOfRows));
511 512 513 514 515 516 517 518 519 520 521

  SSubmitBlk *pBlocks = (SSubmitBlk *)(dataBuf->pData);
  if (TSDB_CODE_SUCCESS != setBlockInfo(pBlocks, dataBuf->pTableMeta, numOfRows)) {
    return buildInvalidOperationMsg(&pCxt->msg, "too many rows in sql, total number of rows should be less than 32767");
  }

  dataBuf->numOfTables = 1;
  pCxt->totalNum += numOfRows;
  return TSDB_CODE_SUCCESS;
}

X
Xiaoyu Wang 已提交
522 523 524 525 526 527
static void destroyInsertParseContextForTable(SInsertParseContext* pCxt) {
  tfree(pCxt->pTableMeta);
  destroyBoundColumnInfo(&pCxt->tags);
  tdDestroyKVRowBuilder(&pCxt->tagsBuilder);
}

528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544
static void destroyDataBlock(STableDataBlocks* pDataBlock) {
  if (pDataBlock == NULL) {
    return;
  }

  tfree(pDataBlock->pData);
  if (!pDataBlock->cloned) {
    // free the refcount for metermeta
    if (pDataBlock->pTableMeta != NULL) {
      tfree(pDataBlock->pTableMeta);
    }

    destroyBoundColumnInfo(&pDataBlock->boundColumnInfo);
  }
  tfree(pDataBlock);
}

X
Xiaoyu Wang 已提交
545 546 547
static void destroyInsertParseContext(SInsertParseContext* pCxt) {
  destroyInsertParseContextForTable(pCxt);
  taosHashCleanup(pCxt->pVgroupsHashObj);
548 549

  destroyBlockHashmap(pCxt->pTableBlockHashObj);
X
Xiaoyu Wang 已提交
550 551 552 553
  destroyBlockArrayList(pCxt->pTableDataBlocks);
  destroyBlockArrayList(pCxt->pVgDataBlocks);
}

554 555 556 557 558 559
//   tb_name
//       [USING stb_name [(tag1_name, ...)] TAGS (tag1_value, ...)]
//       [(field1_name, ...)]
//       VALUES (field1_value, ...) [(field1_value2, ...) ...] | FILE csv_file_path
//   [...];
static int32_t parseInsertBody(SInsertParseContext* pCxt) {
X
Xiaoyu Wang 已提交
560
  // for each table
561
  while (1) {
X
Xiaoyu Wang 已提交
562 563
    destroyInsertParseContextForTable(pCxt);

564 565 566 567 568 569 570
    SToken sToken;
    // pSql -> tb_name ...
    NEXT_TOKEN(pCxt->pSql, sToken);

    // no data in the sql string anymore.
    if (sToken.n == 0) {
      if (0 == pCxt->totalNum) {
571
        return buildInvalidOperationMsg(&pCxt->msg, "no data in sql");;
572 573 574 575 576 577 578
      }
      break;
    }

    SToken tbnameToken = sToken;
    NEXT_TOKEN(pCxt->pSql, sToken);

579
    // USING cluase 
580 581 582 583
    if (TK_USING == sToken.type) {
      CHECK_CODE(parseUsingClause(pCxt, &tbnameToken));
      NEXT_TOKEN(pCxt->pSql, sToken);
    } else {
584
      CHECK_CODE(getTableMeta(pCxt, &tbnameToken));
585 586 587 588
    }

    STableDataBlocks *dataBuf = NULL;
    CHECK_CODE(getDataBlockFromList(pCxt->pTableBlockHashObj, pCxt->pTableMeta->uid, TSDB_DEFAULT_PAYLOAD_SIZE,
589
        sizeof(SSubmitBlk), getTableInfo(pCxt->pTableMeta).rowSize, pCxt->pTableMeta, &dataBuf, NULL));
590 591 592

    if (TK_LP == sToken.type) {
      // pSql -> field1_name, ...)
593
      CHECK_CODE(parseBoundColumns(pCxt, &dataBuf->boundColumnInfo, getTableColumnSchema(pCxt->pTableMeta)));
594 595 596 597 598 599
      NEXT_TOKEN(pCxt->pSql, sToken);
    }

    if (TK_VALUES == sToken.type) {
      // pSql -> (field1_value, ...) [(field1_value2, ...) ...]
      CHECK_CODE(parseValuesClause(pCxt, dataBuf));
600
      pCxt->pOutput->insertType = TSDB_QUERY_TYPE_INSERT;
601 602 603 604 605 606 607 608 609 610 611
      continue;
    }

    // FILE csv_file_path
    if (TK_FILE == sToken.type) {
      // pSql -> csv_file_path
      NEXT_TOKEN(pCxt->pSql, sToken);
      if (0 == sToken.n || (TK_STRING != sToken.type && TK_ID != sToken.type)) {
        return buildSyntaxErrMsg(&pCxt->msg, "file path is required following keyword FILE", sToken.z);
      }
      // todo
612
      pCxt->pOutput->insertType = TSDB_QUERY_TYPE_FILE_INSERT;
613 614 615 616 617 618 619
      continue;
    }

    return buildSyntaxErrMsg(&pCxt->msg, "keyword VALUES or FILE is expected", sToken.z);
  }
  // merge according to vgId
  if (!TSDB_QUERY_HAS_TYPE(pCxt->pOutput->insertType, TSDB_QUERY_TYPE_STMT_INSERT) && taosHashGetSize(pCxt->pTableBlockHashObj) > 0) {
620
    CHECK_CODE(mergeTableDataBlocks(pCxt->pTableBlockHashObj, pCxt->pOutput->schemaAttache, pCxt->pOutput->payloadType, &pCxt->pVgDataBlocks));
621
  }
622
  return buildOutput(pCxt);
623 624 625 626 627 628 629 630
}

// INSERT INTO
//   tb_name
//       [USING stb_name [(tag1_name, ...)] TAGS (tag1_value, ...)]
//       [(field1_name, ...)]
//       VALUES (field1_value, ...) [(field1_value2, ...) ...] | FILE csv_file_path
//   [...];
H
Haojun Liao 已提交
631
int32_t parseInsertSql(SParseContext* pContext, SVnodeModifOpStmtInfo** pInfo) {
632 633
  SInsertParseContext context = {
    .pComCxt = pContext,
634
    .pSql = (char*) pContext->pSql,
635 636
    .msg = {.buf = pContext->pMsg, .len = pContext->msgLen},
    .pTableMeta = NULL,
X
Xiaoyu Wang 已提交
637
    .pVgroupsHashObj = taosHashInit(128, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, false),
638 639
    .pTableBlockHashObj = taosHashInit(128, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), true, false),
    .totalNum = 0,
H
Haojun Liao 已提交
640
    .pOutput = calloc(1, sizeof(SVnodeModifOpStmtInfo))
641 642
  };

X
Xiaoyu Wang 已提交
643
  if (NULL == context.pVgroupsHashObj || NULL == context.pTableBlockHashObj || NULL == context.pOutput) {
644
    terrno = TSDB_CODE_TSC_OUT_OF_MEMORY;
X
Xiaoyu Wang 已提交
645
    return TSDB_CODE_TSC_OUT_OF_MEMORY;
646 647
  }

648
  *pInfo = context.pOutput;
649
  context.pOutput->nodeType = TSDB_SQL_INSERT;
650
  context.pOutput->payloadType = PAYLOAD_TYPE_KV;
651

652 653 654 655 656 657
  int32_t code = skipInsertInto(&context);
  if (TSDB_CODE_SUCCESS == code) {
    code = parseInsertBody(&context);
  }
  destroyInsertParseContext(&context);
  terrno = code;
X
Xiaoyu Wang 已提交
658
  return code;
659
}