parTokenizer.c 19.4 KB
Newer Older
H
hzcheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

H
hjxilinx 已提交
16
#include "os.h"
X
Xiaoyu Wang 已提交
17
#include "parToken.h"
H
Haojun Liao 已提交
18
#include "thash.h"
H
hjxilinx 已提交
19
#include "taosdef.h"
20
#include "ttokendef.h"
H
hzcheng 已提交
21

S
slguan 已提交
22
// All the keywords of the SQL language are stored in a hash table
H
hzcheng 已提交
23
typedef struct SKeyword {
S
slguan 已提交
24
  const char* name;  // The keyword name
H
Haojun Liao 已提交
25
  uint16_t    type;  // type
S
slguan 已提交
26
  uint8_t     len;   // length
H
hzcheng 已提交
27 28
} SKeyword;

S
slguan 已提交
29
// keywords in sql string
H
hzcheng 已提交
30
static SKeyword keywordTable[] = {
31
    {"ACCOUNT",       TK_ACCOUNT},
32 33 34
    {"ACCOUNTS",      TK_ACCOUNTS},
    {"ADD",           TK_ADD},
    {"AGGREGATE",     TK_AGGREGATE},
35
    {"ALL",           TK_ALL},
36
    {"ALTER",         TK_ALTER},
37
    {"ANALYZE",       TK_ANALYZE},
38
    {"AND",           TK_AND},
39
    {"APPS",          TK_APPS},
40 41 42 43 44 45
    {"AS",            TK_AS},
    {"ASC",           TK_ASC},
    {"BETWEEN",       TK_BETWEEN},
    {"BINARY",        TK_BINARY},
    {"BIGINT",        TK_BIGINT},
    {"BLOCKS",        TK_BLOCKS},
46 47
    {"BNODE",         TK_BNODE},
    {"BNODES",        TK_BNODES},
48
    {"BOOL",          TK_BOOL},
49
    {"BUFSIZE",       TK_BUFSIZE},
50 51 52
    {"BY",            TK_BY},
    {"CACHE",         TK_CACHE},
    {"CACHELAST",     TK_CACHELAST},
53
    {"COLUMN",        TK_COLUMN},
54 55
    {"COMMENT",       TK_COMMENT},
    {"COMP",          TK_COMP},
56 57 58 59
    {"COMPACT",       TK_COMPACT},
    {"CONNS",         TK_CONNS},
    {"CONNECTION",    TK_CONNECTION},
    {"CONNECTIONS",   TK_CONNECTIONS},
60 61 62 63
    {"CREATE",        TK_CREATE},
    {"DATABASE",      TK_DATABASE},
    {"DATABASES",     TK_DATABASES},
    {"DAYS",          TK_DAYS},
64
    {"DBS",           TK_DBS},
X
Xiaoyu Wang 已提交
65
    {"DELAY",         TK_DELAY},
66
    {"DESC",          TK_DESC},
67
    {"DESCRIBE",      TK_DESCRIBE},
68
    {"DISTINCT",      TK_DISTINCT},
69 70
    {"DNODE",         TK_DNODE},
    {"DNODES",        TK_DNODES},
71
    {"DOUBLE",        TK_DOUBLE},
72
    {"DROP",          TK_DROP},
73
    {"EXISTS",        TK_EXISTS},
74
    {"EXPLAIN",       TK_EXPLAIN},
X
Xiaoyu Wang 已提交
75
    {"FILE_FACTOR",   TK_FILE_FACTOR},
76
    {"FILL",          TK_FILL},
X
Xiaoyu Wang 已提交
77
    {"FIRST",         TK_FIRST},
78 79 80
    {"FLOAT",         TK_FLOAT},
    {"FROM",          TK_FROM},
    {"FSYNC",         TK_FSYNC},
X
Xiaoyu Wang 已提交
81
    {"FUNCTION",      TK_FUNCTION},
X
Xiaoyu Wang 已提交
82
    {"FUNCTIONS",     TK_FUNCTIONS},
83
    {"GRANTS",        TK_GRANTS},
84 85 86 87 88
    {"GROUP",         TK_GROUP},
    {"HAVING",        TK_HAVING},
    {"IF",            TK_IF},
    {"IMPORT",        TK_IMPORT},
    {"IN",            TK_IN},
X
Xiaoyu Wang 已提交
89
    {"INDEX",         TK_INDEX},
X
Xiaoyu Wang 已提交
90
    {"INDEXES",       TK_INDEXES},
91 92 93 94 95 96 97 98 99 100
    {"INNER",         TK_INNER},
    {"INT",           TK_INT},
    {"INSERT",        TK_INSERT},
    {"INTEGER",       TK_INTEGER},
    {"INTERVAL",      TK_INTERVAL},
    {"INTO",          TK_INTO},
    {"IS",            TK_IS},
    {"JOIN",          TK_JOIN},
    {"JSON",          TK_JSON},
    {"KEEP",          TK_KEEP},
101
    {"KILL",          TK_KILL},
X
Xiaoyu Wang 已提交
102
    {"LAST",          TK_LAST},
103
    {"LICENCE",       TK_LICENCE},
104 105 106
    {"LIKE",          TK_LIKE},
    {"LIMIT",         TK_LIMIT},
    {"LINEAR",        TK_LINEAR},
107
    {"LOCAL",         TK_LOCAL},
108 109 110 111
    {"MATCH",         TK_MATCH},
    {"MAXROWS",       TK_MAXROWS},
    {"MINROWS",       TK_MINROWS},
    {"MINUS",         TK_MINUS},
112
    {"MNODE",         TK_MNODE},
X
Xiaoyu Wang 已提交
113
    {"MNODES",        TK_MNODES},
114
    {"MODIFY",        TK_MODIFY},
X
Xiaoyu Wang 已提交
115
    {"MODULES",       TK_MODULES},
116 117 118 119 120 121
    {"NCHAR",         TK_NCHAR},
    {"NMATCH",        TK_NMATCH},
    {"NONE",          TK_NONE},
    {"NOT",           TK_NOT},
    {"NOW",           TK_NOW},
    {"NULL",          TK_NULL},
X
Xiaoyu Wang 已提交
122
    {"NULLS",         TK_NULLS},
123 124 125 126
    {"OFFSET",        TK_OFFSET},
    {"ON",            TK_ON},
    {"OR",            TK_OR},
    {"ORDER",         TK_ORDER},
127
    {"OUTPUTTYPE",    TK_OUTPUTTYPE},
128
    {"PARTITION",     TK_PARTITION},
129 130
    {"PASS",          TK_PASS},
    {"PORT",          TK_PORT},
131
    {"PPS",           TK_PPS},
132 133 134
    {"PRECISION",     TK_PRECISION},
    {"PRIVILEGE",     TK_PRIVILEGE},
    {"PREV",          TK_PREV},
135
    {"_QENDTS",       TK_QENDTS},
X
Xiaoyu Wang 已提交
136
    {"QNODE",         TK_QNODE},
X
Xiaoyu Wang 已提交
137
    {"QNODES",        TK_QNODES},
138
    {"_QSTARTTS",     TK_QSTARTTS},
139 140
    {"QTIME",         TK_QTIME},
    {"QUERIES",       TK_QUERIES},
141
    {"QUERY",         TK_QUERY},
142
    {"QUORUM",        TK_QUORUM},
143
    {"RATIO",         TK_RATIO},
144
    {"REPLICA",       TK_REPLICA},
145
    {"RESET",         TK_RESET},
X
Xiaoyu Wang 已提交
146 147
    {"RETENTIONS",    TK_RETENTIONS},
    {"ROLLUP",        TK_ROLLUP},
148
    {"_ROWTS",        TK_ROWTS},
149
    {"SCORES",        TK_SCORES},
150 151
    {"SELECT",        TK_SELECT},
    {"SESSION",       TK_SESSION},
152
    {"SET",           TK_SET},
153 154 155 156 157 158
    {"SHOW",          TK_SHOW},
    {"SINGLE_STABLE", TK_SINGLE_STABLE},
    {"SLIDING",       TK_SLIDING},
    {"SLIMIT",        TK_SLIMIT},
    {"SMA",           TK_SMA},
    {"SMALLINT",      TK_SMALLINT},
159 160
    {"SNODE",         TK_SNODE},
    {"SNODES",        TK_SNODES},
161
    {"SOFFSET",       TK_SOFFSET},
162
    {"STABLE",        TK_STABLE},
163
    {"STABLES",       TK_STABLES},
164
    {"STATE",         TK_STATE},
165
    {"STATE_WINDOW",  TK_STATE_WINDOW},
166
    {"STORAGE",       TK_STORAGE},
167
    {"STREAM",        TK_STREAM},
X
Xiaoyu Wang 已提交
168
    {"STREAMS",       TK_STREAMS},
169
    {"STREAM_MODE",   TK_STREAM_MODE},
170
    {"SYNCDB",        TK_SYNCDB},
171 172
    {"TABLE",         TK_TABLE},
    {"TABLES",        TK_TABLES},
173
    {"TAG",           TK_TAG},
174
    {"TAGS",          TK_TAGS},
175
    {"TBNAME",        TK_TBNAME},
176 177
    {"TIMESTAMP",     TK_TIMESTAMP},
    {"TINYINT",       TK_TINYINT},
178
    {"TOPIC",         TK_TOPIC},
179 180
    {"TOPICS",        TK_TOPICS},
    {"TSERIES",       TK_TSERIES},
181 182 183 184
    {"TTL",           TK_TTL},
    {"UNION",         TK_UNION},
    {"UNSIGNED",      TK_UNSIGNED},
    {"USE",           TK_USE},
185 186
    {"USER",          TK_USER},
    {"USERS",         TK_USERS},
187
    {"USING",         TK_USING},
X
Xiaoyu Wang 已提交
188
    {"VALUE",         TK_VALUE},
189 190
    {"VALUES",        TK_VALUES},
    {"VARCHAR",       TK_VARCHAR},
191
    {"VARIABLES",     TK_VARIABLES},
192
    {"VERBOSE",       TK_VERBOSE},
193
    {"VGROUPS",       TK_VGROUPS},
194
    {"VNODES",        TK_VNODES},
195
    {"WAL",           TK_WAL},
196 197
    {"_WDURATION",    TK_WDURATION},
    {"_WENDTS",       TK_WENDTS},
198
    {"WHERE",         TK_WHERE},
X
Xiaoyu Wang 已提交
199
    {"_WSTARTTS",     TK_WSTARTTS},
200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264
    // {"ID",           TK_ID},
    // {"STRING",       TK_STRING},
    // {"EQ",           TK_EQ},
    // {"NE",           TK_NE},
    // {"ISNULL",       TK_ISNULL},
    // {"NOTNULL",      TK_NOTNULL},
    // {"GLOB",         TK_GLOB},
    // {"GT",           TK_GT},
    // {"GE",           TK_GE},
    // {"LT",           TK_LT},
    // {"LE",           TK_LE},
    // {"BITAND",       TK_BITAND},
    // {"BITOR",        TK_BITOR},
    // {"LSHIFT",       TK_LSHIFT},
    // {"RSHIFT",       TK_RSHIFT},
    // {"PLUS",         TK_PLUS},
    // {"DIVIDE",       TK_DIVIDE},
    // {"TIMES",        TK_TIMES},
    // {"STAR",         TK_STAR},
    // {"SLASH",        TK_SLASH},
    // {"REM ",         TK_REM},
    // {"||",           TK_CONCAT},
    // {"UMINUS",       TK_UMINUS},
    // {"UPLUS",        TK_UPLUS},
    // {"BITNOT",       TK_BITNOT},
    // {"DOT",          TK_DOT},
    // {"CTIME",        TK_CTIME},
    // {"LP",           TK_LP},
    // {"RP",           TK_RP},
    // {"COMMA",        TK_COMMA},
    // {"EVERY",        TK_EVERY},
    // {"VARIABLE",     TK_VARIABLE},
    // {"UPDATE",       TK_UPDATE},
    // {"CHANGE",       TK_CHANGE},
    // {"COLON",        TK_COLON},
    // {"ABORT",        TK_ABORT},
    // {"AFTER",        TK_AFTER},
    // {"ATTACH",       TK_ATTACH},
    // {"BEFORE",       TK_BEFORE},
    // {"BEGIN",        TK_BEGIN},
    // {"CASCADE",      TK_CASCADE},
    // {"CLUSTER",      TK_CLUSTER},
    // {"CONFLICT",     TK_CONFLICT},
    // {"COPY",         TK_COPY},
    // {"DEFERRED",     TK_DEFERRED},
    // {"DELIMITERS",   TK_DELIMITERS},
    // {"DETACH",       TK_DETACH},
    // {"EACH",         TK_EACH},
    // {"END",          TK_END},
    // {"FAIL",         TK_FAIL},
    // {"FOR",          TK_FOR},
    // {"IGNORE",       TK_IGNORE},
    // {"IMMEDIATE",    TK_IMMEDIATE},
    // {"INITIALLY",    TK_INITIALLY},
    // {"INSTEAD",      TK_INSTEAD},
    // {"KEY",          TK_KEY},
    // {"OF",           TK_OF},
    // {"RAISE",        TK_RAISE},
    // {"REPLACE",      TK_REPLACE},
    // {"RESTRICT",     TK_RESTRICT},
    // {"ROW",          TK_ROW},
    // {"STATEMENT",    TK_STATEMENT},
    // {"TRIGGER",      TK_TRIGGER},
    // {"VIEW",         TK_VIEW},
    // {"SEMI",         TK_SEMI},
265
    // {"PARTITIONS",   TK_PARTITIONS},
266
    // {"MODE",         TK_MODE},
H
hzcheng 已提交
267 268 269 270 271 272 273 274 275 276 277 278 279 280
};

static const char isIdChar[] = {
    /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
};

H
Haojun Liao 已提交
281
static void* keywordHashTable = NULL;
H
hzcheng 已提交
282

S
TD-1057  
Shengliang Guan 已提交
283
static void doInitKeywordsTable(void) {
284 285
  int numOfEntries = tListLen(keywordTable);
  
H
Haojun Liao 已提交
286
  keywordHashTable = taosHashInit(numOfEntries, MurmurHash3_32, true, false);
287
  for (int32_t i = 0; i < numOfEntries; i++) {
S
TD-1057  
Shengliang Guan 已提交
288
    keywordTable[i].len = (uint8_t)strlen(keywordTable[i].name);
289
    void* ptr = &keywordTable[i];
H
Haojun Liao 已提交
290
    taosHashPut(keywordHashTable, keywordTable[i].name, keywordTable[i].len, (void*)&ptr, POINTER_BYTES);
H
hzcheng 已提交
291
  }
292 293
}

wafwerar's avatar
wafwerar 已提交
294
static TdThreadOnce keywordsHashTableInit = PTHREAD_ONCE_INIT;
295

296
static int32_t tKeywordCode(const char* z, int n) {
wafwerar's avatar
wafwerar 已提交
297
  taosThreadOnce(&keywordsHashTableInit, doInitKeywordsTable);
H
hjxilinx 已提交
298 299
  
  char key[512] = {0};
300
  if (n > tListLen(key)) { // too long token, can not be any other token type
301
    return TK_NK_ID;
302
  }
H
hjxilinx 已提交
303
  
H
hzcheng 已提交
304 305
  for (int32_t j = 0; j < n; ++j) {
    if (z[j] >= 'a' && z[j] <= 'z') {
306
      key[j] = (char)(z[j] & 0xDF);  // to uppercase and set the null-terminated
H
hzcheng 已提交
307 308 309 310 311
    } else {
      key[j] = z[j];
    }
  }

D
dapan1121 已提交
312
  if (keywordHashTable == NULL) {
313
    return TK_NK_ILLEGAL;
D
dapan1121 已提交
314
  }
H
Haojun Liao 已提交
315

H
Haojun Liao 已提交
316
  SKeyword** pKey = (SKeyword**)taosHashGet(keywordHashTable, key, n);
317
  return (pKey != NULL)? (*pKey)->type:TK_NK_ID;
H
hzcheng 已提交
318 319
}

H
huili 已提交
320
/*
321 322 323
 * Return the length of the token that begins at z[0].
 * Store the token type in *type before returning.
 */
324
uint32_t tGetToken(const char* z, uint32_t* tokenId) {
325
  uint32_t i;
H
hzcheng 已提交
326 327 328 329 330 331 332 333
  switch (*z) {
    case ' ':
    case '\t':
    case '\n':
    case '\f':
    case '\r': {
      for (i = 1; isspace(z[i]); i++) {
      }
334
      *tokenId = TK_NK_SPACE;
H
hzcheng 已提交
335 336 337
      return i;
    }
    case ':': {
338
      *tokenId = TK_NK_COLON;
H
hzcheng 已提交
339 340 341 342 343 344
      return 1;
    }
    case '-': {
      if (z[1] == '-') {
        for (i = 2; z[i] && z[i] != '\n'; i++) {
        }
345
        *tokenId = TK_NK_COMMENT;
H
hzcheng 已提交
346 347
        return i;
      }
X
Xiaoyu Wang 已提交
348
      *tokenId = TK_NK_MINUS;
H
hzcheng 已提交
349 350 351
      return 1;
    }
    case '(': {
352
      *tokenId = TK_NK_LP;
H
hzcheng 已提交
353 354 355
      return 1;
    }
    case ')': {
356
      *tokenId = TK_NK_RP;
H
hzcheng 已提交
357 358 359
      return 1;
    }
    case ';': {
360
      *tokenId = TK_NK_SEMI;
H
hzcheng 已提交
361 362 363
      return 1;
    }
    case '+': {
364
      *tokenId = TK_NK_PLUS;
H
hzcheng 已提交
365 366 367
      return 1;
    }
    case '*': {
368
      *tokenId = TK_NK_STAR;
H
hzcheng 已提交
369 370 371 372
      return 1;
    }
    case '/': {
      if (z[1] != '*' || z[2] == 0) {
373
        *tokenId = TK_NK_SLASH;
H
hzcheng 已提交
374 375 376 377 378
        return 1;
      }
      for (i = 3; z[i] && (z[i] != '/' || z[i - 1] != '*'); i++) {
      }
      if (z[i]) i++;
379
      *tokenId = TK_NK_COMMENT;
H
hzcheng 已提交
380 381 382
      return i;
    }
    case '%': {
383
      *tokenId = TK_NK_REM;
H
hzcheng 已提交
384 385 386
      return 1;
    }
    case '=': {
387
      *tokenId = TK_NK_EQ;
H
hzcheng 已提交
388 389 390 391
      return 1 + (z[1] == '=');
    }
    case '<': {
      if (z[1] == '=') {
392
        *tokenId = TK_NK_LE;
H
hzcheng 已提交
393 394
        return 2;
      } else if (z[1] == '>') {
395
        *tokenId = TK_NK_NE;
H
hzcheng 已提交
396 397
        return 2;
      } else if (z[1] == '<') {
398
        *tokenId = TK_NK_LSHIFT;
H
hzcheng 已提交
399 400
        return 2;
      } else {
401
        *tokenId = TK_NK_LT;
H
hzcheng 已提交
402 403 404 405 406
        return 1;
      }
    }
    case '>': {
      if (z[1] == '=') {
407
        *tokenId = TK_NK_GE;
H
hzcheng 已提交
408 409
        return 2;
      } else if (z[1] == '>') {
410
        *tokenId = TK_NK_RSHIFT;
H
hzcheng 已提交
411 412
        return 2;
      } else {
413
        *tokenId = TK_NK_GT;
H
hzcheng 已提交
414 415 416 417 418
        return 1;
      }
    }
    case '!': {
      if (z[1] != '=') {
419
        *tokenId = TK_NK_ILLEGAL;
H
hzcheng 已提交
420 421
        return 2;
      } else {
422
        *tokenId = TK_NK_NE;
H
hzcheng 已提交
423 424 425 426 427
        return 2;
      }
    }
    case '|': {
      if (z[1] != '|') {
428
        *tokenId = TK_NK_BITOR;
H
hzcheng 已提交
429 430
        return 1;
      } else {
431
        *tokenId = TK_NK_CONCAT;
H
hzcheng 已提交
432 433 434 435
        return 2;
      }
    }
    case ',': {
436
      *tokenId = TK_NK_COMMA;
H
hzcheng 已提交
437 438 439
      return 1;
    }
    case '&': {
440
      *tokenId = TK_NK_BITAND;
H
hzcheng 已提交
441 442 443
      return 1;
    }
    case '~': {
444
      *tokenId = TK_NK_BITNOT;
H
hzcheng 已提交
445 446
      return 1;
    }
S
slguan 已提交
447
    case '?': {
448
      *tokenId = TK_NK_QUESTION;
S
slguan 已提交
449 450
      return 1;
    }
451
    case '`':
H
hzcheng 已提交
452 453
    case '\'':
    case '"': {
S
slguan 已提交
454 455
      int  delim = z[0];
      bool strEnd = false;
H
hzcheng 已提交
456
      for (i = 1; z[i]; i++) {
H
Haojun Liao 已提交
457
        if (z[i] == '\\') {   // ignore the escaped character that follows this backslash
L
[1292]  
lihui 已提交
458 459 460 461
          i++;
          continue;
        }
        
462
        if (z[i] == delim) {
H
hzcheng 已提交
463 464 465
          if (z[i + 1] == delim) {
            i++;
          } else {
H
huili 已提交
466
            strEnd = true;
H
hzcheng 已提交
467 468 469 470
            break;
          }
        }
      }
L
[1292]  
lihui 已提交
471
      
H
hzcheng 已提交
472
      if (z[i]) i++;
H
huili 已提交
473

S
slguan 已提交
474
      if (strEnd) {
475
        *tokenId = (delim == '`')? TK_NK_ID:TK_NK_STRING;
S
slguan 已提交
476 477
        return i;
      }
H
huili 已提交
478

S
slguan 已提交
479
      break;
H
hzcheng 已提交
480 481
    }
    case '.': {
S
slguan 已提交
482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498
      /*
       * handle the the float number with out integer part
       * .123
       * .123e4
       */
      if (isdigit(z[1])) {
        for (i = 2; isdigit(z[i]); i++) {
        }

        if ((z[i] == 'e' || z[i] == 'E') &&
            (isdigit(z[i + 1]) || ((z[i + 1] == '+' || z[i + 1] == '-') && isdigit(z[i + 2])))) {
          i += 2;
          while (isdigit(z[i])) {
            i++;
          }
        }

499
        *tokenId = TK_NK_FLOAT;
S
slguan 已提交
500 501
        return i;
      } else {
502
        *tokenId = TK_NK_DOT;
S
slguan 已提交
503 504 505 506 507 508 509 510
        return 1;
      }
    }

    case '0': {
      char next = z[1];

      if (next == 'b') { // bin number
511
        *tokenId = TK_NK_BIN;
S
slguan 已提交
512 513 514 515 516 517 518 519 520
        for (i = 2; (z[i] == '0' || z[i] == '1'); ++i) {
        }

        if (i == 2) {
          break;
        }

        return i;
      } else if (next == 'x') {  //hex number
521
        *tokenId = TK_NK_HEX;
S
slguan 已提交
522 523 524 525 526 527 528 529 530
        for (i = 2; isdigit(z[i]) || (z[i] >= 'a' && z[i] <= 'f') || (z[i] >= 'A' && z[i] <= 'F'); ++i) {
        }

        if (i == 2) {
          break;
        }

        return i;
      }
H
hzcheng 已提交
531 532 533 534 535 536 537 538 539 540
    }
    case '1':
    case '2':
    case '3':
    case '4':
    case '5':
    case '6':
    case '7':
    case '8':
    case '9': {
541
      *tokenId = TK_NK_INTEGER;
H
hzcheng 已提交
542 543 544
      for (i = 1; isdigit(z[i]); i++) {
      }

H
Haojun Liao 已提交
545
      /* here is the 1u/1a/2s/3m/9y */
546
      if ((z[i] == 'b' || z[i] == 'u' || z[i] == 'a' || z[i] == 's' || z[i] == 'm' || z[i] == 'h' || z[i] == 'd' || z[i] == 'n' ||
H
Haojun Liao 已提交
547
           z[i] == 'y' || z[i] == 'w' ||
548
           z[i] == 'B' || z[i] == 'U' || z[i] == 'A' || z[i] == 'S' || z[i] == 'M' || z[i] == 'H' || z[i] == 'D' || z[i] == 'N' ||
H
Haojun Liao 已提交
549
           z[i] == 'Y' || z[i] == 'W') &&
H
hjxilinx 已提交
550
          (isIdChar[(uint8_t)z[i + 1]] == 0)) {
551
        *tokenId = TK_NK_VARIABLE;
H
hzcheng 已提交
552 553 554 555 556 557 558 559 560 561
        i += 1;
        return i;
      }

      int32_t seg = 1;
      while (z[i] == '.' && isdigit(z[i + 1])) {
        i += 2;
        while (isdigit(z[i])) {
          i++;
        }
562
        *tokenId = TK_NK_FLOAT;
H
hzcheng 已提交
563 564 565 566
        seg++;
      }

      if (seg == 4) {  // ip address
567
        *tokenId = TK_NK_IPTOKEN;
H
hzcheng 已提交
568 569 570 571 572 573 574 575 576
        return i;
      }

      if ((z[i] == 'e' || z[i] == 'E') &&
          (isdigit(z[i + 1]) || ((z[i + 1] == '+' || z[i + 1] == '-') && isdigit(z[i + 2])))) {
        i += 2;
        while (isdigit(z[i])) {
          i++;
        }
577
        *tokenId = TK_NK_FLOAT;
H
hzcheng 已提交
578 579 580 581 582 583
      }
      return i;
    }
    case '[': {
      for (i = 1; z[i] && z[i - 1] != ']'; i++) {
      }
584
      *tokenId = TK_NK_ID;
H
hzcheng 已提交
585 586 587 588 589 590
      return i;
    }
    case 'T':
    case 't':
    case 'F':
    case 'f': {
H
hjxilinx 已提交
591
      for (i = 1; ((z[i] & 0x80) == 0) && isIdChar[(uint8_t) z[i]]; i++) {
H
hzcheng 已提交
592 593 594
      }

      if ((i == 4 && strncasecmp(z, "true", 4) == 0) || (i == 5 && strncasecmp(z, "false", 5) == 0)) {
595
        *tokenId = TK_NK_BOOL;
H
hzcheng 已提交
596 597 598 599
        return i;
      }
    }
    default: {
H
hjxilinx 已提交
600
      if (((*z & 0x80) != 0) || !isIdChar[(uint8_t) *z]) {
H
hzcheng 已提交
601 602
        break;
      }
H
hjxilinx 已提交
603
      for (i = 1; ((z[i] & 0x80) == 0) && isIdChar[(uint8_t) z[i]]; i++) {
H
hzcheng 已提交
604
      }
605
      *tokenId = tKeywordCode(z, i);
H
hzcheng 已提交
606 607 608 609
      return i;
    }
  }

610
  *tokenId = TK_NK_ILLEGAL;
H
hzcheng 已提交
611 612 613
  return 0;
}

H
Haojun Liao 已提交
614
SToken tscReplaceStrToken(char **str, SToken *token, const char* newToken) {
D
dapan1121 已提交
615
  char *src = *str;
D
dapan1121 已提交
616 617 618
  size_t nsize = strlen(newToken);
  int32_t size = (int32_t)strlen(*str) - token->n + (int32_t)nsize + 1;
  int32_t bsize = (int32_t)((uint64_t)token->z - (uint64_t)src);
H
Haojun Liao 已提交
619
  SToken ntoken;
D
dapan1121 已提交
620

wafwerar's avatar
wafwerar 已提交
621
  *str = taosMemoryCalloc(1, size);
D
dapan1121 已提交
622 623

  strncpy(*str, src, bsize);
H
Haojun Liao 已提交
624
  strcat(*str, newToken);
D
dapan1121 已提交
625 626
  strcat(*str, token->z + token->n);

D
dapan1121 已提交
627
  ntoken.n = (uint32_t)nsize;
D
dapan1121 已提交
628 629
  ntoken.z = *str + bsize;

wafwerar's avatar
wafwerar 已提交
630
  taosMemoryFreeClear(src);
D
dapan1121 已提交
631 632 633 634

  return ntoken;
}

635
SToken tStrGetToken(const char* str, int32_t* i, bool isPrevOptr) {
H
Haojun Liao 已提交
636
  SToken t0 = {0};
S
slguan 已提交
637

H
hzcheng 已提交
638 639
  // here we reach the end of sql string, null-terminated string
  if (str[*i] == 0) {
S
slguan 已提交
640 641
    t0.n = 0;
    return t0;
H
hzcheng 已提交
642 643
  }

644
  // IGNORE TK_NK_SPACE, TK_NK_COMMA, and specified tokens
S
slguan 已提交
645 646 647
  while (1) {
    *i += t0.n;

H
Haojun Liao 已提交
648 649 650 651 652 653
    int32_t numOfComma = 0;
    char t = str[*i];
    while (t == ' ' || t == '\n' || t == '\r' || t == '\t' || t == '\f' || t == ',') {
      if (t == ',' && (++numOfComma > 1)) {  // comma only allowed once
        t0.n = 0;
        return t0;
S
slguan 已提交
654
      }
H
Haojun Liao 已提交
655
    
H
Haojun Liao 已提交
656
      t = str[++(*i)];
S
slguan 已提交
657
    }
H
hzcheng 已提交
658

659
    t0.n = tGetToken(&str[*i], &t0.type);
H
Haojun Liao 已提交
660
    break;
S
slguan 已提交
661

H
Haojun Liao 已提交
662 663
    // not support user specfied ignored symbol list
#if 0
H
Haojun Liao 已提交
664
    bool ignore = false;
S
slguan 已提交
665 666
    for (uint32_t k = 0; k < numOfIgnoreToken; k++) {
      if (t0.type == ignoreTokenTypes[k]) {
H
Haojun Liao 已提交
667
        ignore = true;
S
slguan 已提交
668 669 670 671
        break;
      }
    }

H
Haojun Liao 已提交
672
    if (!ignore) {
S
slguan 已提交
673 674
      break;
    }
H
Haojun Liao 已提交
675
#endif
H
hzcheng 已提交
676 677
  }

678
  if (t0.type == TK_NK_SEMI) {
S
slguan 已提交
679 680 681 682 683 684 685 686 687
    t0.n = 0;
    return t0;
  }

  uint32_t type = 0;
  int32_t  len;

  // support parse the 'db.tbl' format, notes: There should be no space on either side of the dot!
  if ('.' == str[*i + t0.n]) {
688
    len = tGetToken(&str[*i + t0.n + 1], &type);
S
slguan 已提交
689 690

    // only id and string are valid
691
    if ((TK_NK_STRING != t0.type) && (TK_NK_ID != t0.type)) {
692
      t0.type = TK_NK_ILLEGAL;
S
slguan 已提交
693 694 695 696 697 698 699 700 701
      t0.n = 0;

      return t0;
    }

    t0.n += len + 1;

  } else {
    // support parse the -/+number format
X
Xiaoyu Wang 已提交
702
    if ((isPrevOptr) && (t0.type == TK_NK_MINUS || t0.type == TK_NK_PLUS)) {
703
      len = tGetToken(&str[*i + t0.n], &type);
704
      if (type == TK_NK_INTEGER || type == TK_NK_FLOAT) {
S
slguan 已提交
705 706 707
        t0.type = type;
        t0.n += len;
      }
H
hzcheng 已提交
708 709 710
    }
  }

711
  t0.z = (char*) str + (*i);
S
slguan 已提交
712 713 714
  *i += t0.n;

  return t0;
H
hzcheng 已提交
715 716
}

717
bool taosIsKeyWordToken(const char* z, int32_t len) {
718
  return (tKeywordCode((char*)z, len) != TK_NK_ID);
719
}
H
Haojun Liao 已提交
720 721

void taosCleanupKeywordsTable() {
H
Haojun Liao 已提交
722 723 724 725
  void* m = keywordHashTable;
  if (m != NULL && atomic_val_compare_exchange_ptr(&keywordHashTable, m, 0) == m) {
    taosHashCleanup(m);
  }
Y
yihaoDeng 已提交
726
}
727

H
Haojun Liao 已提交
728
SToken taosTokenDup(SToken* pToken, char* buf, int32_t len) {
729 730 731 732 733
  assert(pToken != NULL && buf != NULL && len > pToken->n);
  
  strncpy(buf, pToken->z, pToken->n);
  buf[pToken->n] = 0;

H
Haojun Liao 已提交
734
  SToken token = *pToken;
735 736 737
  token.z = buf;
  return token;
}