tminmax.c 28.3 KB
Newer Older
H
Haojun Liao 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

#include "builtinsimpl.h"
#include "function.h"
#include "tdatablock.h"
#include "tfunctionInt.h"
#include "tglobal.h"

H
Haojun Liao 已提交
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
#define __COMPARE_ACQUIRED_MAX(i, end, bm, _data, ctx, val, pos) \
  for (; i < (end); ++i) {                                       \
    if (colDataIsNull_f(bm, i)) {                                \
      continue;                                                  \
    }                                                            \
                                                                 \
    if ((val) < (_data)[i]) {                                    \
      (val) = (_data)[i];                                        \
      if ((ctx)->subsidiaries.num > 0) {                         \
        updateTupleData((ctx), i, (ctx)->pSrcBlock, pos);        \
      }                                                          \
    }                                                            \
  }

#define __COMPARE_ACQUIRED_MIN(i, end, bm, _data, ctx, val, pos) \
  for (; i < (end); ++i) {                                       \
    if (colDataIsNull_f(bm, i)) {                                \
      continue;                                                  \
    }                                                            \
                                                                 \
    if ((val) > (_data)[i]) {                                    \
      (val) = (_data)[i];                                        \
      if ((ctx)->subsidiaries.num > 0) {                         \
        updateTupleData((ctx), i, (ctx)->pSrcBlock, pos);        \
      }                                                          \
    }                                                            \
  }

#define __COMPARE_EXTRACT_MIN(start, end, val, _data) \
  for (int32_t i = (start); i < (end); ++i) {         \
    if ((val) > (_data)[i]) {                         \
      (val) = (_data)[i];                             \
    }                                                 \
  }

#define __COMPARE_EXTRACT_MAX(start, end, val, _data) \
  for (int32_t i = (start); i < (end); ++i) {         \
    if ((val) < (_data)[i]) {                         \
      (val) = (_data)[i];                             \
    }                                                 \
  }

64 65
static int32_t getInvokeThreshold(int32_t bits, int32_t bytes) { return bits / (bytes << 3u); }

66 67
static void calculateRounds(int32_t numOfRows, int32_t bytes, int32_t* remainder, int32_t* rounds, int32_t* width) {
  const int32_t bitWidth = 256;
H
Haojun Liao 已提交
68

L
Liu Jicong 已提交
69
  *width = (bitWidth >> 3u) / bytes;
70 71 72 73
  *remainder = numOfRows % (*width);
  *rounds = numOfRows / (*width);
}

H
Haojun Liao 已提交
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
#define EXTRACT_MAX_VAL(_first, _sec, _width, _remain, _v) \
  (_v) = TMAX((_first)[0], (_first)[1]);                   \
  for (int32_t k = 1; k < (_width); ++k) {                 \
    (_v) = TMAX((_v), (_first)[k]);                        \
  }                                                        \
                                                           \
  for (int32_t j = 0; j < (_remain); ++j) {                \
    if ((_v) < (_sec)[j]) {                                \
      (_v) = (_sec)[j];                                    \
    }                                                      \
  }

#define EXTRACT_MIN_VAL(_first, _sec, _width, _remain, _v) \
  (_v) = TMIN((_first)[0], (_first)[1]);                   \
  for (int32_t k = 1; k < (_width); ++k) {                 \
    (_v) = TMIN((_v), (_first)[k]);                        \
  }                                                        \
                                                           \
  for (int32_t j = 0; j < (_remain); ++j) {                \
    if ((_v) > (_sec)[j]) {                                \
      (_v) = (_sec)[j];                                    \
    }                                                      \
  }
L
Liu Jicong 已提交
97

H
Haojun Liao 已提交
98
static int8_t i8VectorCmpAVX2(const void* pData, int32_t numOfRows, bool isMinFunc, bool signVal) {
99 100 101 102 103
  int8_t        v = 0;
  const int8_t* p = pData;

  int32_t width, remain, rounds;
  calculateRounds(numOfRows, sizeof(int8_t), &remain, &rounds, &width);
H
Haojun Liao 已提交
104

H
Haojun Liao 已提交
105
#if __AVX2__
H
Haojun Liao 已提交
106
  __m256i next;
H
Haojun Liao 已提交
107
  __m256i initVal = _mm256_lddqu_si256((__m256i*)p);
H
Haojun Liao 已提交
108
  p += width;
H
Haojun Liao 已提交
109 110

  if (!isMinFunc) {  // max function
H
Haojun Liao 已提交
111 112 113 114 115
    if (signVal) {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_max_epi8(initVal, next);
        p += width;
H
Haojun Liao 已提交
116 117
      }

H
Haojun Liao 已提交
118 119
      const int8_t* q = (const int8_t*)&initVal;
      EXTRACT_MAX_VAL(q, p, width, remain, v)
L
Liu Jicong 已提交
120
    } else {  // unsigned value
H
Haojun Liao 已提交
121 122 123 124 125
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_max_epu8(initVal, next);
        p += width;
      }
126

H
Haojun Liao 已提交
127 128
      const uint8_t* q = (const uint8_t*)&initVal;
      EXTRACT_MAX_VAL(q, p, width, remain, v)
H
Haojun Liao 已提交
129
    }
L
Liu Jicong 已提交
130

H
Haojun Liao 已提交
131 132 133 134 135 136 137
  } else {  // min function
    if (signVal) {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_min_epi8(initVal, next);
        p += width;
      }
H
Haojun Liao 已提交
138

H
Haojun Liao 已提交
139 140 141 142 143 144 145 146
      // let sum up the final results
      const int8_t* q = (const int8_t*)&initVal;
      EXTRACT_MIN_VAL(q, p, width, remain, v)
    } else {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_min_epu8(initVal, next);
        p += width;
H
Haojun Liao 已提交
147
      }
H
Haojun Liao 已提交
148 149 150 151

      // let sum up the final results
      const uint8_t* q = (const uint8_t*)&initVal;
      EXTRACT_MIN_VAL(q, p, width, remain, v)
H
Haojun Liao 已提交
152 153 154 155 156 157 158
    }
  }
#endif

  return v;
}

H
Haojun Liao 已提交
159
static int16_t i16VectorCmpAVX2(const int16_t* pData, int32_t numOfRows, bool isMinFunc, bool signVal) {
160 161
  int16_t        v = 0;
  const int16_t* p = pData;
H
Haojun Liao 已提交
162

163 164
  int32_t width, remain, rounds;
  calculateRounds(numOfRows, sizeof(int16_t), &remain, &rounds, &width);
H
Haojun Liao 已提交
165

166 167
#if __AVX2__
  __m256i next;
H
Haojun Liao 已提交
168
  __m256i initVal = _mm256_lddqu_si256((__m256i*)p);
H
Haojun Liao 已提交
169
  p += width;
H
Haojun Liao 已提交
170 171

  if (!isMinFunc) {  // max function
H
Haojun Liao 已提交
172 173 174 175 176 177
    if (signVal) {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_max_epi16(initVal, next);
        p += width;
      }
H
Haojun Liao 已提交
178

H
Haojun Liao 已提交
179 180 181 182 183 184 185 186 187
      // let sum up the final results
      const int16_t* q = (const int16_t*)&initVal;
      EXTRACT_MAX_VAL(q, p, width, remain, v)
    } else {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_max_epu16(initVal, next);
        p += width;
      }
188

H
Haojun Liao 已提交
189 190 191
      // let sum up the final results
      const uint16_t* q = (const uint16_t*)&initVal;
      EXTRACT_MAX_VAL(q, p, width, remain, v)
H
Haojun Liao 已提交
192
    }
H
Haojun Liao 已提交
193 194

  } else {  // min function
H
Haojun Liao 已提交
195 196 197 198 199 200
    if (signVal) {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_min_epi16(initVal, next);
        p += width;
      }
H
Haojun Liao 已提交
201

H
Haojun Liao 已提交
202 203 204 205 206 207 208 209
      // let sum up the final results
      const int16_t* q = (const int16_t*)&initVal;
      EXTRACT_MIN_VAL(q, p, width, remain, v)
    } else {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_min_epi16(initVal, next);
        p += width;
H
Haojun Liao 已提交
210
      }
H
Haojun Liao 已提交
211 212 213 214

      // let sum up the final results
      const uint16_t* q = (const uint16_t*)&initVal;
      EXTRACT_MIN_VAL(q, p, width, remain, v)
H
Haojun Liao 已提交
215 216 217 218 219 220 221
    }
  }
#endif

  return v;
}

H
Haojun Liao 已提交
222
static int32_t i32VectorCmpAVX2(const int32_t* pData, int32_t numOfRows, bool isMinFunc, bool signVal) {
223 224
  int32_t        v = 0;
  const int32_t* p = pData;
H
Haojun Liao 已提交
225

226 227
  int32_t width, remain, rounds;
  calculateRounds(numOfRows, sizeof(int32_t), &remain, &rounds, &width);
H
Haojun Liao 已提交
228 229 230

#if __AVX2__
  __m256i next;
H
Haojun Liao 已提交
231
  __m256i initVal = _mm256_lddqu_si256((__m256i*)p);
H
Haojun Liao 已提交
232 233 234
  p += width;

  if (!isMinFunc) {  // max function
H
Haojun Liao 已提交
235 236 237 238 239 240
    if (signVal) {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_max_epi32(initVal, next);
        p += width;
      }
H
Haojun Liao 已提交
241

H
Haojun Liao 已提交
242 243 244
      // let compare  the final results
      const int32_t* q = (const int32_t*)&initVal;
      EXTRACT_MAX_VAL(q, p, width, remain, v)
L
Liu Jicong 已提交
245
    } else {  // unsigned value
H
Haojun Liao 已提交
246 247 248 249
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_max_epi32(initVal, next);
        p += width;
H
Haojun Liao 已提交
250 251
      }

H
Haojun Liao 已提交
252 253 254
      // let compare  the final results
      const uint32_t* q = (const uint32_t*)&initVal;
      EXTRACT_MAX_VAL(q, p, width, remain, v)
H
Haojun Liao 已提交
255
    }
H
Haojun Liao 已提交
256 257 258 259 260 261 262
  } else {  // min function
    if (signVal) {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_min_epi32(initVal, next);
        p += width;
      }
H
Haojun Liao 已提交
263

H
Haojun Liao 已提交
264 265 266 267 268 269 270 271
      // let sum up the final results
      const int32_t* q = (const int32_t*)&initVal;
      EXTRACT_MIN_VAL(q, p, width, remain, v)
    } else {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_min_epu32(initVal, next);
        p += width;
H
Haojun Liao 已提交
272
      }
H
Haojun Liao 已提交
273 274 275 276

      // let sum up the final results
      const uint32_t* q = (const uint32_t*)&initVal;
      EXTRACT_MIN_VAL(q, p, width, remain, v)
H
Haojun Liao 已提交
277 278 279 280 281 282 283
    }
  }
#endif

  return v;
}

284
static float floatVectorCmpAVX(const float* pData, int32_t numOfRows, bool isMinFunc) {
L
Liu Jicong 已提交
285
  float        v = 0;
286
  const float* p = pData;
H
Haojun Liao 已提交
287

288 289
  int32_t width, remain, rounds;
  calculateRounds(numOfRows, sizeof(float), &remain, &rounds, &width);
H
Haojun Liao 已提交
290

291 292 293
#if __AVX__

  __m256 next;
H
Haojun Liao 已提交
294
  __m256 initVal = _mm256_loadu_ps(p);
H
Haojun Liao 已提交
295 296 297
  p += width;

  if (!isMinFunc) {  // max function
298 299
    for (int32_t i = 1; i < rounds; ++i) {
      next = _mm256_loadu_ps(p);
H
Haojun Liao 已提交
300
      initVal = _mm256_max_ps(initVal, next);
H
Haojun Liao 已提交
301 302 303
      p += width;
    }

H
Haojun Liao 已提交
304 305
    const float* q = (const float*)&initVal;
    EXTRACT_MAX_VAL(q, p, width, remain, v)
H
Haojun Liao 已提交
306
  } else {  // min function
307 308
    for (int32_t i = 1; i < rounds; ++i) {
      next = _mm256_loadu_ps(p);
H
Haojun Liao 已提交
309
      initVal = _mm256_min_ps(initVal, next);
H
Haojun Liao 已提交
310 311 312
      p += width;
    }

H
Haojun Liao 已提交
313 314
    const float* q = (const float*)&initVal;
    EXTRACT_MIN_VAL(q, p, width, remain, v)
315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330
  }
#endif

  return v;
}

static double doubleVectorCmpAVX(const double* pData, int32_t numOfRows, bool isMinFunc) {
  double        v = 0;
  const double* p = pData;

  int32_t width, remain, rounds;
  calculateRounds(numOfRows, sizeof(double), &remain, &rounds, &width);

#if __AVX__

  __m256d next;
H
Haojun Liao 已提交
331
  __m256d initVal = _mm256_loadu_pd(p);
332 333 334 335 336
  p += width;

  if (!isMinFunc) {  // max function
    for (int32_t i = 1; i < rounds; ++i) {
      next = _mm256_loadu_pd(p);
H
Haojun Liao 已提交
337
      initVal = _mm256_max_pd(initVal, next);
338 339 340 341
      p += width;
    }

    // let sum up the final results
H
Haojun Liao 已提交
342 343
    const double* q = (const double*)&initVal;
    EXTRACT_MAX_VAL(q, p, width, remain, v)
344 345 346
  } else {  // min function
    for (int32_t i = 1; i < rounds; ++i) {
      next = _mm256_loadu_pd(p);
H
Haojun Liao 已提交
347
      initVal = _mm256_min_pd(initVal, next);
348 349 350 351
      p += width;
    }

    // let sum up the final results
H
Haojun Liao 已提交
352 353
    const double* q = (const double*)&initVal;
    EXTRACT_MIN_VAL(q, p, width, remain, v)
H
Haojun Liao 已提交
354 355 356 357 358 359
  }
#endif

  return v;
}

H
Haojun Liao 已提交
360
static int32_t findFirstValPosition(const SColumnInfoData* pCol, int32_t start, int32_t numOfRows) {
361
  int32_t i = start;
L
Liu Jicong 已提交
362

H
Haojun Liao 已提交
363
  while (i < (start + numOfRows) && (colDataIsNull_f(pCol->nullbitmap, i) == true)) {
364 365 366 367 368 369
    i += 1;
  }

  return i;
}

H
Haojun Liao 已提交
370 371 372
static void handleInt8Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc,
                          bool signVal) {
  // AVX2 version to speedup the loop
H
Haojun Liao 已提交
373
  if (tsAVX2Enable && tsSIMDBuiltins) {
H
Haojun Liao 已提交
374 375 376 377
    pBuf->v = i8VectorCmpAVX2(data, numOfRows, isMinFunc, signVal);
  } else {
    if (!pBuf->assign) {
      pBuf->v = ((int8_t*)data)[0];
378 379
    }

H
Haojun Liao 已提交
380 381 382
    if (signVal) {
      const int8_t* p = (const int8_t*)data;
      int8_t*       v = (int8_t*)&pBuf->v;
383

H
Haojun Liao 已提交
384 385 386 387
      if (isMinFunc) {
        __COMPARE_EXTRACT_MIN(start, start + numOfRows, *v, p);
      } else {
        __COMPARE_EXTRACT_MAX(start, start + numOfRows, *v, p);
388 389
      }
    } else {
H
Haojun Liao 已提交
390 391
      const uint8_t* p = (const uint8_t*)data;
      uint8_t*       v = (uint8_t*)&pBuf->v;
392

H
Haojun Liao 已提交
393 394 395 396
      if (isMinFunc) {
        __COMPARE_EXTRACT_MIN(start, start + numOfRows, *v, p);
      } else {
        __COMPARE_EXTRACT_MAX(start, start + numOfRows, *v, p);
397 398 399 400
      }
    }
  }

H
Haojun Liao 已提交
401
  pBuf->assign = true;
402 403
}

H
Haojun Liao 已提交
404 405 406
static void handleInt16Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc,
                           bool signVal) {
  // AVX2 version to speedup the loop
H
Haojun Liao 已提交
407
  if (tsAVX2Enable && tsSIMDBuiltins) {
H
Haojun Liao 已提交
408 409 410 411
    pBuf->v = i16VectorCmpAVX2(data, numOfRows, isMinFunc, signVal);
  } else {
    if (!pBuf->assign) {
      pBuf->v = ((int16_t*)data)[0];
412 413
    }

H
Haojun Liao 已提交
414 415 416
    if (signVal) {
      const int16_t* p = (const int16_t*)data;
      int16_t*       v = (int16_t*)&pBuf->v;
417

H
Haojun Liao 已提交
418 419 420 421
      if (isMinFunc) {
        __COMPARE_EXTRACT_MIN(start, start + numOfRows, *v, p);
      } else {
        __COMPARE_EXTRACT_MAX(start, start + numOfRows, *v, p);
422 423
      }
    } else {
H
Haojun Liao 已提交
424 425
      const uint16_t* p = (const uint16_t*)data;
      uint16_t*       v = (uint16_t*)&pBuf->v;
426

H
Haojun Liao 已提交
427 428 429 430
      if (isMinFunc) {
        __COMPARE_EXTRACT_MIN(start, start + numOfRows, *v, p);
      } else {
        __COMPARE_EXTRACT_MAX(start, start + numOfRows, *v, p);
431 432 433 434
      }
    }
  }

H
Haojun Liao 已提交
435
  pBuf->assign = true;
436
}
H
Haojun Liao 已提交
437

H
Haojun Liao 已提交
438 439 440
static void handleInt32Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc,
                           bool signVal) {
  // AVX2 version to speedup the loop
H
Haojun Liao 已提交
441
  if (tsAVX2Enable && tsSIMDBuiltins) {
H
Haojun Liao 已提交
442 443 444 445 446
    pBuf->v = i32VectorCmpAVX2(data, numOfRows, isMinFunc, signVal);
  } else {
    if (!pBuf->assign) {
      pBuf->v = ((int32_t*)data)[0];
    }
H
Haojun Liao 已提交
447

H
Haojun Liao 已提交
448 449 450
    if (signVal) {
      const int32_t* p = (const int32_t*)data;
      int32_t*       v = (int32_t*)&pBuf->v;
H
Haojun Liao 已提交
451

H
Haojun Liao 已提交
452 453 454 455 456 457 458 459 460 461 462 463 464
      if (isMinFunc) {
        __COMPARE_EXTRACT_MIN(start, start + numOfRows, *v, p);
      } else {
        __COMPARE_EXTRACT_MAX(start, start + numOfRows, *v, p);
      }
    } else {
      const uint32_t* p = (const uint32_t*)data;
      uint32_t*       v = (uint32_t*)&pBuf->v;

      if (isMinFunc) {
        __COMPARE_EXTRACT_MIN(start, start + numOfRows, *v, p);
      } else {
        __COMPARE_EXTRACT_MAX(start, start + numOfRows, *v, p);
H
Haojun Liao 已提交
465 466
      }
    }
H
Haojun Liao 已提交
467
  }
H
Haojun Liao 已提交
468

H
Haojun Liao 已提交
469 470
  pBuf->assign = true;
}
H
Haojun Liao 已提交
471

H
Haojun Liao 已提交
472 473 474 475 476
static void handleInt64Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc,
                           bool signVal) {
  if (!pBuf->assign) {
    pBuf->v = ((int64_t*)data)[0];
  }
H
Haojun Liao 已提交
477

H
Haojun Liao 已提交
478 479 480
  if (signVal) {
    const int64_t* p = (const int64_t*)data;
    int64_t*       v = &pBuf->v;
H
Haojun Liao 已提交
481

H
Haojun Liao 已提交
482 483 484 485
    if (isMinFunc) {
      __COMPARE_EXTRACT_MIN(start, start + numOfRows, *v, p);
    } else {
      __COMPARE_EXTRACT_MAX(start, start + numOfRows, *v, p);
H
Haojun Liao 已提交
486
    }
H
Haojun Liao 已提交
487 488 489 490 491 492
  } else {
    const uint64_t* p = (const uint64_t*)data;
    uint64_t*       v = (uint64_t*)&pBuf->v;

    if (isMinFunc) {
      __COMPARE_EXTRACT_MIN(start, start + numOfRows, *v, p);
H
Haojun Liao 已提交
493
    } else {
H
Haojun Liao 已提交
494 495 496 497
      __COMPARE_EXTRACT_MAX(start, start + numOfRows, *v, p);
    }
  }
}
H
Haojun Liao 已提交
498

L
Liu Jicong 已提交
499 500
static void handleFloatCol(SColumnInfoData* pCol, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf,
                           bool isMinFunc) {
H
Haojun Liao 已提交
501 502
  float* pData = (float*)pCol->pData;
  float* val = (float*)&pBuf->v;
503

H
Haojun Liao 已提交
504
  // AVX version to speedup the loop
H
Haojun Liao 已提交
505
  if (tsAVXEnable && tsSIMDBuiltins) {
H
Haojun Liao 已提交
506 507 508 509
    *val = floatVectorCmpAVX(pData, numOfRows, isMinFunc);
  } else {
    if (!pBuf->assign) {
      *val = pData[0];
510 511 512
    }

    if (isMinFunc) {  // min
H
Haojun Liao 已提交
513
      for (int32_t i = start; i < start + numOfRows; ++i) {
514 515 516 517
        if (*val > pData[i]) {
          *val = pData[i];
        }
      }
H
Haojun Liao 已提交
518 519
    } else {  // max
      for (int32_t i = start; i < start + numOfRows; ++i) {
520 521 522 523 524
        if (*val < pData[i]) {
          *val = pData[i];
        }
      }
    }
H
Haojun Liao 已提交
525 526 527 528 529
  }

  pBuf->assign = true;
}

L
Liu Jicong 已提交
530 531
static void handleDoubleCol(SColumnInfoData* pCol, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf,
                            bool isMinFunc) {
H
Haojun Liao 已提交
532 533 534 535
  double* pData = (double*)pCol->pData;
  double* val = (double*)&pBuf->v;

  // AVX version to speedup the loop
H
Haojun Liao 已提交
536
  if (tsAVXEnable && tsSIMDBuiltins) {
H
Haojun Liao 已提交
537 538
    *val = (double)doubleVectorCmpAVX(pData, numOfRows, isMinFunc);
  } else {
539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557
    if (!pBuf->assign) {
      *val = pData[0];
    }

    if (isMinFunc) {  // min
      for (int32_t i = start; i < start + numOfRows; ++i) {
        if (*val > pData[i]) {
          *val = pData[i];
        }
      }
    } else {  // max
      for (int32_t i = start; i < start + numOfRows; ++i) {
        if (*val < pData[i]) {
          *val = pData[i];
        }
      }
    }
  }

H
Haojun Liao 已提交
558 559
  pBuf->assign = true;
}
H
Haojun Liao 已提交
560

H
Haojun Liao 已提交
561 562 563 564 565 566
static int32_t findRowIndex(int32_t start, int32_t num, SColumnInfoData* pCol, const char* tval) {
  // the data is loaded, not only the block SMA value
  for (int32_t i = start; i < num + start; ++i) {
    char* p = colDataGetData(pCol, i);
    if (memcmp((void*)tval, p, pCol->info.bytes) == 0) {
      return i;
H
Haojun Liao 已提交
567
    }
H
Haojun Liao 已提交
568
  }
H
Haojun Liao 已提交
569

H
Haojun Liao 已提交
570 571 572
  // if reach here means real data of block SMA is not set in pCtx->input.
  return -1;
}
H
Haojun Liao 已提交
573

H
Haojun Liao 已提交
574 575 576 577 578 579 580 581 582
static void doExtractVal(SColumnInfoData* pCol, int32_t i, int32_t end, SqlFunctionCtx* pCtx, SMinmaxResInfo* pBuf,
                         bool isMinFunc) {
  if (isMinFunc) {
    switch (pCol->info.type) {
      case TSDB_DATA_TYPE_BOOL:
      case TSDB_DATA_TYPE_TINYINT: {
        const int8_t* pData = (const int8_t*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, *(int8_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
H
Haojun Liao 已提交
583 584
      }

H
Haojun Liao 已提交
585 586 587 588
      case TSDB_DATA_TYPE_SMALLINT: {
        const int16_t* pData = (const int16_t*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, *(int16_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
H
Haojun Liao 已提交
589
      }
H
Haojun Liao 已提交
590 591 592 593 594

      case TSDB_DATA_TYPE_INT: {
        const int32_t* pData = (const int32_t*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, *(int32_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
H
Haojun Liao 已提交
595 596
      }

H
Haojun Liao 已提交
597 598 599 600
      case TSDB_DATA_TYPE_BIGINT: {
        const int64_t* pData = (const int64_t*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, (pBuf->v), &pBuf->tuplePos)
        break;
H
Haojun Liao 已提交
601 602
      }

H
Haojun Liao 已提交
603 604 605 606 607
      case TSDB_DATA_TYPE_UTINYINT: {
        const uint8_t* pData = (const uint8_t*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, *(uint8_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }
H
Haojun Liao 已提交
608

H
Haojun Liao 已提交
609 610 611 612 613
      case TSDB_DATA_TYPE_USMALLINT: {
        const uint16_t* pData = (const uint16_t*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, *(uint16_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }
H
Haojun Liao 已提交
614

H
Haojun Liao 已提交
615 616 617 618 619
      case TSDB_DATA_TYPE_UINT: {
        const uint32_t* pData = (const uint32_t*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, *(uint32_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }
H
Haojun Liao 已提交
620

H
Haojun Liao 已提交
621 622 623 624 625
      case TSDB_DATA_TYPE_UBIGINT: {
        const uint64_t* pData = (const uint64_t*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, *(uint64_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }
H
Haojun Liao 已提交
626

H
Haojun Liao 已提交
627 628 629 630 631 632 633 634 635 636
      case TSDB_DATA_TYPE_FLOAT: {
        const float* pData = (const float*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, *(float*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }

      case TSDB_DATA_TYPE_DOUBLE: {
        const double* pData = (const double*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, *(double*)&(pBuf->v), &pBuf->tuplePos)
        break;
H
Haojun Liao 已提交
637 638
      }
    }
H
Haojun Liao 已提交
639 640 641 642 643 644 645 646
  } else {
    switch (pCol->info.type) {
      case TSDB_DATA_TYPE_BOOL:
      case TSDB_DATA_TYPE_TINYINT: {
        const int8_t* pData = (const int8_t*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, *(int8_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }
H
Haojun Liao 已提交
647

H
Haojun Liao 已提交
648 649 650 651 652
      case TSDB_DATA_TYPE_SMALLINT: {
        const int16_t* pData = (const int16_t*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, *(int16_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }
H
Haojun Liao 已提交
653

H
Haojun Liao 已提交
654
      case TSDB_DATA_TYPE_INT: {
655
        const int32_t* pData = (const int32_t*)pCol->pData;
H
Haojun Liao 已提交
656 657
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, *(int32_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
H
Haojun Liao 已提交
658
      }
659

H
Haojun Liao 已提交
660 661 662 663
      case TSDB_DATA_TYPE_BIGINT: {
        const int64_t* pData = (const int64_t*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, (pBuf->v), &pBuf->tuplePos)
        break;
H
Haojun Liao 已提交
664
      }
H
Haojun Liao 已提交
665 666 667 668 669

      case TSDB_DATA_TYPE_UTINYINT: {
        const uint8_t* pData = (const uint8_t*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, *(uint8_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
H
Haojun Liao 已提交
670 671
      }

H
Haojun Liao 已提交
672 673 674 675
      case TSDB_DATA_TYPE_USMALLINT: {
        const uint16_t* pData = (const uint16_t*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, *(uint16_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
H
Haojun Liao 已提交
676 677
      }

H
Haojun Liao 已提交
678 679 680 681 682
      case TSDB_DATA_TYPE_UINT: {
        const uint32_t* pData = (const uint32_t*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, *(uint32_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }
H
Haojun Liao 已提交
683

H
Haojun Liao 已提交
684 685 686 687 688
      case TSDB_DATA_TYPE_UBIGINT: {
        const uint64_t* pData = (const uint64_t*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, *(uint64_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }
H
Haojun Liao 已提交
689

H
Haojun Liao 已提交
690 691 692 693 694 695 696 697 698 699 700
      case TSDB_DATA_TYPE_FLOAT: {
        const float* pData = (const float*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, *(float*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }

      case TSDB_DATA_TYPE_DOUBLE: {
        const double* pData = (const double*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, *(double*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }
H
Haojun Liao 已提交
701 702 703 704
    }
  }
}

705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724
static int32_t saveRelatedTuple(SqlFunctionCtx* pCtx, SInputColumnInfoData* pInput, int32_t index, void* tval) {
  SColumnInfoData* pCol = pInput->pData[0];

  SResultRowEntryInfo* pResInfo = GET_RES_INFO(pCtx);
  SMinmaxResInfo*      pBuf = GET_ROWCELL_INTERBUF(pResInfo);

  int32_t code = 0;
  if (pCtx->subsidiaries.num > 0) {
    index = findRowIndex(pInput->startRowIndex, pInput->numOfRows, pCol, tval);
    if (index >= 0) {
      code = saveTupleData(pCtx, index, pCtx->pSrcBlock, &pBuf->tuplePos);
      if (code != TSDB_CODE_SUCCESS) {
        return code;
      }
    }
  }

  return code;
}

G
Ganlin Zhao 已提交
725
int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc, int32_t* nElems) {
H
Haojun Liao 已提交
726
  int32_t numOfElems = 0;
727
  int32_t code = TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
728 729 730 731 732 733 734 735 736 737 738 739 740

  SInputColumnInfoData* pInput = &pCtx->input;
  SColumnDataAgg*       pAgg = pInput->pColumnDataAgg[0];

  SColumnInfoData* pCol = pInput->pData[0];
  int32_t          type = pCol->info.type;

  SResultRowEntryInfo* pResInfo = GET_RES_INFO(pCtx);
  SMinmaxResInfo*      pBuf = GET_ROWCELL_INTERBUF(pResInfo);
  pBuf->type = type;

  if (IS_NULL_TYPE(type)) {
    numOfElems = 0;
741
    goto _over;
H
Haojun Liao 已提交
742 743 744
  }

  // data in current data block are qualified to the query
H
Haojun Liao 已提交
745
  if (pInput->colDataSMAIsSet) {
H
Haojun Liao 已提交
746

747
    numOfElems = pInput->numOfRows - pAgg->numOfNull;
H
Haojun Liao 已提交
748
    if (numOfElems == 0) {
H
Haojun Liao 已提交
749
      goto _over;
H
Haojun Liao 已提交
750 751 752 753
    }

    void*   tval = NULL;
    int16_t index = 0;
754
    tval = isMinFunc? &pInput->pColumnDataAgg[0]->min: &pInput->pColumnDataAgg[0]->max;
H
Haojun Liao 已提交
755 756

    if (!pBuf->assign) {
G
Ganlin Zhao 已提交
757 758 759
      if (type == TSDB_DATA_TYPE_FLOAT) {
        GET_FLOAT_VAL(&pBuf->v) = GET_DOUBLE_VAL(tval);
      } else {
G
Ganlin Zhao 已提交
760
        pBuf->v = GET_INT64_VAL(tval);
G
Ganlin Zhao 已提交
761
      }
762

763
      code = saveRelatedTuple(pCtx, pInput, index, tval);
H
Haojun Liao 已提交
764 765 766 767 768 769 770
    } else {
      if (IS_SIGNED_NUMERIC_TYPE(type)) {
        int64_t prev = 0;
        GET_TYPED_DATA(prev, int64_t, type, &pBuf->v);

        int64_t val = GET_INT64_VAL(tval);
        if ((prev < val) ^ isMinFunc) {
G
Ganlin Zhao 已提交
771
          GET_INT64_VAL(&pBuf->v) = val;
772
          code = saveRelatedTuple(pCtx, pInput, index, tval);
H
Haojun Liao 已提交
773 774 775 776 777 778 779
        }
      } else if (IS_UNSIGNED_NUMERIC_TYPE(type)) {
        uint64_t prev = 0;
        GET_TYPED_DATA(prev, uint64_t, type, &pBuf->v);

        uint64_t val = GET_UINT64_VAL(tval);
        if ((prev < val) ^ isMinFunc) {
G
Ganlin Zhao 已提交
780
          GET_UINT64_VAL(&pBuf->v) = val;
781
          code = saveRelatedTuple(pCtx, pInput, index, tval);
H
Haojun Liao 已提交
782 783 784 785 786 787 788
        }
      } else if (type == TSDB_DATA_TYPE_DOUBLE) {
        double prev = 0;
        GET_TYPED_DATA(prev, double, type, &pBuf->v);

        double val = GET_DOUBLE_VAL(tval);
        if ((prev < val) ^ isMinFunc) {
G
Ganlin Zhao 已提交
789
          GET_DOUBLE_VAL(&pBuf->v) = val;
790
          code = saveRelatedTuple(pCtx, pInput, index, tval);
H
Haojun Liao 已提交
791 792 793 794 795 796 797
        }
      } else if (type == TSDB_DATA_TYPE_FLOAT) {
        float prev = 0;
        GET_TYPED_DATA(prev, float, type, &pBuf->v);

        float val = GET_DOUBLE_VAL(tval);
        if ((prev < val) ^ isMinFunc) {
G
Ganlin Zhao 已提交
798
          GET_FLOAT_VAL(&pBuf->v) = val;
799
          code = saveRelatedTuple(pCtx, pInput, index, tval);
H
Haojun Liao 已提交
800 801 802 803 804
        }
      }
    }

    pBuf->assign = true;
805
    return code;
H
Haojun Liao 已提交
806 807 808 809
  }

  int32_t start = pInput->startRowIndex;
  int32_t numOfRows = pInput->numOfRows;
H
Haojun Liao 已提交
810
  int32_t end = start + numOfRows;
H
Haojun Liao 已提交
811

812
  if (pCol->hasNull || numOfRows < getInvokeThreshold(256, type) || pCtx->subsidiaries.num > 0) {
H
Haojun Liao 已提交
813
    int32_t i = findFirstValPosition(pCol, start, numOfRows);
H
Haojun Liao 已提交
814

H
Haojun Liao 已提交
815
    if ((i < end) && (!pBuf->assign)) {
816 817
      char* p = pCol->pData + pCol->info.bytes * i;

818
      switch (type) {
819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844
        case TSDB_DATA_TYPE_DOUBLE:
        case TSDB_DATA_TYPE_UBIGINT:
        case TSDB_DATA_TYPE_BIGINT:
          pBuf->v = *(int64_t*)p;
          break;
        case TSDB_DATA_TYPE_UINT:
        case TSDB_DATA_TYPE_INT:
          pBuf->v = *(int32_t*)p;
          break;
        case TSDB_DATA_TYPE_USMALLINT:
        case TSDB_DATA_TYPE_SMALLINT:
          pBuf->v = *(int16_t*)p;
          break;
        case TSDB_DATA_TYPE_BOOL:
        case TSDB_DATA_TYPE_UTINYINT:
        case TSDB_DATA_TYPE_TINYINT:
          pBuf->v = *(int8_t*)p;
          break;
        case TSDB_DATA_TYPE_FLOAT: {
          *(float*)&pBuf->v = *(float*)p;
          break;
        }
        default:
          memcpy(&pBuf->v, p, pCol->info.bytes);
          break;
      }
H
Haojun Liao 已提交
845

H
Haojun Liao 已提交
846
      if (pCtx->subsidiaries.num > 0) {
847
        code = saveTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos);
G
Ganlin Zhao 已提交
848 849 850
        if (code != TSDB_CODE_SUCCESS) {
          return code;
        }
H
Haojun Liao 已提交
851
      }
H
Haojun Liao 已提交
852 853 854
      pBuf->assign = true;
      numOfElems = 1;
    }
H
Haojun Liao 已提交
855

H
Haojun Liao 已提交
856
    if (i >= end) {
H
Haojun Liao 已提交
857
      goto _over;
H
Haojun Liao 已提交
858
    }
H
Haojun Liao 已提交
859

H
Haojun Liao 已提交
860 861 862
    doExtractVal(pCol, i, end, pCtx, pBuf, isMinFunc);
  } else {
    numOfElems = numOfRows;
H
Haojun Liao 已提交
863

864
    switch (type) {
H
Haojun Liao 已提交
865 866 867 868
      case TSDB_DATA_TYPE_BOOL:
      case TSDB_DATA_TYPE_TINYINT: {
        handleInt8Col(pCol->pData, start, numOfRows, pBuf, isMinFunc, true);
        break;
H
Haojun Liao 已提交
869
      }
H
Haojun Liao 已提交
870 871 872
      case TSDB_DATA_TYPE_SMALLINT: {
        handleInt16Col(pCol->pData, start, numOfRows, pBuf, isMinFunc, true);
        break;
H
Haojun Liao 已提交
873
      }
H
Haojun Liao 已提交
874 875 876
      case TSDB_DATA_TYPE_INT: {
        handleInt32Col(pCol->pData, start, numOfRows, pBuf, isMinFunc, true);
        break;
H
Haojun Liao 已提交
877
      }
H
Haojun Liao 已提交
878 879 880
      case TSDB_DATA_TYPE_BIGINT: {
        handleInt64Col(pCol->pData, start, numOfRows, pBuf, isMinFunc, true);
        break;
H
Haojun Liao 已提交
881
      }
H
Haojun Liao 已提交
882 883 884 885 886 887 888
      case TSDB_DATA_TYPE_UTINYINT: {
        handleInt8Col(pCol->pData, start, numOfRows, pBuf, isMinFunc, false);
        break;
      }
      case TSDB_DATA_TYPE_USMALLINT: {
        handleInt16Col(pCol->pData, start, numOfRows, pBuf, isMinFunc, false);
        break;
H
Haojun Liao 已提交
889
      }
H
Haojun Liao 已提交
890
      case TSDB_DATA_TYPE_UINT: {
H
Haojun Liao 已提交
891
        handleInt32Col(pCol->pData, start, numOfRows, pBuf, isMinFunc, false);
H
Haojun Liao 已提交
892 893 894
        break;
      }
      case TSDB_DATA_TYPE_UBIGINT: {
H
Haojun Liao 已提交
895
        handleInt64Col(pCol->pData, start, numOfRows, pBuf, isMinFunc, false);
H
Haojun Liao 已提交
896 897 898 899 900 901 902 903 904 905 906
        break;
      }
      case TSDB_DATA_TYPE_FLOAT: {
        handleFloatCol(pCol, start, numOfRows, pBuf, isMinFunc);
        break;
      }
      case TSDB_DATA_TYPE_DOUBLE: {
        handleDoubleCol(pCol, start, numOfRows, pBuf, isMinFunc);
        break;
      }
    }
907 908

    pBuf->assign = true;
H
Haojun Liao 已提交
909
  }
H
Haojun Liao 已提交
910

H
Haojun Liao 已提交
911 912
_over:
  if (numOfElems == 0 && pCtx->subsidiaries.num > 0 && !pBuf->nullTupleSaved) {
913
    code = saveTupleData(pCtx, pInput->startRowIndex, pCtx->pSrcBlock, &pBuf->nullTuplePos);
G
Ganlin Zhao 已提交
914 915 916
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }
917

H
Haojun Liao 已提交
918
    pBuf->nullTupleSaved = true;
H
Haojun Liao 已提交
919 920
  }

G
Ganlin Zhao 已提交
921 922
  *nElems = numOfElems;
  return TSDB_CODE_SUCCESS;
L
Liu Jicong 已提交
923
}