tminmax.c 28.2 KB
Newer Older
H
Haojun Liao 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

#include "builtinsimpl.h"
#include "function.h"
#include "tdatablock.h"
#include "tfunctionInt.h"
#include "tglobal.h"

H
Haojun Liao 已提交
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
#define __COMPARE_ACQUIRED_MAX(i, end, bm, _data, ctx, val, pos) \
  for (; i < (end); ++i) {                                       \
    if (colDataIsNull_f(bm, i)) {                                \
      continue;                                                  \
    }                                                            \
                                                                 \
    if ((val) < (_data)[i]) {                                    \
      (val) = (_data)[i];                                        \
      if ((ctx)->subsidiaries.num > 0) {                         \
        updateTupleData((ctx), i, (ctx)->pSrcBlock, pos);        \
      }                                                          \
    }                                                            \
  }

#define __COMPARE_ACQUIRED_MIN(i, end, bm, _data, ctx, val, pos) \
  for (; i < (end); ++i) {                                       \
    if (colDataIsNull_f(bm, i)) {                                \
      continue;                                                  \
    }                                                            \
                                                                 \
    if ((val) > (_data)[i]) {                                    \
      (val) = (_data)[i];                                        \
      if ((ctx)->subsidiaries.num > 0) {                         \
        updateTupleData((ctx), i, (ctx)->pSrcBlock, pos);        \
      }                                                          \
    }                                                            \
  }

#define __COMPARE_EXTRACT_MIN(start, end, val, _data) \
  for (int32_t i = (start); i < (end); ++i) {         \
    if ((val) > (_data)[i]) {                         \
      (val) = (_data)[i];                             \
    }                                                 \
  }

#define __COMPARE_EXTRACT_MAX(start, end, val, _data) \
  for (int32_t i = (start); i < (end); ++i) {         \
    if ((val) < (_data)[i]) {                         \
      (val) = (_data)[i];                             \
    }                                                 \
  }

64 65
static void calculateRounds(int32_t numOfRows, int32_t bytes, int32_t* remainder, int32_t* rounds, int32_t* width) {
  const int32_t bitWidth = 256;
H
Haojun Liao 已提交
66

67 68 69 70 71
  *width = (bitWidth>>3u) / bytes;
  *remainder = numOfRows % (*width);
  *rounds = numOfRows / (*width);
}

H
Haojun Liao 已提交
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
#define EXTRACT_MAX_VAL(_first, _sec, _width, _remain, _v) \
  (_v) = TMAX((_first)[0], (_first)[1]);                   \
  for (int32_t k = 1; k < (_width); ++k) {                 \
    (_v) = TMAX((_v), (_first)[k]);                        \
  }                                                        \
                                                           \
  for (int32_t j = 0; j < (_remain); ++j) {                \
    if ((_v) < (_sec)[j]) {                                \
      (_v) = (_sec)[j];                                    \
    }                                                      \
  }

#define EXTRACT_MIN_VAL(_first, _sec, _width, _remain, _v) \
  (_v) = TMIN((_first)[0], (_first)[1]);                   \
  for (int32_t k = 1; k < (_width); ++k) {                 \
    (_v) = TMIN((_v), (_first)[k]);                        \
  }                                                        \
                                                           \
  for (int32_t j = 0; j < (_remain); ++j) {                \
    if ((_v) > (_sec)[j]) {                                \
      (_v) = (_sec)[j];                                    \
    }                                                      \
  }
  
  
static int8_t i8VectorCmpAVX2(const void* pData, int32_t numOfRows, bool isMinFunc, bool signVal) {
98 99 100 101 102
  int8_t        v = 0;
  const int8_t* p = pData;

  int32_t width, remain, rounds;
  calculateRounds(numOfRows, sizeof(int8_t), &remain, &rounds, &width);
H
Haojun Liao 已提交
103

H
Haojun Liao 已提交
104
#if __AVX2__
H
Haojun Liao 已提交
105
  __m256i next;
H
Haojun Liao 已提交
106
  __m256i initVal = _mm256_lddqu_si256((__m256i*)p);
H
Haojun Liao 已提交
107
  p += width;
H
Haojun Liao 已提交
108 109

  if (!isMinFunc) {  // max function
H
Haojun Liao 已提交
110 111 112 113 114
    if (signVal) {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_max_epi8(initVal, next);
        p += width;
H
Haojun Liao 已提交
115 116
      }

H
Haojun Liao 已提交
117 118 119 120 121 122 123 124
      const int8_t* q = (const int8_t*)&initVal;
      EXTRACT_MAX_VAL(q, p, width, remain, v)
    } else {  // unsigned value 
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_max_epu8(initVal, next);
        p += width;
      }
125

H
Haojun Liao 已提交
126 127
      const uint8_t* q = (const uint8_t*)&initVal;
      EXTRACT_MAX_VAL(q, p, width, remain, v)
H
Haojun Liao 已提交
128
    }
H
Haojun Liao 已提交
129 130 131 132 133 134 135 136
    
  } else {  // min function
    if (signVal) {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_min_epi8(initVal, next);
        p += width;
      }
H
Haojun Liao 已提交
137

H
Haojun Liao 已提交
138 139 140 141 142 143 144 145
      // let sum up the final results
      const int8_t* q = (const int8_t*)&initVal;
      EXTRACT_MIN_VAL(q, p, width, remain, v)
    } else {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_min_epu8(initVal, next);
        p += width;
H
Haojun Liao 已提交
146
      }
H
Haojun Liao 已提交
147 148 149 150

      // let sum up the final results
      const uint8_t* q = (const uint8_t*)&initVal;
      EXTRACT_MIN_VAL(q, p, width, remain, v)
H
Haojun Liao 已提交
151 152 153 154 155 156 157
    }
  }
#endif

  return v;
}

H
Haojun Liao 已提交
158
static int16_t i16VectorCmpAVX2(const int16_t* pData, int32_t numOfRows, bool isMinFunc, bool signVal) {
159 160
  int16_t        v = 0;
  const int16_t* p = pData;
H
Haojun Liao 已提交
161

162 163
  int32_t width, remain, rounds;
  calculateRounds(numOfRows, sizeof(int16_t), &remain, &rounds, &width);
H
Haojun Liao 已提交
164

165 166
#if __AVX2__
  __m256i next;
H
Haojun Liao 已提交
167
  __m256i initVal = _mm256_lddqu_si256((__m256i*)p);
H
Haojun Liao 已提交
168
  p += width;
H
Haojun Liao 已提交
169 170

  if (!isMinFunc) {  // max function
H
Haojun Liao 已提交
171 172 173 174 175 176
    if (signVal) {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_max_epi16(initVal, next);
        p += width;
      }
H
Haojun Liao 已提交
177

H
Haojun Liao 已提交
178 179 180 181 182 183 184 185 186
      // let sum up the final results
      const int16_t* q = (const int16_t*)&initVal;
      EXTRACT_MAX_VAL(q, p, width, remain, v)
    } else {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_max_epu16(initVal, next);
        p += width;
      }
187

H
Haojun Liao 已提交
188 189 190
      // let sum up the final results
      const uint16_t* q = (const uint16_t*)&initVal;
      EXTRACT_MAX_VAL(q, p, width, remain, v)
H
Haojun Liao 已提交
191
    }
H
Haojun Liao 已提交
192 193

  } else {  // min function
H
Haojun Liao 已提交
194 195 196 197 198 199
    if (signVal) {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_min_epi16(initVal, next);
        p += width;
      }
H
Haojun Liao 已提交
200

H
Haojun Liao 已提交
201 202 203 204 205 206 207 208
      // let sum up the final results
      const int16_t* q = (const int16_t*)&initVal;
      EXTRACT_MIN_VAL(q, p, width, remain, v)
    } else {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_min_epi16(initVal, next);
        p += width;
H
Haojun Liao 已提交
209
      }
H
Haojun Liao 已提交
210 211 212 213

      // let sum up the final results
      const uint16_t* q = (const uint16_t*)&initVal;
      EXTRACT_MIN_VAL(q, p, width, remain, v)
H
Haojun Liao 已提交
214 215 216 217 218 219 220
    }
  }
#endif

  return v;
}

H
Haojun Liao 已提交
221
static int32_t i32VectorCmpAVX2(const int32_t* pData, int32_t numOfRows, bool isMinFunc, bool signVal) {
222 223
  int32_t        v = 0;
  const int32_t* p = pData;
H
Haojun Liao 已提交
224

225 226
  int32_t width, remain, rounds;
  calculateRounds(numOfRows, sizeof(int32_t), &remain, &rounds, &width);
H
Haojun Liao 已提交
227 228 229

#if __AVX2__
  __m256i next;
H
Haojun Liao 已提交
230
  __m256i initVal = _mm256_lddqu_si256((__m256i*)p);
H
Haojun Liao 已提交
231 232 233
  p += width;

  if (!isMinFunc) {  // max function
H
Haojun Liao 已提交
234 235 236 237 238 239
    if (signVal) {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_max_epi32(initVal, next);
        p += width;
      }
H
Haojun Liao 已提交
240

H
Haojun Liao 已提交
241 242 243 244 245 246 247 248
      // let compare  the final results
      const int32_t* q = (const int32_t*)&initVal;
      EXTRACT_MAX_VAL(q, p, width, remain, v)
    } else { // unsigned value
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_max_epi32(initVal, next);
        p += width;
H
Haojun Liao 已提交
249 250
      }

H
Haojun Liao 已提交
251 252 253
      // let compare  the final results
      const uint32_t* q = (const uint32_t*)&initVal;
      EXTRACT_MAX_VAL(q, p, width, remain, v)
H
Haojun Liao 已提交
254
    }
H
Haojun Liao 已提交
255 256 257 258 259 260 261
  } else {  // min function
    if (signVal) {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_min_epi32(initVal, next);
        p += width;
      }
H
Haojun Liao 已提交
262

H
Haojun Liao 已提交
263 264 265 266 267 268 269 270
      // let sum up the final results
      const int32_t* q = (const int32_t*)&initVal;
      EXTRACT_MIN_VAL(q, p, width, remain, v)
    } else {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_min_epu32(initVal, next);
        p += width;
H
Haojun Liao 已提交
271
      }
H
Haojun Liao 已提交
272 273 274 275

      // let sum up the final results
      const uint32_t* q = (const uint32_t*)&initVal;
      EXTRACT_MIN_VAL(q, p, width, remain, v)
H
Haojun Liao 已提交
276 277 278 279 280 281 282
    }
  }
#endif

  return v;
}

283 284 285
static float floatVectorCmpAVX(const float* pData, int32_t numOfRows, bool isMinFunc) {
  float v = 0;
  const float* p = pData;
H
Haojun Liao 已提交
286

287 288
  int32_t width, remain, rounds;
  calculateRounds(numOfRows, sizeof(float), &remain, &rounds, &width);
H
Haojun Liao 已提交
289

290 291 292
#if __AVX__

  __m256 next;
H
Haojun Liao 已提交
293
  __m256 initVal = _mm256_loadu_ps(p);
H
Haojun Liao 已提交
294 295 296
  p += width;

  if (!isMinFunc) {  // max function
297 298
    for (int32_t i = 1; i < rounds; ++i) {
      next = _mm256_loadu_ps(p);
H
Haojun Liao 已提交
299
      initVal = _mm256_max_ps(initVal, next);
H
Haojun Liao 已提交
300 301 302
      p += width;
    }

H
Haojun Liao 已提交
303 304
    const float* q = (const float*)&initVal;
    EXTRACT_MAX_VAL(q, p, width, remain, v)
H
Haojun Liao 已提交
305
  } else {  // min function
306 307
    for (int32_t i = 1; i < rounds; ++i) {
      next = _mm256_loadu_ps(p);
H
Haojun Liao 已提交
308
      initVal = _mm256_min_ps(initVal, next);
H
Haojun Liao 已提交
309 310 311
      p += width;
    }

H
Haojun Liao 已提交
312 313
    const float* q = (const float*)&initVal;
    EXTRACT_MIN_VAL(q, p, width, remain, v)
314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329
  }
#endif

  return v;
}

static double doubleVectorCmpAVX(const double* pData, int32_t numOfRows, bool isMinFunc) {
  double        v = 0;
  const double* p = pData;

  int32_t width, remain, rounds;
  calculateRounds(numOfRows, sizeof(double), &remain, &rounds, &width);

#if __AVX__

  __m256d next;
H
Haojun Liao 已提交
330
  __m256d initVal = _mm256_loadu_pd(p);
331 332 333 334 335
  p += width;

  if (!isMinFunc) {  // max function
    for (int32_t i = 1; i < rounds; ++i) {
      next = _mm256_loadu_pd(p);
H
Haojun Liao 已提交
336
      initVal = _mm256_max_pd(initVal, next);
337 338 339 340
      p += width;
    }

    // let sum up the final results
H
Haojun Liao 已提交
341 342
    const double* q = (const double*)&initVal;
    EXTRACT_MAX_VAL(q, p, width, remain, v)
343 344 345
  } else {  // min function
    for (int32_t i = 1; i < rounds; ++i) {
      next = _mm256_loadu_pd(p);
H
Haojun Liao 已提交
346
      initVal = _mm256_min_pd(initVal, next);
347 348 349 350
      p += width;
    }

    // let sum up the final results
H
Haojun Liao 已提交
351 352
    const double* q = (const double*)&initVal;
    EXTRACT_MIN_VAL(q, p, width, remain, v)
H
Haojun Liao 已提交
353 354 355 356 357 358
  }
#endif

  return v;
}

H
Haojun Liao 已提交
359
static int32_t findFirstValPosition(const SColumnInfoData* pCol, int32_t start, int32_t numOfRows) {
360
  int32_t i = start;
H
Haojun Liao 已提交
361 362
  
  while (i < (start + numOfRows) && (colDataIsNull_f(pCol->nullbitmap, i) == true)) {
363 364 365 366 367 368
    i += 1;
  }

  return i;
}

H
Haojun Liao 已提交
369 370 371 372 373 374 375 376
static void handleInt8Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc,
                          bool signVal) {
  // AVX2 version to speedup the loop
  if (tsAVX2Enable && tsSIMDEnable) {
    pBuf->v = i8VectorCmpAVX2(data, numOfRows, isMinFunc, signVal);
  } else {
    if (!pBuf->assign) {
      pBuf->v = ((int8_t*)data)[0];
377 378
    }

H
Haojun Liao 已提交
379 380 381
    if (signVal) {
      const int8_t* p = (const int8_t*)data;
      int8_t*       v = (int8_t*)&pBuf->v;
382

H
Haojun Liao 已提交
383 384 385 386
      if (isMinFunc) {
        __COMPARE_EXTRACT_MIN(start, start + numOfRows, *v, p);
      } else {
        __COMPARE_EXTRACT_MAX(start, start + numOfRows, *v, p);
387 388
      }
    } else {
H
Haojun Liao 已提交
389 390
      const uint8_t* p = (const uint8_t*)data;
      uint8_t*       v = (uint8_t*)&pBuf->v;
391

H
Haojun Liao 已提交
392 393 394 395
      if (isMinFunc) {
        __COMPARE_EXTRACT_MIN(start, start + numOfRows, *v, p);
      } else {
        __COMPARE_EXTRACT_MAX(start, start + numOfRows, *v, p);
396 397 398 399
      }
    }
  }

H
Haojun Liao 已提交
400
  pBuf->assign = true;
401 402
}

H
Haojun Liao 已提交
403 404 405 406 407 408 409 410
static void handleInt16Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc,
                           bool signVal) {
  // AVX2 version to speedup the loop
  if (tsAVX2Enable && tsSIMDEnable) {
    pBuf->v = i16VectorCmpAVX2(data, numOfRows, isMinFunc, signVal);
  } else {
    if (!pBuf->assign) {
      pBuf->v = ((int16_t*)data)[0];
411 412
    }

H
Haojun Liao 已提交
413 414 415
    if (signVal) {
      const int16_t* p = (const int16_t*)data;
      int16_t*       v = (int16_t*)&pBuf->v;
416

H
Haojun Liao 已提交
417 418 419 420
      if (isMinFunc) {
        __COMPARE_EXTRACT_MIN(start, start + numOfRows, *v, p);
      } else {
        __COMPARE_EXTRACT_MAX(start, start + numOfRows, *v, p);
421 422
      }
    } else {
H
Haojun Liao 已提交
423 424
      const uint16_t* p = (const uint16_t*)data;
      uint16_t*       v = (uint16_t*)&pBuf->v;
425

H
Haojun Liao 已提交
426 427 428 429
      if (isMinFunc) {
        __COMPARE_EXTRACT_MIN(start, start + numOfRows, *v, p);
      } else {
        __COMPARE_EXTRACT_MAX(start, start + numOfRows, *v, p);
430 431 432 433
      }
    }
  }

H
Haojun Liao 已提交
434
  pBuf->assign = true;
435
}
H
Haojun Liao 已提交
436

H
Haojun Liao 已提交
437 438 439 440 441 442 443 444 445
static void handleInt32Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc,
                           bool signVal) {
  // AVX2 version to speedup the loop
  if (tsAVX2Enable && tsSIMDEnable) {
    pBuf->v = i32VectorCmpAVX2(data, numOfRows, isMinFunc, signVal);
  } else {
    if (!pBuf->assign) {
      pBuf->v = ((int32_t*)data)[0];
    }
H
Haojun Liao 已提交
446

H
Haojun Liao 已提交
447 448 449
    if (signVal) {
      const int32_t* p = (const int32_t*)data;
      int32_t*       v = (int32_t*)&pBuf->v;
H
Haojun Liao 已提交
450

H
Haojun Liao 已提交
451 452 453 454 455 456 457 458 459 460 461 462 463
      if (isMinFunc) {
        __COMPARE_EXTRACT_MIN(start, start + numOfRows, *v, p);
      } else {
        __COMPARE_EXTRACT_MAX(start, start + numOfRows, *v, p);
      }
    } else {
      const uint32_t* p = (const uint32_t*)data;
      uint32_t*       v = (uint32_t*)&pBuf->v;

      if (isMinFunc) {
        __COMPARE_EXTRACT_MIN(start, start + numOfRows, *v, p);
      } else {
        __COMPARE_EXTRACT_MAX(start, start + numOfRows, *v, p);
H
Haojun Liao 已提交
464 465
      }
    }
H
Haojun Liao 已提交
466
  }
H
Haojun Liao 已提交
467

H
Haojun Liao 已提交
468 469
  pBuf->assign = true;
}
H
Haojun Liao 已提交
470

H
Haojun Liao 已提交
471 472 473 474 475
static void handleInt64Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc,
                           bool signVal) {
  if (!pBuf->assign) {
    pBuf->v = ((int64_t*)data)[0];
  }
H
Haojun Liao 已提交
476

H
Haojun Liao 已提交
477 478 479
  if (signVal) {
    const int64_t* p = (const int64_t*)data;
    int64_t*       v = &pBuf->v;
H
Haojun Liao 已提交
480

H
Haojun Liao 已提交
481 482 483 484
    if (isMinFunc) {
      __COMPARE_EXTRACT_MIN(start, start + numOfRows, *v, p);
    } else {
      __COMPARE_EXTRACT_MAX(start, start + numOfRows, *v, p);
H
Haojun Liao 已提交
485
    }
H
Haojun Liao 已提交
486 487 488 489 490 491
  } else {
    const uint64_t* p = (const uint64_t*)data;
    uint64_t*       v = (uint64_t*)&pBuf->v;

    if (isMinFunc) {
      __COMPARE_EXTRACT_MIN(start, start + numOfRows, *v, p);
H
Haojun Liao 已提交
492
    } else {
H
Haojun Liao 已提交
493 494 495 496
      __COMPARE_EXTRACT_MAX(start, start + numOfRows, *v, p);
    }
  }
}
H
Haojun Liao 已提交
497

H
Haojun Liao 已提交
498 499 500 501 502 503 504 505 506 507 508
static void handleUint8Col(SColumnInfoData* pCol, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf,
                           bool isMinFunc) {
  const uint8_t* pData = (uint8_t*)pCol->pData;
  uint8_t* val = (uint8_t*)&pBuf->v;

  // AVX2 version to speedup the loop
  if (tsAVX2Enable && tsSIMDEnable) {
    *val = i8VectorCmpAVX2(pData, numOfRows, isMinFunc, false);
  } else {
    if (!pBuf->assign) {
      *val = pData[0];
H
Haojun Liao 已提交
509 510
    }

H
Haojun Liao 已提交
511 512 513 514 515
    if (isMinFunc) {  // min
      __COMPARE_EXTRACT_MIN(start, start + numOfRows, *val, pData);
    } else {
      __COMPARE_EXTRACT_MAX(start, start + numOfRows, *val, pData);
    }
H
Haojun Liao 已提交
516 517
  }

H
Haojun Liao 已提交
518
  pBuf->assign = true;
H
Haojun Liao 已提交
519 520
}

H
Haojun Liao 已提交
521 522 523
static void handleFloatCol(SColumnInfoData* pCol, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc) {
  float* pData = (float*)pCol->pData;
  float* val = (float*)&pBuf->v;
524

H
Haojun Liao 已提交
525 526 527 528 529 530
  // AVX version to speedup the loop
  if (tsAVXEnable && tsSIMDEnable) {
    *val = floatVectorCmpAVX(pData, numOfRows, isMinFunc);
  } else {
    if (!pBuf->assign) {
      *val = pData[0];
531 532 533
    }

    if (isMinFunc) {  // min
H
Haojun Liao 已提交
534
      for (int32_t i = start; i < start + numOfRows; ++i) {
535 536 537 538
        if (*val > pData[i]) {
          *val = pData[i];
        }
      }
H
Haojun Liao 已提交
539 540
    } else {  // max
      for (int32_t i = start; i < start + numOfRows; ++i) {
541 542 543 544 545
        if (*val < pData[i]) {
          *val = pData[i];
        }
      }
    }
H
Haojun Liao 已提交
546 547 548 549 550 551 552 553 554 555 556 557 558
  }

  pBuf->assign = true;
}

static void handleDoubleCol(SColumnInfoData* pCol, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc) {
  double* pData = (double*)pCol->pData;
  double* val = (double*)&pBuf->v;

  // AVX version to speedup the loop
  if (tsAVXEnable && tsSIMDEnable) {
    *val = (double)doubleVectorCmpAVX(pData, numOfRows, isMinFunc);
  } else {
559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577
    if (!pBuf->assign) {
      *val = pData[0];
    }

    if (isMinFunc) {  // min
      for (int32_t i = start; i < start + numOfRows; ++i) {
        if (*val > pData[i]) {
          *val = pData[i];
        }
      }
    } else {  // max
      for (int32_t i = start; i < start + numOfRows; ++i) {
        if (*val < pData[i]) {
          *val = pData[i];
        }
      }
    }
  }

H
Haojun Liao 已提交
578 579
  pBuf->assign = true;
}
H
Haojun Liao 已提交
580

H
Haojun Liao 已提交
581 582 583 584 585 586
static int32_t findRowIndex(int32_t start, int32_t num, SColumnInfoData* pCol, const char* tval) {
  // the data is loaded, not only the block SMA value
  for (int32_t i = start; i < num + start; ++i) {
    char* p = colDataGetData(pCol, i);
    if (memcmp((void*)tval, p, pCol->info.bytes) == 0) {
      return i;
H
Haojun Liao 已提交
587
    }
H
Haojun Liao 已提交
588
  }
H
Haojun Liao 已提交
589

H
Haojun Liao 已提交
590 591 592
  // if reach here means real data of block SMA is not set in pCtx->input.
  return -1;
}
H
Haojun Liao 已提交
593

H
Haojun Liao 已提交
594 595 596 597 598 599 600 601 602
static void doExtractVal(SColumnInfoData* pCol, int32_t i, int32_t end, SqlFunctionCtx* pCtx, SMinmaxResInfo* pBuf,
                         bool isMinFunc) {
  if (isMinFunc) {
    switch (pCol->info.type) {
      case TSDB_DATA_TYPE_BOOL:
      case TSDB_DATA_TYPE_TINYINT: {
        const int8_t* pData = (const int8_t*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, *(int8_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
H
Haojun Liao 已提交
603 604
      }

H
Haojun Liao 已提交
605 606 607 608
      case TSDB_DATA_TYPE_SMALLINT: {
        const int16_t* pData = (const int16_t*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, *(int16_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
H
Haojun Liao 已提交
609
      }
H
Haojun Liao 已提交
610 611 612 613 614

      case TSDB_DATA_TYPE_INT: {
        const int32_t* pData = (const int32_t*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, *(int32_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
H
Haojun Liao 已提交
615 616
      }

H
Haojun Liao 已提交
617 618 619 620
      case TSDB_DATA_TYPE_BIGINT: {
        const int64_t* pData = (const int64_t*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, (pBuf->v), &pBuf->tuplePos)
        break;
H
Haojun Liao 已提交
621 622
      }

H
Haojun Liao 已提交
623 624 625 626 627
      case TSDB_DATA_TYPE_UTINYINT: {
        const uint8_t* pData = (const uint8_t*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, *(uint8_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }
H
Haojun Liao 已提交
628

H
Haojun Liao 已提交
629 630 631 632 633
      case TSDB_DATA_TYPE_USMALLINT: {
        const uint16_t* pData = (const uint16_t*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, *(uint16_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }
H
Haojun Liao 已提交
634

H
Haojun Liao 已提交
635 636 637 638 639
      case TSDB_DATA_TYPE_UINT: {
        const uint32_t* pData = (const uint32_t*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, *(uint32_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }
H
Haojun Liao 已提交
640

H
Haojun Liao 已提交
641 642 643 644 645
      case TSDB_DATA_TYPE_UBIGINT: {
        const uint64_t* pData = (const uint64_t*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, *(uint64_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }
H
Haojun Liao 已提交
646

H
Haojun Liao 已提交
647 648 649 650 651 652 653 654 655 656
      case TSDB_DATA_TYPE_FLOAT: {
        const float* pData = (const float*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, *(float*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }

      case TSDB_DATA_TYPE_DOUBLE: {
        const double* pData = (const double*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, *(double*)&(pBuf->v), &pBuf->tuplePos)
        break;
H
Haojun Liao 已提交
657 658
      }
    }
H
Haojun Liao 已提交
659 660 661 662 663 664 665 666
  } else {
    switch (pCol->info.type) {
      case TSDB_DATA_TYPE_BOOL:
      case TSDB_DATA_TYPE_TINYINT: {
        const int8_t* pData = (const int8_t*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, *(int8_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }
H
Haojun Liao 已提交
667

H
Haojun Liao 已提交
668 669 670 671 672
      case TSDB_DATA_TYPE_SMALLINT: {
        const int16_t* pData = (const int16_t*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, *(int16_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }
H
Haojun Liao 已提交
673

H
Haojun Liao 已提交
674 675 676 677
      case TSDB_DATA_TYPE_INT: {
        const int16_t* pData = (const int16_t*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, *(int32_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
H
Haojun Liao 已提交
678
      }
679

H
Haojun Liao 已提交
680 681 682 683
      case TSDB_DATA_TYPE_BIGINT: {
        const int64_t* pData = (const int64_t*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, (pBuf->v), &pBuf->tuplePos)
        break;
H
Haojun Liao 已提交
684
      }
H
Haojun Liao 已提交
685 686 687 688 689

      case TSDB_DATA_TYPE_UTINYINT: {
        const uint8_t* pData = (const uint8_t*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, *(uint8_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
H
Haojun Liao 已提交
690 691
      }

H
Haojun Liao 已提交
692 693 694 695
      case TSDB_DATA_TYPE_USMALLINT: {
        const uint16_t* pData = (const uint16_t*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, *(uint16_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
H
Haojun Liao 已提交
696 697
      }

H
Haojun Liao 已提交
698 699 700 701 702
      case TSDB_DATA_TYPE_UINT: {
        const uint32_t* pData = (const uint32_t*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, *(uint32_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }
H
Haojun Liao 已提交
703

H
Haojun Liao 已提交
704 705 706 707 708
      case TSDB_DATA_TYPE_UBIGINT: {
        const uint64_t* pData = (const uint64_t*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, *(uint64_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }
H
Haojun Liao 已提交
709

H
Haojun Liao 已提交
710 711 712 713 714 715 716 717 718 719 720
      case TSDB_DATA_TYPE_FLOAT: {
        const float* pData = (const float*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, *(float*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }

      case TSDB_DATA_TYPE_DOUBLE: {
        const double* pData = (const double*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, *(double*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }
H
Haojun Liao 已提交
721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739
    }
  }
}

int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc) {
  int32_t numOfElems = 0;

  SInputColumnInfoData* pInput = &pCtx->input;
  SColumnDataAgg*       pAgg = pInput->pColumnDataAgg[0];

  SColumnInfoData* pCol = pInput->pData[0];
  int32_t          type = pCol->info.type;

  SResultRowEntryInfo* pResInfo = GET_RES_INFO(pCtx);
  SMinmaxResInfo*      pBuf = GET_ROWCELL_INTERBUF(pResInfo);
  pBuf->type = type;

  if (IS_NULL_TYPE(type)) {
    numOfElems = 0;
740
    goto _over;
H
Haojun Liao 已提交
741 742 743
  }

  // data in current data block are qualified to the query
H
Haojun Liao 已提交
744
  if (pInput->colDataSMAIsSet) {
H
Haojun Liao 已提交
745 746
    numOfElems = pInput->numOfRows - pAgg->numOfNull;
    ASSERT(pInput->numOfRows == pInput->totalRows && numOfElems >= 0);
H
Haojun Liao 已提交
747

H
Haojun Liao 已提交
748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835
    if (numOfElems == 0) {
      return numOfElems;
    }

    void*   tval = NULL;
    int16_t index = 0;

    if (isMinFunc) {
      tval = &pInput->pColumnDataAgg[0]->min;
    } else {
      tval = &pInput->pColumnDataAgg[0]->max;
    }

    if (!pBuf->assign) {
      pBuf->v = *(int64_t*)tval;
      if (pCtx->subsidiaries.num > 0) {
        index = findRowIndex(pInput->startRowIndex, pInput->numOfRows, pCol, tval);
        if (index >= 0) {
          pBuf->tuplePos = saveTupleData(pCtx, index, pCtx->pSrcBlock, NULL);
        }
      }
    } else {
      if (IS_SIGNED_NUMERIC_TYPE(type)) {
        int64_t prev = 0;
        GET_TYPED_DATA(prev, int64_t, type, &pBuf->v);

        int64_t val = GET_INT64_VAL(tval);
        if ((prev < val) ^ isMinFunc) {
          *(int64_t*)&pBuf->v = val;
          if (pCtx->subsidiaries.num > 0) {
            index = findRowIndex(pInput->startRowIndex, pInput->numOfRows, pCol, tval);
            if (index >= 0) {
              pBuf->tuplePos = saveTupleData(pCtx, index, pCtx->pSrcBlock, NULL);
            }
          }
        }
      } else if (IS_UNSIGNED_NUMERIC_TYPE(type)) {
        uint64_t prev = 0;
        GET_TYPED_DATA(prev, uint64_t, type, &pBuf->v);

        uint64_t val = GET_UINT64_VAL(tval);
        if ((prev < val) ^ isMinFunc) {
          *(uint64_t*)&pBuf->v = val;
          if (pCtx->subsidiaries.num > 0) {
            index = findRowIndex(pInput->startRowIndex, pInput->numOfRows, pCol, tval);
            if (index >= 0) {
              pBuf->tuplePos = saveTupleData(pCtx, index, pCtx->pSrcBlock, NULL);
            }
          }
        }
      } else if (type == TSDB_DATA_TYPE_DOUBLE) {
        double prev = 0;
        GET_TYPED_DATA(prev, double, type, &pBuf->v);

        double val = GET_DOUBLE_VAL(tval);
        if ((prev < val) ^ isMinFunc) {
          *(double*)&pBuf->v = val;
          if (pCtx->subsidiaries.num > 0) {
            index = findRowIndex(pInput->startRowIndex, pInput->numOfRows, pCol, tval);
            if (index >= 0) {
              pBuf->tuplePos = saveTupleData(pCtx, index, pCtx->pSrcBlock, NULL);
            }
          }
        }
      } else if (type == TSDB_DATA_TYPE_FLOAT) {
        float prev = 0;
        GET_TYPED_DATA(prev, float, type, &pBuf->v);

        float val = GET_DOUBLE_VAL(tval);
        if ((prev < val) ^ isMinFunc) {
          *(float*)&pBuf->v = val;
        }

        if (pCtx->subsidiaries.num > 0) {
          index = findRowIndex(pInput->startRowIndex, pInput->numOfRows, pCol, tval);
          if (index >= 0) {
            pBuf->tuplePos = saveTupleData(pCtx, index, pCtx->pSrcBlock, NULL);
          }
        }
      }
    }

    pBuf->assign = true;
    return numOfElems;
  }

  int32_t start = pInput->startRowIndex;
  int32_t numOfRows = pInput->numOfRows;
H
Haojun Liao 已提交
836
  int32_t end = start + numOfRows;
H
Haojun Liao 已提交
837

H
Haojun Liao 已提交
838 839
  if (pCol->hasNull || numOfRows < 32 || pCtx->subsidiaries.num > 0) {
    int32_t i = findFirstValPosition(pCol, start, numOfRows);
H
Haojun Liao 已提交
840

H
Haojun Liao 已提交
841 842
    if ((i < end) && (!pBuf->assign)) {
      memcpy(&pBuf->v, pCol->pData + (pCol->info.bytes * i), pCol->info.bytes);
H
Haojun Liao 已提交
843

H
Haojun Liao 已提交
844 845
      if (pCtx->subsidiaries.num > 0) {
        pBuf->tuplePos = saveTupleData(pCtx, i, pCtx->pSrcBlock, NULL);
H
Haojun Liao 已提交
846
      }
H
Haojun Liao 已提交
847 848 849
      pBuf->assign = true;
      numOfElems = 1;
    }
H
Haojun Liao 已提交
850

H
Haojun Liao 已提交
851 852 853 854
    if (i >= end) {
      ASSERT(numOfElems == 0);
      return numOfElems;
    }
H
Haojun Liao 已提交
855

H
Haojun Liao 已提交
856 857 858
    doExtractVal(pCol, i, end, pCtx, pBuf, isMinFunc);
  } else {
    numOfElems = numOfRows;
H
Haojun Liao 已提交
859

H
Haojun Liao 已提交
860 861 862 863 864
    switch(pCol->info.type) {
      case TSDB_DATA_TYPE_BOOL:
      case TSDB_DATA_TYPE_TINYINT: {
        handleInt8Col(pCol->pData, start, numOfRows, pBuf, isMinFunc, true);
        break;
H
Haojun Liao 已提交
865
      }
H
Haojun Liao 已提交
866 867 868
      case TSDB_DATA_TYPE_SMALLINT: {
        handleInt16Col(pCol->pData, start, numOfRows, pBuf, isMinFunc, true);
        break;
H
Haojun Liao 已提交
869
      }
H
Haojun Liao 已提交
870 871 872
      case TSDB_DATA_TYPE_INT: {
        handleInt32Col(pCol->pData, start, numOfRows, pBuf, isMinFunc, true);
        break;
H
Haojun Liao 已提交
873
      }
H
Haojun Liao 已提交
874 875 876
      case TSDB_DATA_TYPE_BIGINT: {
        handleInt64Col(pCol->pData, start, numOfRows, pBuf, isMinFunc, true);
        break;
H
Haojun Liao 已提交
877
      }
H
Haojun Liao 已提交
878 879 880 881 882 883 884
      case TSDB_DATA_TYPE_UTINYINT: {
        handleInt8Col(pCol->pData, start, numOfRows, pBuf, isMinFunc, false);
        break;
      }
      case TSDB_DATA_TYPE_USMALLINT: {
        handleInt16Col(pCol->pData, start, numOfRows, pBuf, isMinFunc, false);
        break;
H
Haojun Liao 已提交
885
      }
H
Haojun Liao 已提交
886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902
      case TSDB_DATA_TYPE_UINT: {
        handleInt16Col(pCol->pData, start, numOfRows, pBuf, isMinFunc, false);
        break;
      }
      case TSDB_DATA_TYPE_UBIGINT: {
        handleInt16Col(pCol->pData, start, numOfRows, pBuf, isMinFunc, false);
        break;
      }
      case TSDB_DATA_TYPE_FLOAT: {
        handleFloatCol(pCol, start, numOfRows, pBuf, isMinFunc);
        break;
      }
      case TSDB_DATA_TYPE_DOUBLE: {
        handleDoubleCol(pCol, start, numOfRows, pBuf, isMinFunc);
        break;
      }
    }
H
Haojun Liao 已提交
903

H
Haojun Liao 已提交
904 905 906 907
  _over:
    if (numOfElems == 0 && pCtx->subsidiaries.num > 0 && !pBuf->nullTupleSaved) {
      pBuf->nullTuplePos = saveTupleData(pCtx, pInput->startRowIndex, pCtx->pSrcBlock, NULL);
      pBuf->nullTupleSaved = true;
H
Haojun Liao 已提交
908 909 910 911 912
    }
  }

  return numOfElems;
}