tminmax.c 27.7 KB
Newer Older
H
Haojun Liao 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

#include "builtinsimpl.h"
#include "function.h"
#include "tdatablock.h"
#include "tfunctionInt.h"
#include "tglobal.h"

H
Haojun Liao 已提交
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
#define __COMPARE_ACQUIRED_MAX(i, end, bm, _data, ctx, val, pos) \
  for (; i < (end); ++i) {                                       \
    if (colDataIsNull_f(bm, i)) {                                \
      continue;                                                  \
    }                                                            \
                                                                 \
    if ((val) < (_data)[i]) {                                    \
      (val) = (_data)[i];                                        \
      if ((ctx)->subsidiaries.num > 0) {                         \
        updateTupleData((ctx), i, (ctx)->pSrcBlock, pos);        \
      }                                                          \
    }                                                            \
  }

#define __COMPARE_ACQUIRED_MIN(i, end, bm, _data, ctx, val, pos) \
  for (; i < (end); ++i) {                                       \
    if (colDataIsNull_f(bm, i)) {                                \
      continue;                                                  \
    }                                                            \
                                                                 \
    if ((val) > (_data)[i]) {                                    \
      (val) = (_data)[i];                                        \
      if ((ctx)->subsidiaries.num > 0) {                         \
        updateTupleData((ctx), i, (ctx)->pSrcBlock, pos);        \
      }                                                          \
    }                                                            \
  }

#define __COMPARE_EXTRACT_MIN(start, end, val, _data) \
  for (int32_t i = (start); i < (end); ++i) {         \
    if ((val) > (_data)[i]) {                         \
      (val) = (_data)[i];                             \
    }                                                 \
  }

#define __COMPARE_EXTRACT_MAX(start, end, val, _data) \
  for (int32_t i = (start); i < (end); ++i) {         \
    if ((val) < (_data)[i]) {                         \
      (val) = (_data)[i];                             \
    }                                                 \
  }

64 65
static void calculateRounds(int32_t numOfRows, int32_t bytes, int32_t* remainder, int32_t* rounds, int32_t* width) {
  const int32_t bitWidth = 256;
H
Haojun Liao 已提交
66

67 68 69 70 71
  *width = (bitWidth>>3u) / bytes;
  *remainder = numOfRows % (*width);
  *rounds = numOfRows / (*width);
}

H
Haojun Liao 已提交
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
#define EXTRACT_MAX_VAL(_first, _sec, _width, _remain, _v) \
  (_v) = TMAX((_first)[0], (_first)[1]);                   \
  for (int32_t k = 1; k < (_width); ++k) {                 \
    (_v) = TMAX((_v), (_first)[k]);                        \
  }                                                        \
                                                           \
  for (int32_t j = 0; j < (_remain); ++j) {                \
    if ((_v) < (_sec)[j]) {                                \
      (_v) = (_sec)[j];                                    \
    }                                                      \
  }

#define EXTRACT_MIN_VAL(_first, _sec, _width, _remain, _v) \
  (_v) = TMIN((_first)[0], (_first)[1]);                   \
  for (int32_t k = 1; k < (_width); ++k) {                 \
    (_v) = TMIN((_v), (_first)[k]);                        \
  }                                                        \
                                                           \
  for (int32_t j = 0; j < (_remain); ++j) {                \
    if ((_v) > (_sec)[j]) {                                \
      (_v) = (_sec)[j];                                    \
    }                                                      \
  }
  
  
static int8_t i8VectorCmpAVX2(const void* pData, int32_t numOfRows, bool isMinFunc, bool signVal) {
98 99 100 101 102
  int8_t        v = 0;
  const int8_t* p = pData;

  int32_t width, remain, rounds;
  calculateRounds(numOfRows, sizeof(int8_t), &remain, &rounds, &width);
H
Haojun Liao 已提交
103

H
Haojun Liao 已提交
104
#if __AVX2__
H
Haojun Liao 已提交
105
  __m256i next;
H
Haojun Liao 已提交
106
  __m256i initVal = _mm256_lddqu_si256((__m256i*)p);
H
Haojun Liao 已提交
107
  p += width;
H
Haojun Liao 已提交
108 109

  if (!isMinFunc) {  // max function
H
Haojun Liao 已提交
110 111 112 113 114
    if (signVal) {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_max_epi8(initVal, next);
        p += width;
H
Haojun Liao 已提交
115 116
      }

H
Haojun Liao 已提交
117 118 119 120 121 122 123 124
      const int8_t* q = (const int8_t*)&initVal;
      EXTRACT_MAX_VAL(q, p, width, remain, v)
    } else {  // unsigned value 
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_max_epu8(initVal, next);
        p += width;
      }
125

H
Haojun Liao 已提交
126 127
      const uint8_t* q = (const uint8_t*)&initVal;
      EXTRACT_MAX_VAL(q, p, width, remain, v)
H
Haojun Liao 已提交
128
    }
H
Haojun Liao 已提交
129 130 131 132 133 134 135 136
    
  } else {  // min function
    if (signVal) {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_min_epi8(initVal, next);
        p += width;
      }
H
Haojun Liao 已提交
137

H
Haojun Liao 已提交
138 139 140 141 142 143 144 145
      // let sum up the final results
      const int8_t* q = (const int8_t*)&initVal;
      EXTRACT_MIN_VAL(q, p, width, remain, v)
    } else {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_min_epu8(initVal, next);
        p += width;
H
Haojun Liao 已提交
146
      }
H
Haojun Liao 已提交
147 148 149 150

      // let sum up the final results
      const uint8_t* q = (const uint8_t*)&initVal;
      EXTRACT_MIN_VAL(q, p, width, remain, v)
H
Haojun Liao 已提交
151 152 153 154 155 156 157
    }
  }
#endif

  return v;
}

H
Haojun Liao 已提交
158
static int16_t i16VectorCmpAVX2(const int16_t* pData, int32_t numOfRows, bool isMinFunc, bool signVal) {
159 160
  int16_t        v = 0;
  const int16_t* p = pData;
H
Haojun Liao 已提交
161

162 163
  int32_t width, remain, rounds;
  calculateRounds(numOfRows, sizeof(int16_t), &remain, &rounds, &width);
H
Haojun Liao 已提交
164

165 166
#if __AVX2__
  __m256i next;
H
Haojun Liao 已提交
167
  __m256i initVal = _mm256_lddqu_si256((__m256i*)p);
H
Haojun Liao 已提交
168
  p += width;
H
Haojun Liao 已提交
169 170

  if (!isMinFunc) {  // max function
H
Haojun Liao 已提交
171 172 173 174 175 176
    if (signVal) {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_max_epi16(initVal, next);
        p += width;
      }
H
Haojun Liao 已提交
177

H
Haojun Liao 已提交
178 179 180 181 182 183 184 185 186
      // let sum up the final results
      const int16_t* q = (const int16_t*)&initVal;
      EXTRACT_MAX_VAL(q, p, width, remain, v)
    } else {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_max_epu16(initVal, next);
        p += width;
      }
187

H
Haojun Liao 已提交
188 189 190
      // let sum up the final results
      const uint16_t* q = (const uint16_t*)&initVal;
      EXTRACT_MAX_VAL(q, p, width, remain, v)
H
Haojun Liao 已提交
191
    }
H
Haojun Liao 已提交
192 193

  } else {  // min function
H
Haojun Liao 已提交
194 195 196 197 198 199
    if (signVal) {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_min_epi16(initVal, next);
        p += width;
      }
H
Haojun Liao 已提交
200

H
Haojun Liao 已提交
201 202 203 204 205 206 207 208
      // let sum up the final results
      const int16_t* q = (const int16_t*)&initVal;
      EXTRACT_MIN_VAL(q, p, width, remain, v)
    } else {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_min_epi16(initVal, next);
        p += width;
H
Haojun Liao 已提交
209
      }
H
Haojun Liao 已提交
210 211 212 213

      // let sum up the final results
      const uint16_t* q = (const uint16_t*)&initVal;
      EXTRACT_MIN_VAL(q, p, width, remain, v)
H
Haojun Liao 已提交
214 215 216 217 218 219 220
    }
  }
#endif

  return v;
}

H
Haojun Liao 已提交
221
static int32_t i32VectorCmpAVX2(const int32_t* pData, int32_t numOfRows, bool isMinFunc, bool signVal) {
222 223
  int32_t        v = 0;
  const int32_t* p = pData;
H
Haojun Liao 已提交
224

225 226
  int32_t width, remain, rounds;
  calculateRounds(numOfRows, sizeof(int32_t), &remain, &rounds, &width);
H
Haojun Liao 已提交
227 228 229

#if __AVX2__
  __m256i next;
H
Haojun Liao 已提交
230
  __m256i initVal = _mm256_lddqu_si256((__m256i*)p);
H
Haojun Liao 已提交
231 232 233
  p += width;

  if (!isMinFunc) {  // max function
H
Haojun Liao 已提交
234 235 236 237 238 239
    if (signVal) {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_max_epi32(initVal, next);
        p += width;
      }
H
Haojun Liao 已提交
240

H
Haojun Liao 已提交
241 242 243 244 245 246 247 248
      // let compare  the final results
      const int32_t* q = (const int32_t*)&initVal;
      EXTRACT_MAX_VAL(q, p, width, remain, v)
    } else { // unsigned value
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_max_epi32(initVal, next);
        p += width;
H
Haojun Liao 已提交
249 250
      }

H
Haojun Liao 已提交
251 252 253
      // let compare  the final results
      const uint32_t* q = (const uint32_t*)&initVal;
      EXTRACT_MAX_VAL(q, p, width, remain, v)
H
Haojun Liao 已提交
254
    }
H
Haojun Liao 已提交
255 256 257 258 259 260 261
  } else {  // min function
    if (signVal) {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_min_epi32(initVal, next);
        p += width;
      }
H
Haojun Liao 已提交
262

H
Haojun Liao 已提交
263 264 265 266 267 268 269 270
      // let sum up the final results
      const int32_t* q = (const int32_t*)&initVal;
      EXTRACT_MIN_VAL(q, p, width, remain, v)
    } else {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_min_epu32(initVal, next);
        p += width;
H
Haojun Liao 已提交
271
      }
H
Haojun Liao 已提交
272 273 274 275

      // let sum up the final results
      const uint32_t* q = (const uint32_t*)&initVal;
      EXTRACT_MIN_VAL(q, p, width, remain, v)
H
Haojun Liao 已提交
276 277 278 279 280 281 282
    }
  }
#endif

  return v;
}

283 284 285
static float floatVectorCmpAVX(const float* pData, int32_t numOfRows, bool isMinFunc) {
  float v = 0;
  const float* p = pData;
H
Haojun Liao 已提交
286

287 288
  int32_t width, remain, rounds;
  calculateRounds(numOfRows, sizeof(float), &remain, &rounds, &width);
H
Haojun Liao 已提交
289

290 291 292
#if __AVX__

  __m256 next;
H
Haojun Liao 已提交
293
  __m256 initVal = _mm256_loadu_ps(p);
H
Haojun Liao 已提交
294 295 296
  p += width;

  if (!isMinFunc) {  // max function
297 298
    for (int32_t i = 1; i < rounds; ++i) {
      next = _mm256_loadu_ps(p);
H
Haojun Liao 已提交
299
      initVal = _mm256_max_ps(initVal, next);
H
Haojun Liao 已提交
300 301 302
      p += width;
    }

H
Haojun Liao 已提交
303 304
    const float* q = (const float*)&initVal;
    EXTRACT_MAX_VAL(q, p, width, remain, v)
H
Haojun Liao 已提交
305
  } else {  // min function
306 307
    for (int32_t i = 1; i < rounds; ++i) {
      next = _mm256_loadu_ps(p);
H
Haojun Liao 已提交
308
      initVal = _mm256_min_ps(initVal, next);
H
Haojun Liao 已提交
309 310 311
      p += width;
    }

H
Haojun Liao 已提交
312 313
    const float* q = (const float*)&initVal;
    EXTRACT_MIN_VAL(q, p, width, remain, v)
314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329
  }
#endif

  return v;
}

static double doubleVectorCmpAVX(const double* pData, int32_t numOfRows, bool isMinFunc) {
  double        v = 0;
  const double* p = pData;

  int32_t width, remain, rounds;
  calculateRounds(numOfRows, sizeof(double), &remain, &rounds, &width);

#if __AVX__

  __m256d next;
H
Haojun Liao 已提交
330
  __m256d initVal = _mm256_loadu_pd(p);
331 332 333 334 335
  p += width;

  if (!isMinFunc) {  // max function
    for (int32_t i = 1; i < rounds; ++i) {
      next = _mm256_loadu_pd(p);
H
Haojun Liao 已提交
336
      initVal = _mm256_max_pd(initVal, next);
337 338 339 340
      p += width;
    }

    // let sum up the final results
H
Haojun Liao 已提交
341 342
    const double* q = (const double*)&initVal;
    EXTRACT_MAX_VAL(q, p, width, remain, v)
343 344 345
  } else {  // min function
    for (int32_t i = 1; i < rounds; ++i) {
      next = _mm256_loadu_pd(p);
H
Haojun Liao 已提交
346
      initVal = _mm256_min_pd(initVal, next);
347 348 349 350
      p += width;
    }

    // let sum up the final results
H
Haojun Liao 已提交
351 352
    const double* q = (const double*)&initVal;
    EXTRACT_MIN_VAL(q, p, width, remain, v)
H
Haojun Liao 已提交
353 354 355 356 357 358
  }
#endif

  return v;
}

H
Haojun Liao 已提交
359
static int32_t findFirstValPosition(const SColumnInfoData* pCol, int32_t start, int32_t numOfRows) {
360
  int32_t i = start;
H
Haojun Liao 已提交
361 362
  
  while (i < (start + numOfRows) && (colDataIsNull_f(pCol->nullbitmap, i) == true)) {
363 364 365 366 367 368
    i += 1;
  }

  return i;
}

H
Haojun Liao 已提交
369 370 371
static void handleInt8Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc,
                          bool signVal) {
  // AVX2 version to speedup the loop
H
Haojun Liao 已提交
372
  if (tsAVX2Enable && tsSIMDBuiltins) {
H
Haojun Liao 已提交
373 374 375 376
    pBuf->v = i8VectorCmpAVX2(data, numOfRows, isMinFunc, signVal);
  } else {
    if (!pBuf->assign) {
      pBuf->v = ((int8_t*)data)[0];
377 378
    }

H
Haojun Liao 已提交
379 380 381
    if (signVal) {
      const int8_t* p = (const int8_t*)data;
      int8_t*       v = (int8_t*)&pBuf->v;
382

H
Haojun Liao 已提交
383 384 385 386
      if (isMinFunc) {
        __COMPARE_EXTRACT_MIN(start, start + numOfRows, *v, p);
      } else {
        __COMPARE_EXTRACT_MAX(start, start + numOfRows, *v, p);
387 388
      }
    } else {
H
Haojun Liao 已提交
389 390
      const uint8_t* p = (const uint8_t*)data;
      uint8_t*       v = (uint8_t*)&pBuf->v;
391

H
Haojun Liao 已提交
392 393 394 395
      if (isMinFunc) {
        __COMPARE_EXTRACT_MIN(start, start + numOfRows, *v, p);
      } else {
        __COMPARE_EXTRACT_MAX(start, start + numOfRows, *v, p);
396 397 398 399
      }
    }
  }

H
Haojun Liao 已提交
400
  pBuf->assign = true;
401 402
}

H
Haojun Liao 已提交
403 404 405
static void handleInt16Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc,
                           bool signVal) {
  // AVX2 version to speedup the loop
H
Haojun Liao 已提交
406
  if (tsAVX2Enable && tsSIMDBuiltins) {
H
Haojun Liao 已提交
407 408 409 410
    pBuf->v = i16VectorCmpAVX2(data, numOfRows, isMinFunc, signVal);
  } else {
    if (!pBuf->assign) {
      pBuf->v = ((int16_t*)data)[0];
411 412
    }

H
Haojun Liao 已提交
413 414 415
    if (signVal) {
      const int16_t* p = (const int16_t*)data;
      int16_t*       v = (int16_t*)&pBuf->v;
416

H
Haojun Liao 已提交
417 418 419 420
      if (isMinFunc) {
        __COMPARE_EXTRACT_MIN(start, start + numOfRows, *v, p);
      } else {
        __COMPARE_EXTRACT_MAX(start, start + numOfRows, *v, p);
421 422
      }
    } else {
H
Haojun Liao 已提交
423 424
      const uint16_t* p = (const uint16_t*)data;
      uint16_t*       v = (uint16_t*)&pBuf->v;
425

H
Haojun Liao 已提交
426 427 428 429
      if (isMinFunc) {
        __COMPARE_EXTRACT_MIN(start, start + numOfRows, *v, p);
      } else {
        __COMPARE_EXTRACT_MAX(start, start + numOfRows, *v, p);
430 431 432 433
      }
    }
  }

H
Haojun Liao 已提交
434
  pBuf->assign = true;
435
}
H
Haojun Liao 已提交
436

H
Haojun Liao 已提交
437 438 439
static void handleInt32Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc,
                           bool signVal) {
  // AVX2 version to speedup the loop
H
Haojun Liao 已提交
440
  if (tsAVX2Enable && tsSIMDBuiltins) {
H
Haojun Liao 已提交
441 442 443 444 445
    pBuf->v = i32VectorCmpAVX2(data, numOfRows, isMinFunc, signVal);
  } else {
    if (!pBuf->assign) {
      pBuf->v = ((int32_t*)data)[0];
    }
H
Haojun Liao 已提交
446

H
Haojun Liao 已提交
447 448 449
    if (signVal) {
      const int32_t* p = (const int32_t*)data;
      int32_t*       v = (int32_t*)&pBuf->v;
H
Haojun Liao 已提交
450

H
Haojun Liao 已提交
451 452 453 454 455 456 457 458 459 460 461 462 463
      if (isMinFunc) {
        __COMPARE_EXTRACT_MIN(start, start + numOfRows, *v, p);
      } else {
        __COMPARE_EXTRACT_MAX(start, start + numOfRows, *v, p);
      }
    } else {
      const uint32_t* p = (const uint32_t*)data;
      uint32_t*       v = (uint32_t*)&pBuf->v;

      if (isMinFunc) {
        __COMPARE_EXTRACT_MIN(start, start + numOfRows, *v, p);
      } else {
        __COMPARE_EXTRACT_MAX(start, start + numOfRows, *v, p);
H
Haojun Liao 已提交
464 465
      }
    }
H
Haojun Liao 已提交
466
  }
H
Haojun Liao 已提交
467

H
Haojun Liao 已提交
468 469
  pBuf->assign = true;
}
H
Haojun Liao 已提交
470

H
Haojun Liao 已提交
471 472 473 474 475
static void handleInt64Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc,
                           bool signVal) {
  if (!pBuf->assign) {
    pBuf->v = ((int64_t*)data)[0];
  }
H
Haojun Liao 已提交
476

H
Haojun Liao 已提交
477 478 479
  if (signVal) {
    const int64_t* p = (const int64_t*)data;
    int64_t*       v = &pBuf->v;
H
Haojun Liao 已提交
480

H
Haojun Liao 已提交
481 482 483 484
    if (isMinFunc) {
      __COMPARE_EXTRACT_MIN(start, start + numOfRows, *v, p);
    } else {
      __COMPARE_EXTRACT_MAX(start, start + numOfRows, *v, p);
H
Haojun Liao 已提交
485
    }
H
Haojun Liao 已提交
486 487 488 489 490 491
  } else {
    const uint64_t* p = (const uint64_t*)data;
    uint64_t*       v = (uint64_t*)&pBuf->v;

    if (isMinFunc) {
      __COMPARE_EXTRACT_MIN(start, start + numOfRows, *v, p);
H
Haojun Liao 已提交
492
    } else {
H
Haojun Liao 已提交
493 494 495 496
      __COMPARE_EXTRACT_MAX(start, start + numOfRows, *v, p);
    }
  }
}
H
Haojun Liao 已提交
497

H
Haojun Liao 已提交
498 499 500
static void handleFloatCol(SColumnInfoData* pCol, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc) {
  float* pData = (float*)pCol->pData;
  float* val = (float*)&pBuf->v;
501

H
Haojun Liao 已提交
502
  // AVX version to speedup the loop
H
Haojun Liao 已提交
503
  if (tsAVXEnable && tsSIMDBuiltins) {
H
Haojun Liao 已提交
504 505 506 507
    *val = floatVectorCmpAVX(pData, numOfRows, isMinFunc);
  } else {
    if (!pBuf->assign) {
      *val = pData[0];
508 509 510
    }

    if (isMinFunc) {  // min
H
Haojun Liao 已提交
511
      for (int32_t i = start; i < start + numOfRows; ++i) {
512 513 514 515
        if (*val > pData[i]) {
          *val = pData[i];
        }
      }
H
Haojun Liao 已提交
516 517
    } else {  // max
      for (int32_t i = start; i < start + numOfRows; ++i) {
518 519 520 521 522
        if (*val < pData[i]) {
          *val = pData[i];
        }
      }
    }
H
Haojun Liao 已提交
523 524 525 526 527 528 529 530 531 532
  }

  pBuf->assign = true;
}

static void handleDoubleCol(SColumnInfoData* pCol, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc) {
  double* pData = (double*)pCol->pData;
  double* val = (double*)&pBuf->v;

  // AVX version to speedup the loop
H
Haojun Liao 已提交
533
  if (tsAVXEnable && tsSIMDBuiltins) {
H
Haojun Liao 已提交
534 535
    *val = (double)doubleVectorCmpAVX(pData, numOfRows, isMinFunc);
  } else {
536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554
    if (!pBuf->assign) {
      *val = pData[0];
    }

    if (isMinFunc) {  // min
      for (int32_t i = start; i < start + numOfRows; ++i) {
        if (*val > pData[i]) {
          *val = pData[i];
        }
      }
    } else {  // max
      for (int32_t i = start; i < start + numOfRows; ++i) {
        if (*val < pData[i]) {
          *val = pData[i];
        }
      }
    }
  }

H
Haojun Liao 已提交
555 556
  pBuf->assign = true;
}
H
Haojun Liao 已提交
557

H
Haojun Liao 已提交
558 559 560 561 562 563
static int32_t findRowIndex(int32_t start, int32_t num, SColumnInfoData* pCol, const char* tval) {
  // the data is loaded, not only the block SMA value
  for (int32_t i = start; i < num + start; ++i) {
    char* p = colDataGetData(pCol, i);
    if (memcmp((void*)tval, p, pCol->info.bytes) == 0) {
      return i;
H
Haojun Liao 已提交
564
    }
H
Haojun Liao 已提交
565
  }
H
Haojun Liao 已提交
566

H
Haojun Liao 已提交
567 568 569
  // if reach here means real data of block SMA is not set in pCtx->input.
  return -1;
}
H
Haojun Liao 已提交
570

H
Haojun Liao 已提交
571 572 573 574 575 576 577 578 579
static void doExtractVal(SColumnInfoData* pCol, int32_t i, int32_t end, SqlFunctionCtx* pCtx, SMinmaxResInfo* pBuf,
                         bool isMinFunc) {
  if (isMinFunc) {
    switch (pCol->info.type) {
      case TSDB_DATA_TYPE_BOOL:
      case TSDB_DATA_TYPE_TINYINT: {
        const int8_t* pData = (const int8_t*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, *(int8_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
H
Haojun Liao 已提交
580 581
      }

H
Haojun Liao 已提交
582 583 584 585
      case TSDB_DATA_TYPE_SMALLINT: {
        const int16_t* pData = (const int16_t*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, *(int16_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
H
Haojun Liao 已提交
586
      }
H
Haojun Liao 已提交
587 588 589 590 591

      case TSDB_DATA_TYPE_INT: {
        const int32_t* pData = (const int32_t*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, *(int32_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
H
Haojun Liao 已提交
592 593
      }

H
Haojun Liao 已提交
594 595 596 597
      case TSDB_DATA_TYPE_BIGINT: {
        const int64_t* pData = (const int64_t*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, (pBuf->v), &pBuf->tuplePos)
        break;
H
Haojun Liao 已提交
598 599
      }

H
Haojun Liao 已提交
600 601 602 603 604
      case TSDB_DATA_TYPE_UTINYINT: {
        const uint8_t* pData = (const uint8_t*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, *(uint8_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }
H
Haojun Liao 已提交
605

H
Haojun Liao 已提交
606 607 608 609 610
      case TSDB_DATA_TYPE_USMALLINT: {
        const uint16_t* pData = (const uint16_t*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, *(uint16_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }
H
Haojun Liao 已提交
611

H
Haojun Liao 已提交
612 613 614 615 616
      case TSDB_DATA_TYPE_UINT: {
        const uint32_t* pData = (const uint32_t*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, *(uint32_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }
H
Haojun Liao 已提交
617

H
Haojun Liao 已提交
618 619 620 621 622
      case TSDB_DATA_TYPE_UBIGINT: {
        const uint64_t* pData = (const uint64_t*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, *(uint64_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }
H
Haojun Liao 已提交
623

H
Haojun Liao 已提交
624 625 626 627 628 629 630 631 632 633
      case TSDB_DATA_TYPE_FLOAT: {
        const float* pData = (const float*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, *(float*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }

      case TSDB_DATA_TYPE_DOUBLE: {
        const double* pData = (const double*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, *(double*)&(pBuf->v), &pBuf->tuplePos)
        break;
H
Haojun Liao 已提交
634 635
      }
    }
H
Haojun Liao 已提交
636 637 638 639 640 641 642 643
  } else {
    switch (pCol->info.type) {
      case TSDB_DATA_TYPE_BOOL:
      case TSDB_DATA_TYPE_TINYINT: {
        const int8_t* pData = (const int8_t*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, *(int8_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }
H
Haojun Liao 已提交
644

H
Haojun Liao 已提交
645 646 647 648 649
      case TSDB_DATA_TYPE_SMALLINT: {
        const int16_t* pData = (const int16_t*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, *(int16_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }
H
Haojun Liao 已提交
650

H
Haojun Liao 已提交
651
      case TSDB_DATA_TYPE_INT: {
652
        const int32_t* pData = (const int32_t*)pCol->pData;
H
Haojun Liao 已提交
653 654
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, *(int32_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
H
Haojun Liao 已提交
655
      }
656

H
Haojun Liao 已提交
657 658 659 660
      case TSDB_DATA_TYPE_BIGINT: {
        const int64_t* pData = (const int64_t*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, (pBuf->v), &pBuf->tuplePos)
        break;
H
Haojun Liao 已提交
661
      }
H
Haojun Liao 已提交
662 663 664 665 666

      case TSDB_DATA_TYPE_UTINYINT: {
        const uint8_t* pData = (const uint8_t*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, *(uint8_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
H
Haojun Liao 已提交
667 668
      }

H
Haojun Liao 已提交
669 670 671 672
      case TSDB_DATA_TYPE_USMALLINT: {
        const uint16_t* pData = (const uint16_t*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, *(uint16_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
H
Haojun Liao 已提交
673 674
      }

H
Haojun Liao 已提交
675 676 677 678 679
      case TSDB_DATA_TYPE_UINT: {
        const uint32_t* pData = (const uint32_t*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, *(uint32_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }
H
Haojun Liao 已提交
680

H
Haojun Liao 已提交
681 682 683 684 685
      case TSDB_DATA_TYPE_UBIGINT: {
        const uint64_t* pData = (const uint64_t*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, *(uint64_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }
H
Haojun Liao 已提交
686

H
Haojun Liao 已提交
687 688 689 690 691 692 693 694 695 696 697
      case TSDB_DATA_TYPE_FLOAT: {
        const float* pData = (const float*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, *(float*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }

      case TSDB_DATA_TYPE_DOUBLE: {
        const double* pData = (const double*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, *(double*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }
H
Haojun Liao 已提交
698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716
    }
  }
}

int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc) {
  int32_t numOfElems = 0;

  SInputColumnInfoData* pInput = &pCtx->input;
  SColumnDataAgg*       pAgg = pInput->pColumnDataAgg[0];

  SColumnInfoData* pCol = pInput->pData[0];
  int32_t          type = pCol->info.type;

  SResultRowEntryInfo* pResInfo = GET_RES_INFO(pCtx);
  SMinmaxResInfo*      pBuf = GET_ROWCELL_INTERBUF(pResInfo);
  pBuf->type = type;

  if (IS_NULL_TYPE(type)) {
    numOfElems = 0;
717
    goto _over;
H
Haojun Liao 已提交
718 719 720
  }

  // data in current data block are qualified to the query
H
Haojun Liao 已提交
721
  if (pInput->colDataSMAIsSet) {
H
Haojun Liao 已提交
722 723
    numOfElems = pInput->numOfRows - pAgg->numOfNull;
    ASSERT(pInput->numOfRows == pInput->totalRows && numOfElems >= 0);
H
Haojun Liao 已提交
724

H
Haojun Liao 已提交
725
    if (numOfElems == 0) {
H
Haojun Liao 已提交
726
      goto _over;
H
Haojun Liao 已提交
727 728 729 730 731 732 733 734 735 736 737 738
    }

    void*   tval = NULL;
    int16_t index = 0;

    if (isMinFunc) {
      tval = &pInput->pColumnDataAgg[0]->min;
    } else {
      tval = &pInput->pColumnDataAgg[0]->max;
    }

    if (!pBuf->assign) {
G
Ganlin Zhao 已提交
739 740 741 742 743
      if (type == TSDB_DATA_TYPE_FLOAT) {
        GET_FLOAT_VAL(&pBuf->v) = GET_DOUBLE_VAL(tval);
      } else {
        pBuf->v = *(int64_t*)tval;
      }
744

H
Haojun Liao 已提交
745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795
      if (pCtx->subsidiaries.num > 0) {
        index = findRowIndex(pInput->startRowIndex, pInput->numOfRows, pCol, tval);
        if (index >= 0) {
          pBuf->tuplePos = saveTupleData(pCtx, index, pCtx->pSrcBlock, NULL);
        }
      }
    } else {
      if (IS_SIGNED_NUMERIC_TYPE(type)) {
        int64_t prev = 0;
        GET_TYPED_DATA(prev, int64_t, type, &pBuf->v);

        int64_t val = GET_INT64_VAL(tval);
        if ((prev < val) ^ isMinFunc) {
          *(int64_t*)&pBuf->v = val;
          if (pCtx->subsidiaries.num > 0) {
            index = findRowIndex(pInput->startRowIndex, pInput->numOfRows, pCol, tval);
            if (index >= 0) {
              pBuf->tuplePos = saveTupleData(pCtx, index, pCtx->pSrcBlock, NULL);
            }
          }
        }
      } else if (IS_UNSIGNED_NUMERIC_TYPE(type)) {
        uint64_t prev = 0;
        GET_TYPED_DATA(prev, uint64_t, type, &pBuf->v);

        uint64_t val = GET_UINT64_VAL(tval);
        if ((prev < val) ^ isMinFunc) {
          *(uint64_t*)&pBuf->v = val;
          if (pCtx->subsidiaries.num > 0) {
            index = findRowIndex(pInput->startRowIndex, pInput->numOfRows, pCol, tval);
            if (index >= 0) {
              pBuf->tuplePos = saveTupleData(pCtx, index, pCtx->pSrcBlock, NULL);
            }
          }
        }
      } else if (type == TSDB_DATA_TYPE_DOUBLE) {
        double prev = 0;
        GET_TYPED_DATA(prev, double, type, &pBuf->v);

        double val = GET_DOUBLE_VAL(tval);
        if ((prev < val) ^ isMinFunc) {
          *(double*)&pBuf->v = val;
          if (pCtx->subsidiaries.num > 0) {
            index = findRowIndex(pInput->startRowIndex, pInput->numOfRows, pCol, tval);
            if (index >= 0) {
              pBuf->tuplePos = saveTupleData(pCtx, index, pCtx->pSrcBlock, NULL);
            }
          }
        }
      } else if (type == TSDB_DATA_TYPE_FLOAT) {
        float prev = 0;
G
Ganlin Zhao 已提交
796
        GET_TYPED_DATA(prev, float, type, &pBuf->v);
H
Haojun Liao 已提交
797 798 799

        float val = GET_DOUBLE_VAL(tval);
        if ((prev < val) ^ isMinFunc) {
G
Ganlin Zhao 已提交
800
          *(float*)&pBuf->v = val;
H
Haojun Liao 已提交
801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817
        }

        if (pCtx->subsidiaries.num > 0) {
          index = findRowIndex(pInput->startRowIndex, pInput->numOfRows, pCol, tval);
          if (index >= 0) {
            pBuf->tuplePos = saveTupleData(pCtx, index, pCtx->pSrcBlock, NULL);
          }
        }
      }
    }

    pBuf->assign = true;
    return numOfElems;
  }

  int32_t start = pInput->startRowIndex;
  int32_t numOfRows = pInput->numOfRows;
H
Haojun Liao 已提交
818
  int32_t end = start + numOfRows;
H
Haojun Liao 已提交
819

H
Haojun Liao 已提交
820 821
  if (pCol->hasNull || numOfRows < 32 || pCtx->subsidiaries.num > 0) {
    int32_t i = findFirstValPosition(pCol, start, numOfRows);
H
Haojun Liao 已提交
822

H
Haojun Liao 已提交
823 824
    if ((i < end) && (!pBuf->assign)) {
      memcpy(&pBuf->v, pCol->pData + (pCol->info.bytes * i), pCol->info.bytes);
H
Haojun Liao 已提交
825

H
Haojun Liao 已提交
826 827
      if (pCtx->subsidiaries.num > 0) {
        pBuf->tuplePos = saveTupleData(pCtx, i, pCtx->pSrcBlock, NULL);
H
Haojun Liao 已提交
828
      }
H
Haojun Liao 已提交
829 830 831
      pBuf->assign = true;
      numOfElems = 1;
    }
H
Haojun Liao 已提交
832

H
Haojun Liao 已提交
833 834
    if (i >= end) {
      ASSERT(numOfElems == 0);
H
Haojun Liao 已提交
835
      goto _over;
H
Haojun Liao 已提交
836
    }
H
Haojun Liao 已提交
837

H
Haojun Liao 已提交
838 839 840
    doExtractVal(pCol, i, end, pCtx, pBuf, isMinFunc);
  } else {
    numOfElems = numOfRows;
H
Haojun Liao 已提交
841

H
Haojun Liao 已提交
842
    switch (pCol->info.type) {
H
Haojun Liao 已提交
843 844 845 846
      case TSDB_DATA_TYPE_BOOL:
      case TSDB_DATA_TYPE_TINYINT: {
        handleInt8Col(pCol->pData, start, numOfRows, pBuf, isMinFunc, true);
        break;
H
Haojun Liao 已提交
847
      }
H
Haojun Liao 已提交
848 849 850
      case TSDB_DATA_TYPE_SMALLINT: {
        handleInt16Col(pCol->pData, start, numOfRows, pBuf, isMinFunc, true);
        break;
H
Haojun Liao 已提交
851
      }
H
Haojun Liao 已提交
852 853 854
      case TSDB_DATA_TYPE_INT: {
        handleInt32Col(pCol->pData, start, numOfRows, pBuf, isMinFunc, true);
        break;
H
Haojun Liao 已提交
855
      }
H
Haojun Liao 已提交
856 857 858
      case TSDB_DATA_TYPE_BIGINT: {
        handleInt64Col(pCol->pData, start, numOfRows, pBuf, isMinFunc, true);
        break;
H
Haojun Liao 已提交
859
      }
H
Haojun Liao 已提交
860 861 862 863 864 865 866
      case TSDB_DATA_TYPE_UTINYINT: {
        handleInt8Col(pCol->pData, start, numOfRows, pBuf, isMinFunc, false);
        break;
      }
      case TSDB_DATA_TYPE_USMALLINT: {
        handleInt16Col(pCol->pData, start, numOfRows, pBuf, isMinFunc, false);
        break;
H
Haojun Liao 已提交
867
      }
H
Haojun Liao 已提交
868
      case TSDB_DATA_TYPE_UINT: {
H
Haojun Liao 已提交
869
        handleInt32Col(pCol->pData, start, numOfRows, pBuf, isMinFunc, false);
H
Haojun Liao 已提交
870 871 872
        break;
      }
      case TSDB_DATA_TYPE_UBIGINT: {
H
Haojun Liao 已提交
873
        handleInt64Col(pCol->pData, start, numOfRows, pBuf, isMinFunc, false);
H
Haojun Liao 已提交
874 875 876 877 878 879 880 881 882 883 884
        break;
      }
      case TSDB_DATA_TYPE_FLOAT: {
        handleFloatCol(pCol, start, numOfRows, pBuf, isMinFunc);
        break;
      }
      case TSDB_DATA_TYPE_DOUBLE: {
        handleDoubleCol(pCol, start, numOfRows, pBuf, isMinFunc);
        break;
      }
    }
885 886

    pBuf->assign = true;
H
Haojun Liao 已提交
887
  }
H
Haojun Liao 已提交
888

H
Haojun Liao 已提交
889 890 891 892
_over:
  if (numOfElems == 0 && pCtx->subsidiaries.num > 0 && !pBuf->nullTupleSaved) {
    pBuf->nullTuplePos = saveTupleData(pCtx, pInput->startRowIndex, pCtx->pSrcBlock, NULL);
    pBuf->nullTupleSaved = true;
H
Haojun Liao 已提交
893 894 895
  }

  return numOfElems;
896
}