tminmax.c 29.1 KB
Newer Older
H
Haojun Liao 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

#include "builtinsimpl.h"
#include "function.h"
#include "tdatablock.h"
#include "tfunctionInt.h"
#include "tglobal.h"

H
Haojun Liao 已提交
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
#define __COMPARE_ACQUIRED_MAX(i, end, bm, _data, ctx, val, pos) \
  for (; i < (end); ++i) {                                       \
    if (colDataIsNull_f(bm, i)) {                                \
      continue;                                                  \
    }                                                            \
                                                                 \
    if ((val) < (_data)[i]) {                                    \
      (val) = (_data)[i];                                        \
      if ((ctx)->subsidiaries.num > 0) {                         \
        updateTupleData((ctx), i, (ctx)->pSrcBlock, pos);        \
      }                                                          \
    }                                                            \
  }

#define __COMPARE_ACQUIRED_MIN(i, end, bm, _data, ctx, val, pos) \
  for (; i < (end); ++i) {                                       \
    if (colDataIsNull_f(bm, i)) {                                \
      continue;                                                  \
    }                                                            \
                                                                 \
    if ((val) > (_data)[i]) {                                    \
      (val) = (_data)[i];                                        \
      if ((ctx)->subsidiaries.num > 0) {                         \
        updateTupleData((ctx), i, (ctx)->pSrcBlock, pos);        \
      }                                                          \
    }                                                            \
  }

#define __COMPARE_EXTRACT_MIN(start, end, val, _data) \
  for (int32_t i = (start); i < (end); ++i) {         \
    if ((val) > (_data)[i]) {                         \
      (val) = (_data)[i];                             \
    }                                                 \
  }

#define __COMPARE_EXTRACT_MAX(start, end, val, _data) \
  for (int32_t i = (start); i < (end); ++i) {         \
    if ((val) < (_data)[i]) {                         \
      (val) = (_data)[i];                             \
    }                                                 \
  }

64 65
static void calculateRounds(int32_t numOfRows, int32_t bytes, int32_t* remainder, int32_t* rounds, int32_t* width) {
  const int32_t bitWidth = 256;
H
Haojun Liao 已提交
66

L
Liu Jicong 已提交
67
  *width = (bitWidth >> 3u) / bytes;
68 69 70 71
  *remainder = numOfRows % (*width);
  *rounds = numOfRows / (*width);
}

H
Haojun Liao 已提交
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94
#define EXTRACT_MAX_VAL(_first, _sec, _width, _remain, _v) \
  (_v) = TMAX((_first)[0], (_first)[1]);                   \
  for (int32_t k = 1; k < (_width); ++k) {                 \
    (_v) = TMAX((_v), (_first)[k]);                        \
  }                                                        \
                                                           \
  for (int32_t j = 0; j < (_remain); ++j) {                \
    if ((_v) < (_sec)[j]) {                                \
      (_v) = (_sec)[j];                                    \
    }                                                      \
  }

#define EXTRACT_MIN_VAL(_first, _sec, _width, _remain, _v) \
  (_v) = TMIN((_first)[0], (_first)[1]);                   \
  for (int32_t k = 1; k < (_width); ++k) {                 \
    (_v) = TMIN((_v), (_first)[k]);                        \
  }                                                        \
                                                           \
  for (int32_t j = 0; j < (_remain); ++j) {                \
    if ((_v) > (_sec)[j]) {                                \
      (_v) = (_sec)[j];                                    \
    }                                                      \
  }
L
Liu Jicong 已提交
95

H
Haojun Liao 已提交
96
static int8_t i8VectorCmpAVX2(const void* pData, int32_t numOfRows, bool isMinFunc, bool signVal) {
97 98 99 100 101
  int8_t        v = 0;
  const int8_t* p = pData;

  int32_t width, remain, rounds;
  calculateRounds(numOfRows, sizeof(int8_t), &remain, &rounds, &width);
H
Haojun Liao 已提交
102

H
Haojun Liao 已提交
103
#if __AVX2__
H
Haojun Liao 已提交
104
  __m256i next;
H
Haojun Liao 已提交
105
  __m256i initVal = _mm256_lddqu_si256((__m256i*)p);
H
Haojun Liao 已提交
106
  p += width;
H
Haojun Liao 已提交
107 108

  if (!isMinFunc) {  // max function
H
Haojun Liao 已提交
109 110 111 112 113
    if (signVal) {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_max_epi8(initVal, next);
        p += width;
H
Haojun Liao 已提交
114 115
      }

H
Haojun Liao 已提交
116 117
      const int8_t* q = (const int8_t*)&initVal;
      EXTRACT_MAX_VAL(q, p, width, remain, v)
L
Liu Jicong 已提交
118
    } else {  // unsigned value
H
Haojun Liao 已提交
119 120 121 122 123
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_max_epu8(initVal, next);
        p += width;
      }
124

H
Haojun Liao 已提交
125 126
      const uint8_t* q = (const uint8_t*)&initVal;
      EXTRACT_MAX_VAL(q, p, width, remain, v)
H
Haojun Liao 已提交
127
    }
L
Liu Jicong 已提交
128

H
Haojun Liao 已提交
129 130 131 132 133 134 135
  } else {  // min function
    if (signVal) {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_min_epi8(initVal, next);
        p += width;
      }
H
Haojun Liao 已提交
136

H
Haojun Liao 已提交
137 138 139 140 141 142 143 144
      // let sum up the final results
      const int8_t* q = (const int8_t*)&initVal;
      EXTRACT_MIN_VAL(q, p, width, remain, v)
    } else {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_min_epu8(initVal, next);
        p += width;
H
Haojun Liao 已提交
145
      }
H
Haojun Liao 已提交
146 147 148 149

      // let sum up the final results
      const uint8_t* q = (const uint8_t*)&initVal;
      EXTRACT_MIN_VAL(q, p, width, remain, v)
H
Haojun Liao 已提交
150 151 152 153 154 155 156
    }
  }
#endif

  return v;
}

H
Haojun Liao 已提交
157
static int16_t i16VectorCmpAVX2(const int16_t* pData, int32_t numOfRows, bool isMinFunc, bool signVal) {
158 159
  int16_t        v = 0;
  const int16_t* p = pData;
H
Haojun Liao 已提交
160

161 162
  int32_t width, remain, rounds;
  calculateRounds(numOfRows, sizeof(int16_t), &remain, &rounds, &width);
H
Haojun Liao 已提交
163

164 165
#if __AVX2__
  __m256i next;
H
Haojun Liao 已提交
166
  __m256i initVal = _mm256_lddqu_si256((__m256i*)p);
H
Haojun Liao 已提交
167
  p += width;
H
Haojun Liao 已提交
168 169

  if (!isMinFunc) {  // max function
H
Haojun Liao 已提交
170 171 172 173 174 175
    if (signVal) {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_max_epi16(initVal, next);
        p += width;
      }
H
Haojun Liao 已提交
176

H
Haojun Liao 已提交
177 178 179 180 181 182 183 184 185
      // let sum up the final results
      const int16_t* q = (const int16_t*)&initVal;
      EXTRACT_MAX_VAL(q, p, width, remain, v)
    } else {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_max_epu16(initVal, next);
        p += width;
      }
186

H
Haojun Liao 已提交
187 188 189
      // let sum up the final results
      const uint16_t* q = (const uint16_t*)&initVal;
      EXTRACT_MAX_VAL(q, p, width, remain, v)
H
Haojun Liao 已提交
190
    }
H
Haojun Liao 已提交
191 192

  } else {  // min function
H
Haojun Liao 已提交
193 194 195 196 197 198
    if (signVal) {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_min_epi16(initVal, next);
        p += width;
      }
H
Haojun Liao 已提交
199

H
Haojun Liao 已提交
200 201 202 203 204 205 206 207
      // let sum up the final results
      const int16_t* q = (const int16_t*)&initVal;
      EXTRACT_MIN_VAL(q, p, width, remain, v)
    } else {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_min_epi16(initVal, next);
        p += width;
H
Haojun Liao 已提交
208
      }
H
Haojun Liao 已提交
209 210 211 212

      // let sum up the final results
      const uint16_t* q = (const uint16_t*)&initVal;
      EXTRACT_MIN_VAL(q, p, width, remain, v)
H
Haojun Liao 已提交
213 214 215 216 217 218 219
    }
  }
#endif

  return v;
}

H
Haojun Liao 已提交
220
static int32_t i32VectorCmpAVX2(const int32_t* pData, int32_t numOfRows, bool isMinFunc, bool signVal) {
221 222
  int32_t        v = 0;
  const int32_t* p = pData;
H
Haojun Liao 已提交
223

224 225
  int32_t width, remain, rounds;
  calculateRounds(numOfRows, sizeof(int32_t), &remain, &rounds, &width);
H
Haojun Liao 已提交
226 227 228

#if __AVX2__
  __m256i next;
H
Haojun Liao 已提交
229
  __m256i initVal = _mm256_lddqu_si256((__m256i*)p);
H
Haojun Liao 已提交
230 231 232
  p += width;

  if (!isMinFunc) {  // max function
H
Haojun Liao 已提交
233 234 235 236 237 238
    if (signVal) {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_max_epi32(initVal, next);
        p += width;
      }
H
Haojun Liao 已提交
239

H
Haojun Liao 已提交
240 241 242
      // let compare  the final results
      const int32_t* q = (const int32_t*)&initVal;
      EXTRACT_MAX_VAL(q, p, width, remain, v)
L
Liu Jicong 已提交
243
    } else {  // unsigned value
H
Haojun Liao 已提交
244 245 246 247
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_max_epi32(initVal, next);
        p += width;
H
Haojun Liao 已提交
248 249
      }

H
Haojun Liao 已提交
250 251 252
      // let compare  the final results
      const uint32_t* q = (const uint32_t*)&initVal;
      EXTRACT_MAX_VAL(q, p, width, remain, v)
H
Haojun Liao 已提交
253
    }
H
Haojun Liao 已提交
254 255 256 257 258 259 260
  } else {  // min function
    if (signVal) {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_min_epi32(initVal, next);
        p += width;
      }
H
Haojun Liao 已提交
261

H
Haojun Liao 已提交
262 263 264 265 266 267 268 269
      // let sum up the final results
      const int32_t* q = (const int32_t*)&initVal;
      EXTRACT_MIN_VAL(q, p, width, remain, v)
    } else {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_min_epu32(initVal, next);
        p += width;
H
Haojun Liao 已提交
270
      }
H
Haojun Liao 已提交
271 272 273 274

      // let sum up the final results
      const uint32_t* q = (const uint32_t*)&initVal;
      EXTRACT_MIN_VAL(q, p, width, remain, v)
H
Haojun Liao 已提交
275 276 277 278 279 280 281
    }
  }
#endif

  return v;
}

282
static float floatVectorCmpAVX(const float* pData, int32_t numOfRows, bool isMinFunc) {
L
Liu Jicong 已提交
283
  float        v = 0;
284
  const float* p = pData;
H
Haojun Liao 已提交
285

286 287
  int32_t width, remain, rounds;
  calculateRounds(numOfRows, sizeof(float), &remain, &rounds, &width);
H
Haojun Liao 已提交
288

289 290 291
#if __AVX__

  __m256 next;
H
Haojun Liao 已提交
292
  __m256 initVal = _mm256_loadu_ps(p);
H
Haojun Liao 已提交
293 294 295
  p += width;

  if (!isMinFunc) {  // max function
296 297
    for (int32_t i = 1; i < rounds; ++i) {
      next = _mm256_loadu_ps(p);
H
Haojun Liao 已提交
298
      initVal = _mm256_max_ps(initVal, next);
H
Haojun Liao 已提交
299 300 301
      p += width;
    }

H
Haojun Liao 已提交
302 303
    const float* q = (const float*)&initVal;
    EXTRACT_MAX_VAL(q, p, width, remain, v)
H
Haojun Liao 已提交
304
  } else {  // min function
305 306
    for (int32_t i = 1; i < rounds; ++i) {
      next = _mm256_loadu_ps(p);
H
Haojun Liao 已提交
307
      initVal = _mm256_min_ps(initVal, next);
H
Haojun Liao 已提交
308 309 310
      p += width;
    }

H
Haojun Liao 已提交
311 312
    const float* q = (const float*)&initVal;
    EXTRACT_MIN_VAL(q, p, width, remain, v)
313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328
  }
#endif

  return v;
}

static double doubleVectorCmpAVX(const double* pData, int32_t numOfRows, bool isMinFunc) {
  double        v = 0;
  const double* p = pData;

  int32_t width, remain, rounds;
  calculateRounds(numOfRows, sizeof(double), &remain, &rounds, &width);

#if __AVX__

  __m256d next;
H
Haojun Liao 已提交
329
  __m256d initVal = _mm256_loadu_pd(p);
330 331 332 333 334
  p += width;

  if (!isMinFunc) {  // max function
    for (int32_t i = 1; i < rounds; ++i) {
      next = _mm256_loadu_pd(p);
H
Haojun Liao 已提交
335
      initVal = _mm256_max_pd(initVal, next);
336 337 338 339
      p += width;
    }

    // let sum up the final results
H
Haojun Liao 已提交
340 341
    const double* q = (const double*)&initVal;
    EXTRACT_MAX_VAL(q, p, width, remain, v)
342 343 344
  } else {  // min function
    for (int32_t i = 1; i < rounds; ++i) {
      next = _mm256_loadu_pd(p);
H
Haojun Liao 已提交
345
      initVal = _mm256_min_pd(initVal, next);
346 347 348 349
      p += width;
    }

    // let sum up the final results
H
Haojun Liao 已提交
350 351
    const double* q = (const double*)&initVal;
    EXTRACT_MIN_VAL(q, p, width, remain, v)
H
Haojun Liao 已提交
352 353 354 355 356 357
  }
#endif

  return v;
}

H
Haojun Liao 已提交
358
static int32_t findFirstValPosition(const SColumnInfoData* pCol, int32_t start, int32_t numOfRows) {
359
  int32_t i = start;
L
Liu Jicong 已提交
360

H
Haojun Liao 已提交
361
  while (i < (start + numOfRows) && (colDataIsNull_f(pCol->nullbitmap, i) == true)) {
362 363 364 365 366 367
    i += 1;
  }

  return i;
}

H
Haojun Liao 已提交
368 369 370
static void handleInt8Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc,
                          bool signVal) {
  // AVX2 version to speedup the loop
H
Haojun Liao 已提交
371
  if (tsAVX2Enable && tsSIMDBuiltins) {
H
Haojun Liao 已提交
372 373 374 375
    pBuf->v = i8VectorCmpAVX2(data, numOfRows, isMinFunc, signVal);
  } else {
    if (!pBuf->assign) {
      pBuf->v = ((int8_t*)data)[0];
376 377
    }

H
Haojun Liao 已提交
378 379 380
    if (signVal) {
      const int8_t* p = (const int8_t*)data;
      int8_t*       v = (int8_t*)&pBuf->v;
381

H
Haojun Liao 已提交
382 383 384 385
      if (isMinFunc) {
        __COMPARE_EXTRACT_MIN(start, start + numOfRows, *v, p);
      } else {
        __COMPARE_EXTRACT_MAX(start, start + numOfRows, *v, p);
386 387
      }
    } else {
H
Haojun Liao 已提交
388 389
      const uint8_t* p = (const uint8_t*)data;
      uint8_t*       v = (uint8_t*)&pBuf->v;
390

H
Haojun Liao 已提交
391 392 393 394
      if (isMinFunc) {
        __COMPARE_EXTRACT_MIN(start, start + numOfRows, *v, p);
      } else {
        __COMPARE_EXTRACT_MAX(start, start + numOfRows, *v, p);
395 396 397 398
      }
    }
  }

H
Haojun Liao 已提交
399
  pBuf->assign = true;
400 401
}

H
Haojun Liao 已提交
402 403 404
static void handleInt16Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc,
                           bool signVal) {
  // AVX2 version to speedup the loop
H
Haojun Liao 已提交
405
  if (tsAVX2Enable && tsSIMDBuiltins) {
H
Haojun Liao 已提交
406 407 408 409
    pBuf->v = i16VectorCmpAVX2(data, numOfRows, isMinFunc, signVal);
  } else {
    if (!pBuf->assign) {
      pBuf->v = ((int16_t*)data)[0];
410 411
    }

H
Haojun Liao 已提交
412 413 414
    if (signVal) {
      const int16_t* p = (const int16_t*)data;
      int16_t*       v = (int16_t*)&pBuf->v;
415

H
Haojun Liao 已提交
416 417 418 419
      if (isMinFunc) {
        __COMPARE_EXTRACT_MIN(start, start + numOfRows, *v, p);
      } else {
        __COMPARE_EXTRACT_MAX(start, start + numOfRows, *v, p);
420 421
      }
    } else {
H
Haojun Liao 已提交
422 423
      const uint16_t* p = (const uint16_t*)data;
      uint16_t*       v = (uint16_t*)&pBuf->v;
424

H
Haojun Liao 已提交
425 426 427 428
      if (isMinFunc) {
        __COMPARE_EXTRACT_MIN(start, start + numOfRows, *v, p);
      } else {
        __COMPARE_EXTRACT_MAX(start, start + numOfRows, *v, p);
429 430 431 432
      }
    }
  }

H
Haojun Liao 已提交
433
  pBuf->assign = true;
434
}
H
Haojun Liao 已提交
435

H
Haojun Liao 已提交
436 437 438
static void handleInt32Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc,
                           bool signVal) {
  // AVX2 version to speedup the loop
H
Haojun Liao 已提交
439
  if (tsAVX2Enable && tsSIMDBuiltins) {
H
Haojun Liao 已提交
440 441 442 443 444
    pBuf->v = i32VectorCmpAVX2(data, numOfRows, isMinFunc, signVal);
  } else {
    if (!pBuf->assign) {
      pBuf->v = ((int32_t*)data)[0];
    }
H
Haojun Liao 已提交
445

H
Haojun Liao 已提交
446 447 448
    if (signVal) {
      const int32_t* p = (const int32_t*)data;
      int32_t*       v = (int32_t*)&pBuf->v;
H
Haojun Liao 已提交
449

H
Haojun Liao 已提交
450 451 452 453 454 455 456 457 458 459 460 461 462
      if (isMinFunc) {
        __COMPARE_EXTRACT_MIN(start, start + numOfRows, *v, p);
      } else {
        __COMPARE_EXTRACT_MAX(start, start + numOfRows, *v, p);
      }
    } else {
      const uint32_t* p = (const uint32_t*)data;
      uint32_t*       v = (uint32_t*)&pBuf->v;

      if (isMinFunc) {
        __COMPARE_EXTRACT_MIN(start, start + numOfRows, *v, p);
      } else {
        __COMPARE_EXTRACT_MAX(start, start + numOfRows, *v, p);
H
Haojun Liao 已提交
463 464
      }
    }
H
Haojun Liao 已提交
465
  }
H
Haojun Liao 已提交
466

H
Haojun Liao 已提交
467 468
  pBuf->assign = true;
}
H
Haojun Liao 已提交
469

H
Haojun Liao 已提交
470 471 472 473 474
static void handleInt64Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc,
                           bool signVal) {
  if (!pBuf->assign) {
    pBuf->v = ((int64_t*)data)[0];
  }
H
Haojun Liao 已提交
475

H
Haojun Liao 已提交
476 477 478
  if (signVal) {
    const int64_t* p = (const int64_t*)data;
    int64_t*       v = &pBuf->v;
H
Haojun Liao 已提交
479

H
Haojun Liao 已提交
480 481 482 483
    if (isMinFunc) {
      __COMPARE_EXTRACT_MIN(start, start + numOfRows, *v, p);
    } else {
      __COMPARE_EXTRACT_MAX(start, start + numOfRows, *v, p);
H
Haojun Liao 已提交
484
    }
H
Haojun Liao 已提交
485 486 487 488 489 490
  } else {
    const uint64_t* p = (const uint64_t*)data;
    uint64_t*       v = (uint64_t*)&pBuf->v;

    if (isMinFunc) {
      __COMPARE_EXTRACT_MIN(start, start + numOfRows, *v, p);
H
Haojun Liao 已提交
491
    } else {
H
Haojun Liao 已提交
492 493 494 495
      __COMPARE_EXTRACT_MAX(start, start + numOfRows, *v, p);
    }
  }
}
H
Haojun Liao 已提交
496

L
Liu Jicong 已提交
497 498
static void handleFloatCol(SColumnInfoData* pCol, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf,
                           bool isMinFunc) {
H
Haojun Liao 已提交
499 500
  float* pData = (float*)pCol->pData;
  float* val = (float*)&pBuf->v;
501

H
Haojun Liao 已提交
502
  // AVX version to speedup the loop
H
Haojun Liao 已提交
503
  if (tsAVXEnable && tsSIMDBuiltins) {
H
Haojun Liao 已提交
504 505 506 507
    *val = floatVectorCmpAVX(pData, numOfRows, isMinFunc);
  } else {
    if (!pBuf->assign) {
      *val = pData[0];
508 509 510
    }

    if (isMinFunc) {  // min
H
Haojun Liao 已提交
511
      for (int32_t i = start; i < start + numOfRows; ++i) {
512 513 514 515
        if (*val > pData[i]) {
          *val = pData[i];
        }
      }
H
Haojun Liao 已提交
516 517
    } else {  // max
      for (int32_t i = start; i < start + numOfRows; ++i) {
518 519 520 521 522
        if (*val < pData[i]) {
          *val = pData[i];
        }
      }
    }
H
Haojun Liao 已提交
523 524 525 526 527
  }

  pBuf->assign = true;
}

L
Liu Jicong 已提交
528 529
static void handleDoubleCol(SColumnInfoData* pCol, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf,
                            bool isMinFunc) {
H
Haojun Liao 已提交
530 531 532 533
  double* pData = (double*)pCol->pData;
  double* val = (double*)&pBuf->v;

  // AVX version to speedup the loop
H
Haojun Liao 已提交
534
  if (tsAVXEnable && tsSIMDBuiltins) {
H
Haojun Liao 已提交
535 536
    *val = (double)doubleVectorCmpAVX(pData, numOfRows, isMinFunc);
  } else {
537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555
    if (!pBuf->assign) {
      *val = pData[0];
    }

    if (isMinFunc) {  // min
      for (int32_t i = start; i < start + numOfRows; ++i) {
        if (*val > pData[i]) {
          *val = pData[i];
        }
      }
    } else {  // max
      for (int32_t i = start; i < start + numOfRows; ++i) {
        if (*val < pData[i]) {
          *val = pData[i];
        }
      }
    }
  }

H
Haojun Liao 已提交
556 557
  pBuf->assign = true;
}
H
Haojun Liao 已提交
558

H
Haojun Liao 已提交
559 560 561 562 563 564
static int32_t findRowIndex(int32_t start, int32_t num, SColumnInfoData* pCol, const char* tval) {
  // the data is loaded, not only the block SMA value
  for (int32_t i = start; i < num + start; ++i) {
    char* p = colDataGetData(pCol, i);
    if (memcmp((void*)tval, p, pCol->info.bytes) == 0) {
      return i;
H
Haojun Liao 已提交
565
    }
H
Haojun Liao 已提交
566
  }
H
Haojun Liao 已提交
567

H
Haojun Liao 已提交
568 569 570
  // if reach here means real data of block SMA is not set in pCtx->input.
  return -1;
}
H
Haojun Liao 已提交
571

H
Haojun Liao 已提交
572 573 574 575 576 577 578 579 580
static void doExtractVal(SColumnInfoData* pCol, int32_t i, int32_t end, SqlFunctionCtx* pCtx, SMinmaxResInfo* pBuf,
                         bool isMinFunc) {
  if (isMinFunc) {
    switch (pCol->info.type) {
      case TSDB_DATA_TYPE_BOOL:
      case TSDB_DATA_TYPE_TINYINT: {
        const int8_t* pData = (const int8_t*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, *(int8_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
H
Haojun Liao 已提交
581 582
      }

H
Haojun Liao 已提交
583 584 585 586
      case TSDB_DATA_TYPE_SMALLINT: {
        const int16_t* pData = (const int16_t*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, *(int16_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
H
Haojun Liao 已提交
587
      }
H
Haojun Liao 已提交
588 589 590 591 592

      case TSDB_DATA_TYPE_INT: {
        const int32_t* pData = (const int32_t*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, *(int32_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
H
Haojun Liao 已提交
593 594
      }

H
Haojun Liao 已提交
595 596 597 598
      case TSDB_DATA_TYPE_BIGINT: {
        const int64_t* pData = (const int64_t*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, (pBuf->v), &pBuf->tuplePos)
        break;
H
Haojun Liao 已提交
599 600
      }

H
Haojun Liao 已提交
601 602 603 604 605
      case TSDB_DATA_TYPE_UTINYINT: {
        const uint8_t* pData = (const uint8_t*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, *(uint8_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }
H
Haojun Liao 已提交
606

H
Haojun Liao 已提交
607 608 609 610 611
      case TSDB_DATA_TYPE_USMALLINT: {
        const uint16_t* pData = (const uint16_t*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, *(uint16_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }
H
Haojun Liao 已提交
612

H
Haojun Liao 已提交
613 614 615 616 617
      case TSDB_DATA_TYPE_UINT: {
        const uint32_t* pData = (const uint32_t*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, *(uint32_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }
H
Haojun Liao 已提交
618

H
Haojun Liao 已提交
619 620 621 622 623
      case TSDB_DATA_TYPE_UBIGINT: {
        const uint64_t* pData = (const uint64_t*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, *(uint64_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }
H
Haojun Liao 已提交
624

H
Haojun Liao 已提交
625 626 627 628 629 630 631 632 633 634
      case TSDB_DATA_TYPE_FLOAT: {
        const float* pData = (const float*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, *(float*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }

      case TSDB_DATA_TYPE_DOUBLE: {
        const double* pData = (const double*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, *(double*)&(pBuf->v), &pBuf->tuplePos)
        break;
H
Haojun Liao 已提交
635 636
      }
    }
H
Haojun Liao 已提交
637 638 639 640 641 642 643 644
  } else {
    switch (pCol->info.type) {
      case TSDB_DATA_TYPE_BOOL:
      case TSDB_DATA_TYPE_TINYINT: {
        const int8_t* pData = (const int8_t*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, *(int8_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }
H
Haojun Liao 已提交
645

H
Haojun Liao 已提交
646 647 648 649 650
      case TSDB_DATA_TYPE_SMALLINT: {
        const int16_t* pData = (const int16_t*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, *(int16_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }
H
Haojun Liao 已提交
651

H
Haojun Liao 已提交
652
      case TSDB_DATA_TYPE_INT: {
653
        const int32_t* pData = (const int32_t*)pCol->pData;
H
Haojun Liao 已提交
654 655
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, *(int32_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
H
Haojun Liao 已提交
656
      }
657

H
Haojun Liao 已提交
658 659 660 661
      case TSDB_DATA_TYPE_BIGINT: {
        const int64_t* pData = (const int64_t*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, (pBuf->v), &pBuf->tuplePos)
        break;
H
Haojun Liao 已提交
662
      }
H
Haojun Liao 已提交
663 664 665 666 667

      case TSDB_DATA_TYPE_UTINYINT: {
        const uint8_t* pData = (const uint8_t*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, *(uint8_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
H
Haojun Liao 已提交
668 669
      }

H
Haojun Liao 已提交
670 671 672 673
      case TSDB_DATA_TYPE_USMALLINT: {
        const uint16_t* pData = (const uint16_t*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, *(uint16_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
H
Haojun Liao 已提交
674 675
      }

H
Haojun Liao 已提交
676 677 678 679 680
      case TSDB_DATA_TYPE_UINT: {
        const uint32_t* pData = (const uint32_t*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, *(uint32_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }
H
Haojun Liao 已提交
681

H
Haojun Liao 已提交
682 683 684 685 686
      case TSDB_DATA_TYPE_UBIGINT: {
        const uint64_t* pData = (const uint64_t*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, *(uint64_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }
H
Haojun Liao 已提交
687

H
Haojun Liao 已提交
688 689 690 691 692 693 694 695 696 697 698
      case TSDB_DATA_TYPE_FLOAT: {
        const float* pData = (const float*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, *(float*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }

      case TSDB_DATA_TYPE_DOUBLE: {
        const double* pData = (const double*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, *(double*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }
H
Haojun Liao 已提交
699 700 701 702
    }
  }
}

G
Ganlin Zhao 已提交
703
int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc, int32_t* nElems) {
H
Haojun Liao 已提交
704 705 706 707 708 709 710 711 712 713 714 715 716 717
  int32_t numOfElems = 0;

  SInputColumnInfoData* pInput = &pCtx->input;
  SColumnDataAgg*       pAgg = pInput->pColumnDataAgg[0];

  SColumnInfoData* pCol = pInput->pData[0];
  int32_t          type = pCol->info.type;

  SResultRowEntryInfo* pResInfo = GET_RES_INFO(pCtx);
  SMinmaxResInfo*      pBuf = GET_ROWCELL_INTERBUF(pResInfo);
  pBuf->type = type;

  if (IS_NULL_TYPE(type)) {
    numOfElems = 0;
718
    goto _over;
H
Haojun Liao 已提交
719 720 721
  }

  // data in current data block are qualified to the query
H
Haojun Liao 已提交
722
  if (pInput->colDataSMAIsSet) {
H
Haojun Liao 已提交
723
    numOfElems = pInput->numOfRows - pAgg->numOfNull;
H
Haojun Liao 已提交
724

H
Haojun Liao 已提交
725
    if (numOfElems == 0) {
H
Haojun Liao 已提交
726
      goto _over;
H
Haojun Liao 已提交
727 728 729 730 731 732 733 734 735 736 737 738
    }

    void*   tval = NULL;
    int16_t index = 0;

    if (isMinFunc) {
      tval = &pInput->pColumnDataAgg[0]->min;
    } else {
      tval = &pInput->pColumnDataAgg[0]->max;
    }

    if (!pBuf->assign) {
G
Ganlin Zhao 已提交
739 740 741
      if (type == TSDB_DATA_TYPE_FLOAT) {
        GET_FLOAT_VAL(&pBuf->v) = GET_DOUBLE_VAL(tval);
      } else {
G
Ganlin Zhao 已提交
742
        pBuf->v = GET_INT64_VAL(tval);
G
Ganlin Zhao 已提交
743
      }
744

H
Haojun Liao 已提交
745 746 747
      if (pCtx->subsidiaries.num > 0) {
        index = findRowIndex(pInput->startRowIndex, pInput->numOfRows, pCol, tval);
        if (index >= 0) {
G
Ganlin Zhao 已提交
748 749 750 751
          int32_t code = saveTupleData(pCtx, index, pCtx->pSrcBlock, &pBuf->tuplePos);
          if (code != TSDB_CODE_SUCCESS) {
            return code;
          }
H
Haojun Liao 已提交
752 753 754 755 756 757 758 759 760
        }
      }
    } else {
      if (IS_SIGNED_NUMERIC_TYPE(type)) {
        int64_t prev = 0;
        GET_TYPED_DATA(prev, int64_t, type, &pBuf->v);

        int64_t val = GET_INT64_VAL(tval);
        if ((prev < val) ^ isMinFunc) {
G
Ganlin Zhao 已提交
761
          GET_INT64_VAL(&pBuf->v) = val;
H
Haojun Liao 已提交
762 763 764
          if (pCtx->subsidiaries.num > 0) {
            index = findRowIndex(pInput->startRowIndex, pInput->numOfRows, pCol, tval);
            if (index >= 0) {
G
Ganlin Zhao 已提交
765 766 767 768
              int32_t code = saveTupleData(pCtx, index, pCtx->pSrcBlock, &pBuf->tuplePos);
              if (code != TSDB_CODE_SUCCESS) {
                return code;
              }
H
Haojun Liao 已提交
769 770 771 772 773 774 775 776 777
            }
          }
        }
      } else if (IS_UNSIGNED_NUMERIC_TYPE(type)) {
        uint64_t prev = 0;
        GET_TYPED_DATA(prev, uint64_t, type, &pBuf->v);

        uint64_t val = GET_UINT64_VAL(tval);
        if ((prev < val) ^ isMinFunc) {
G
Ganlin Zhao 已提交
778
          GET_UINT64_VAL(&pBuf->v) = val;
H
Haojun Liao 已提交
779 780 781
          if (pCtx->subsidiaries.num > 0) {
            index = findRowIndex(pInput->startRowIndex, pInput->numOfRows, pCol, tval);
            if (index >= 0) {
G
Ganlin Zhao 已提交
782 783 784 785
              int32_t code = saveTupleData(pCtx, index, pCtx->pSrcBlock, &pBuf->tuplePos);
              if (code != TSDB_CODE_SUCCESS) {
                return code;
              }
H
Haojun Liao 已提交
786 787 788 789 790 791 792 793 794
            }
          }
        }
      } else if (type == TSDB_DATA_TYPE_DOUBLE) {
        double prev = 0;
        GET_TYPED_DATA(prev, double, type, &pBuf->v);

        double val = GET_DOUBLE_VAL(tval);
        if ((prev < val) ^ isMinFunc) {
G
Ganlin Zhao 已提交
795
          GET_DOUBLE_VAL(&pBuf->v) = val;
H
Haojun Liao 已提交
796 797 798
          if (pCtx->subsidiaries.num > 0) {
            index = findRowIndex(pInput->startRowIndex, pInput->numOfRows, pCol, tval);
            if (index >= 0) {
G
Ganlin Zhao 已提交
799 800 801 802
              int32_t code = saveTupleData(pCtx, index, pCtx->pSrcBlock, &pBuf->tuplePos);
              if (code != TSDB_CODE_SUCCESS) {
                return code;
              }
H
Haojun Liao 已提交
803 804 805 806 807 808 809 810 811
            }
          }
        }
      } else if (type == TSDB_DATA_TYPE_FLOAT) {
        float prev = 0;
        GET_TYPED_DATA(prev, float, type, &pBuf->v);

        float val = GET_DOUBLE_VAL(tval);
        if ((prev < val) ^ isMinFunc) {
G
Ganlin Zhao 已提交
812
          GET_FLOAT_VAL(&pBuf->v) = val;
H
Haojun Liao 已提交
813 814 815 816 817
        }

        if (pCtx->subsidiaries.num > 0) {
          index = findRowIndex(pInput->startRowIndex, pInput->numOfRows, pCol, tval);
          if (index >= 0) {
G
Ganlin Zhao 已提交
818 819 820 821
            int32_t code = saveTupleData(pCtx, index, pCtx->pSrcBlock, &pBuf->tuplePos);
            if (code != TSDB_CODE_SUCCESS) {
              return code;
            }
H
Haojun Liao 已提交
822 823 824 825 826 827
          }
        }
      }
    }

    pBuf->assign = true;
G
Ganlin Zhao 已提交
828
    return TSDB_CODE_SUCCESS;
H
Haojun Liao 已提交
829 830 831 832
  }

  int32_t start = pInput->startRowIndex;
  int32_t numOfRows = pInput->numOfRows;
H
Haojun Liao 已提交
833
  int32_t end = start + numOfRows;
H
Haojun Liao 已提交
834

H
Haojun Liao 已提交
835 836
  if (pCol->hasNull || numOfRows < 32 || pCtx->subsidiaries.num > 0) {
    int32_t i = findFirstValPosition(pCol, start, numOfRows);
H
Haojun Liao 已提交
837

H
Haojun Liao 已提交
838
    if ((i < end) && (!pBuf->assign)) {
839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867
      char* p = pCol->pData + pCol->info.bytes * i;

      switch (pCol->info.type) {
        case TSDB_DATA_TYPE_DOUBLE:
        case TSDB_DATA_TYPE_UBIGINT:
        case TSDB_DATA_TYPE_BIGINT:
          pBuf->v = *(int64_t*)p;
          break;
        case TSDB_DATA_TYPE_UINT:
        case TSDB_DATA_TYPE_INT:
          pBuf->v = *(int32_t*)p;
          break;
        case TSDB_DATA_TYPE_USMALLINT:
        case TSDB_DATA_TYPE_SMALLINT:
          pBuf->v = *(int16_t*)p;
          break;
        case TSDB_DATA_TYPE_BOOL:
        case TSDB_DATA_TYPE_UTINYINT:
        case TSDB_DATA_TYPE_TINYINT:
          pBuf->v = *(int8_t*)p;
          break;
        case TSDB_DATA_TYPE_FLOAT: {
          *(float*)&pBuf->v = *(float*)p;
          break;
        }
        default:
          memcpy(&pBuf->v, p, pCol->info.bytes);
          break;
      }
H
Haojun Liao 已提交
868

H
Haojun Liao 已提交
869
      if (pCtx->subsidiaries.num > 0) {
G
Ganlin Zhao 已提交
870 871 872 873
        int32_t code = saveTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos);
        if (code != TSDB_CODE_SUCCESS) {
          return code;
        }
H
Haojun Liao 已提交
874
      }
H
Haojun Liao 已提交
875 876 877
      pBuf->assign = true;
      numOfElems = 1;
    }
H
Haojun Liao 已提交
878

H
Haojun Liao 已提交
879
    if (i >= end) {
H
Haojun Liao 已提交
880
      goto _over;
H
Haojun Liao 已提交
881
    }
H
Haojun Liao 已提交
882

H
Haojun Liao 已提交
883 884 885
    doExtractVal(pCol, i, end, pCtx, pBuf, isMinFunc);
  } else {
    numOfElems = numOfRows;
H
Haojun Liao 已提交
886

H
Haojun Liao 已提交
887
    switch (pCol->info.type) {
H
Haojun Liao 已提交
888 889 890 891
      case TSDB_DATA_TYPE_BOOL:
      case TSDB_DATA_TYPE_TINYINT: {
        handleInt8Col(pCol->pData, start, numOfRows, pBuf, isMinFunc, true);
        break;
H
Haojun Liao 已提交
892
      }
H
Haojun Liao 已提交
893 894 895
      case TSDB_DATA_TYPE_SMALLINT: {
        handleInt16Col(pCol->pData, start, numOfRows, pBuf, isMinFunc, true);
        break;
H
Haojun Liao 已提交
896
      }
H
Haojun Liao 已提交
897 898 899
      case TSDB_DATA_TYPE_INT: {
        handleInt32Col(pCol->pData, start, numOfRows, pBuf, isMinFunc, true);
        break;
H
Haojun Liao 已提交
900
      }
H
Haojun Liao 已提交
901 902 903
      case TSDB_DATA_TYPE_BIGINT: {
        handleInt64Col(pCol->pData, start, numOfRows, pBuf, isMinFunc, true);
        break;
H
Haojun Liao 已提交
904
      }
H
Haojun Liao 已提交
905 906 907 908 909 910 911
      case TSDB_DATA_TYPE_UTINYINT: {
        handleInt8Col(pCol->pData, start, numOfRows, pBuf, isMinFunc, false);
        break;
      }
      case TSDB_DATA_TYPE_USMALLINT: {
        handleInt16Col(pCol->pData, start, numOfRows, pBuf, isMinFunc, false);
        break;
H
Haojun Liao 已提交
912
      }
H
Haojun Liao 已提交
913
      case TSDB_DATA_TYPE_UINT: {
H
Haojun Liao 已提交
914
        handleInt32Col(pCol->pData, start, numOfRows, pBuf, isMinFunc, false);
H
Haojun Liao 已提交
915 916 917
        break;
      }
      case TSDB_DATA_TYPE_UBIGINT: {
H
Haojun Liao 已提交
918
        handleInt64Col(pCol->pData, start, numOfRows, pBuf, isMinFunc, false);
H
Haojun Liao 已提交
919 920 921 922 923 924 925 926 927 928 929
        break;
      }
      case TSDB_DATA_TYPE_FLOAT: {
        handleFloatCol(pCol, start, numOfRows, pBuf, isMinFunc);
        break;
      }
      case TSDB_DATA_TYPE_DOUBLE: {
        handleDoubleCol(pCol, start, numOfRows, pBuf, isMinFunc);
        break;
      }
    }
930 931

    pBuf->assign = true;
H
Haojun Liao 已提交
932
  }
H
Haojun Liao 已提交
933

H
Haojun Liao 已提交
934 935
_over:
  if (numOfElems == 0 && pCtx->subsidiaries.num > 0 && !pBuf->nullTupleSaved) {
G
Ganlin Zhao 已提交
936 937 938 939
    int32_t code = saveTupleData(pCtx, pInput->startRowIndex, pCtx->pSrcBlock, &pBuf->nullTuplePos);
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }
H
Haojun Liao 已提交
940
    pBuf->nullTupleSaved = true;
H
Haojun Liao 已提交
941 942
  }

G
Ganlin Zhao 已提交
943 944
  *nElems = numOfElems;
  return TSDB_CODE_SUCCESS;
L
Liu Jicong 已提交
945
}