tminmax.c 28.3 KB
Newer Older
H
Haojun Liao 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

#include "builtinsimpl.h"
#include "function.h"
#include "tdatablock.h"
#include "tfunctionInt.h"
#include "tglobal.h"

H
Haojun Liao 已提交
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
#define __COMPARE_ACQUIRED_MAX(i, end, bm, _data, ctx, val, pos) \
  for (; i < (end); ++i) {                                       \
    if (colDataIsNull_f(bm, i)) {                                \
      continue;                                                  \
    }                                                            \
                                                                 \
    if ((val) < (_data)[i]) {                                    \
      (val) = (_data)[i];                                        \
      if ((ctx)->subsidiaries.num > 0) {                         \
        updateTupleData((ctx), i, (ctx)->pSrcBlock, pos);        \
      }                                                          \
    }                                                            \
  }

#define __COMPARE_ACQUIRED_MIN(i, end, bm, _data, ctx, val, pos) \
  for (; i < (end); ++i) {                                       \
    if (colDataIsNull_f(bm, i)) {                                \
      continue;                                                  \
    }                                                            \
                                                                 \
    if ((val) > (_data)[i]) {                                    \
      (val) = (_data)[i];                                        \
      if ((ctx)->subsidiaries.num > 0) {                         \
        updateTupleData((ctx), i, (ctx)->pSrcBlock, pos);        \
      }                                                          \
    }                                                            \
  }

#define __COMPARE_EXTRACT_MIN(start, end, val, _data) \
  for (int32_t i = (start); i < (end); ++i) {         \
    if ((val) > (_data)[i]) {                         \
      (val) = (_data)[i];                             \
    }                                                 \
  }

#define __COMPARE_EXTRACT_MAX(start, end, val, _data) \
  for (int32_t i = (start); i < (end); ++i) {         \
    if ((val) < (_data)[i]) {                         \
      (val) = (_data)[i];                             \
    }                                                 \
  }

64 65
static void calculateRounds(int32_t numOfRows, int32_t bytes, int32_t* remainder, int32_t* rounds, int32_t* width) {
  const int32_t bitWidth = 256;
H
Haojun Liao 已提交
66

L
Liu Jicong 已提交
67
  *width = (bitWidth >> 3u) / bytes;
68 69 70 71
  *remainder = numOfRows % (*width);
  *rounds = numOfRows / (*width);
}

H
Haojun Liao 已提交
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94
#define EXTRACT_MAX_VAL(_first, _sec, _width, _remain, _v) \
  (_v) = TMAX((_first)[0], (_first)[1]);                   \
  for (int32_t k = 1; k < (_width); ++k) {                 \
    (_v) = TMAX((_v), (_first)[k]);                        \
  }                                                        \
                                                           \
  for (int32_t j = 0; j < (_remain); ++j) {                \
    if ((_v) < (_sec)[j]) {                                \
      (_v) = (_sec)[j];                                    \
    }                                                      \
  }

#define EXTRACT_MIN_VAL(_first, _sec, _width, _remain, _v) \
  (_v) = TMIN((_first)[0], (_first)[1]);                   \
  for (int32_t k = 1; k < (_width); ++k) {                 \
    (_v) = TMIN((_v), (_first)[k]);                        \
  }                                                        \
                                                           \
  for (int32_t j = 0; j < (_remain); ++j) {                \
    if ((_v) > (_sec)[j]) {                                \
      (_v) = (_sec)[j];                                    \
    }                                                      \
  }
L
Liu Jicong 已提交
95

H
Haojun Liao 已提交
96
static int8_t i8VectorCmpAVX2(const void* pData, int32_t numOfRows, bool isMinFunc, bool signVal) {
97 98 99 100 101
  int8_t        v = 0;
  const int8_t* p = pData;

  int32_t width, remain, rounds;
  calculateRounds(numOfRows, sizeof(int8_t), &remain, &rounds, &width);
H
Haojun Liao 已提交
102

H
Haojun Liao 已提交
103
#if __AVX2__
H
Haojun Liao 已提交
104
  __m256i next;
H
Haojun Liao 已提交
105
  __m256i initVal = _mm256_lddqu_si256((__m256i*)p);
H
Haojun Liao 已提交
106
  p += width;
H
Haojun Liao 已提交
107 108

  if (!isMinFunc) {  // max function
H
Haojun Liao 已提交
109 110 111 112 113
    if (signVal) {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_max_epi8(initVal, next);
        p += width;
H
Haojun Liao 已提交
114 115
      }

H
Haojun Liao 已提交
116 117
      const int8_t* q = (const int8_t*)&initVal;
      EXTRACT_MAX_VAL(q, p, width, remain, v)
L
Liu Jicong 已提交
118
    } else {  // unsigned value
H
Haojun Liao 已提交
119 120 121 122 123
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_max_epu8(initVal, next);
        p += width;
      }
124

H
Haojun Liao 已提交
125 126
      const uint8_t* q = (const uint8_t*)&initVal;
      EXTRACT_MAX_VAL(q, p, width, remain, v)
H
Haojun Liao 已提交
127
    }
L
Liu Jicong 已提交
128

H
Haojun Liao 已提交
129 130 131 132 133 134 135
  } else {  // min function
    if (signVal) {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_min_epi8(initVal, next);
        p += width;
      }
H
Haojun Liao 已提交
136

H
Haojun Liao 已提交
137 138 139 140 141 142 143 144
      // let sum up the final results
      const int8_t* q = (const int8_t*)&initVal;
      EXTRACT_MIN_VAL(q, p, width, remain, v)
    } else {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_min_epu8(initVal, next);
        p += width;
H
Haojun Liao 已提交
145
      }
H
Haojun Liao 已提交
146 147 148 149

      // let sum up the final results
      const uint8_t* q = (const uint8_t*)&initVal;
      EXTRACT_MIN_VAL(q, p, width, remain, v)
H
Haojun Liao 已提交
150 151 152 153 154 155 156
    }
  }
#endif

  return v;
}

H
Haojun Liao 已提交
157
static int16_t i16VectorCmpAVX2(const int16_t* pData, int32_t numOfRows, bool isMinFunc, bool signVal) {
158 159
  int16_t        v = 0;
  const int16_t* p = pData;
H
Haojun Liao 已提交
160

161 162
  int32_t width, remain, rounds;
  calculateRounds(numOfRows, sizeof(int16_t), &remain, &rounds, &width);
H
Haojun Liao 已提交
163

164 165
#if __AVX2__
  __m256i next;
H
Haojun Liao 已提交
166
  __m256i initVal = _mm256_lddqu_si256((__m256i*)p);
H
Haojun Liao 已提交
167
  p += width;
H
Haojun Liao 已提交
168 169

  if (!isMinFunc) {  // max function
H
Haojun Liao 已提交
170 171 172 173 174 175
    if (signVal) {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_max_epi16(initVal, next);
        p += width;
      }
H
Haojun Liao 已提交
176

H
Haojun Liao 已提交
177 178 179 180 181 182 183 184 185
      // let sum up the final results
      const int16_t* q = (const int16_t*)&initVal;
      EXTRACT_MAX_VAL(q, p, width, remain, v)
    } else {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_max_epu16(initVal, next);
        p += width;
      }
186

H
Haojun Liao 已提交
187 188 189
      // let sum up the final results
      const uint16_t* q = (const uint16_t*)&initVal;
      EXTRACT_MAX_VAL(q, p, width, remain, v)
H
Haojun Liao 已提交
190
    }
H
Haojun Liao 已提交
191 192

  } else {  // min function
H
Haojun Liao 已提交
193 194 195 196 197 198
    if (signVal) {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_min_epi16(initVal, next);
        p += width;
      }
H
Haojun Liao 已提交
199

H
Haojun Liao 已提交
200 201 202 203 204 205 206 207
      // let sum up the final results
      const int16_t* q = (const int16_t*)&initVal;
      EXTRACT_MIN_VAL(q, p, width, remain, v)
    } else {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_min_epi16(initVal, next);
        p += width;
H
Haojun Liao 已提交
208
      }
H
Haojun Liao 已提交
209 210 211 212

      // let sum up the final results
      const uint16_t* q = (const uint16_t*)&initVal;
      EXTRACT_MIN_VAL(q, p, width, remain, v)
H
Haojun Liao 已提交
213 214 215 216 217 218 219
    }
  }
#endif

  return v;
}

H
Haojun Liao 已提交
220
static int32_t i32VectorCmpAVX2(const int32_t* pData, int32_t numOfRows, bool isMinFunc, bool signVal) {
221 222
  int32_t        v = 0;
  const int32_t* p = pData;
H
Haojun Liao 已提交
223

224 225
  int32_t width, remain, rounds;
  calculateRounds(numOfRows, sizeof(int32_t), &remain, &rounds, &width);
H
Haojun Liao 已提交
226 227 228

#if __AVX2__
  __m256i next;
H
Haojun Liao 已提交
229
  __m256i initVal = _mm256_lddqu_si256((__m256i*)p);
H
Haojun Liao 已提交
230 231 232
  p += width;

  if (!isMinFunc) {  // max function
H
Haojun Liao 已提交
233 234 235 236 237 238
    if (signVal) {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_max_epi32(initVal, next);
        p += width;
      }
H
Haojun Liao 已提交
239

H
Haojun Liao 已提交
240 241 242
      // let compare  the final results
      const int32_t* q = (const int32_t*)&initVal;
      EXTRACT_MAX_VAL(q, p, width, remain, v)
L
Liu Jicong 已提交
243
    } else {  // unsigned value
H
Haojun Liao 已提交
244 245 246 247
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_max_epi32(initVal, next);
        p += width;
H
Haojun Liao 已提交
248 249
      }

H
Haojun Liao 已提交
250 251 252
      // let compare  the final results
      const uint32_t* q = (const uint32_t*)&initVal;
      EXTRACT_MAX_VAL(q, p, width, remain, v)
H
Haojun Liao 已提交
253
    }
H
Haojun Liao 已提交
254 255 256 257 258 259 260
  } else {  // min function
    if (signVal) {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_min_epi32(initVal, next);
        p += width;
      }
H
Haojun Liao 已提交
261

H
Haojun Liao 已提交
262 263 264 265 266 267 268 269
      // let sum up the final results
      const int32_t* q = (const int32_t*)&initVal;
      EXTRACT_MIN_VAL(q, p, width, remain, v)
    } else {
      for (int32_t i = 0; i < rounds; ++i) {
        next = _mm256_lddqu_si256((__m256i*)p);
        initVal = _mm256_min_epu32(initVal, next);
        p += width;
H
Haojun Liao 已提交
270
      }
H
Haojun Liao 已提交
271 272 273 274

      // let sum up the final results
      const uint32_t* q = (const uint32_t*)&initVal;
      EXTRACT_MIN_VAL(q, p, width, remain, v)
H
Haojun Liao 已提交
275 276 277 278 279 280 281
    }
  }
#endif

  return v;
}

282
static float floatVectorCmpAVX(const float* pData, int32_t numOfRows, bool isMinFunc) {
L
Liu Jicong 已提交
283
  float        v = 0;
284
  const float* p = pData;
H
Haojun Liao 已提交
285

286 287
  int32_t width, remain, rounds;
  calculateRounds(numOfRows, sizeof(float), &remain, &rounds, &width);
H
Haojun Liao 已提交
288

289 290 291
#if __AVX__

  __m256 next;
H
Haojun Liao 已提交
292
  __m256 initVal = _mm256_loadu_ps(p);
H
Haojun Liao 已提交
293 294 295
  p += width;

  if (!isMinFunc) {  // max function
296 297
    for (int32_t i = 1; i < rounds; ++i) {
      next = _mm256_loadu_ps(p);
H
Haojun Liao 已提交
298
      initVal = _mm256_max_ps(initVal, next);
H
Haojun Liao 已提交
299 300 301
      p += width;
    }

H
Haojun Liao 已提交
302 303
    const float* q = (const float*)&initVal;
    EXTRACT_MAX_VAL(q, p, width, remain, v)
H
Haojun Liao 已提交
304
  } else {  // min function
305 306
    for (int32_t i = 1; i < rounds; ++i) {
      next = _mm256_loadu_ps(p);
H
Haojun Liao 已提交
307
      initVal = _mm256_min_ps(initVal, next);
H
Haojun Liao 已提交
308 309 310
      p += width;
    }

H
Haojun Liao 已提交
311 312
    const float* q = (const float*)&initVal;
    EXTRACT_MIN_VAL(q, p, width, remain, v)
313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328
  }
#endif

  return v;
}

static double doubleVectorCmpAVX(const double* pData, int32_t numOfRows, bool isMinFunc) {
  double        v = 0;
  const double* p = pData;

  int32_t width, remain, rounds;
  calculateRounds(numOfRows, sizeof(double), &remain, &rounds, &width);

#if __AVX__

  __m256d next;
H
Haojun Liao 已提交
329
  __m256d initVal = _mm256_loadu_pd(p);
330 331 332 333 334
  p += width;

  if (!isMinFunc) {  // max function
    for (int32_t i = 1; i < rounds; ++i) {
      next = _mm256_loadu_pd(p);
H
Haojun Liao 已提交
335
      initVal = _mm256_max_pd(initVal, next);
336 337 338 339
      p += width;
    }

    // let sum up the final results
H
Haojun Liao 已提交
340 341
    const double* q = (const double*)&initVal;
    EXTRACT_MAX_VAL(q, p, width, remain, v)
342 343 344
  } else {  // min function
    for (int32_t i = 1; i < rounds; ++i) {
      next = _mm256_loadu_pd(p);
H
Haojun Liao 已提交
345
      initVal = _mm256_min_pd(initVal, next);
346 347 348 349
      p += width;
    }

    // let sum up the final results
H
Haojun Liao 已提交
350 351
    const double* q = (const double*)&initVal;
    EXTRACT_MIN_VAL(q, p, width, remain, v)
H
Haojun Liao 已提交
352 353 354 355 356 357
  }
#endif

  return v;
}

H
Haojun Liao 已提交
358
static int32_t findFirstValPosition(const SColumnInfoData* pCol, int32_t start, int32_t numOfRows) {
359
  int32_t i = start;
L
Liu Jicong 已提交
360

H
Haojun Liao 已提交
361
  while (i < (start + numOfRows) && (colDataIsNull_f(pCol->nullbitmap, i) == true)) {
362 363 364 365 366 367
    i += 1;
  }

  return i;
}

H
Haojun Liao 已提交
368 369 370
static void handleInt8Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc,
                          bool signVal) {
  // AVX2 version to speedup the loop
H
Haojun Liao 已提交
371
  if (tsAVX2Enable && tsSIMDBuiltins) {
H
Haojun Liao 已提交
372 373 374
    pBuf->v = i8VectorCmpAVX2(data, numOfRows, isMinFunc, signVal);
  } else {
    if (!pBuf->assign) {
375
      pBuf->v = ((int8_t*)data)[start];
376 377
    }

H
Haojun Liao 已提交
378 379 380
    if (signVal) {
      const int8_t* p = (const int8_t*)data;
      int8_t*       v = (int8_t*)&pBuf->v;
381

H
Haojun Liao 已提交
382 383 384 385
      if (isMinFunc) {
        __COMPARE_EXTRACT_MIN(start, start + numOfRows, *v, p);
      } else {
        __COMPARE_EXTRACT_MAX(start, start + numOfRows, *v, p);
386 387
      }
    } else {
H
Haojun Liao 已提交
388 389
      const uint8_t* p = (const uint8_t*)data;
      uint8_t*       v = (uint8_t*)&pBuf->v;
390

H
Haojun Liao 已提交
391 392 393 394
      if (isMinFunc) {
        __COMPARE_EXTRACT_MIN(start, start + numOfRows, *v, p);
      } else {
        __COMPARE_EXTRACT_MAX(start, start + numOfRows, *v, p);
395 396 397 398
      }
    }
  }

H
Haojun Liao 已提交
399
  pBuf->assign = true;
400 401
}

H
Haojun Liao 已提交
402 403 404
static void handleInt16Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc,
                           bool signVal) {
  // AVX2 version to speedup the loop
H
Haojun Liao 已提交
405
  if (tsAVX2Enable && tsSIMDBuiltins) {
H
Haojun Liao 已提交
406 407 408
    pBuf->v = i16VectorCmpAVX2(data, numOfRows, isMinFunc, signVal);
  } else {
    if (!pBuf->assign) {
409
      pBuf->v = ((int16_t*)data)[start];
410 411
    }

H
Haojun Liao 已提交
412 413 414
    if (signVal) {
      const int16_t* p = (const int16_t*)data;
      int16_t*       v = (int16_t*)&pBuf->v;
415

H
Haojun Liao 已提交
416 417 418 419
      if (isMinFunc) {
        __COMPARE_EXTRACT_MIN(start, start + numOfRows, *v, p);
      } else {
        __COMPARE_EXTRACT_MAX(start, start + numOfRows, *v, p);
420 421
      }
    } else {
H
Haojun Liao 已提交
422 423
      const uint16_t* p = (const uint16_t*)data;
      uint16_t*       v = (uint16_t*)&pBuf->v;
424

H
Haojun Liao 已提交
425 426 427 428
      if (isMinFunc) {
        __COMPARE_EXTRACT_MIN(start, start + numOfRows, *v, p);
      } else {
        __COMPARE_EXTRACT_MAX(start, start + numOfRows, *v, p);
429 430 431 432
      }
    }
  }

H
Haojun Liao 已提交
433
  pBuf->assign = true;
434
}
H
Haojun Liao 已提交
435

H
Haojun Liao 已提交
436 437 438
static void handleInt32Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc,
                           bool signVal) {
  // AVX2 version to speedup the loop
H
Haojun Liao 已提交
439
  if (tsAVX2Enable && tsSIMDBuiltins) {
H
Haojun Liao 已提交
440 441 442
    pBuf->v = i32VectorCmpAVX2(data, numOfRows, isMinFunc, signVal);
  } else {
    if (!pBuf->assign) {
443
      pBuf->v = ((int32_t*)data)[start];
H
Haojun Liao 已提交
444
    }
H
Haojun Liao 已提交
445

H
Haojun Liao 已提交
446 447 448
    if (signVal) {
      const int32_t* p = (const int32_t*)data;
      int32_t*       v = (int32_t*)&pBuf->v;
H
Haojun Liao 已提交
449

H
Haojun Liao 已提交
450 451 452 453 454 455 456 457 458 459 460 461 462
      if (isMinFunc) {
        __COMPARE_EXTRACT_MIN(start, start + numOfRows, *v, p);
      } else {
        __COMPARE_EXTRACT_MAX(start, start + numOfRows, *v, p);
      }
    } else {
      const uint32_t* p = (const uint32_t*)data;
      uint32_t*       v = (uint32_t*)&pBuf->v;

      if (isMinFunc) {
        __COMPARE_EXTRACT_MIN(start, start + numOfRows, *v, p);
      } else {
        __COMPARE_EXTRACT_MAX(start, start + numOfRows, *v, p);
H
Haojun Liao 已提交
463 464
      }
    }
H
Haojun Liao 已提交
465
  }
H
Haojun Liao 已提交
466

H
Haojun Liao 已提交
467 468
  pBuf->assign = true;
}
H
Haojun Liao 已提交
469

H
Haojun Liao 已提交
470 471 472
static void handleInt64Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc,
                           bool signVal) {
  if (!pBuf->assign) {
473
    pBuf->v = ((int64_t*)data)[start];
H
Haojun Liao 已提交
474
  }
H
Haojun Liao 已提交
475

H
Haojun Liao 已提交
476 477 478
  if (signVal) {
    const int64_t* p = (const int64_t*)data;
    int64_t*       v = &pBuf->v;
H
Haojun Liao 已提交
479

H
Haojun Liao 已提交
480 481 482 483
    if (isMinFunc) {
      __COMPARE_EXTRACT_MIN(start, start + numOfRows, *v, p);
    } else {
      __COMPARE_EXTRACT_MAX(start, start + numOfRows, *v, p);
H
Haojun Liao 已提交
484
    }
H
Haojun Liao 已提交
485 486 487 488 489 490
  } else {
    const uint64_t* p = (const uint64_t*)data;
    uint64_t*       v = (uint64_t*)&pBuf->v;

    if (isMinFunc) {
      __COMPARE_EXTRACT_MIN(start, start + numOfRows, *v, p);
H
Haojun Liao 已提交
491
    } else {
H
Haojun Liao 已提交
492 493 494 495
      __COMPARE_EXTRACT_MAX(start, start + numOfRows, *v, p);
    }
  }
}
H
Haojun Liao 已提交
496

L
Liu Jicong 已提交
497 498
static void handleFloatCol(SColumnInfoData* pCol, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf,
                           bool isMinFunc) {
H
Haojun Liao 已提交
499 500
  float* pData = (float*)pCol->pData;
  float* val = (float*)&pBuf->v;
501

H
Haojun Liao 已提交
502
  // AVX version to speedup the loop
H
Haojun Liao 已提交
503
  if (tsAVXEnable && tsSIMDBuiltins) {
H
Haojun Liao 已提交
504 505 506
    *val = floatVectorCmpAVX(pData, numOfRows, isMinFunc);
  } else {
    if (!pBuf->assign) {
507
      *val = pData[start];
508 509 510
    }

    if (isMinFunc) {  // min
H
Haojun Liao 已提交
511
      for (int32_t i = start; i < start + numOfRows; ++i) {
512 513 514 515
        if (*val > pData[i]) {
          *val = pData[i];
        }
      }
H
Haojun Liao 已提交
516 517
    } else {  // max
      for (int32_t i = start; i < start + numOfRows; ++i) {
518 519 520 521 522
        if (*val < pData[i]) {
          *val = pData[i];
        }
      }
    }
H
Haojun Liao 已提交
523 524 525 526 527
  }

  pBuf->assign = true;
}

L
Liu Jicong 已提交
528 529
static void handleDoubleCol(SColumnInfoData* pCol, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf,
                            bool isMinFunc) {
H
Haojun Liao 已提交
530 531 532 533
  double* pData = (double*)pCol->pData;
  double* val = (double*)&pBuf->v;

  // AVX version to speedup the loop
H
Haojun Liao 已提交
534
  if (tsAVXEnable && tsSIMDBuiltins) {
H
Haojun Liao 已提交
535 536
    *val = (double)doubleVectorCmpAVX(pData, numOfRows, isMinFunc);
  } else {
537
    if (!pBuf->assign) {
538
      *val = pData[start];
539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555
    }

    if (isMinFunc) {  // min
      for (int32_t i = start; i < start + numOfRows; ++i) {
        if (*val > pData[i]) {
          *val = pData[i];
        }
      }
    } else {  // max
      for (int32_t i = start; i < start + numOfRows; ++i) {
        if (*val < pData[i]) {
          *val = pData[i];
        }
      }
    }
  }

H
Haojun Liao 已提交
556 557
  pBuf->assign = true;
}
H
Haojun Liao 已提交
558

H
Haojun Liao 已提交
559 560 561 562 563 564
static int32_t findRowIndex(int32_t start, int32_t num, SColumnInfoData* pCol, const char* tval) {
  // the data is loaded, not only the block SMA value
  for (int32_t i = start; i < num + start; ++i) {
    char* p = colDataGetData(pCol, i);
    if (memcmp((void*)tval, p, pCol->info.bytes) == 0) {
      return i;
H
Haojun Liao 已提交
565
    }
H
Haojun Liao 已提交
566
  }
H
Haojun Liao 已提交
567

H
Haojun Liao 已提交
568 569 570
  // if reach here means real data of block SMA is not set in pCtx->input.
  return -1;
}
H
Haojun Liao 已提交
571

H
Haojun Liao 已提交
572 573 574 575 576 577 578 579 580
static void doExtractVal(SColumnInfoData* pCol, int32_t i, int32_t end, SqlFunctionCtx* pCtx, SMinmaxResInfo* pBuf,
                         bool isMinFunc) {
  if (isMinFunc) {
    switch (pCol->info.type) {
      case TSDB_DATA_TYPE_BOOL:
      case TSDB_DATA_TYPE_TINYINT: {
        const int8_t* pData = (const int8_t*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, *(int8_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
H
Haojun Liao 已提交
581 582
      }

H
Haojun Liao 已提交
583 584 585 586
      case TSDB_DATA_TYPE_SMALLINT: {
        const int16_t* pData = (const int16_t*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, *(int16_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
H
Haojun Liao 已提交
587
      }
H
Haojun Liao 已提交
588 589 590 591 592

      case TSDB_DATA_TYPE_INT: {
        const int32_t* pData = (const int32_t*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, *(int32_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
H
Haojun Liao 已提交
593 594
      }

H
Haojun Liao 已提交
595 596 597 598
      case TSDB_DATA_TYPE_BIGINT: {
        const int64_t* pData = (const int64_t*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, (pBuf->v), &pBuf->tuplePos)
        break;
H
Haojun Liao 已提交
599 600
      }

H
Haojun Liao 已提交
601 602 603 604 605
      case TSDB_DATA_TYPE_UTINYINT: {
        const uint8_t* pData = (const uint8_t*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, *(uint8_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }
H
Haojun Liao 已提交
606

H
Haojun Liao 已提交
607 608 609 610 611
      case TSDB_DATA_TYPE_USMALLINT: {
        const uint16_t* pData = (const uint16_t*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, *(uint16_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }
H
Haojun Liao 已提交
612

H
Haojun Liao 已提交
613 614 615 616 617
      case TSDB_DATA_TYPE_UINT: {
        const uint32_t* pData = (const uint32_t*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, *(uint32_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }
H
Haojun Liao 已提交
618

H
Haojun Liao 已提交
619 620 621 622 623
      case TSDB_DATA_TYPE_UBIGINT: {
        const uint64_t* pData = (const uint64_t*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, *(uint64_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }
H
Haojun Liao 已提交
624

H
Haojun Liao 已提交
625 626 627 628 629 630 631 632 633 634
      case TSDB_DATA_TYPE_FLOAT: {
        const float* pData = (const float*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, *(float*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }

      case TSDB_DATA_TYPE_DOUBLE: {
        const double* pData = (const double*)pCol->pData;
        __COMPARE_ACQUIRED_MIN(i, end, pCol->nullbitmap, pData, pCtx, *(double*)&(pBuf->v), &pBuf->tuplePos)
        break;
H
Haojun Liao 已提交
635 636
      }
    }
H
Haojun Liao 已提交
637 638 639 640 641 642 643 644
  } else {
    switch (pCol->info.type) {
      case TSDB_DATA_TYPE_BOOL:
      case TSDB_DATA_TYPE_TINYINT: {
        const int8_t* pData = (const int8_t*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, *(int8_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }
H
Haojun Liao 已提交
645

H
Haojun Liao 已提交
646 647 648 649 650
      case TSDB_DATA_TYPE_SMALLINT: {
        const int16_t* pData = (const int16_t*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, *(int16_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }
H
Haojun Liao 已提交
651

H
Haojun Liao 已提交
652
      case TSDB_DATA_TYPE_INT: {
653
        const int32_t* pData = (const int32_t*)pCol->pData;
H
Haojun Liao 已提交
654 655
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, *(int32_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
H
Haojun Liao 已提交
656
      }
657

H
Haojun Liao 已提交
658 659 660 661
      case TSDB_DATA_TYPE_BIGINT: {
        const int64_t* pData = (const int64_t*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, (pBuf->v), &pBuf->tuplePos)
        break;
H
Haojun Liao 已提交
662
      }
H
Haojun Liao 已提交
663 664 665 666 667

      case TSDB_DATA_TYPE_UTINYINT: {
        const uint8_t* pData = (const uint8_t*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, *(uint8_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
H
Haojun Liao 已提交
668 669
      }

H
Haojun Liao 已提交
670 671 672 673
      case TSDB_DATA_TYPE_USMALLINT: {
        const uint16_t* pData = (const uint16_t*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, *(uint16_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
H
Haojun Liao 已提交
674 675
      }

H
Haojun Liao 已提交
676 677 678 679 680
      case TSDB_DATA_TYPE_UINT: {
        const uint32_t* pData = (const uint32_t*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, *(uint32_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }
H
Haojun Liao 已提交
681

H
Haojun Liao 已提交
682 683 684 685 686
      case TSDB_DATA_TYPE_UBIGINT: {
        const uint64_t* pData = (const uint64_t*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, *(uint64_t*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }
H
Haojun Liao 已提交
687

H
Haojun Liao 已提交
688 689 690 691 692 693 694 695 696 697 698
      case TSDB_DATA_TYPE_FLOAT: {
        const float* pData = (const float*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, *(float*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }

      case TSDB_DATA_TYPE_DOUBLE: {
        const double* pData = (const double*)pCol->pData;
        __COMPARE_ACQUIRED_MAX(i, end, pCol->nullbitmap, pData, pCtx, *(double*)&(pBuf->v), &pBuf->tuplePos)
        break;
      }
H
Haojun Liao 已提交
699 700 701 702
    }
  }
}

G
Ganlin Zhao 已提交
703
int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc, int32_t* nElems) {
H
Haojun Liao 已提交
704 705 706 707 708 709 710 711 712 713 714 715 716
  int32_t numOfElems = 0;

  SInputColumnInfoData* pInput = &pCtx->input;
  SColumnDataAgg*       pAgg = pInput->pColumnDataAgg[0];

  SColumnInfoData* pCol = pInput->pData[0];
  int32_t          type = pCol->info.type;

  SResultRowEntryInfo* pResInfo = GET_RES_INFO(pCtx);
  SMinmaxResInfo*      pBuf = GET_ROWCELL_INTERBUF(pResInfo);
  pBuf->type = type;

  if (IS_NULL_TYPE(type)) {
717
    goto _over;
H
Haojun Liao 已提交
718 719 720
  }

  // data in current data block are qualified to the query
H
Haojun Liao 已提交
721
  if (pInput->colDataSMAIsSet) {
H
Haojun Liao 已提交
722 723
    numOfElems = pInput->numOfRows - pAgg->numOfNull;
    if (numOfElems == 0) {
H
Haojun Liao 已提交
724
      goto _over;
H
Haojun Liao 已提交
725 726 727
    }

    int16_t index = 0;
728
    void*   tval = (isMinFunc) ? &pInput->pColumnDataAgg[0]->min : &pInput->pColumnDataAgg[0]->max;
H
Haojun Liao 已提交
729 730

    if (!pBuf->assign) {
G
Ganlin Zhao 已提交
731 732 733
      if (type == TSDB_DATA_TYPE_FLOAT) {
        GET_FLOAT_VAL(&pBuf->v) = GET_DOUBLE_VAL(tval);
      } else {
G
Ganlin Zhao 已提交
734
        pBuf->v = GET_INT64_VAL(tval);
G
Ganlin Zhao 已提交
735
      }
736

H
Haojun Liao 已提交
737 738 739
      if (pCtx->subsidiaries.num > 0) {
        index = findRowIndex(pInput->startRowIndex, pInput->numOfRows, pCol, tval);
        if (index >= 0) {
G
Ganlin Zhao 已提交
740 741 742 743
          int32_t code = saveTupleData(pCtx, index, pCtx->pSrcBlock, &pBuf->tuplePos);
          if (code != TSDB_CODE_SUCCESS) {
            return code;
          }
H
Haojun Liao 已提交
744 745 746 747 748 749 750 751 752
        }
      }
    } else {
      if (IS_SIGNED_NUMERIC_TYPE(type)) {
        int64_t prev = 0;
        GET_TYPED_DATA(prev, int64_t, type, &pBuf->v);

        int64_t val = GET_INT64_VAL(tval);
        if ((prev < val) ^ isMinFunc) {
G
Ganlin Zhao 已提交
753
          GET_INT64_VAL(&pBuf->v) = val;
H
Haojun Liao 已提交
754 755 756
          if (pCtx->subsidiaries.num > 0) {
            index = findRowIndex(pInput->startRowIndex, pInput->numOfRows, pCol, tval);
            if (index >= 0) {
G
Ganlin Zhao 已提交
757 758 759 760
              int32_t code = saveTupleData(pCtx, index, pCtx->pSrcBlock, &pBuf->tuplePos);
              if (code != TSDB_CODE_SUCCESS) {
                return code;
              }
H
Haojun Liao 已提交
761 762 763 764 765 766 767 768 769
            }
          }
        }
      } else if (IS_UNSIGNED_NUMERIC_TYPE(type)) {
        uint64_t prev = 0;
        GET_TYPED_DATA(prev, uint64_t, type, &pBuf->v);

        uint64_t val = GET_UINT64_VAL(tval);
        if ((prev < val) ^ isMinFunc) {
G
Ganlin Zhao 已提交
770
          GET_UINT64_VAL(&pBuf->v) = val;
H
Haojun Liao 已提交
771 772 773
          if (pCtx->subsidiaries.num > 0) {
            index = findRowIndex(pInput->startRowIndex, pInput->numOfRows, pCol, tval);
            if (index >= 0) {
G
Ganlin Zhao 已提交
774 775 776 777
              int32_t code = saveTupleData(pCtx, index, pCtx->pSrcBlock, &pBuf->tuplePos);
              if (code != TSDB_CODE_SUCCESS) {
                return code;
              }
H
Haojun Liao 已提交
778 779 780 781 782 783 784 785 786
            }
          }
        }
      } else if (type == TSDB_DATA_TYPE_DOUBLE) {
        double prev = 0;
        GET_TYPED_DATA(prev, double, type, &pBuf->v);

        double val = GET_DOUBLE_VAL(tval);
        if ((prev < val) ^ isMinFunc) {
G
Ganlin Zhao 已提交
787
          GET_DOUBLE_VAL(&pBuf->v) = val;
H
Haojun Liao 已提交
788 789 790
          if (pCtx->subsidiaries.num > 0) {
            index = findRowIndex(pInput->startRowIndex, pInput->numOfRows, pCol, tval);
            if (index >= 0) {
G
Ganlin Zhao 已提交
791 792 793 794
              int32_t code = saveTupleData(pCtx, index, pCtx->pSrcBlock, &pBuf->tuplePos);
              if (code != TSDB_CODE_SUCCESS) {
                return code;
              }
H
Haojun Liao 已提交
795 796 797 798 799 800 801 802 803
            }
          }
        }
      } else if (type == TSDB_DATA_TYPE_FLOAT) {
        float prev = 0;
        GET_TYPED_DATA(prev, float, type, &pBuf->v);

        float val = GET_DOUBLE_VAL(tval);
        if ((prev < val) ^ isMinFunc) {
G
Ganlin Zhao 已提交
804
          GET_FLOAT_VAL(&pBuf->v) = val;
H
Haojun Liao 已提交
805 806 807 808 809
        }

        if (pCtx->subsidiaries.num > 0) {
          index = findRowIndex(pInput->startRowIndex, pInput->numOfRows, pCol, tval);
          if (index >= 0) {
G
Ganlin Zhao 已提交
810 811 812 813
            int32_t code = saveTupleData(pCtx, index, pCtx->pSrcBlock, &pBuf->tuplePos);
            if (code != TSDB_CODE_SUCCESS) {
              return code;
            }
H
Haojun Liao 已提交
814 815 816 817 818
          }
        }
      }
    }

819
    numOfElems = 1;
H
Haojun Liao 已提交
820
    pBuf->assign = true;
821
    goto _over;
H
Haojun Liao 已提交
822 823 824 825
  }

  int32_t start = pInput->startRowIndex;
  int32_t numOfRows = pInput->numOfRows;
H
Haojun Liao 已提交
826
  int32_t end = start + numOfRows;
H
Haojun Liao 已提交
827

H
Haojun Liao 已提交
828 829
  if (pCol->hasNull || numOfRows < 32 || pCtx->subsidiaries.num > 0) {
    int32_t i = findFirstValPosition(pCol, start, numOfRows);
H
Haojun Liao 已提交
830

H
Haojun Liao 已提交
831 832
    if ((i < end) && (!pBuf->assign)) {
      memcpy(&pBuf->v, pCol->pData + (pCol->info.bytes * i), pCol->info.bytes);
H
Haojun Liao 已提交
833

H
Haojun Liao 已提交
834
      if (pCtx->subsidiaries.num > 0) {
G
Ganlin Zhao 已提交
835 836 837 838
        int32_t code = saveTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos);
        if (code != TSDB_CODE_SUCCESS) {
          return code;
        }
H
Haojun Liao 已提交
839
      }
H
Haojun Liao 已提交
840 841 842
      pBuf->assign = true;
      numOfElems = 1;
    }
H
Haojun Liao 已提交
843

H
Haojun Liao 已提交
844
    if (i >= end) {
H
Haojun Liao 已提交
845
      goto _over;
H
Haojun Liao 已提交
846
    }
H
Haojun Liao 已提交
847

H
Haojun Liao 已提交
848 849 850
    doExtractVal(pCol, i, end, pCtx, pBuf, isMinFunc);
  } else {
    numOfElems = numOfRows;
H
Haojun Liao 已提交
851

H
Haojun Liao 已提交
852
    switch (pCol->info.type) {
H
Haojun Liao 已提交
853 854 855 856
      case TSDB_DATA_TYPE_BOOL:
      case TSDB_DATA_TYPE_TINYINT: {
        handleInt8Col(pCol->pData, start, numOfRows, pBuf, isMinFunc, true);
        break;
H
Haojun Liao 已提交
857
      }
H
Haojun Liao 已提交
858 859 860
      case TSDB_DATA_TYPE_SMALLINT: {
        handleInt16Col(pCol->pData, start, numOfRows, pBuf, isMinFunc, true);
        break;
H
Haojun Liao 已提交
861
      }
H
Haojun Liao 已提交
862 863 864
      case TSDB_DATA_TYPE_INT: {
        handleInt32Col(pCol->pData, start, numOfRows, pBuf, isMinFunc, true);
        break;
H
Haojun Liao 已提交
865
      }
H
Haojun Liao 已提交
866 867 868
      case TSDB_DATA_TYPE_BIGINT: {
        handleInt64Col(pCol->pData, start, numOfRows, pBuf, isMinFunc, true);
        break;
H
Haojun Liao 已提交
869
      }
H
Haojun Liao 已提交
870 871 872 873 874 875 876
      case TSDB_DATA_TYPE_UTINYINT: {
        handleInt8Col(pCol->pData, start, numOfRows, pBuf, isMinFunc, false);
        break;
      }
      case TSDB_DATA_TYPE_USMALLINT: {
        handleInt16Col(pCol->pData, start, numOfRows, pBuf, isMinFunc, false);
        break;
H
Haojun Liao 已提交
877
      }
H
Haojun Liao 已提交
878
      case TSDB_DATA_TYPE_UINT: {
H
Haojun Liao 已提交
879
        handleInt32Col(pCol->pData, start, numOfRows, pBuf, isMinFunc, false);
H
Haojun Liao 已提交
880 881 882
        break;
      }
      case TSDB_DATA_TYPE_UBIGINT: {
H
Haojun Liao 已提交
883
        handleInt64Col(pCol->pData, start, numOfRows, pBuf, isMinFunc, false);
H
Haojun Liao 已提交
884 885 886 887 888 889 890 891 892 893 894
        break;
      }
      case TSDB_DATA_TYPE_FLOAT: {
        handleFloatCol(pCol, start, numOfRows, pBuf, isMinFunc);
        break;
      }
      case TSDB_DATA_TYPE_DOUBLE: {
        handleDoubleCol(pCol, start, numOfRows, pBuf, isMinFunc);
        break;
      }
    }
895 896

    pBuf->assign = true;
H
Haojun Liao 已提交
897
  }
H
Haojun Liao 已提交
898

H
Haojun Liao 已提交
899 900
_over:
  if (numOfElems == 0 && pCtx->subsidiaries.num > 0 && !pBuf->nullTupleSaved) {
G
Ganlin Zhao 已提交
901 902 903 904
    int32_t code = saveTupleData(pCtx, pInput->startRowIndex, pCtx->pSrcBlock, &pBuf->nullTuplePos);
    if (code != TSDB_CODE_SUCCESS) {
      return code;
    }
H
Haojun Liao 已提交
905
    pBuf->nullTupleSaved = true;
H
Haojun Liao 已提交
906 907
  }

G
Ganlin Zhao 已提交
908 909
  *nElems = numOfElems;
  return TSDB_CODE_SUCCESS;
L
Liu Jicong 已提交
910
}