ob_partition_merge_policy.cpp 63.7 KB
Newer Older
W
wangzelin.wzl 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
/**
 * Copyright (c) 2021 OceanBase
 * OceanBase CE is licensed under Mulan PubL v2.
 * You can use this software according to the terms and conditions of the Mulan PubL v2.
 * You may obtain a copy of Mulan PubL v2 at:
 *          http://license.coscl.org.cn/MulanPubL-2.0
 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
 * See the Mulan PubL v2 for more details.
 */

#define USING_LOG_PREFIX STORAGE
#include "ob_partition_merge_policy.h"
#include "share/ob_debug_sync_point.h"
#include "share/ob_debug_sync.h"
#include "share/ob_force_print_log.h"
#include "share/rc/ob_tenant_base.h"
19
#include "storage/memtable/ob_memtable.h"
W
wangzelin.wzl 已提交
20 21
#include "storage/tablet/ob_tablet.h"
#include "storage/tablet/ob_tablet_table_store.h"
22
#include "storage/tablet/ob_table_store_util.h"
W
wangzelin.wzl 已提交
23 24
#include "storage/ob_storage_schema.h"
#include "storage/ob_storage_struct.h"
25
#include "storage/ob_tenant_tablet_stat_mgr.h"
W
wangzelin.wzl 已提交
26 27 28
#include "storage/compaction/ob_compaction_diagnose.h"
#include "storage/compaction/ob_tenant_compaction_progress.h"
#include "observer/omt/ob_tenant_config_mgr.h"
29
#include "share/scn.h"
O
obdev 已提交
30
#include "storage/compaction/ob_tenant_tablet_scheduler.h"
W
wangzelin.wzl 已提交
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47

using namespace oceanbase;
using namespace common;
using namespace share;
using namespace storage;
using namespace blocksstable;
using namespace memtable;
using namespace share::schema;
using namespace blocksstable;

namespace oceanbase
{
namespace compaction
{

// keep order with ObMergeType
ObPartitionMergePolicy::GetMergeTables ObPartitionMergePolicy::get_merge_tables[MERGE_TYPE_MAX]
48
  = { ObPartitionMergePolicy::get_minor_merge_tables,
W
wangzelin.wzl 已提交
49
      ObPartitionMergePolicy::get_hist_minor_merge_tables,
50
      ObAdaptiveMergePolicy::get_meta_merge_tables,
W
wangzelin.wzl 已提交
51
      ObPartitionMergePolicy::get_mini_merge_tables,
52 53
      ObPartitionMergePolicy::get_medium_merge_tables,
      ObPartitionMergePolicy::get_medium_merge_tables,
W
wangzelin.wzl 已提交
54 55 56 57 58 59 60 61 62
    };


int ObPartitionMergePolicy::get_neighbour_freeze_info(
    const int64_t snapshot_version,
    const ObITable *last_major,
    ObTenantFreezeInfoMgr::NeighbourFreezeInfo &freeze_info)
{
  int ret = OB_SUCCESS;
63 64 65 66 67 68 69 70 71 72 73
  if (OB_FAIL(MTL(ObTenantFreezeInfoMgr *)->get_neighbour_major_freeze(snapshot_version, freeze_info))) {
    if (OB_ENTRY_NOT_EXIST == ret) {
      LOG_WARN("Failed to get freeze info, use snapshot_gc_ts instead", K(ret), K(snapshot_version));
      ret = OB_SUCCESS;
      freeze_info.reset();
      freeze_info.next.freeze_version = INT64_MAX;
      if (OB_NOT_NULL(last_major)) {
        freeze_info.prev.freeze_version = last_major->get_snapshot_version();
      }
    } else {
      LOG_WARN("Failed to get neighbour major freeze info", K(ret), K(snapshot_version));
W
wangzelin.wzl 已提交
74
    }
75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
  }
  return ret;
}

int ObPartitionMergePolicy::get_medium_merge_tables(
  const ObGetMergeTablesParam &param,
  ObLS &ls,
  const ObTablet &tablet,
  ObGetMergeTablesResult &result)
{
  int ret = OB_SUCCESS;
  ObSSTable *base_table = nullptr;
  const ObTabletTableStore &table_store = tablet.get_table_store();
  result.reset();
  result.merge_version_ = param.merge_version_;
  result.suggest_merge_type_ = param.merge_type_;
  DEBUG_SYNC(BEFORE_GET_MAJOR_MGERGE_TABLES);

  if (OB_UNLIKELY(!table_store.is_valid() || !param.is_valid() || !is_major_merge_type(param.merge_type_))) {
    ret = OB_INVALID_ARGUMENT;
    LOG_WARN("get invalid argument", K(ret), K(table_store), K(param));
  } else if (OB_ISNULL(base_table = static_cast<ObSSTable*>(table_store.get_major_sstables().get_boundary_table(true/*last*/)))) {
    ret = OB_ENTRY_NOT_EXIST;
    LOG_ERROR("major sstable not exist", K(ret), K(table_store));
  } else if (OB_FAIL(base_table->get_frozen_schema_version(result.base_schema_version_))) {
    LOG_WARN("failed to get frozen schema version", K(ret));
  } else if (OB_FAIL(result.handle_.add_table(base_table))) {
    LOG_WARN("failed to add base_table to result", K(ret));
  } else if (base_table->get_snapshot_version() >= param.merge_version_) {
    ret = OB_NO_NEED_MERGE;
    LOG_INFO("medium merge already finished", K(ret), KPC(base_table), K(result));
W
wangzelin.wzl 已提交
106
  } else {
107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
    const ObSSTableArray &minor_tables = table_store.get_minor_sstables();
    bool start_add_table_flag = false;
    for (int64_t i = 0; OB_SUCC(ret) && i < minor_tables.count_; ++i) {
      if (OB_ISNULL(minor_tables[i])) {
        ret = OB_ERR_UNEXPECTED;
        LOG_ERROR("table must not null", K(ret), K(i), K(minor_tables));
      // TODO: add right boundary for major
      } else if (!start_add_table_flag && minor_tables[i]->get_upper_trans_version() >= base_table->get_snapshot_version()) {
        start_add_table_flag = true;
      }
      if (OB_SUCC(ret) && start_add_table_flag) {
        if (OB_FAIL(result.handle_.add_table(minor_tables[i]))) {
          LOG_WARN("failed to add table", K(ret));
        }
      }
    }
    if (OB_SUCC(ret) && OB_FAIL(result.handle_.check_continues(nullptr))) {
      LOG_WARN("failed to check continues for major merge", K(ret));
    }
  }

O
obdev 已提交
128 129 130 131 132
  if (OB_FAIL(ret)) {
  } else if (OB_ISNULL(base_table)) {
    ret = OB_ERR_UNEXPECTED;
    LOG_WARN("get unexpected null base table", K(ret), K(tablet));
  } else {
133 134 135 136 137 138 139 140 141
    result.version_range_.base_version_ = base_table->get_upper_trans_version();
    result.version_range_.multi_version_start_ = tablet.get_multi_version_start();
    result.version_range_.snapshot_version_ = param.merge_version_;
    if (OB_FAIL(get_multi_version_start(param.merge_type_, ls, tablet, result.version_range_))) {
      LOG_WARN("failed to get multi version_start", K(ret));
    } else {
      result.read_base_version_ = base_table->get_snapshot_version();
      result.create_snapshot_version_ = base_table->get_meta().get_basic_meta().create_snapshot_version_;
    }
W
wangzelin.wzl 已提交
142 143 144 145 146 147
  }
  return ret;
}

int ObPartitionMergePolicy::get_mini_merge_tables(
    const ObGetMergeTablesParam &param,
148
    ObLS &ls,
W
wangzelin.wzl 已提交
149 150 151 152
    const ObTablet &tablet,
    ObGetMergeTablesResult &result)
{
  int ret = OB_SUCCESS;
153

W
wangzelin.wzl 已提交
154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
  ObTenantFreezeInfoMgr::NeighbourFreezeInfo freeze_info;
  int64_t merge_inc_base_version = tablet.get_snapshot_version();
  const ObMergeType merge_type = param.merge_type_;
  const ObTabletTableStore &table_store = tablet.get_table_store();
  ObSEArray<ObTableHandleV2, MAX_MEMSTORE_CNT> memtable_handles;
  result.reset();
  DEBUG_SYNC(BEFORE_GET_MINOR_MGERGE_TABLES);

  if (MINI_MERGE != merge_type) {
    ret = OB_INVALID_ARGUMENT;
    LOG_WARN("invalid args", K(ret), K(merge_type));
  } else if (OB_UNLIKELY(nullptr == tablet.get_memtable_mgr() || !table_store.is_valid())) {
    ret = OB_ERR_UNEXPECTED;
    LOG_WARN("get unexpected null memtable mgr from tablet or invalid table store", K(ret), K(tablet), K(table_store));
  } else if (table_store.get_minor_sstables().count() >= MAX_SSTABLE_CNT_IN_STORAGE) {
    ret = OB_SIZE_OVERFLOW;
O
obdev 已提交
170 171
    LOG_ERROR("Too many sstables, delay mini merge until sstable count falls below MAX_SSTABLE_CNT",
              K(ret), K(table_store), K(tablet));
W
wangzelin.wzl 已提交
172
    // add compaction diagnose info
173
    ObPartitionMergePolicy::diagnose_table_count_unsafe(MINI_MERGE, tablet);
W
wangzelin.wzl 已提交
174 175
  } else if (OB_FAIL(tablet.get_memtable_mgr()->get_all_memtables(memtable_handles))) {
    LOG_WARN("failed to get all memtables from memtable mgr", K(ret));
O
obdev 已提交
176 177 178
  } else if (OB_FAIL(get_neighbour_freeze_info(merge_inc_base_version,
                                               table_store.get_major_sstables().get_boundary_table(true),
                                               freeze_info))) {
W
wangzelin.wzl 已提交
179
    LOG_WARN("failed to get next major freeze", K(ret), K(merge_inc_base_version), K(table_store));
180
  } else if (OB_FAIL(find_mini_merge_tables(param, freeze_info, ls, tablet, memtable_handles, result))) {
W
wangzelin.wzl 已提交
181 182 183 184
    if (OB_NO_NEED_MERGE != ret) {
      LOG_WARN("failed to find mini merge tables", K(ret), K(freeze_info));
    }
  } else if (!result.update_tablet_directly_
185
      && OB_FAIL(deal_with_minor_result(merge_type, ls, tablet, result))) {
W
wangzelin.wzl 已提交
186 187
    LOG_WARN("failed to deal with minor merge result", K(ret));
  }
188

W
wangzelin.wzl 已提交
189 190 191 192 193 194
  return ret;
}

int ObPartitionMergePolicy::find_mini_merge_tables(
    const ObGetMergeTablesParam &param,
    const ObTenantFreezeInfoMgr::NeighbourFreezeInfo &freeze_info,
195
    ObLS &ls,
W
wangzelin.wzl 已提交
196 197 198 199 200 201 202 203
    const storage::ObTablet &tablet,
    ObIArray<ObTableHandleV2> &memtable_handles,
    ObGetMergeTablesResult &result)
{
  int ret = OB_SUCCESS;
  result.reset();
  // TODO: @dengzhi.ldz, remove max_snapshot_version, merge all forzen memtables
  // Keep max_snapshot_version currently because major merge must be done step by step
204
  int64_t max_snapshot_version = freeze_info.next.freeze_version;
O
obdev 已提交
205
  const SCN &clog_checkpoint_scn = tablet.get_clog_checkpoint_scn();
W
wangzelin.wzl 已提交
206 207

  // Freezing in the restart phase may not satisfy end >= last_max_sstable,
208
  // so the memtable cannot be filtered by scn
W
wangzelin.wzl 已提交
209 210 211
  // can only take out all frozen memtable
  ObIMemtable *memtable = nullptr;
  const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_;
O
obdev 已提交
212
  bool need_update_snapshot_version = false;
W
wangzelin.wzl 已提交
213 214 215 216 217
  for (int64_t i = 0; OB_SUCC(ret) && i < memtable_handles.count(); ++i) {
    if (OB_ISNULL(memtable = static_cast<ObIMemtable *>(memtable_handles.at(i).get_table()))) {
      ret = OB_ERR_SYS;
      LOG_ERROR("memtable must not null", K(ret), K(tablet));
    } else if (OB_UNLIKELY(memtable->is_active_memtable())) {
218
      LOG_DEBUG("skip active memtable", K(i), KPC(memtable), K(memtable_handles));
W
wangzelin.wzl 已提交
219 220 221 222
      break;
    } else if (!memtable->can_be_minor_merged()) {
      FLOG_INFO("memtable cannot mini merge now", K(ret), K(i), KPC(memtable), K(max_snapshot_version), K(memtable_handles), K(param));
      break;
O
obdev 已提交
223
    } else if (memtable->get_end_scn() <= clog_checkpoint_scn) {
O
obdev 已提交
224 225
      if (!tablet_id.is_special_merge_tablet() &&
          memtable->get_snapshot_version() > tablet.get_tablet_meta().snapshot_version_) {
O
obdev 已提交
226
        need_update_snapshot_version = true;
W
wangzelin.wzl 已提交
227 228 229 230 231
      } else {
        LOG_DEBUG("memtable wait to release", K(param), KPC(memtable));
        continue;
      }
    } else if (result.handle_.get_count() > 0) {
232
      if (result.scn_range_.end_scn_ < memtable->get_start_scn()) {
O
obdev 已提交
233 234
        FLOG_INFO("scn range  not continues, reset previous minor merge tables",
                  "last_end_scn", result.scn_range_.end_scn_, KPC(memtable), K(tablet));
W
wangzelin.wzl 已提交
235 236 237 238 239 240 241 242 243 244 245 246 247 248 249
        // mini merge always use the oldest memtable to dump
        break;
      } else if (memtable->get_snapshot_version() > max_snapshot_version) {
        // This judgment is only to try to prevent cross-major mini merge,
        // but when the result is empty, it can still be added
        LOG_INFO("max_snapshot_version is reached, no need find more tables",
                 K(max_snapshot_version), KPC(memtable));
        break;
      }
    }

    if (OB_SUCC(ret)) {
      if (OB_FAIL(result.handle_.add_table(memtable))) {
        LOG_WARN("Failed to add memtable", KPC(memtable), K(ret));
      } else {
250
        // update end_scn/snapshot_version
W
wangzelin.wzl 已提交
251
        if (1 == result.handle_.get_count()) {
252
          result.scn_range_.start_scn_ = memtable->get_start_scn();
W
wangzelin.wzl 已提交
253
        }
254
        result.scn_range_.end_scn_ = memtable->get_end_scn();
W
wangzelin.wzl 已提交
255 256 257 258
        result.version_range_.snapshot_version_ = MAX(memtable->get_snapshot_version(), result.version_range_.snapshot_version_);
      }
    }
  } // end for
259 260
  if (OB_FAIL(ret)) {
  } else {
W
wangzelin.wzl 已提交
261 262
    result.suggest_merge_type_ = param.merge_type_;
    result.version_range_.multi_version_start_ = tablet.get_multi_version_start();
O
obdev 已提交
263 264 265
    if (result.handle_.empty()) {
      ret = OB_NO_NEED_MERGE;
    } else if (result.scn_range_.end_scn_ <= clog_checkpoint_scn) {
O
obdev 已提交
266
      if (need_update_snapshot_version) {
O
obdev 已提交
267
        result.update_tablet_directly_ = true;
268
        result.version_range_.multi_version_start_ = 0; // set multi_version_start to pass tablet::init check
O
obdev 已提交
269 270 271 272
        LOG_INFO("meet empty force freeze memtable, could update tablet directly", K(ret), K(result));
      } else {
        ret = OB_NO_NEED_MERGE;
      }
W
wangzelin.wzl 已提交
273 274 275 276 277 278 279 280 281 282
    } else if (OB_FAIL(refine_mini_merge_result(tablet, result))) {
      if (OB_NO_NEED_MERGE != ret) {
        LOG_WARN("failed to refine mini merge result", K(ret), K(tablet_id));
      }
    }

    if (OB_SUCC(ret) && result.version_range_.snapshot_version_ > max_snapshot_version) {
      result.schedule_major_ = true;
    }
  }
283

W
wangzelin.wzl 已提交
284 285 286 287 288
  return ret;
}

int ObPartitionMergePolicy::deal_with_minor_result(
    const ObMergeType &merge_type,
289
    ObLS &ls,
W
wangzelin.wzl 已提交
290 291 292 293 294 295 296
    const ObTablet &tablet,
    ObGetMergeTablesResult &result)
{
  int ret = OB_SUCCESS;
  if (result.handle_.empty()) {
    ret = OB_NO_NEED_MERGE;
    LOG_INFO("no need to minor merge", K(ret), K(merge_type), K(result));
297
  } else if (OB_UNLIKELY(!result.scn_range_.is_valid())) {
W
wangzelin.wzl 已提交
298 299
    ret = OB_INVALID_ARGUMENT;
    LOG_ERROR("Invalid argument to check result", K(ret), K(result));
300
  } else if (OB_FAIL(result.handle_.check_continues(&result.scn_range_))) {
W
wangzelin.wzl 已提交
301
    LOG_WARN("failed to check continues", K(ret), K(result));
302 303
  } else if (OB_FAIL(get_multi_version_start(merge_type, ls, tablet, result.version_range_))) {
    LOG_WARN("failed to get kept multi_version_start", K(ret), K(merge_type), K(tablet));
W
wangzelin.wzl 已提交
304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321
  } else {
    result.schema_version_ = tablet.get_storage_schema().schema_version_;
    if (MINI_MERGE == merge_type) {
      ObITable *table = NULL;
      result.base_schema_version_ = result.schema_version_;
      for (int64_t i = 0; OB_SUCC(ret) && i < result.handle_.get_count(); ++i) {
        if (OB_ISNULL(table = result.handle_.get_table(i)) || !table->is_memtable()) {
          ret = OB_ERR_SYS;
          LOG_ERROR("get unexpected table", KPC(table), K(ret));
        } else {
          result.schema_version_ = MAX(result.schema_version_, reinterpret_cast<ObIMemtable *>(table)->get_max_schema_version());
        }
      }
    } else {
      if (OB_FAIL(result.handle_.get_table(0)->get_frozen_schema_version(result.base_schema_version_))) {
        LOG_WARN("failed to get frozen schema version", K(ret), K(result));
      }
    }
322 323 324 325 326 327 328 329 330
    if (OB_SUCC(ret)) {
      result.version_range_.base_version_ = 0;
      if (OB_SUCC(ret) && !is_mini_merge(merge_type)) {
        const ObTabletTableStore &table_store = tablet.get_table_store();
        if (OB_FAIL(table_store.get_recycle_version(result.version_range_.multi_version_start_, result.version_range_.base_version_))) {
          LOG_WARN("Fail to get table store recycle version", K(ret), K(result.version_range_), K(table_store));
        }
      }
    }
W
wangzelin.wzl 已提交
331 332 333 334
  }
  return ret;
}

335
int ObPartitionMergePolicy::get_minor_merge_tables(
W
wangzelin.wzl 已提交
336
    const ObGetMergeTablesParam &param,
337
    ObLS &ls,
W
wangzelin.wzl 已提交
338 339 340 341 342 343 344 345 346 347 348
    const ObTablet &tablet,
    ObGetMergeTablesResult &result)
{
  int ret = OB_SUCCESS;
  int64_t min_snapshot_version = 0;
  int64_t max_snapshot_version = 0;
  const ObMergeType merge_type = param.merge_type_;
  result.reset();
  DEBUG_SYNC(BEFORE_GET_MINOR_MGERGE_TABLES);

  // no need to distinguish data tablet and tx tablet, all minor tables included
349
  if (OB_UNLIKELY(!is_minor_merge(merge_type))) {
W
wangzelin.wzl 已提交
350 351
    ret = OB_INVALID_ARGUMENT;
    LOG_WARN("get invalid arguments", K(ret), K(merge_type));
352
  } else if (tablet.is_ls_inner_tablet()) {
W
wangzelin.wzl 已提交
353 354 355 356 357 358 359
    min_snapshot_version = 0;
    max_snapshot_version = INT64_MAX;
  } else if (OB_FAIL(get_boundary_snapshot_version(tablet, min_snapshot_version, max_snapshot_version))) {
    LOG_WARN("fail to calculate boundary version", K(ret));
  }

  if (OB_FAIL(ret)) {
360 361 362 363 364 365 366 367 368
  } else if (OB_FAIL(find_minor_merge_tables(param,
                                             min_snapshot_version,
                                             max_snapshot_version,
                                             ls,
                                             tablet,
                                             result))) {
    if (OB_NO_NEED_MERGE != ret) {
      LOG_WARN("failed to get minor merge tables", K(ret), K(max_snapshot_version));
    }
W
wangzelin.wzl 已提交
369
  }
370

W
wangzelin.wzl 已提交
371 372 373 374 375 376
  return ret;
}

int ObPartitionMergePolicy::get_boundary_snapshot_version(
    const ObTablet &tablet,
    int64_t &min_snapshot,
377 378
    int64_t &max_snapshot,
    const bool check_table_cnt)
W
wangzelin.wzl 已提交
379 380 381 382 383 384 385
{
  int ret = OB_SUCCESS;
  int64_t merge_inc_base_version = tablet.get_snapshot_version();
  ObTenantFreezeInfoMgr::NeighbourFreezeInfo freeze_info;
  const ObTabletTableStore &table_store = tablet.get_table_store();
  ObITable *last_major_table = table_store.get_major_sstables().get_boundary_table(true);

386 387 388 389
  if (OB_UNLIKELY(tablet.is_ls_inner_tablet())) {
    ret = OB_NOT_SUPPORTED;
    LOG_WARN("not supported for special tablet", K(ret), K(tablet));
  } else if (OB_UNLIKELY(!table_store.is_valid())) {
W
wangzelin.wzl 已提交
390 391 392 393 394 395
    ret = OB_ERR_SYS;
    LOG_WARN("table store not valid", K(ret), K(table_store));
  } else if (OB_FAIL(get_neighbour_freeze_info(merge_inc_base_version,
                                               last_major_table,
                                               freeze_info))) {
    LOG_WARN("failed to get freeze info", K(ret), K(merge_inc_base_version), K(table_store));
396
  } else if (check_table_cnt && table_store.get_table_count() >= OB_UNSAFE_TABLE_CNT) {
W
wangzelin.wzl 已提交
397 398 399 400 401 402 403
    max_snapshot = INT64_MAX;
    if (table_store.get_table_count() >= OB_EMERGENCY_TABLE_CNT) {
      min_snapshot = 0;
    } else if (OB_NOT_NULL(last_major_table)) {
      min_snapshot = last_major_table->get_snapshot_version();
    }
  } else {
404 405 406 407 408
    if (OB_NOT_NULL(last_major_table)) {
      min_snapshot = max(last_major_table->get_snapshot_version(), freeze_info.prev.freeze_version);
    } else {
      min_snapshot = freeze_info.prev.freeze_version;
    }
409
    max_snapshot = freeze_info.next.freeze_version;
410 411 412 413 414 415 416 417 418 419

    int64_t max_medium_scn = 0;
    if (OB_FAIL(tablet.get_max_medium_snapshot(max_medium_scn))) {
      LOG_WARN("failed to get medium from memtables", K(ret));
    } else {
      min_snapshot = max(min_snapshot, max_medium_scn);
    }
    LOG_DEBUG("get boundary snapshot", K(ret), "tablet_id", tablet.get_tablet_meta().tablet_id_, K(table_store), K(min_snapshot), K(max_snapshot),
        K(tablet.get_medium_compaction_info_list()), K(max_medium_scn), KPC(last_major_table),
        K(freeze_info));
W
wangzelin.wzl 已提交
420 421 422 423
  }
  return ret;
}

424
int ObPartitionMergePolicy::find_minor_merge_tables(
W
wangzelin.wzl 已提交
425 426 427
    const ObGetMergeTablesParam &param,
    const int64_t min_snapshot_version,
    const int64_t max_snapshot_version,
428
    ObLS &ls,
W
wangzelin.wzl 已提交
429 430 431 432 433 434 435
    const ObTablet &tablet,
    ObGetMergeTablesResult &result)
{
  int ret = OB_SUCCESS;
  result.reset_handle_and_range();
  const ObTabletTableStore &table_store = tablet.get_table_store();
  ObTablesHandleArray minor_tables;
436
  int64_t minor_compact_trigger = DEFAULT_MINOR_COMPACT_TRIGGER;
W
wangzelin.wzl 已提交
437 438 439 440 441 442 443 444 445

  if (OB_UNLIKELY(!table_store.is_valid())) {
    ret = OB_ERR_SYS;
    LOG_WARN("unexpected table store", K(ret), K(table_store));
  } else if (OB_FAIL(table_store.get_mini_minor_sstables(minor_tables))) {
    LOG_WARN("failed to get mini minor sstables", K(ret), K(table_store));
  } else {
    ObSSTable *table = nullptr;
    bool found_greater = false;
446 447 448 449 450 451 452 453
    {
      omt::ObTenantConfigGuard tenant_config(TENANT_CONF(MTL_ID()));
      if (tenant_config.is_valid()) {
        minor_compact_trigger = tenant_config->minor_compact_trigger;
      }
    }

    ObSEArray<ObSSTable*, 16> minor_merge_candidates;
W
wangzelin.wzl 已提交
454
    for (int64_t i = 0; OB_SUCC(ret) && i < minor_tables.get_count(); ++i) {
455 456 457
      if (OB_ISNULL(table = static_cast<ObSSTable *>(minor_tables.get_table(i)))) {
        ret = OB_ERR_UNEXPECTED;
        LOG_WARN("table must not null", K(ret), K(i), K(table_store));
W
wangzelin.wzl 已提交
458 459 460 461
      } else if (!found_greater && table->get_upper_trans_version() <= min_snapshot_version) {
        continue;
      } else {
        found_greater = true;
462 463 464 465 466 467 468
        if (0 == minor_merge_candidates.count()) {
        } else if (is_history_minor_merge(param.merge_type_) && table->get_upper_trans_version() > max_snapshot_version) {
          break;
        } else if (table_store.get_table_count() < OB_UNSAFE_TABLE_CNT &&
            table->get_max_merged_trans_version() > max_snapshot_version) {
          LOG_INFO("max_snapshot_version reached, stop find more tables", K(param), K(max_snapshot_version), KPC(table));
          break;
W
wangzelin.wzl 已提交
469
        }
470 471
        if (OB_FAIL(minor_merge_candidates.push_back(table))) {
          LOG_WARN("failed to add table", K(ret));
W
wangzelin.wzl 已提交
472 473
        }
      }
474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499
    }

    int64_t left_border = 0;
    int64_t right_border = minor_merge_candidates.count();
    if (OB_FAIL(ret)) {
    } else if (MINOR_MERGE != param.merge_type_) {
    } else if (OB_FAIL(refine_minor_merge_tables(tablet, minor_merge_candidates, left_border, right_border))) {
      LOG_WARN("failed to adjust mini minor merge tables", K(ret));
    }

    for (int64_t i = left_border; OB_SUCC(ret) && i < right_border; ++i) {
      ObSSTable *table = minor_merge_candidates.at(i);
      if (result.handle_.get_count() > 0 && result.scn_range_.end_scn_ < table->get_start_scn()) {
        LOG_INFO("log ts not continues, reset previous minor merge tables",
                "last_end_log_ts", result.scn_range_.end_scn_, KPC(table));
        result.reset_handle_and_range();
      }
      if (OB_FAIL(result.handle_.add_table(table))) {
        LOG_WARN("Failed to add table", K(ret), KPC(table));
      } else {
        if (1 == result.handle_.get_count()) {
          result.scn_range_.start_scn_ = table->get_start_scn();
        }
        result.scn_range_.end_scn_ = table->get_end_scn();
      }
    }
W
wangzelin.wzl 已提交
500 501 502 503
  }

  if (OB_SUCC(ret)) {
    result.suggest_merge_type_ = param.merge_type_;
504 505 506 507 508
    result.version_range_.snapshot_version_ = tablet.get_snapshot_version();
    if (OB_FAIL(refine_minor_merge_result(minor_compact_trigger, result))) {
      if (OB_NO_NEED_MERGE != ret) {
        LOG_WARN("failed to refine_minor_merge_result", K(ret));
      }
W
wangzelin.wzl 已提交
509
    } else {
510
      if (OB_FAIL(deal_with_minor_result(param.merge_type_, ls, tablet, result))) {
W
wangzelin.wzl 已提交
511 512 513
        LOG_WARN("Failed to deal with minor merge result", K(ret), K(param), K(result));
      } else {
        FLOG_INFO("succeed to get minor merge tables", K(min_snapshot_version), K(max_snapshot_version),
514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553
            K(result), K(tablet));
      }
    }
  } else if (OB_NO_NEED_MERGE == ret && table_store.get_minor_sstables().count() >= DIAGNOSE_TABLE_CNT_IN_STORAGE) {
      ADD_SUSPECT_INFO(MINOR_MERGE,
                       tablet.get_tablet_meta().ls_id_,
                       tablet.get_tablet_meta().tablet_id_,
                       "can't schedule minor merge.",
                       K(min_snapshot_version), K(max_snapshot_version),
                       "mini_sstable_cnt", table_store.get_minor_sstables().count());
  }
  return ret;
}

int ObPartitionMergePolicy::refine_minor_merge_tables(
    const ObTablet &tablet,
    const ObIArray<ObSSTable *> &merge_tables,
    int64_t &left_border,
    int64_t &right_border)
{
  int ret = OB_SUCCESS;
  const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_;
  ObITable *meta_table = tablet.get_table_store().get_extend_sstable(ObTabletTableStore::META_MAJOR);
  int64_t covered_by_meta_table_cnt = 0;
  left_border = 0;
  right_border = merge_tables.count();

  if (tablet_id.is_special_merge_tablet()) {
  } else if (merge_tables.count() < 2 || nullptr == meta_table) {
    // do nothing
  } else {
    // no need meta merge, but exist meta_sstable
    for (int64_t i = 0; OB_SUCC(ret) && i < merge_tables.count(); ++i) {
      if (OB_ISNULL(merge_tables.at(i))) {
        ret = OB_ERR_UNEXPECTED;
        LOG_WARN("get unexpected null table", K(ret), K(i), K(merge_tables));
      } else if (merge_tables.at(i)->get_upper_trans_version() > meta_table->get_snapshot_version()) {
        break;
      } else {
        ++covered_by_meta_table_cnt;
W
wangzelin.wzl 已提交
554 555 556
      }
    }
  }
557 558 559 560 561 562 563

  if (OB_FAIL(ret)) {
  } else if (covered_by_meta_table_cnt * 2 >= merge_tables.count()) {
    right_border = covered_by_meta_table_cnt;
  } else {
    left_border = covered_by_meta_table_cnt;
  }
W
wangzelin.wzl 已提交
564 565 566 567 568
  return ret;
}

int ObPartitionMergePolicy::get_hist_minor_merge_tables(
    const ObGetMergeTablesParam &param,
569
    ObLS &ls,
W
wangzelin.wzl 已提交
570 571 572 573 574 575 576 577
    const ObTablet &tablet,
    ObGetMergeTablesResult &result)
{
  int ret = OB_SUCCESS;
  const ObMergeType merge_type = param.merge_type_;
  int64_t max_snapshot_version = 0;
  result.reset();

578
  if (OB_UNLIKELY(!is_history_minor_merge(merge_type))) {
W
wangzelin.wzl 已提交
579 580 581 582 583 584
    ret = OB_INVALID_ARGUMENT;
    LOG_WARN("invalid args", K(ret), K(merge_type));
  } else if (OB_FAIL(deal_hist_minor_merge(tablet, max_snapshot_version))) {
    if (OB_NO_NEED_MERGE != ret) {
      LOG_WARN("failed to deal hist minor merge", K(ret));
    }
585 586 587 588 589
  } else if (OB_FAIL(find_minor_merge_tables(param, 0/*min_snapshot*/,
      max_snapshot_version, ls, tablet, result))) {
    if (OB_NO_NEED_MERGE != ret) {
      LOG_WARN("failed to get minor tables for hist minor merge", K(ret));
    }
W
wangzelin.wzl 已提交
590 591 592 593
  }
  return ret;
}

594
int ObPartitionMergePolicy::deal_hist_minor_merge(
W
wangzelin.wzl 已提交
595
    const ObTablet &tablet,
596
    int64_t &max_snapshot_version)
W
wangzelin.wzl 已提交
597 598 599
{
  int ret = OB_SUCCESS;
  const ObTabletTableStore &table_store = tablet.get_table_store();
600 601 602
  const int64_t hist_threshold = cal_hist_minor_merge_threshold();
  ObITable *first_major_table = nullptr;
  max_snapshot_version = 0;
W
wangzelin.wzl 已提交
603

604
  if (!table_store.is_valid()) {
W
wangzelin.wzl 已提交
605
    ret = OB_ERR_UNEXPECTED;
606 607 608 609 610 611 612 613 614 615
    LOG_ERROR("get unexpected invalid table store", K(ret), K(table_store));
  } else if (table_store.get_minor_sstables().count_ < hist_threshold) {
    ret = OB_NO_NEED_MERGE;
  } else if (OB_ISNULL(first_major_table = table_store.get_major_sstables().get_boundary_table(false))) {
    // index table during building, need compat with continuous multi version
    if (0 == (max_snapshot_version = MTL(ObTenantFreezeInfoMgr*)->get_latest_frozen_version())) {
      // no freeze info found, wait normal mini minor to free sstable
      ret = OB_NO_NEED_MERGE;
      LOG_WARN("No freeze range to do hist minor merge for buiding index", K(ret), K(table_store));
    }
W
wangzelin.wzl 已提交
616
  } else {
617 618 619 620 621 622 623 624 625
    ObTenantFreezeInfoMgr::NeighbourFreezeInfo freeze_info;
    ObSEArray<ObTenantFreezeInfoMgr::FreezeInfo, 8> freeze_infos;
    if (OB_FAIL(MTL(ObTenantFreezeInfoMgr *)->get_freeze_info_behind_major_snapshot(
                first_major_table->get_snapshot_version(),
                freeze_infos))) {
      if (OB_ENTRY_NOT_EXIST == ret) {
        ret = OB_NO_NEED_MERGE;
      } else {
        LOG_WARN("Failed to get freeze infos behind major version", K(ret), KPC(first_major_table));
W
wangzelin.wzl 已提交
626
      }
627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648
    } else if (freeze_infos.count() <= 1) {
      // only one major freeze found, wait normal mini minor to reduce table count
      ret = OB_NO_NEED_MERGE;
      LOG_DEBUG("No enough freeze range to do hist minor merge", K(ret), K(freeze_infos));
    } else {
      int64_t table_cnt = 0;
      int64_t min_minor_version = 0;
      int64_t max_minor_version = 0;
      if (OB_FAIL(get_boundary_snapshot_version(tablet, min_minor_version, max_minor_version))) {
        LOG_WARN("fail to calculate boundary version", K(ret));
      } else {
        ObSSTable *table = nullptr;
        const ObSSTableArray &minor_tables = table_store.get_minor_sstables();
        for (int64_t i = 0; OB_SUCC(ret) && i < minor_tables.count_; ++i) {
          if (OB_ISNULL(table = static_cast<ObSSTable*>(minor_tables[i]))) {
            ret = OB_ERR_SYS;
            LOG_ERROR("table must not null", K(ret), K(i), K(table_store));
          } else if (table->get_upper_trans_version() <= min_minor_version) {
            table_cnt++;
          } else {
            break;
          }
W
wangzelin.wzl 已提交
649 650
        }

651 652 653 654 655 656
        if (OB_SUCC(ret)) {
          if (1 < table_cnt) {
            max_snapshot_version = min_minor_version;
          } else {
            ret = OB_NO_NEED_MERGE;
          }
W
wangzelin.wzl 已提交
657 658 659 660 661 662 663
        }
      }
    }
  }
  return ret;
}

664 665 666 667 668 669
int ObPartitionMergePolicy::diagnose_minor_dag(
    ObMergeType merge_type,
    const ObLSID ls_id,
    const ObTabletID tablet_id,
    char *buf,
    const int64_t buf_len)
W
wangzelin.wzl 已提交
670 671
{
  int ret = OB_SUCCESS;
672
  ObTabletMergeExecuteDag dag;
W
wangzelin.wzl 已提交
673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693
  ObDiagnoseTabletCompProgress progress;
  if (OB_FAIL(ObCompactionDiagnoseMgr::diagnose_dag(
          merge_type,
          ls_id,
          tablet_id,
          ObVersion::MIN_VERSION,
          dag,
          progress))) {
    if (OB_HASH_NOT_EXIST != ret) {
      LOG_WARN("failed to init dag", K(ret), K(ls_id), K(tablet_id));
    } else {
      // no minor merge dag
      ret = OB_SUCCESS;
    }
  } else if (progress.is_valid()) { // dag exist
    ADD_COMPACTION_INFO_PARAM(buf, buf_len, "minor_merge_progress", progress);
  }
  return ret;
}

int ObPartitionMergePolicy::diagnose_table_count_unsafe(
694
    const ObMergeType merge_type,
W
wangzelin.wzl 已提交
695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723
    const storage::ObTablet &tablet)
{
  int ret = OB_SUCCESS;
  int tmp_ret = OB_SUCCESS;
  const int64_t buf_len = ObScheduleSuspectInfoMgr::EXTRA_INFO_LEN;
  char tmp_str[buf_len] = "\0";
  const ObStorageSchema &storage_schema = tablet.get_storage_schema();
  const ObTabletTableStore &table_store = tablet.get_table_store();
  const ObSSTableArray &major_tables = table_store.get_major_sstables();
  const ObSSTableArray &minor_tables = table_store.get_minor_sstables();

  // add sstable info
  const int64_t major_table_count = major_tables.count_;
  const int64_t minor_table_count = minor_tables.count_;
  ADD_COMPACTION_INFO_PARAM(tmp_str, buf_len, K(major_table_count), K(minor_table_count));

  if (OB_TMP_FAIL(ObCompactionDiagnoseMgr::check_system_compaction_config(tmp_str, buf_len))) {
    LOG_WARN("failed to check system compaction config", K(tmp_ret), K(tmp_str));
  }

  // check min_reserved_snapshot
  const ObLSID &ls_id = tablet.get_tablet_meta().ls_id_;
  const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_;
  int64_t min_reserved_snapshot = 0;
  int64_t min_merged_snapshot = INT64_MAX;
  ObString snapshot_from_str;
  const ObSSTable *major_sstable = static_cast<const ObSSTable *>(major_tables.get_boundary_table(false/*last*/));
  if (OB_ISNULL(major_sstable)) {
    const ObSSTable *minor_sstable = static_cast<const ObSSTable *>(minor_tables.get_boundary_table(false/*last*/));
724
    ADD_COMPACTION_INFO_PARAM(tmp_str, buf_len, "no major sstable. first_minor_start_scn = ", minor_sstable->get_start_scn());
W
wangzelin.wzl 已提交
725 726 727 728 729 730 731 732 733 734 735 736
  } else if (FALSE_IT(min_merged_snapshot = major_sstable->get_snapshot_version())) {
  } else if (OB_FAIL(MTL_CALL_FREEZE_INFO_MGR(diagnose_min_reserved_snapshot,
      tablet_id,
      min_merged_snapshot,
      min_reserved_snapshot,
      snapshot_from_str))) {
    LOG_WARN("failed to get min reserved snapshot", K(ret), K(tablet_id));
  } else if (snapshot_from_str.length() > 0) {
    ADD_COMPACTION_INFO_PARAM(tmp_str, buf_len, "snapstho_type", snapshot_from_str, K(min_reserved_snapshot));
  }

  // check have minor merge DAG
737
  if (OB_TMP_FAIL(diagnose_minor_dag(MINOR_MERGE, ls_id, tablet_id, tmp_str, buf_len))) {
W
wangzelin.wzl 已提交
738 739
    LOG_WARN("failed to diagnose minor dag", K(tmp_ret), K(ls_id), K(tablet_id), K(tmp_str));
  }
740
  if (OB_TMP_FAIL(diagnose_minor_dag(HISTORY_MINOR_MERGE, ls_id, tablet_id, tmp_str, buf_len))) {
W
wangzelin.wzl 已提交
741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762
    LOG_WARN("failed to diagnose history minor dag", K(tmp_ret), K(ls_id), K(tablet_id), K(tmp_str));
  }

  // add suspect info
  ADD_SUSPECT_INFO(merge_type, ls_id, tablet_id,
      "sstable count is not safe", "extra_info", tmp_str);
  return ret;
}

int ObPartitionMergePolicy::refine_mini_merge_result(
    const ObTablet &tablet,
    ObGetMergeTablesResult &result)
{
  int ret = OB_SUCCESS;
  ObITable *last_table = nullptr;
  const ObTabletTableStore &table_store = tablet.get_table_store();

  if (OB_UNLIKELY(!table_store.is_valid())) {
    ret = OB_ERR_UNEXPECTED;
    LOG_WARN("Table store not valid", K(ret), K(table_store));
  } else if (OB_ISNULL(last_table = table_store.get_minor_sstables().get_boundary_table(true/*last*/))) {
    // no minor sstable, skip to cut memtable's boundary
763
  } else if (result.scn_range_.start_scn_ > last_table->get_end_scn()) {
W
wangzelin.wzl 已提交
764
    ret = OB_ERR_UNEXPECTED;
O
obdev 已提交
765 766
    LOG_ERROR("Unexpected uncontinuous scn_range in mini merge",
              K(ret), K(result), KPC(last_table), K(table_store), K(tablet));
767
  } else if (result.scn_range_.start_scn_ < last_table->get_end_scn()
W
wangzelin.wzl 已提交
768
      && !tablet.get_tablet_meta().tablet_id_.is_special_merge_tablet()) {
O
obdev 已提交
769
    // fix start_scn to make scn_range continuous in migrate phase for issue 42832934
770
    if (result.scn_range_.end_scn_ <= last_table->get_end_scn()) {
O
obdev 已提交
771
      ret = OB_ERR_UNEXPECTED;
O
obdev 已提交
772 773
      LOG_WARN("No need mini merge memtable which is covered by existing sstable",
               K(ret), K(result), KPC(last_table), K(table_store), K(tablet));
W
wangzelin.wzl 已提交
774
    } else {
775
      result.scn_range_.start_scn_ = last_table->get_end_scn();
O
obdev 已提交
776
      FLOG_INFO("Fix mini merge result scn range", K(ret), K(result), KPC(last_table), K(table_store), K(tablet));
W
wangzelin.wzl 已提交
777 778 779 780 781
    }
  }
  return ret;
}

782 783 784
int ObPartitionMergePolicy::refine_minor_merge_result(
    const int64_t minor_compact_trigger,
    ObGetMergeTablesResult &result)
W
wangzelin.wzl 已提交
785 786 787
{
  int ret = OB_SUCCESS;
  ObMergeType &merge_type = result.suggest_merge_type_;
788
  if (result.handle_.get_count() <= MAX(minor_compact_trigger, 1)) {
789 790 791 792
    ret = OB_NO_NEED_MERGE;
    LOG_DEBUG("minor refine, no need to do minor merge", K(result));
    result.handle_.reset();
  } else if (OB_UNLIKELY(!is_minor_merge_type(merge_type))) {
W
wangzelin.wzl 已提交
793 794
    ret = OB_ERR_UNEXPECTED;
    LOG_WARN("Unexpected merge type to refine merge tables", K(result), K(ret));
795 796
  } else if (0 == minor_compact_trigger || result.handle_.get_count() >= OB_UNSAFE_TABLE_CNT) {
    // no refine
W
wangzelin.wzl 已提交
797 798 799 800
  } else {
    ObSEArray<ObITable *, MAX_SSTABLE_CNT_IN_STORAGE> mini_tables;
    ObITable *table = NULL;
    ObSSTable *sstable = NULL;
801 802 803
    int64_t large_sstable_cnt = 0;
    int64_t large_sstable_row_cnt = 0;
    int64_t mini_sstable_row_cnt = 0;
W
wangzelin.wzl 已提交
804 805 806 807
    for (int64_t i = 0; OB_SUCC(ret) && i < result.handle_.get_count(); ++i) {
      if (OB_ISNULL(table = result.handle_.get_table(i)) || !table->is_minor_sstable()) {
        ret = OB_ERR_SYS;
        LOG_ERROR("get unexpected table", KP(table), K(ret));
808 809 810 811 812 813 814 815 816 817 818 819 820 821
      } else if (FALSE_IT(sstable = reinterpret_cast<ObSSTable*>(table))) {
      } else {
        if (sstable->get_meta().get_basic_meta().row_count_ > OB_LARGE_MINOR_SSTABLE_ROW_COUNT) { // large sstable
          ++large_sstable_cnt;
          large_sstable_row_cnt += sstable->get_meta().get_basic_meta().row_count_;
          if (mini_tables.count() > minor_compact_trigger) {
            break;
          } else {
            mini_tables.reset();
            continue;
          }
        } else {
          mini_sstable_row_cnt += sstable->get_meta().get_basic_meta().row_count_;
        }
W
wangzelin.wzl 已提交
822 823 824
        if (OB_FAIL(mini_tables.push_back(table))) {
          LOG_WARN("Failed to push mini minor table into array", K(ret));
        }
825 826 827 828 829 830 831 832
      }
    } // end of for

    int64_t size_amplification_factor = OB_DEFAULT_COMPACTION_AMPLIFICATION_FACTOR;
    {
      omt::ObTenantConfigGuard tenant_config(TENANT_CONF(MTL_ID()));
      if (tenant_config.is_valid()) {
        size_amplification_factor = tenant_config->_minor_compaction_amplification_factor;
W
wangzelin.wzl 已提交
833 834
      }
    }
835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852
    if (OB_FAIL(ret)) {
    } else if (large_sstable_cnt > 1
        || mini_tables.count() <= minor_compact_trigger
        || mini_sstable_row_cnt > (large_sstable_row_cnt * size_amplification_factor / 100)) {
      // no refine, use current result to compaction
    } else if (mini_tables.count() != result.handle_.get_count()) {
      // reset the merge result, mini sstable merge into a new mini sstable
      result.reset_handle_and_range();
      for (int64_t i = 0; OB_SUCC(ret) && i < mini_tables.count(); i++) {
        ObITable *table = mini_tables.at(i);
        if (OB_UNLIKELY(0 != i && table->get_start_scn() != result.scn_range_.end_scn_)) {
          ret = OB_ERR_UNEXPECTED;
          LOG_WARN("unexepcted table array", K(ret), K(i), KPC(table), K(mini_tables));
        } else if (OB_FAIL(result.handle_.add_table(table))) {
          LOG_WARN("Failed to add table to minor merge result", KPC(table), K(ret));
        } else {
          if (1 == result.handle_.get_count()) {
            result.scn_range_.start_scn_ = table->get_start_scn();
W
wangzelin.wzl 已提交
853
          }
854
          result.scn_range_.end_scn_ = table->get_end_scn();
W
wangzelin.wzl 已提交
855 856
        }
      }
857 858 859
      if (OB_SUCC(ret)) {
        LOG_INFO("minor refine, mini minor merge sstable refine info", K(result));
      }
W
wangzelin.wzl 已提交
860 861 862 863 864
    }
  }
  return ret;
}

865 866
// call this func means have serialized medium compaction clog = medium_snapshot
int ObPartitionMergePolicy::check_need_medium_merge(
O
obdev 已提交
867
    ObLS &ls,
868 869 870
    storage::ObTablet &tablet,
    const int64_t medium_snapshot,
    bool &need_merge,
O
obdev 已提交
871 872
    bool &can_merge,
    bool &need_force_freeze)
W
wangzelin.wzl 已提交
873
{
874 875 876 877 878 879 880 881 882
  int ret = OB_SUCCESS;
  need_merge = false;
  can_merge = false;
  const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_;
  ObTabletTableStore &table_store = tablet.get_table_store();
  ObITable *last_major = table_store.get_major_sstables().get_boundary_table(true/*last*/);
  const bool is_tablet_data_status_complete = tablet.get_tablet_meta().ha_status_.is_data_status_complete();
  if (nullptr == last_major) {
    // no major, no medium
W
wangzelin.wzl 已提交
883
  } else {
884 885 886
    need_merge = last_major->get_snapshot_version() < medium_snapshot;
    if (need_merge
        && is_tablet_data_status_complete
O
obdev 已提交
887
        && ObTenantTabletScheduler::check_weak_read_ts_ready(medium_snapshot, ls)) {
O
obdev 已提交
888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911
      can_merge = tablet.get_snapshot_version() >= medium_snapshot;
      if (!can_merge) {
        ObTabletMemtableMgr *memtable_mgr = nullptr;
        ObTableHandleV2 memtable_handle;
        memtable::ObMemtable *last_frozen_memtable = nullptr;
        if (OB_ISNULL(memtable_mgr = static_cast<ObTabletMemtableMgr *>(tablet.get_memtable_mgr()))) {
          ret = OB_ERR_UNEXPECTED;
          LOG_WARN("memtable mgr is unexpected null", K(ret), K(tablet));
        } else if (OB_FAIL(memtable_mgr->get_last_frozen_memtable(memtable_handle))) {
          if (OB_ENTRY_NOT_EXIST == ret) { // no frozen memtable, need force freeze
            need_force_freeze = true;
            ret = OB_SUCCESS;
          } else {
            LOG_WARN("failed to get last frozen memtable", K(ret));
          }
        } else if (OB_FAIL(memtable_handle.get_data_memtable(last_frozen_memtable))) {
          LOG_WARN("failed to get last frozen memtable", K(ret));
        } else {
          need_force_freeze = last_frozen_memtable->get_snapshot_version() < medium_snapshot;
          if (!need_force_freeze) {
            LOG_INFO("tablet no need force freeze", K(ret), K(tablet_id), K(medium_snapshot), KPC(last_frozen_memtable));
          }
        }
      }
912
    }
W
wangzelin.wzl 已提交
913
  }
914 915 916 917 918 919

  if (need_merge && !can_merge && REACH_TENANT_TIME_INTERVAL(60L * 1000L * 1000L)) {
    LOG_INFO("check_need_medium_merge", K(ret), "ls_id", tablet.get_tablet_meta().ls_id_, K(tablet_id),
             K(need_merge), K(can_merge), K(medium_snapshot), K(is_tablet_data_status_complete));
    ADD_SUSPECT_INFO(MAJOR_MERGE, tablet.get_tablet_meta().ls_id_, tablet_id,
                     "need major merge but can't merge now",
O
obdev 已提交
920
                     K(medium_snapshot), K(is_tablet_data_status_complete), K(need_force_freeze),
921 922 923
                     "max_serialized_medium_scn", tablet.get_tablet_meta().max_serialized_medium_scn_);
  }
  return ret;
W
wangzelin.wzl 已提交
924 925 926 927 928 929 930 931 932 933 934 935
}

int64_t ObPartitionMergePolicy::cal_hist_minor_merge_threshold()
{
  int64_t compact_trigger = DEFAULT_MINOR_COMPACT_TRIGGER;
  omt::ObTenantConfigGuard tenant_config(TENANT_CONF(MTL_ID()));
  if (tenant_config.is_valid()) {
    compact_trigger = tenant_config->minor_compact_trigger;
  }
  return MIN((1 + compact_trigger) * OB_HIST_MINOR_FACTOR, MAX_TABLE_CNT_IN_STORAGE / 2);
}

936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996
int ObPartitionMergePolicy::get_multi_version_start(
    const ObMergeType merge_type,
    ObLS &ls,
    const ObTablet &tablet,
    ObVersionRange &result_version_range)
{
  int ret = OB_SUCCESS;
  int64_t expect_multi_version_start = 0;
  if (tablet.is_ls_inner_tablet()) {
    result_version_range.multi_version_start_ = INT64_MAX;
  } else if (OB_FAIL(ObTablet::get_kept_multi_version_start(ls, tablet, expect_multi_version_start))) {
    if (is_mini_merge(merge_type) || OB_TENANT_NOT_EXIST == ret) {
      expect_multi_version_start = tablet.get_multi_version_start();
      FLOG_INFO("failed to get multi_version_start, use multi_version_start on tablet", K(ret),
          K(merge_type), K(expect_multi_version_start));
      ret = OB_SUCCESS; // clear errno
    } else {
      LOG_WARN("failed to get kept multi_version_start", K(ret),
          "tablet_id", tablet.get_tablet_meta().tablet_id_);
    }
  }
  if (OB_SUCC(ret) && !tablet.is_ls_inner_tablet()) {
    // update multi_version_start
    if (expect_multi_version_start < result_version_range.multi_version_start_) {
      LOG_WARN("cannot reserve multi_version_start", "multi_version_start", result_version_range.multi_version_start_,
               K(expect_multi_version_start));
    } else if (expect_multi_version_start < result_version_range.snapshot_version_) {
      result_version_range.multi_version_start_ = expect_multi_version_start;
      LOG_DEBUG("succ reserve multi_version_start", "multi_version_start", result_version_range.multi_version_start_,
                K(expect_multi_version_start));
    } else {
      result_version_range.multi_version_start_ = result_version_range.snapshot_version_;
      LOG_DEBUG("no need keep multi version", "multi_version_start", result_version_range.multi_version_start_,
                K(expect_multi_version_start));
    }
  }
  return ret;
}


int add_table_with_check(ObGetMergeTablesResult &result, ObITable *table)
{
  int ret = OB_SUCCESS;
  if (OB_ISNULL(table)) {
    ret = OB_INVALID_ARGUMENT;
    LOG_WARN("invalid argument", K(ret), KP(table));
  } else if (OB_UNLIKELY(!result.handle_.empty()
      && table->get_start_scn() > result.scn_range_.end_scn_)) {
    ret = OB_ERR_UNEXPECTED;
    LOG_WARN("log ts range is not continues", K(ret), K(result), KPC(table));
  } else if (OB_FAIL(result.handle_.add_table(table))) {
    LOG_WARN("failed to add table", K(ret), KPC(table));
  } else {
    if (1 == result.handle_.get_count()) {
      result.scn_range_.start_scn_ = table->get_start_scn();
    }
    result.scn_range_.end_scn_ = table->get_end_scn();
  }
  return ret;
}

O
obdev 已提交
997 998 999 1000 1001
int ObPartitionMergePolicy::generate_input_result_array(
    const ObGetMergeTablesResult &input_result,
    ObMinorExecuteRangeMgr &minor_range_mgr,
    int64_t &fixed_input_table_cnt,
    ObIArray<ObGetMergeTablesResult> &input_result_array)
1002 1003
{
  int ret = OB_SUCCESS;
O
obdev 已提交
1004 1005 1006 1007 1008 1009 1010 1011 1012
  fixed_input_table_cnt = 0;
  input_result_array.reset();
  ObGetMergeTablesResult tmp_result;

  if (minor_range_mgr.exe_range_array_.empty()) {
    if (OB_FAIL(input_result_array.push_back(input_result))) {
      LOG_WARN("failed to add input result", K(ret), K(input_result));
    } else {
      fixed_input_table_cnt += input_result.handle_.get_count();
1013
    }
O
obdev 已提交
1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028
  } else if (OB_FAIL(tmp_result.copy_basic_info(input_result))) {
    LOG_WARN("failed to copy basic info", K(ret), K(input_result));
  } else {
    const ObIArray<ObITable *> &table_array = input_result.handle_.get_tables();
    ObITable *table = nullptr;
    for (int64_t idx = 0; OB_SUCC(ret) && idx < table_array.count(); ++idx) {
      if (OB_ISNULL(table = table_array.at(idx))) {
        ret = OB_ERR_UNEXPECTED;
        LOG_WARN("table is unexpected null", K(ret), K(idx), K(table_array));
      } else if (minor_range_mgr.in_execute_range(table)) {
        if (tmp_result.handle_.get_count() < 2) {
        } else if (OB_FAIL(input_result_array.push_back(tmp_result))) {
          LOG_WARN("failed to add tmp result", K(ret), K(tmp_result));
        } else {
          fixed_input_table_cnt += tmp_result.handle_.get_count();
1029
        }
O
obdev 已提交
1030 1031 1032 1033
        tmp_result.handle_.reset();
        tmp_result.scn_range_.reset();
      } else if (OB_FAIL(add_table_with_check(tmp_result, table))) {
        LOG_WARN("failed to add table into result", K(ret), K(tmp_result), KPC(table));
1034 1035
      }
    }
O
obdev 已提交
1036 1037 1038 1039 1040 1041 1042

    if (OB_FAIL(ret) || tmp_result.handle_.get_count() < 2) {
    } else if (OB_FAIL(input_result_array.push_back(tmp_result))) {
      LOG_WARN("failed to add tmp result", K(ret), K(tmp_result));
    } else {
      fixed_input_table_cnt += tmp_result.handle_.get_count();
    }
1043 1044 1045 1046
  }
  return ret;
}

O
obdev 已提交
1047 1048
int ObPartitionMergePolicy::split_parallel_minor_range(
    const int64_t table_count_threshold,
1049 1050 1051 1052
    const ObGetMergeTablesResult &input_result,
    ObIArray<ObGetMergeTablesResult> &parallel_result)
{
  int ret = OB_SUCCESS;
O
obdev 已提交
1053 1054 1055 1056 1057
  const int64_t input_table_cnt = input_result.handle_.get_count();
  ObGetMergeTablesResult tmp_result;
  if (input_table_cnt < table_count_threshold) {
    // if there are no running minor dags, then the input_table_cnt must be greater than threshold.
  } else if (input_table_cnt < OB_MINOR_PARALLEL_SSTABLE_CNT_TRIGGER) {
1058
    if (OB_FAIL(parallel_result.push_back(input_result))) {
O
obdev 已提交
1059
      LOG_WARN("failed to add input result", K(ret), K(input_result));
1060
    }
O
obdev 已提交
1061 1062
  } else if (OB_FAIL(tmp_result.copy_basic_info(input_result))) {
    LOG_WARN("failed to copy basic info", K(ret), K(input_result));
1063
  } else {
O
obdev 已提交
1064 1065
    const int64_t parallel_dag_cnt = MAX(1, input_table_cnt / OB_MINOR_PARALLEL_SSTABLE_CNT_IN_DAG);
    const int64_t parallel_table_cnt = input_table_cnt / parallel_dag_cnt;
1066 1067
    const ObIArray<ObITable *> &table_array = input_result.handle_.get_tables();
    ObITable *table = nullptr;
O
obdev 已提交
1068 1069 1070 1071 1072 1073 1074 1075

    int64_t start = 0;
    int64_t end = 0;
    for (int64_t seq = 0; OB_SUCC(ret) && seq < parallel_dag_cnt; ++seq) {
      start = parallel_table_cnt * seq;
      end = (parallel_dag_cnt - 1 == seq) ? table_array.count() : end + parallel_table_cnt;
      for (int64_t i = start; OB_SUCC(ret) && i < end; ++i) {
        if (OB_ISNULL(table = table_array.at(i))) {
1076
          ret = OB_ERR_UNEXPECTED;
O
obdev 已提交
1077
          LOG_WARN("table is unexpected null", K(ret), K(i), K(table_array));
1078 1079 1080
        } else if (OB_FAIL(add_table_with_check(tmp_result, table))) {
          LOG_WARN("failed to add table into result", K(ret), K(tmp_result), KPC(table));
        }
O
obdev 已提交
1081 1082 1083 1084
      }
      if (OB_FAIL(ret)) {
      } else if (OB_FAIL(parallel_result.push_back(tmp_result))) {
        LOG_WARN("failed to add tmp result", K(ret), K(tmp_result));
1085
      } else {
O
obdev 已提交
1086
        LOG_DEBUG("success to push result", K(ret), K(tmp_result), K(parallel_result));
O
obdev 已提交
1087 1088
        tmp_result.handle_.reset();
        tmp_result.scn_range_.reset();
1089
      }
O
obdev 已提交
1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128
    }
  }
  return ret;
}

int ObPartitionMergePolicy::generate_parallel_minor_interval(
    const int64_t minor_compact_trigger,
    const ObGetMergeTablesResult &input_result,
    ObMinorExecuteRangeMgr &minor_range_mgr,
    ObIArray<ObGetMergeTablesResult> &parallel_result)
{
  int ret = OB_SUCCESS;
  parallel_result.reset();
  ObSEArray<ObGetMergeTablesResult, 2> input_result_array;
  int64_t fixed_input_table_cnt = 0;

  if (!storage::is_minor_merge(input_result.suggest_merge_type_)) {
    ret = OB_NO_NEED_MERGE;
  } else if (input_result.handle_.get_count() < minor_compact_trigger) {
    ret = OB_NO_NEED_MERGE;
  } else if (OB_FAIL(generate_input_result_array(input_result, minor_range_mgr, fixed_input_table_cnt, input_result_array))) {
    LOG_WARN("failed to generate input result into array", K(ret), K(input_result));
  } else if (fixed_input_table_cnt < minor_compact_trigger) {
    // the quantity of table that should be merged is smaller than trigger, wait for the existing minor tasks to finish.
    ret = OB_NO_NEED_MERGE;
  }

  /*
   * When existing minor dag, we should ensure that the quantity of tables per parallel dag is a reasonable value:
   * 1. If compact_trigger is small, minor merge should be easier to schedule, we should lower the threshold;
   * 2. If compact_trigger is big, we should upper the threshold to prevent the creation of dag frequently.
   */
  int64_t table_count_threshold = minor_range_mgr.exe_range_array_.empty()
                                ? minor_compact_trigger
                                : MIN(OB_MINOR_PARALLEL_SSTABLE_CNT_IN_DAG / 2, minor_compact_trigger * 2);
  for (int64_t i = 0; OB_SUCC(ret) && i < input_result_array.count(); ++i) {
    if (OB_FAIL(split_parallel_minor_range(table_count_threshold, input_result_array.at(i), parallel_result))) {
      LOG_WARN("failed to split parallel minor range", K(ret), K(input_result_array.at(i)));
    }
1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318
  }
  return ret;
}


/*************************************** ObMinorExecuteRangeMgr ***************************************/
bool compareScnRange(share::ObScnRange &a, share::ObScnRange &b)
{
  return a.end_scn_ < b.end_scn_;
}

int ObMinorExecuteRangeMgr::get_merge_ranges(
    const ObLSID &ls_id,
    const ObTabletID &tablet_id)
{
  int ret = OB_SUCCESS;

  ObTabletMergeDagParam param;
  param.merge_type_ = MINOR_MERGE;
  param.merge_version_ = ObVersion::MIN_VERSION;
  param.ls_id_ = ls_id;
  param.tablet_id_ = tablet_id;
  param.for_diagnose_ = true;

  if (OB_FAIL(MTL(ObTenantDagScheduler*)->get_minor_exe_dag_info(param, exe_range_array_))) {
    LOG_WARN("failed to get minor exe dag info", K(ret));
  } else if (OB_FAIL(sort_ranges())) {
    LOG_WARN("failed to sort ranges", K(ret), K(param));
  }
  return ret;
}

int ObMinorExecuteRangeMgr::sort_ranges()
{
  int ret = OB_SUCCESS;
  std::sort(exe_range_array_.begin(), exe_range_array_.end(), compareScnRange);
  for (int i = 1; OB_SUCC(ret) && i < exe_range_array_.count(); ++i) {
    if (OB_UNLIKELY(!exe_range_array_.at(i).is_valid()
        || (exe_range_array_.at(i - 1).start_scn_.get_val_for_tx() > 0 // except meta major merge range
            && exe_range_array_.at(i).start_scn_.get_val_for_tx() > 0
            && exe_range_array_.at(i).start_scn_ < exe_range_array_.at(i - 1).end_scn_))) {
      ret = OB_ERR_UNEXPECTED;
      LOG_WARN("unexpected minor ranges", K(ret), K(i), K(exe_range_array_));
    }
  }
  return ret;
}

bool ObMinorExecuteRangeMgr::in_execute_range(const ObITable *table) const
{
  bool bret = false;
  if (exe_range_array_.count() > 0 && OB_NOT_NULL(table)) {
    for (int i = 0; i < exe_range_array_.count(); ++i) {
      if (table->get_end_scn() <= exe_range_array_.at(i).end_scn_
          && table->get_end_scn() > exe_range_array_.at(i).start_scn_) {
        bret = true;
        LOG_DEBUG("in execute range", KPC(table), K(i), K(exe_range_array_.at(i)));
        break;
      }
    }
  }
  return bret;
}


/*************************************** ObAdaptiveMergePolicy ***************************************/
const char * ObAdaptiveMergeReasonStr[] = {
  "NONE",
  "LOAD_DATA_SCENE",
  "TOMBSTONE_SCENE",
  "INEFFICIENT_QUERY",
  "FREQUENT_WRITE"
};

const char* ObAdaptiveMergePolicy::merge_reason_to_str(const int64_t merge_reason)
{
  STATIC_ASSERT(static_cast<int64_t>(INVALID_REASON) == ARRAYSIZEOF(ObAdaptiveMergeReasonStr),
                "adaptive merge reason str len is mismatch");
  const char *str = "";
  if (merge_reason >= INVALID_REASON || merge_reason < NONE) {
    str = "invalid_merge_reason";
  } else {
    str = ObAdaptiveMergeReasonStr[merge_reason];
  }
  return str;
}

bool ObAdaptiveMergePolicy::is_valid_merge_reason(const AdaptiveMergeReason &reason)
{
  return reason > AdaptiveMergeReason::NONE &&
         reason < AdaptiveMergeReason::INVALID_REASON;
}

int ObAdaptiveMergePolicy::get_meta_merge_tables(
    const ObGetMergeTablesParam &param,
    ObLS &ls,
    const ObTablet &tablet,
    ObGetMergeTablesResult &result)
{
  int ret = OB_SUCCESS;
  const ObMergeType merge_type = param.merge_type_;
  const ObTabletTableStore &table_store = tablet.get_table_store();
  const ObStorageSchema &storage_schema = tablet.get_storage_schema();
  result.reset();

  if (OB_UNLIKELY(!table_store.is_valid())) {
    ret = OB_ERR_SYS;
    LOG_WARN("table store not valid", K(ret), K(table_store));
  } else if (OB_UNLIKELY(META_MAJOR_MERGE != merge_type)) {
    ret = OB_INVALID_ARGUMENT;
    LOG_WARN("invalid args", K(ret), K(merge_type));
  } else if (OB_FAIL(find_meta_major_tables(tablet, result))) {
    if (OB_NO_NEED_MERGE != ret) {
      LOG_WARN("Failed to find minor merge tables", K(ret));
    }
  } else if (OB_FAIL(result.handle_.check_continues(nullptr))) {
    LOG_WARN("failed to check continues", K(ret), K(result));
  } else if (FALSE_IT(result.schema_version_ = storage_schema.schema_version_)) {
  } else if (FALSE_IT(result.suggest_merge_type_ = META_MAJOR_MERGE)) {
  } else if (FALSE_IT(result.version_range_.snapshot_version_ =
      MIN(tablet.get_snapshot_version(), result.version_range_.snapshot_version_))) {
    // choose version should less than tablet::snapshot
  } else if (OB_FAIL(ObPartitionMergePolicy::get_multi_version_start(
      param.merge_type_, ls, tablet, result.version_range_))) {
    LOG_WARN("failed to get multi version_start", K(ret));
  } else if (OB_FAIL(result.handle_.get_table(0)->get_frozen_schema_version(result.base_schema_version_))) {
    LOG_WARN("failed to get frozen schema version", K(ret), K(result));
  } else {
    FLOG_INFO("succeed to get meta major merge tables", K(result), K(table_store));
  }
  return ret;
}

int ObAdaptiveMergePolicy::find_meta_major_tables(
    const storage::ObTablet &tablet,
    ObGetMergeTablesResult &result)
{
  int ret = OB_SUCCESS;
  int64_t min_snapshot = 0;
  int64_t max_snapshot = 0;
  int64_t base_row_cnt = 0;
  int64_t inc_row_cnt = 0;
  int64_t tx_determ_table_cnt = 0;
  const ObTabletTableStore &table_store = tablet.get_table_store();
  ObITable *last_major = table_store.get_major_sstables().get_boundary_table(true);
  ObITable *last_minor = table_store.get_minor_sstables().get_boundary_table(true);
  ObITable *base_table = table_store.get_extend_sstable(ObTabletTableStore::META_MAJOR);
  const ObSSTableArray &minor_tables = table_store.get_minor_sstables();

  if (!table_store.is_valid()) {
    ret = OB_ERR_UNEXPECTED;
    LOG_WARN("ObTabletTableStore is not valid", K(ret), K(table_store));
  } else if (nullptr == last_minor || nullptr == last_major) {
    ret = OB_NO_NEED_MERGE;
    LOG_WARN("no minor/major sstable to do meta major merge", K(ret), KPC(last_minor), KPC(last_major));
  } else if (OB_FAIL(ObPartitionMergePolicy::get_boundary_snapshot_version(
      tablet, min_snapshot, max_snapshot, false/*check_table_cnt*/))) {
    if (OB_NO_NEED_MERGE != ret) {
      LOG_WARN("Failed to find meta merge base table", K(ret), KPC(last_major), KPC(last_major), KPC(base_table));
    }
  } else if (FALSE_IT(base_table = nullptr == base_table ? last_major : base_table)) {
  } else if (base_table->get_snapshot_version() < min_snapshot || max_snapshot != INT64_MAX) {
    // max_snapshot == INT64_MAX means there's no next freeze_info
    ret = OB_NO_NEED_MERGE;
    LOG_DEBUG("no need meta merge when the tablet is doing major merge", K(ret), K(min_snapshot), K(max_snapshot), KPC(base_table));
  } else if (OB_FAIL(add_meta_merge_result(base_table, result, true/*update_snapshot*/))) {
    LOG_WARN("failed to add base table to meta merge result", K(ret), KPC(base_table), K(result));
  } else {
    ++tx_determ_table_cnt; // inc for base_table
    bool found_undeterm_table = false;
    base_row_cnt = static_cast<ObSSTable *>(base_table)->get_meta().get_row_count();
    ObITable *table = nullptr;
    for (int64_t i = 0; OB_SUCC(ret) && i < minor_tables.count(); ++i) {
      if (OB_ISNULL(table = minor_tables[i]) || !table->is_multi_version_minor_sstable()) {
        ret = OB_ERR_UNEXPECTED;
        LOG_WARN("get unexpected table", K(ret), K(i), K(table_store));
      } else if (table->get_upper_trans_version() <= base_table->get_snapshot_version()) {
        // skip minor sstable which has been merged
        continue;
      } else if (!found_undeterm_table && table->is_trans_state_deterministic()) {
        ++tx_determ_table_cnt;
        inc_row_cnt += static_cast<ObSSTable *>(table)->get_meta().get_row_count();
      } else {
        found_undeterm_table = true;
      }

      if (FAILEDx(add_meta_merge_result(table, result, !found_undeterm_table))) {
        LOG_WARN("failed to add minor table to meta merge result", K(ret));
      }
    } // end of for
O
obdev 已提交
1319

1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331
    if (OB_FAIL(ret)) {
    } else if (tx_determ_table_cnt < 2) {
      ret = OB_NO_NEED_MERGE;
      LOG_INFO("no enough table for meta merge", K(ret), K(result), K(table_store));
    } else if (inc_row_cnt < TRANS_STATE_DETERM_ROW_CNT_THRESHOLD
      || inc_row_cnt < INC_ROW_COUNT_PERCENTAGE_THRESHOLD * base_row_cnt) {
      ret = OB_NO_NEED_MERGE;
      LOG_INFO("found sstable could merge is not enough", K(ret), K(inc_row_cnt), K(base_row_cnt));
    } else if (result.version_range_.snapshot_version_ < tablet.get_multi_version_start()) {
      ret = OB_NO_NEED_MERGE;
      LOG_INFO("chosen snapshot is abandoned", K(ret), K(result), K(tablet.get_multi_version_start()));
    }
O
obdev 已提交
1332 1333 1334 1335 1336 1337 1338 1339 1340

#ifdef ERRSIM
  ret = OB_E(EventTable::EN_SCHEDULE_MEDIUM_COMPACTION) ret;
  if (OB_FAIL(ret) && tablet.get_tablet_meta().tablet_id_.id() > ObTabletID::MIN_USER_TABLET_ID) {
    FLOG_INFO("set schedule medium with errsim", "tablet_id", tablet.get_tablet_meta().tablet_id_);
    ret = OB_SUCCESS;
  }
#endif

1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434
  }
  return ret;
}

int ObAdaptiveMergePolicy::find_base_table_and_inc_version(
    ObITable *last_major_table,
    ObITable *last_minor_table,
    ObITable *&meta_base_table,
    int64_t &merge_inc_version)
{
  int ret = OB_SUCCESS;
  // find meta base table
  if (OB_NOT_NULL(last_major_table)) {
    if (OB_ISNULL(meta_base_table)) {
      meta_base_table = last_major_table;
    } else if (OB_UNLIKELY(meta_base_table->get_snapshot_version() <= last_major_table->get_snapshot_version())) {
      ret = OB_ERR_UNEXPECTED;
      LOG_WARN("meta major table covered by major", K(ret), KPC(meta_base_table), KPC(last_major_table));
    }
  }

  // find meta merge inc version
  if (OB_FAIL(ret)) {
  } else if (OB_NOT_NULL(last_major_table) && OB_NOT_NULL(last_minor_table)) {
    merge_inc_version = MAX(last_major_table->get_snapshot_version(), last_minor_table->get_max_merged_trans_version());
  } else if (OB_NOT_NULL(last_major_table)) {
    merge_inc_version = last_major_table->get_snapshot_version();
  } else if (OB_NOT_NULL(last_minor_table)){
    merge_inc_version = last_minor_table->get_max_merged_trans_version();
  }

  if (OB_SUCC(ret) && (NULL == meta_base_table || merge_inc_version <= 0)) {
    ret = OB_NO_NEED_MERGE;
    LOG_WARN("cannot meta merge with null base table or inc version", K(ret), K(meta_base_table), K(merge_inc_version));
  }
  return ret;
}

int ObAdaptiveMergePolicy::add_meta_merge_result(
    ObITable *table,
    ObGetMergeTablesResult &result,
    const bool update_snapshot_flag)
{
  int ret = OB_SUCCESS;

  if (OB_ISNULL(table)) {
    ret = OB_INVALID_ARGUMENT;
    LOG_WARN("get invalid argument", K(ret), KPC(table));
  } else if (OB_FAIL(result.handle_.add_table(table))) {
    LOG_WARN("failed to add table", K(ret), KPC(table));
  } else if (table->is_meta_major_sstable() || table->is_major_sstable()) {
    result.version_range_.base_version_ = 0;
    result.version_range_.multi_version_start_ = table->get_snapshot_version();
    result.version_range_.snapshot_version_ = table->get_snapshot_version();
    result.create_snapshot_version_ = static_cast<ObSSTable *>(table)->get_meta().get_basic_meta().create_snapshot_version_;
  } else if (update_snapshot_flag) {
    int64_t max_snapshot = MAX(result.version_range_.snapshot_version_, table->get_max_merged_trans_version());
    result.version_range_.multi_version_start_ = max_snapshot;
    result.version_range_.snapshot_version_ = max_snapshot;
    result.scn_range_.end_scn_ = table->get_end_scn();
  }
  return ret;
}

int ObAdaptiveMergePolicy::get_adaptive_merge_reason(
    const ObTablet &tablet,
    AdaptiveMergeReason &reason)
{
  int ret = OB_SUCCESS;
  int tmp_ret = OB_SUCCESS;
  const ObLSID &ls_id = tablet.get_tablet_meta().ls_id_;
  const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_;
  ObTabletStat tablet_stat;
  reason = AdaptiveMergeReason::NONE;

  if (OB_FAIL(MTL(ObTenantTabletStatMgr *)->get_latest_tablet_stat(ls_id, tablet_id, tablet_stat))) {
    if (OB_HASH_NOT_EXIST != ret) {
      LOG_WARN("failed to get latest tablet stat", K(ret), K(ls_id), K(tablet_id));
    } else if (OB_TMP_FAIL(check_inc_sstable_row_cnt_percentage(tablet, reason))) {
      LOG_WARN("failed to check sstable data situation", K(tmp_ret), K(ls_id), K(tablet_id));
    }
  } else {
    if (OB_TMP_FAIL(check_tombstone_situation(tablet_stat, tablet, reason))) {
      LOG_WARN("failed to check tombstone scene", K(tmp_ret), K(ls_id), K(tablet_id));
    }
    if (AdaptiveMergeReason::NONE == reason && OB_TMP_FAIL(check_load_data_situation(tablet_stat, tablet, reason))) {
      LOG_WARN("failed to check load data scene", K(tmp_ret), K(ls_id), K(tablet_id));
    }
    if (AdaptiveMergeReason::NONE == reason && OB_TMP_FAIL(check_inc_sstable_row_cnt_percentage(tablet, reason))) {
      LOG_WARN("failed to check sstable data situation", K(tmp_ret), K(ls_id), K(tablet_id));
    }
    if (AdaptiveMergeReason::NONE == reason && OB_TMP_FAIL(check_ineffecient_read(tablet_stat, tablet, reason))) {
      LOG_WARN("failed to check ineffecient read", K(tmp_ret), K(ls_id), K(tablet_id));
    }
1435 1436 1437 1438

    if (REACH_TENANT_TIME_INTERVAL(10 * 1000 * 1000 /*10s*/)) {
      LOG_INFO("Check tablet adaptive merge reason", K(reason), K(tablet_stat)); // TODO tmp log, remove later
    }
1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537
  }
  return ret;
}

int ObAdaptiveMergePolicy::check_inc_sstable_row_cnt_percentage(
    const ObTablet &tablet,
    AdaptiveMergeReason &reason)
{
  int ret = OB_SUCCESS;
  const ObLSID &ls_id = tablet.get_tablet_meta().ls_id_;
  const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_;
  ObSSTable *last_major = static_cast<ObSSTable *>(tablet.get_table_store().get_major_sstables().get_boundary_table(true));
  int64_t base_row_count = nullptr != last_major ? last_major->get_meta().get_basic_meta().row_count_ : 0;
  int64_t inc_row_count = 0;
  const ObSSTableArray &minor_sstables = tablet.get_table_store().get_minor_sstables();
  ObSSTable *sstable = nullptr;
  for (int i = 0; OB_SUCC(ret) && i < minor_sstables.count(); ++i) {
    if (OB_ISNULL(sstable = static_cast<ObSSTable *>(minor_sstables.get_table(i)))) {
      ret = OB_ERR_UNEXPECTED;
      LOG_WARN("sstable is null", K(ret), K(i));
    } else {
      inc_row_count += sstable->get_meta().get_basic_meta().row_count_;
    }
  }
  if ((inc_row_count > INC_ROW_COUNT_THRESHOLD) ||
      (base_row_count > BASE_ROW_COUNT_THRESHOLD &&
      (inc_row_count * 100 / base_row_count) > LOAD_DATA_SCENE_THRESHOLD)) {
    reason = AdaptiveMergeReason::FREQUENT_WRITE;
  }
  LOG_DEBUG("check_sstable_data_situation", K(ret), K(ls_id), K(tablet_id), K(reason),
      K(base_row_count), K(inc_row_count));
  return ret;
}

int ObAdaptiveMergePolicy::check_load_data_situation(
    const ObTabletStat &tablet_stat,
    const ObTablet &tablet,
    AdaptiveMergeReason &reason)
{
  int ret = OB_SUCCESS;
  const ObLSID &ls_id = tablet.get_tablet_meta().ls_id_;
  const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_;
  reason = AdaptiveMergeReason::NONE;
  if (!tablet.is_valid() || !tablet_stat.is_valid()
      || ls_id.id() != tablet_stat.ls_id_ || tablet_id.id() != tablet_stat.tablet_id_) {
    ret = OB_INVALID_ARGUMENT;
    LOG_WARN("get invalid arguments", K(ret), K(tablet), K(tablet_stat));
  } else if (tablet_stat.is_hot_tablet() && tablet_stat.is_insert_mostly()) {
    reason = AdaptiveMergeReason::LOAD_DATA_SCENE;
  }
  LOG_DEBUG("check_load_data_situation", K(ret), K(ls_id), K(tablet_id), K(reason), K(tablet_stat));
  return ret;
}

int ObAdaptiveMergePolicy::check_tombstone_situation(
    const ObTabletStat &tablet_stat,
    const ObTablet &tablet,
    AdaptiveMergeReason &reason)
{
  int ret = OB_SUCCESS;
  const ObLSID &ls_id = tablet.get_tablet_meta().ls_id_;
  const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_;
  reason = AdaptiveMergeReason::NONE;

  if (!tablet.is_valid() || !tablet_stat.is_valid()
      || ls_id.id() != tablet_stat.ls_id_ || tablet_id.id() != tablet_stat.tablet_id_) {
    ret = OB_INVALID_ARGUMENT;
    LOG_WARN("get invalid arguments", K(ret), K(tablet), K(tablet_stat));
  } else if (tablet_stat.is_hot_tablet() && tablet_stat.is_update_mostly()) {
    reason = AdaptiveMergeReason::TOMBSTONE_SCENE;
  }
  LOG_DEBUG("check_tombstone_situation", K(ret), K(ls_id), K(tablet_id), K(reason), K(tablet_stat));
  return ret;
}

int ObAdaptiveMergePolicy::check_ineffecient_read(
    const ObTabletStat &tablet_stat,
    const ObTablet &tablet,
    AdaptiveMergeReason &reason)
{
  int ret = OB_SUCCESS;
  const ObLSID &ls_id = tablet.get_tablet_meta().ls_id_;
  const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_;
  reason = AdaptiveMergeReason::NONE;

  if (!tablet.is_valid() || !tablet_stat.is_valid() ||
      ls_id.id() != tablet_stat.ls_id_ || tablet_id.id() != tablet_stat.tablet_id_) {
    ret = OB_INVALID_ARGUMENT;
    LOG_WARN("get invalid arguments", K(ret), K(tablet), K(tablet_stat));
  } else if (!tablet_stat.is_hot_tablet()) {
  } else if (tablet_stat.is_inefficient_scan() || tablet_stat.is_inefficient_insert()
          || tablet_stat.is_inefficient_pushdown()) {
    reason = AdaptiveMergeReason::INEFFICIENT_QUERY;
  }
  LOG_DEBUG("check_ineffecient_read", K(ret), K(ls_id), K(tablet_id), K(reason), K(tablet_stat));
  return ret;
}


W
wangzelin.wzl 已提交
1538 1539
} /* namespace compaction */
} /* namespace oceanbase */