diff --git a/deps/easy/src/util/easy_pool.c b/deps/easy/src/util/easy_pool.c index b34a6e154527ab7e0764dda796e421e327e6dada..065868245d96451bff38d88caf1d5dbf864463cd 100644 --- a/deps/easy/src/util/easy_pool.c +++ b/deps/easy/src/util/easy_pool.c @@ -166,7 +166,7 @@ void* easy_pool_default_realloc(void* ptr, size_t size) if (size) { return realloc(ptr, size); } else if (ptr) { - // free(ptr); + free(ptr); } return 0; diff --git a/deps/oblib/src/lib/mysqlclient/ob_mysql_connection_pool.h b/deps/oblib/src/lib/mysqlclient/ob_mysql_connection_pool.h index b74194145676343b6c0377ad53c0ffbcc134cc05..7fe3e4e5fe7fbaddf5e7ab68ac907d991287703b 100644 --- a/deps/oblib/src/lib/mysqlclient/ob_mysql_connection_pool.h +++ b/deps/oblib/src/lib/mysqlclient/ob_mysql_connection_pool.h @@ -113,6 +113,8 @@ public: { return is_updated_; } + bool is_use_ssl() const { return is_use_ssl_; } + void disable_ssl() { is_use_ssl_ = false; } int64_t to_string(char* buf, const int64_t buf_len) const { int64_t pos = 0; @@ -179,6 +181,7 @@ protected: bool is_updated_; bool is_stop_; + bool is_use_ssl_; ObMySQLConnection::Mode mode_; int tg_id_; diff --git a/deps/oblib/src/lib/string/ob_string.h b/deps/oblib/src/lib/string/ob_string.h index 037353aca0622fc02442d6c04c6c3f6e9504f5f8..7121543135c21411c6e901ee90da8a2d4a736599 100644 --- a/deps/oblib/src/lib/string/ob_string.h +++ b/deps/oblib/src/lib/string/ob_string.h @@ -100,7 +100,7 @@ public: * */ - inline int clone(const ObString& rv, ObDataBuffer& buf); + int clone(const ObString& rv, ObDataBuffer& buf); // reset void reset() diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8b93f4655a99e2b4438966104b47a8033dd375a5..aa6b984993d79b2ad0c092899b79033f989d7122 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -53,6 +53,7 @@ add_subdirectory(archive) add_subdirectory(election) add_subdirectory(storage) add_subdirectory(observer) +add_subdirectory(liboblog) if (OB_ENABLE_SERVER_PCH) target_precompile_headers(ob_base INTERFACE ${ob_server_pchs}) diff --git a/src/liboblog/CMakeLists.txt b/src/liboblog/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..be67ad5c84c99c54cdf41b23f959b1ba48c04bc1 --- /dev/null +++ b/src/liboblog/CMakeLists.txt @@ -0,0 +1,3 @@ +add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) +add_subdirectory(src) +add_subdirectory(tests) diff --git a/src/liboblog/src/CMakeLists.txt b/src/liboblog/src/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..d77c1d331adfd8435686106048b223a711fa1abc --- /dev/null +++ b/src/liboblog/src/CMakeLists.txt @@ -0,0 +1,164 @@ +add_library(oblog_base INTERFACE) +target_include_directories(oblog_base INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}) +target_link_libraries(oblog_base + INTERFACE ob_base oblogmsg + ) + +add_library(oblog_miner INTERFACE) + +target_include_directories(oblog_miner INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}) +target_link_libraries(oblog_miner + INTERFACE ob_base oblogmsg_static + ) + +set(SRC_LIST + liboblog.h liboblog.cpp + ob_concurrent_seq_queue.h ob_concurrent_seq_queue.cpp + ob_easy_hazard_map.h + ob_log_adapt_string.h ob_log_adapt_string.cpp + ob_log_adapt_string.h ob_log_adapt_string.cpp + ob_log_all_svr_cache.h ob_log_all_svr_cache.cpp + ob_log_binlog_record.h ob_log_binlog_record.cpp + ob_log_binlog_record_pool.h ob_log_binlog_record_pool.cpp + ob_log_binlog_record_queue.h ob_log_binlog_record_queue.cpp + ob_log_cluster_id_filter.cpp ob_log_cluster_id_filter.h + ob_log_common.h + ob_log_config.h ob_log_config.cpp + ob_log_ddl_handler.cpp ob_log_ddl_handler.h + ob_log_ddl_parser.h ob_log_ddl_parser.cpp + ob_log_dlist.h + ob_log_dml_parser.h ob_log_dml_parser.cpp + ob_log_fake_common_config.h + ob_log_fetch_log_rpc.h ob_log_fetch_log_rpc.cpp + ob_log_fetch_stat_info.h ob_log_fetch_stat_info.cpp + ob_log_fetch_stream.h ob_log_fetch_stream.cpp + ob_log_fetch_stream_container.h ob_log_fetch_stream_container.cpp + ob_log_fetch_stream_pool.h ob_log_fetch_stream_pool.cpp + ob_log_fetch_stream_type.h ob_log_fetch_stream_type.cpp + ob_log_fetcher.h ob_log_fetcher.cpp + ob_log_fetcher_dead_pool.h ob_log_fetcher_dead_pool.cpp + ob_log_fetcher_dispatcher.cpp ob_log_fetcher_dispatcher.h + ob_log_fetcher_heartbeat_worker.cpp ob_log_fetcher_heartbeat_worker.h + ob_log_fetcher_heartbeat_worker.h ob_log_fetcher_heartbeat_worker.cpp + ob_log_fetcher_idle_pool.h ob_log_fetcher_idle_pool.cpp + ob_log_formatter.h ob_log_formatter.cpp + ob_log_entry_task_pool.h ob_log_entry_task_pool.cpp + ob_log_row_list.h ob_log_row_list.cpp + ob_log_row_data_index.h ob_log_row_data_index.cpp + ob_log_store_service.h + ob_log_store_service_stat.h ob_log_store_service_stat.cpp + ob_log_mock_store_service.h + ob_log_storager.h ob_log_storager.cpp + ob_log_reader_plug_in.h ob_log_reader_plug_in.cpp + ob_log_data_processor.h ob_log_data_processor.cpp + ob_log_hbase_mode.h ob_log_hbase_mode.cpp + ob_log_work_mode.h ob_log_work_mode.cpp + ob_log_instance.h ob_log_instance.cpp + ob_log_lighty_list.h + ob_log_main.c + ob_log_meta_manager.h ob_log_meta_manager.cpp + ob_log_mysql_connector.h ob_log_mysql_connector.cpp + ob_log_mysql_proxy.h ob_log_mysql_proxy.cpp + ob_log_part_fetch_ctx.h ob_log_part_fetch_ctx.cpp + ob_log_part_fetch_mgr.h ob_log_part_fetch_mgr.cpp + ob_log_part_mgr.h ob_log_part_mgr.cpp + ob_log_table_id_cache.h ob_log_table_id_cache.cpp + ob_log_part_progress_controller.h ob_log_part_progress_controller.cpp + ob_log_part_serve_info.h + ob_log_part_svr_list.h ob_log_part_svr_list.cpp + ob_log_svr_blacklist.h ob_log_svr_blacklist.cpp + ob_log_part_trans_dispatcher.cpp ob_log_part_trans_dispatcher.h + ob_log_part_trans_parser.h ob_log_part_trans_parser.cpp + ob_log_part_trans_resolver.h ob_log_part_trans_resolver.cpp + ob_log_part_trans_resolver_factory.cpp ob_log_part_trans_resolver_factory.h + ob_log_part_trans_task.cpp ob_log_part_trans_task.h + ob_log_part_trans_task.h ob_log_part_trans_task.cpp + ob_log_part_trans_task_queue.h ob_log_part_trans_task_queue.cpp + ob_log_entry_wrapper.h ob_log_entry_wrapper.cpp + ob_log_resource_collector.h ob_log_resource_collector.cpp + ob_log_resource_recycle_task.h + ob_log_rpc.h ob_log_rpc.cpp + ob_log_schema_cache_info.h ob_log_schema_cache_info.cpp + ob_log_schema_getter.h ob_log_schema_getter.cpp + ob_log_sequencer1.h ob_log_sequencer1.cpp + ob_log_server_priority.h ob_log_server_priority.cpp + ob_log_sql_server_provider.h ob_log_sql_server_provider.cpp + ob_log_start_log_id_locator.h ob_log_start_log_id_locator.cpp + ob_log_stream_worker.h ob_log_stream_worker.cpp + ob_log_svr_finder.h ob_log_svr_finder.cpp + ob_log_svr_stream.h ob_log_svr_stream.cpp + ob_log_systable_helper.h ob_log_systable_helper.cpp + ob_log_table_matcher.h ob_log_table_matcher.cpp + ob_log_start_schema_matcher.h ob_log_start_schema_matcher.cpp + ob_log_task_pool.h + ob_log_timer.h ob_log_timer.cpp + ob_log_timezone_info_getter.h ob_log_timezone_info_getter.cpp + ob_log_trace_id.h ob_log_trace_id.cpp + ob_log_trans_ctx.h ob_log_trans_ctx.cpp + ob_log_trans_ctx_mgr.h ob_log_trans_ctx_mgr.cpp + ob_log_trans_log.h ob_log_trans_log.cpp + ob_log_trans_stat_mgr.h ob_log_trans_stat_mgr.cpp + ob_log_utils.h ob_log_utils.cpp + ob_map_queue.h + ob_map_queue_thread.h + ob_ms_queue_thread.h + ob_obj2str_helper.h ob_obj2str_helper.cpp + ob_seq_thread.h + ob_small_arena.h ob_small_arena.cpp + ob_log_tenant_task_queue.h ob_log_tenant_task_queue.cpp + ob_log_tenant.h ob_log_tenant.cpp + ob_log_tenant_mgr.h ob_log_tenant_mgr.cpp + ob_log_part_info.h ob_log_part_info.cpp + ob_log_part_callback.h + ob_log_ref_state.h ob_log_ref_state.cpp + ob_log_committer.h ob_log_committer.cpp + ) +add_library(oblog_objects OBJECT ${SRC_LIST}) +target_link_libraries(oblog_objects PUBLIC oblog_base) +target_compile_definitions(oblog_objects PRIVATE ENABLE_DEBUG_LOG) +target_compile_options(oblog_objects PRIVATE -Werror) + +disable_pch(oblog_objects) +set(LGPL_DEPS "-L${DEP_DIR}/lib/mariadb -l:libmariadbclient.a -laio") + +if (OB_SO_CACHE) + add_library(oblog SHARED IMPORTED GLOBAL) + set_target_properties(oblog PROPERTIES + IMPORTED_LOCATION "${CMAKE_CURRENT_BINARY_DIR}/liboblog.so" + IMPORTED_LINK_INTERFACE_LIBRARIES oblog_miner) +else() + add_library(oblog SHARED ${CMAKE_BINARY_DIR}/src/observer/ob_version.cpp) + target_link_libraries(oblog + PUBLIC oblog_base oblib + PRIVATE + -Wl,--whole-archive + $ + -Wl,--no-whole-archive + -Wl,--start-group + oceanbase_static + -Wl,--end-group + -static-libgcc -static-libstdc++ + -Wl,-Bsymbolic + -Wl,-e,so_main + ${LGPL_DEPS} + easy + ) + add_dependencies(oblog oblog_objects) + set_target_properties(oblog PROPERTIES SOVERSION 1 VERSION 1.0.0) +endif() + +add_library(oblog_objects_miner OBJECT ${SRC_LIST}) +disable_pch(oblog_objects_miner) +target_link_libraries(oblog_objects_miner PUBLIC oblog_miner) +add_library(oblog_static + STATIC + EXCLUDE_FROM_ALL + ${CMAKE_BINARY_DIR}/src/observer/ob_version.cpp) +target_link_libraries(oblog_static + PUBLIC oblog_objects_miner oblib + -Wl,--start-group + oceanbase_static + -Wl,--end-group + PRIVATE -static-libgcc -static-libstdc++ + ${LGPL_DEPS} + ) diff --git a/src/liboblog/src/liboblog.cpp b/src/liboblog/src/liboblog.cpp new file mode 100644 index 0000000000000000000000000000000000000000..006ae4d47c46d4eccdd8ff0ef8807c21f6856442 --- /dev/null +++ b/src/liboblog/src/liboblog.cpp @@ -0,0 +1,62 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include "liboblog.h" + +#include +#include +#include "lib/allocator/ob_malloc.h" // ob_set_memory_size_limit +#include "lib/utility/utility.h" // get_phy_mem_size + +#include "ob_log_common.h" // MAX_MEMORY_USAGE_PERCENT +#include "ob_log_instance.h" // ObLogInstance + +using namespace oceanbase::common; +namespace oceanbase +{ +namespace liboblog +{ + +ObLogFactory::ObLogFactory() +{ + // set max memory limit + lib::set_memory_limit(get_phy_mem_size() * MAX_MEMORY_USAGE_PERCENT / 100); + + CURLcode curl_code = curl_global_init(CURL_GLOBAL_ALL); + + if (OB_UNLIKELY(CURLE_OK != curl_code)) { + OBLOG_LOG(ERROR, "curl_global_init fail", K(curl_code)); + } + + setlocale(LC_ALL, ""); + setlocale(LC_TIME, "en_US.UTF-8"); +} + +ObLogFactory::~ObLogFactory() +{ + curl_global_cleanup(); +} + +IObLog *ObLogFactory::construct_oblog() +{ + return ObLogInstance::get_instance(); +} + +void ObLogFactory::deconstruct(IObLog *log) +{ + UNUSED(log); + + ObLogInstance::destroy_instance(); +} + +} // namespace liboblog +} // namespace oceanbase diff --git a/src/liboblog/src/liboblog.h b/src/liboblog/src/liboblog.h new file mode 100644 index 0000000000000000000000000000000000000000..84e56a80743ea138607bfe1fc2fa24441dc69562 --- /dev/null +++ b/src/liboblog/src/liboblog.h @@ -0,0 +1,166 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_LIBOBLOG_ +#define OCEANBASE_LIBOBLOG_LIBOBLOG_ + +#ifndef __STDC_LIMIT_MACROS +#define __STDC_LIMIT_MACROS +#endif + +#ifndef __STDC_CONSTANT_MACROS +#define __STDC_CONSTANT_MACROS +#endif + +#include // FNM_CASEFOLD +#include +#include +#include + +using namespace oceanbase::logmessage; +namespace oceanbase +{ +namespace liboblog +{ +struct ObLogError +{ + enum ErrLevel + { + ERR_WARN = 0, + ERR_ABORT, + } level_; ///< error level + int errno_; ///< error number + const char *errmsg_; ///< error message +}; + +typedef void (* ERROR_CALLBACK) (const ObLogError &err); + +class IObLog +{ +public: + virtual ~IObLog() {}; +public: + /* + * init liboblog + * @param config_file config file name + * @param start_timestamp start timestamp (by second) + * @param err_cb error callback function pointer + */ + virtual int init(const char *config_file, + const uint64_t start_timestamp, + ERROR_CALLBACK err_cb = NULL) = 0; + + /* + * init liboblog + * @param configs config by map + * @param start_timestamp start timestamp (by secon) + * @param err_cb error callback function pointer + */ + virtual int init(const std::map& configs, + const uint64_t start_timestamp, + ERROR_CALLBACK err_cb = NULL) = 0; + + /* + * init liboblog + * @param configs config by map + * @param start_timestamp start timestamp by microsecond + * @param err_cb error callback function pointer + */ + virtual int init_with_start_tstamp_usec(const std::map& configs, + const uint64_t start_timestamp_usec, + ERROR_CALLBACK err_cb = NULL) = 0; + + virtual void destroy() = 0; + + /* + * fetch next binlog record from OB cluster + * @param record binlog record, memory allocated by oblog, support release_record(corresponding times) after mutli next_record + * @param OB_SUCCESS success + * @param OB_TIMEOUT timeout + * @param other errorcode fail + */ + virtual int next_record(ILogRecord **record, const int64_t timeout_us) = 0; + + /* + * fetch next binlog record from OB cluster + * @param [out] record binlog record, memory allocated by oblog, support release_record(corresponding tiems) after mutli next_record + * @param [out] major_version major version of ILogRecord + * @param [out] tenant_id tenant id of ILogRecord + * + * @param OB_SUCCESS success + * @param OB_TIMEOUT timeout + * @param other error code fail + */ + virtual int next_record(ILogRecord **record, + int32_t &major_version, + uint64_t &tenant_id, + const int64_t timeout_us) = 0; + + /* + * release recorcd for EACH ILogRecord + * @param record + */ + virtual void release_record(ILogRecord *record) = 0; + + /* + * Launch liboblog + * @retval OB_SUCCESS on success + * @retval ! OB_SUCCESS on fail + */ + virtual int launch() = 0; + + /* + * Stop liboblog + */ + virtual void stop() = 0; + + /// Match the TableGroup being served + /// Currently, TableGroup refers to a specific Database in the format "Tenant.Database". + /// + /// @param [in] pattern target pattern string + /// @param [out] is_matched match result + /// @param [in] fnmatch_flags fnmatch flags + /// + /// @retval OB_SUCCESS success + /// @retval other value fail + virtual int table_group_match(const char *pattern, bool &is_matched, int fnmatch_flags = FNM_CASEFOLD) = 0; + + /// get all serving tenant TableGroup list + /// + /// @param [out] table_groups tablegroup list + /// + /// @retval OB_SUCCESS success + /// @retval other value fail + virtual int get_table_groups(std::vector &table_groups) = 0; + + /// get all serving tenant id list after oblog inited + /// + /// @param [out] tenant_ids tenant ids that oblog serving + /// + /// @retval OB_SUCCESS success + /// @retval other value fail + virtual int get_tenant_ids(std::vector &tenant_ids) = 0; +}; + +class ObLogFactory +{ +public: + ObLogFactory(); + ~ObLogFactory(); +public: + IObLog *construct_oblog(); + void deconstruct(IObLog *log); +}; +} +} + +#endif // OCEANBASE_LIBOBLOG_LIBOBLOG_ diff --git a/src/liboblog/src/ob_concurrent_seq_queue.cpp b/src/liboblog/src/ob_concurrent_seq_queue.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2ba5241b2b100be836f8db3c0de4742231998487 --- /dev/null +++ b/src/liboblog/src/ob_concurrent_seq_queue.cpp @@ -0,0 +1,262 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX LIB + +#include "ob_concurrent_seq_queue.h" + +#include // futex + +#include "lib/ob_define.h" +#include "lib/time/ob_time_utility.h" // ObTimeUtility +#include "share/ob_errno.h" // KR + +namespace oceanbase +{ +namespace common +{ + +static struct timespec make_timespec(int64_t us) +{ + timespec ts; + ts.tv_sec = us / 1000000; + ts.tv_nsec = 1000 * (us % 1000000); + return ts; +} +#define futex(...) syscall(SYS_futex,__VA_ARGS__) +inline int futex_wake(volatile int *p, int val) +{ + return static_cast(futex((int *)p, FUTEX_WAKE_PRIVATE, val, NULL, NULL, 0)); +} +// 0: Woken up by FUTEX_WAKE +// ETIMEDOUT: Timeout +// EWOULDBLOCK: Target value changed, not equal to the incoming comparison value +// EINTR: woken up by various signals +inline int futex_wait(volatile int *p, int val, const timespec *timeout) +{ + int ret = 0; + if (0 != futex((int *)p, FUTEX_WAIT_PRIVATE, val, timeout, NULL, 0)) { + ret = errno; + } + return ret; +} + +///////////////////////////////////////////////////////////////////////////// +ObConcurrentSeqQueue::ObConcurrentSeqQueue(): items_(NULL), limit_(0), size_(0) +{} + +ObConcurrentSeqQueue::~ObConcurrentSeqQueue() +{ + destroy(); +} + +int ObConcurrentSeqQueue::init(const int64_t limit, const ObMemAttr &memattr) +{ + int ret = OB_SUCCESS; + int64_t alloc_size = sizeof(SeqItem) * limit; + if (OB_UNLIKELY(0 >= limit)) { + ret = OB_INVALID_ARGUMENT; + LOG_ERROR("invalid argument", KR(ret), K(limit)); + } else if (OB_UNLIKELY(limit_ > 0 || NULL != items_)) { + ret = OB_INIT_TWICE; + } else if (OB_ISNULL(items_ = (SeqItem *)ob_malloc(alloc_size, memattr))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_ERROR("allocate memory failed", K(alloc_size), KR(ret), K(items_)); + } else { + limit_ = limit; + size_ = 0; + memset(items_, 0, sizeof(SeqItem) * limit); + for (int64_t i = 0; i < limit; i++) { + // default to NOT READY state + items_[i].seq_ = i; + } + } + return ret; +} + +void ObConcurrentSeqQueue::destroy() +{ + if (NULL != items_) { + ob_free(items_); + items_ = NULL; + } + limit_ = 0; + size_ = 0; +} + +inline bool ObConcurrentSeqQueue::is_inited_() const +{ + return NULL != items_ && limit_ > 0; +} + +inline ObConcurrentSeqQueue::SeqItem &ObConcurrentSeqQueue::seq_item_(const int64_t seq) +{ + return items_[seq % limit_]; +} + +int ObConcurrentSeqQueue::wait_on_item_(SeqItem &item, + const int64_t cmp_val, + const int64_t end_time) +{ + int ret = OB_SUCCESS; + // Sleep time for one operation + // We set it relatively short to avoid the cost of "false sleep" caused by int32_t overflow + static const int64_t WAIT_TIME_ON_OP = 10L * 1000L; + + int64_t wait_time_us = end_time - ObTimeUtility::current_time(); + wait_time_us = std::min(wait_time_us, WAIT_TIME_ON_OP); + if (wait_time_us <= 0) { + ret = OB_TIMEOUT; + } else { + volatile int *p = reinterpret_cast(&item.seq_); + int cmp_val_int32 = static_cast(cmp_val & INT32_MASK); + timespec ts = make_timespec(wait_time_us); + + /// Note: Our data is int64_t, but futex only supports int32_t. + /// This is a direct comparison of the lower 32 bits of int64_t, since our data is incremented + /// Only after the int32_t value overflows will the value be misclassified, leading to a false sleep. + /// We consider the probability of this occurring to be extremely low, and even if it does, + /// we reduce the impact by making the sleep wait time relatively short. + int futex_err = futex_wait(p, cmp_val_int32, &ts); + if (futex_err == ETIMEDOUT) { + ret = OB_TIMEOUT; + } else { + // 成功 + } + } + return ret; +} + +int ObConcurrentSeqQueue::update_seq_(SeqItem &item, + const int64_t expected_cur_seq, + const int64_t new_seq) +{ + int ret = OB_SUCCESS; + // If the setup fails, it means there is a concurrent scenario and exit with an error + int64_t cur_seq = ATOMIC_CAS(&item.seq_, expected_cur_seq, new_seq); + if (OB_UNLIKELY(cur_seq != expected_cur_seq)) { + LOG_ERROR("update seq value fail, must have other threads updating the same item", + K(cur_seq), K(expected_cur_seq), K(new_seq), K(item.seq_), K(item.data_)); + ret = OB_STATE_NOT_MATCH; + } else { + volatile int *p = reinterpret_cast(&item.seq_); + // wake up all threads waiting on that element + // Note: The reason for waking up all threads here is to avoid waking up threads that are not the target threads + // For example: there may be multiple consuming/producing threads waiting for different seq values on this slot. + // In order to successfully wake up the target thread, we can only broadcast, because we don't have the ability to wake up a specific thread via futex + int64_t wake_num = futex_wake(p, INT32_MAX); + LOG_DEBUG("wake_up after update seq", "old_val", expected_cur_seq, "new_val", new_seq, + K(wake_num)); + } + return ret; +} + +int ObConcurrentSeqQueue::push(void *data, const int64_t seq, const int64_t timeout_us) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited_())) { + ret = OB_NOT_INIT; + LOG_ERROR("ObConcurrentSeqQueue not init", K(items_), K(limit_)); + } else if (OB_UNLIKELY(seq < 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_ERROR("invalid argument", K(seq)); + } else { + bool ready_to_push = false; + SeqItem &item = seq_item_(seq); + int64_t end_time = ObTimeUtility::current_time() + timeout_us; + + while (! ready_to_push && OB_SUCCESS == ret) { + // First save the current seq value + int64_t item_seq = ATOMIC_LOAD(&item.seq_); + + // should not be greater than target seq + if (OB_UNLIKELY(item_seq > seq)) { + ret = OB_ENTRY_EXIST; + LOG_ERROR("invalid sequence: ENTRY_EXIST", K(seq), K(item.seq_)); + } else { + ready_to_push = (item_seq == seq); + } + + // Wait if the data is not ready to be pushed + if (OB_SUCCESS == ret && ! ready_to_push) { + ret = wait_on_item_(item, item_seq, end_time); + } + } + + if (OB_SUCCESS == ret) { + // Set the data once the element is ready + item.data_ = data; + __sync_synchronize(); + + // seq -> seq + 1 + if (OB_FAIL(update_seq_(item, seq, seq + 1))) { + LOG_ERROR("update seq fail after push data", KR(ret), K(seq)); + } else { + ATOMIC_INC(&size_); + } + } + } + return ret; +} + +int ObConcurrentSeqQueue::pop(void *&data, const int64_t asked_seq, const int64_t timeout_us) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited_())) { + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(asked_seq < 0)) { + ret = OB_INVALID_ARGUMENT; + OB_LOG(ERROR, "invalid sequence number", K(asked_seq)); + } else { + bool ready_to_pop = false; + SeqItem &item = seq_item_(asked_seq); + int64_t end_time = ObTimeUtility::current_time() + timeout_us; + // The value becomes seq + 1, indicating that the data is ready + int64_t ready_seq = asked_seq + 1; + + // Loop to wait for seq to become ready + while (! ready_to_pop && OB_SUCCESS == ret) { + // First save the current seq value + int64_t item_seq = ATOMIC_LOAD(&item.seq_); + + if (OB_UNLIKELY(item_seq > ready_seq)) { + ret = OB_ENTRY_NOT_EXIST; + LOG_ERROR("invalid sequence: ENTRY_NOT_EXIST", K(asked_seq), K(ready_seq), K(item_seq)); + } else { + ready_to_pop = (item_seq == ready_seq); + } + + // waif if data is not ready + if (! ready_to_pop && OB_SUCCESS == ret) { + ret = wait_on_item_(item, item_seq, end_time); + } + } + + // Take out the data, update the seq and prepare the next round of slots + if (OB_SUCCESS == ret) { + data = item.data_; + __sync_synchronize(); + + // update value of seq FROM asked_seq + 1 TO asked_seq + limit_ + if (OB_FAIL(update_seq_(item, asked_seq + 1, asked_seq + limit_))) { + LOG_ERROR("update seq fail after pop data", K(asked_seq)); + } else { + ATOMIC_DEC(&size_); + } + } + } + + return ret; +} + +}; // end namespace common +}; // end namespace oceanbase diff --git a/src/liboblog/src/ob_concurrent_seq_queue.h b/src/liboblog/src/ob_concurrent_seq_queue.h new file mode 100644 index 0000000000000000000000000000000000000000..753363ef2adb86951e293a6652ad7cf56c67f1d7 --- /dev/null +++ b/src/liboblog/src/ob_concurrent_seq_queue.h @@ -0,0 +1,86 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_COMMON_OB_CONCURRENT_SEQ_QUEUE_H__ +#define OCEANBASE_COMMON_OB_CONCURRENT_SEQ_QUEUE_H__ + +#include "lib/utility/ob_macro_utils.h" // DISALLOW_COPY_AND_ASSIGN +#include "lib/allocator/ob_malloc.h" // default_memattr + +namespace oceanbase +{ +namespace common +{ +// A fixed-length, concurrent Sequence Queue, where the sequence number is specified for push and pop. + +// Usage scenarios: +// 1. Different seqs are pushed in parallel, and a specific seq can only be pushed by one thread +// 2. Different seqs are popped in parallel, and a specific seq can only be popped by one thread +// 3. different seqs do not guarantee the order of push and pop, there may be 0 <= M < N, pop(N) operation earlier than push(M) +// +// Implementation idea. +// Assuming that there are a total of H slots, each of which holds an element whose seq number is predetermined. +// For slot X, it stores the element seq number: X + K * H, K >= 0 +// +// Each slot element is accompanied by a seq variable, which takes the following values, for slot X. +// 1. X + K * H: data is not ready, pending production +// 2. X + K * H + 1: data is ready, pending consumption +// +// Producer push element No. M: wait for the value of the element's seq variable to change to M, set the data, and then mark seq as M + 1, indicating that the data is ready. +// Consumer pop element #M: wait for the element's seq variable value to become M + 2, take the data out, then change seq to M + H and wait for the next round of push +// +class ObConcurrentSeqQueue +{ +public: + ObConcurrentSeqQueue(); + ~ObConcurrentSeqQueue(); +public: + int init(const int64_t queue_size, const ObMemAttr &memattr = default_memattr); + /// @retval OB_SUCCESS success + /// @retval OB_TIMEOUT timeout + /// @retval other value fail + int push(void *data, const int64_t seq, const int64_t timeout_us); + + /// @retval OB_SUCCESS success + /// @retval OB_TIMEOUT timeout + /// @retval other value fail + int pop(void *&data, const int64_t seq, const int64_t timeout_us); + void destroy(); + + /// valid task number; + int64_t size() const { return ATOMIC_LOAD(&size_); } +private: + static const int64_t INT32_MASK = ((1LL << 32) - 1LL); + struct SeqItem + { + volatile int64_t seq_; + void *volatile data_; + }; +private: + bool is_inited_() const; + SeqItem &seq_item_(const int64_t seq); + int wait_on_item_(SeqItem &item, const int64_t cmp_val, const int64_t end_time); + int update_seq_(SeqItem &item, + const int64_t expected_cur_seq, + const int64_t new_seq); +private: + SeqItem *items_ CACHE_ALIGNED; + int64_t limit_; + // task number + int64_t size_ CACHE_ALIGNED; +private: + DISALLOW_COPY_AND_ASSIGN(ObConcurrentSeqQueue); +}; +} // end namespace common +} // end namespace oceanbase + +#endif /* OCEANBASE_COMMON_OB_CONCURRENT_SEQ_QUEUE_H__ */ diff --git a/src/liboblog/src/ob_easy_hazard_map.h b/src/liboblog/src/ob_easy_hazard_map.h new file mode 100644 index 0000000000000000000000000000000000000000..bdf52c78a51be545deaf87fb0822a912df36f0f0 --- /dev/null +++ b/src/liboblog/src/ob_easy_hazard_map.h @@ -0,0 +1,355 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_EASY_HAZARD_MAP_ +#define OCEANBASE_LIBOBLOG_EASY_HAZARD_MAP_ + +#include "lib/atomic/ob_atomic.h" // ATOMIC_* +#include "lib/objectpool/ob_small_obj_pool.h" // ObSmallObjPool +#include "lib/hash/ob_concurrent_hash_map_with_hazard_value.h" // ObConcurrentHashMapWithHazardValue +#include "share/ob_errno.h" // KR + +namespace oceanbase +{ +namespace liboblog +{ +template +class ObEasyHazardMap +{ + typedef common::ObConcurrentHashMapWithHazardValue EMap; + typedef common::ObSmallObjPool EPool; + + class EAlloc : public EMap::IValueAlloc + { + public: + EAlloc() : inited_(false), pool_() {} + ~EAlloc() { destroy(); } + + public: + V* alloc(); + void free(V* value); + int64_t get_alloc_count() const { return pool_.get_alloc_count(); } + int64_t get_free_count() const { return pool_.get_free_count(); } + + public: + int init(const int64_t max_cached_count, + const int64_t block_size, + const char *label, + const uint64_t tenant_id); + void destroy(); + + private: + bool inited_; + EPool pool_; + }; + +public: + ObEasyHazardMap() : inited_(false), valid_count_(0), alloc_(), map_() {} + virtual ~ObEasyHazardMap() { destroy(); } + +public: + int init(const int64_t max_cached_count, + const int64_t block_size, + const char *label, + const uint64_t tenant_id = common::OB_SERVER_TENANT_ID); + void destroy(); + int64_t get_valid_count() const { return valid_count_; } + int64_t get_alloc_count() const { return alloc_.get_alloc_count(); } + int64_t get_free_count() const { return alloc_.get_free_count(); } + + /// 元素的分配与释放 + V* alloc() { return alloc_.alloc(); } + void free(V *value) { alloc_.free(value); } + + /// contains key or not + /// + /// @param key key to find + /// + /// @retval OB_ENTRY_EXIST element exist + /// @retval OB_ENTRY_NOT_EXIST element not exist + /// @retval other errcode fail + int contains_key(const K &key); + + /// insert element + /// NOTICE: + /// 1. The interface has no get semantics, no need to call revert(), if the element is to be used further, call get() once + /// 2. Elements must be allocated and freed using the alloc()/free() functions provided by this class + /// + /// @param [in] key key + /// @param [int] value Value + /// + /// @retval OB_SUCCESS success + /// @retval OB_ENTRY_EXIST element already exist + /// @retval other errcode fail + int insert(const K &key, V *value); + + /// Get Key-Value record, support creating a new Value when it does not exist + /// + /// @param [in] key key + /// @param [out] value value of key + /// @param [in] enable_create Whether to allow the creation of a new Value object if it does not exist + /// + /// @retval OB_SUCCESS success + /// @retval OB_ENTRY_NOT_EXIST Does not exist, return when enable_create is false + /// @retval other errcode fail + int get(const K &key, V *&value, bool enable_create = false); + + /// Return to Value object + /// + /// @param value Value object + /// + /// @retval OB_SUCCESS success + /// @retval other errcode fail + int revert(V *value); + + /// Delete Key-Value records + /// + /// @param key key of target operation + /// + /// @retval OB_SUCCESS success + /// @retval OB_ENTRY_NOT_EXIST key not exist + /// @retval other errcode fail + int remove(const K &key); + + void print_state(const char *mod_str) const; + + template int for_each(Function &fn) + { + int ret = common::OB_SUCCESS; + if (! inited_) { + ret = common::OB_NOT_INIT; + } else { + ret = map_.for_each(fn); + } + return ret; + } + +private: + bool inited_; + int64_t valid_count_; + EAlloc alloc_; + EMap map_; + +private: + DISALLOW_COPY_AND_ASSIGN(ObEasyHazardMap); +}; + +/////////////////////////////////////////////////////////////////////////////////// + +template +int ObEasyHazardMap::EAlloc::init(const int64_t max_cached_count, + const int64_t block_size, + const char *label, + const uint64_t tenant_id) +{ + int ret = common::OB_SUCCESS; + if (inited_) { + ret = common::OB_INIT_TWICE; + } else if (max_cached_count <= 0 || block_size <= 0) { + ret = common::OB_INVALID_ARGUMENT; + } else if (OB_FAIL(pool_.init(max_cached_count, label, tenant_id, block_size))) { + LIB_LOG(ERROR, "init value pool fail", KR(ret), K(max_cached_count), K(block_size)); + } else { + inited_ = true; + } + return ret; +} + +template +void ObEasyHazardMap::EAlloc::destroy() +{ + inited_ = false; + pool_.destroy(); +} + +template +V* ObEasyHazardMap::EAlloc::alloc() +{ + int ret = common::OB_SUCCESS; + V *ret_obj = NULL; + + if (! inited_) { + ret = common::OB_NOT_INIT; + } else if (OB_FAIL(pool_.alloc(ret_obj))) { + LIB_LOG(ERROR, "alloc value from pool fail", KR(ret)); + } else { + ret_obj->reset(); + } + + return ret_obj; +} + +template +void ObEasyHazardMap::EAlloc::free(V *value) +{ + int ret = common::OB_SUCCESS; + if (inited_ && NULL != value) { + if (OB_FAIL(pool_.free(value))) { + LIB_LOG(ERROR, "free value fail", K(value), KR(ret)); + } else { + value = NULL; + } + } +} + +///////////////////////////////////////////////////////////////////////////////////// + +template +int ObEasyHazardMap::init(const int64_t max_cached_count, + const int64_t block_size, + const char *label, + const uint64_t tenant_id) +{ + int ret = common::OB_SUCCESS; + if (inited_) { + ret = common::OB_INIT_TWICE; + } else if (max_cached_count <= 0 || block_size <= 0) { + ret = common::OB_INVALID_ARGUMENT; + } else if (OB_FAIL(alloc_.init(max_cached_count, block_size, label, tenant_id))) { + LIB_LOG(ERROR, "init allocator fail", KR(ret), K(max_cached_count), K(block_size)); + } else if (OB_FAIL(map_.init(&alloc_))) { + LIB_LOG(ERROR, "init map fail", KR(ret)); + } else { + valid_count_ = 0; + inited_ = true; + } + return ret; +} + +template +void ObEasyHazardMap::destroy() +{ + inited_ = false; + valid_count_ = 0; + + // FIXME: can't call destroy of EAlloc cause EMap don't have destroy funtion + // but EMap relays on EAlloc + + // TODO: recycle each element in Map +} + +template +int ObEasyHazardMap::contains_key(const K &key) +{ + int ret = common::OB_SUCCESS; + if (OB_UNLIKELY(! inited_)) { + ret = common::OB_NOT_INIT; + } else { + ret = map_.contains_key(key); + } + return ret; +} + +template +int ObEasyHazardMap::insert(const K &key, V *value) +{ + int ret = common::OB_SUCCESS; + if (OB_UNLIKELY(! inited_)) { + ret = common::OB_NOT_INIT; + } else if (OB_FAIL(map_.put_refactored(key, value))) { + if (common::OB_ENTRY_EXIST == ret) { + // element exist + } else { + LIB_LOG(WARN, "put value into easy hazard map fail", KR(ret), K(key), K(value)); + } + } else { + // Inserted successfully, increase the number of valid + ATOMIC_INC(&valid_count_); + } + return ret; +} + +template +int ObEasyHazardMap::get(const K &key, V *&value, bool enable_create) +{ + int ret = common::OB_SUCCESS; + if (! inited_) { + ret = common::OB_NOT_INIT; + } else if (OB_SUCC(map_.get_refactored(key, value))) { + // succ + } else if (OB_LIKELY(common::OB_ENTRY_NOT_EXIST == ret) && OB_LIKELY(enable_create)) { + // Create a new record when the record does not exist and is allowed to create a new record + while (common::OB_ENTRY_NOT_EXIST == ret) { + if (OB_SUCC(map_.create_refactored(key, value))) { + // Created successfully and returned the object just created + ATOMIC_INC(&valid_count_); + } else if (OB_UNLIKELY(common::OB_ENTRY_EXIST == ret)) { + // Create operational conflicts and get them through the get interface + LIB_LOG(DEBUG, "create value conflict, get value instead", K(key)); + + ret = map_.get_refactored(key, value); + + if (OB_UNLIKELY(common::OB_SUCCESS != ret) + && OB_UNLIKELY(common::OB_ENTRY_NOT_EXIST != ret)) { + LIB_LOG(ERROR, "get value from map fail", KR(ret)); + } else if (OB_UNLIKELY(common::OB_ENTRY_NOT_EXIST == ret)) { + // The second get record still does not exist, which means the record has been deleted in the meantime, try again next time + LIB_LOG(WARN, "value not exist after create-get. retry immediately."); + } + } else { + LIB_LOG(ERROR, "create value from map fail", KR(ret)); + } + } + } else if (OB_UNLIKELY(common::OB_ENTRY_NOT_EXIST != ret)) { + LIB_LOG(ERROR, "get value from map fail", KR(ret), K(key)); + } + + return ret; +} + +template +int ObEasyHazardMap::revert(V *value) +{ + int ret = common::OB_SUCCESS; + if (! inited_) { + ret = common::OB_NOT_INIT; + } else if (OB_UNLIKELY(NULL == value)) { + ret = common::OB_INVALID_ARGUMENT; + } else if (OB_FAIL(map_.revert_value(value))) { + LIB_LOG(ERROR, "revert value fail", KR(ret), K(value)); + } else { + // succ + value = NULL; + } + return ret; +} + +template +int ObEasyHazardMap::remove(const K &key) +{ + int ret = common::OB_SUCCESS; + if (! inited_) { + ret = common::OB_NOT_INIT; + } else if (OB_FAIL(map_.remove_refactored(key))) { + if (common::OB_ENTRY_NOT_EXIST != ret) { + LIB_LOG(ERROR, "remove value fail", KR(ret), K(key)); + } + } else { + // succ + ATOMIC_DEC(&valid_count_); + } + return ret; +} + +template +void ObEasyHazardMap::print_state(const char *mod_str) const +{ + _LIB_LOG(INFO, "%s VALID=%ld HAZARD_CACHED=%ld ALLOC=%ld FREE=%ld", + mod_str, + get_valid_count(), + get_alloc_count() - get_free_count() - get_valid_count(), + get_alloc_count(), + get_free_count()); +} +} // namespace liboblog +} // namespace oceanbase +#endif /* OCEANBASE_LIBOBLOG_EASY_HAZARD_MAP_ */ diff --git a/src/liboblog/src/ob_log_adapt_string.cpp b/src/liboblog/src/ob_log_adapt_string.cpp new file mode 100644 index 0000000000000000000000000000000000000000..aa8bead67b4f9333b3379da8af364c85396efb07 --- /dev/null +++ b/src/liboblog/src/ob_log_adapt_string.cpp @@ -0,0 +1,164 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG + +#include "ob_log_adapt_string.h" + +#include "lib/allocator/ob_malloc.h" // ob_free +#include "lib/utility/ob_print_utils.h" // databuff_printf + +using namespace oceanbase::common; +namespace oceanbase +{ +namespace liboblog +{ + +ObLogAdaptString::ObLogAdaptString(const char *label) : + attr_(500, label), + buf_() +{} + +ObLogAdaptString::~ObLogAdaptString() +{ + if (NULL != buf_.get_data()) { + ob_free(buf_.get_data()); + } + buf_.reset(); +} + +int ObLogAdaptString::append(const char *data) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(data)) { + LOG_ERROR("invalid argument", K(data)); + ret = OB_INVALID_ARGUMENT; + } else { + int64_t size = strlen(data); + char *data_buf = NULL; + + // Prepare memory even if the data length is 0, because the empty string case should be supported + if (OB_FAIL(alloc_buf_(size, data_buf))) { + LOG_ERROR("allocate buffer fail", KR(ret), K(size), K(data_buf)); + } + // Non-empty string case requires buffer to be valid + else if (OB_UNLIKELY(size > 0 && NULL == data_buf)) { + LOG_ERROR("data buffer is invalid", K(data_buf), K(size)); + ret = OB_ERR_UNEXPECTED; + } else if (size > 0) { + // copy data into data_buf + (void)MEMCPY(data_buf, data, size); + } + } + return ret; +} + +int ObLogAdaptString::append_int64(const int64_t int_val) +{ + int ret = OB_SUCCESS; + static const int64_t MAX_INT_CHAR_LEN = 32; + char data_buf[MAX_INT_CHAR_LEN]; + // First print the number to the buffer, then append to the end + if (OB_FAIL(databuff_printf(data_buf, sizeof(data_buf), "%ld", int_val))) { + LOG_ERROR("databuff_printf fail", KR(ret), K(sizeof(data_buf)), K(data_buf), K(int_val)); + } else if (OB_FAIL(append(data_buf))) { + LOG_ERROR("append string fail", KR(ret), K(sizeof(data_buf)), K(int_val)); + } else { + // success + } + return ret; +} + +// Supports calling the append function again after cstr +int ObLogAdaptString::cstr(const char *&str) +{ + int ret = OB_SUCCESS; + if (buf_.get_data() == NULL) { + str = ""; + } + // Require that there must be space left to store \0 + else if (OB_UNLIKELY(buf_.get_remain() <= 0)) { + LOG_ERROR("remain buffer is not enough", K(buf_)); + ret = OB_ERR_UNEXPECTED; + } else { + // Fill \0, but do not change the pos position, the purpose is to support continued filling + buf_.get_data()[buf_.get_position()] = '\0'; + str = buf_.get_data(); + } + return ret; +} + +int ObLogAdaptString::alloc_buf_(const int64_t data_size, char *&data_buf) +{ + static const int64_t STRING_DEFAULT_SIZE = 8 * _K_; + + int ret = OB_SUCCESS; + // The prepared buffer should always be larger than the data length, as it will be filled with \0 at the end. + int64_t expected_buf_size = data_size + 1; + data_buf = NULL; + + // First prepare the buffer, if the buffer is empty, then create a new buffer + if (NULL == buf_.get_data()) { + int64_t alloc_size = std::max(expected_buf_size, STRING_DEFAULT_SIZE); + char *new_buf = static_cast(ob_malloc(alloc_size, attr_)); + + if (OB_ISNULL(new_buf)) { + LOG_ERROR("allocate memory fail", K(new_buf), K(alloc_size), K(expected_buf_size)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else if (OB_UNLIKELY(! buf_.set_data(new_buf, alloc_size))) { + LOG_ERROR("set data fail", K(buf_), K(new_buf), K(alloc_size)); + ret = OB_ERR_UNEXPECTED; + } + } + // If there is not enough space left in the buffer, reallocate a larger space + else if (buf_.get_remain() < expected_buf_size) { + int64_t realloc_size = buf_.get_capacity() + std::max(expected_buf_size, STRING_DEFAULT_SIZE); + char *new_buf = static_cast(ob_realloc(buf_.get_data(), realloc_size, attr_)); + + if (OB_ISNULL(new_buf)) { + LOG_ERROR("realloc memory fail", K(new_buf), K(realloc_size), K(expected_buf_size)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + int64_t pos = buf_.get_position(); + + buf_.reset(); + if (OB_UNLIKELY(! buf_.set_data(new_buf, realloc_size))) { + LOG_ERROR("set data fail", K(buf_), K(new_buf), K(realloc_size)); + ret = OB_ERR_UNEXPECTED; + } + // Reallocate previously allocated memory + else if (OB_ISNULL(buf_.alloc(pos))) { + LOG_ERROR("allocate old memory from buf fail", K(pos), K(buf_)); + ret = OB_ERR_UNEXPECTED; + } + } + } + + + if (OB_SUCCESS == ret) { + // After the buffer is ready, allocate the memory, allocate the memory of the size of the data, here you can not allocate \0 memory, because it will repeatedly fill the data + // Allocate memory only if data_size is greater than 0 + if (data_size > 0) { + if (OB_ISNULL(data_buf = static_cast(buf_.alloc(data_size)))) { + LOG_ERROR("allocate buffer fail", KR(ret), K(data_size), K(buf_)); + ret = OB_ERR_UNEXPECTED; + } else { + // success + } + } + } + return ret; +} + +} +} diff --git a/src/liboblog/src/ob_log_adapt_string.h b/src/liboblog/src/ob_log_adapt_string.h new file mode 100644 index 0000000000000000000000000000000000000000..59abe095171a93dfc80fd6845691f33d6d48bfd4 --- /dev/null +++ b/src/liboblog/src/ob_log_adapt_string.h @@ -0,0 +1,54 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_OB_LOG_ADAPT_STRING_H__ +#define OCEANBASE_OB_LOG_ADAPT_STRING_H__ + +#include "lib/alloc/alloc_struct.h" // ObMemAttr +#include "common/data_buffer.h" // ObDataBuffer + +#include "ob_log_utils.h" // _K_ + +namespace oceanbase +{ +namespace liboblog +{ +class ObLogAdaptString +{ +public: + explicit ObLogAdaptString(const char *label); + virtual ~ObLogAdaptString(); + + int append(const char *data); + int append_int64(const int64_t int_val); + + // Supports calling append function again after cstr to fill + // If the user has not called the append function, the empty string is returned, for compatibility with std::string + int cstr(const char *&str); + +public: + TO_STRING_KV(K_(buf)); + +private: + int alloc_buf_(const int64_t data_size, char *&data_buf); + +private: + lib::ObMemAttr attr_; + common::ObDataBuffer buf_; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogAdaptString); +}; + +} +} +#endif diff --git a/src/liboblog/src/ob_log_all_svr_cache.cpp b/src/liboblog/src/ob_log_all_svr_cache.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d407d7c97387ac78ba00a5a111e2aa481f57a08d --- /dev/null +++ b/src/liboblog/src/ob_log_all_svr_cache.cpp @@ -0,0 +1,572 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG + +#include "ob_log_all_svr_cache.h" + +#include "lib/allocator/ob_mod_define.h" // ObModIds +#include "lib/utility/ob_macro_utils.h" // OB_ISNULL, ... +#include "lib/oblog/ob_log_module.h" // LOG_* + +#include "ob_log_instance.h" // IObLogErrHandler +#include "ob_log_systable_helper.h" // IObLogSysTableHelper +#include "ob_log_config.h" // ObLogConfig + +using namespace oceanbase::common; +using namespace oceanbase::share; + +namespace oceanbase +{ +namespace liboblog +{ + +int64_t ObLogAllSvrCache::g_all_server_cache_update_interval= + ObLogConfig::default_all_server_cache_update_interval_sec * _SEC_; +int64_t ObLogAllSvrCache::g_all_zone_cache_update_interval= + ObLogConfig::default_all_zone_cache_update_interval_sec * _SEC_; + +ObRegion ObLogAllSvrCache::g_assign_region=ObRegion(""); + +ObLogAllSvrCache::ObLogAllSvrCache() : + tid_(0), + err_handler_(NULL), + systable_helper_(NULL), + stop_flag_(true), + cur_version_(0), + cur_zone_version_(0), + zone_need_update_(false), + zone_last_update_tstamp_(OB_INVALID_TIMESTAMP), + is_region_info_valid_(true), + svr_map_(), + zone_map_() +{} + +ObLogAllSvrCache::~ObLogAllSvrCache() +{ + destroy(); +} + +const char *ObLogAllSvrCache::print_svr_status(StatusType status) +{ + const char *str = "UNKNOWN"; + int ret = OB_SUCCESS; + + if (OB_FAIL(ObServerStatus::display_status_str(status, str))) { + str = "UNKNOWN"; + } + + return str; +} + +bool ObLogAllSvrCache::is_svr_avail(const common::ObAddr &svr) +{ + bool bool_ret = false; + RegionPriority region_priority = REGION_PRIORITY_UNKNOWN; + + bool_ret = is_svr_avail(svr, region_priority); + + return bool_ret; +} + +bool ObLogAllSvrCache::is_svr_avail( + const common::ObAddr &svr, + RegionPriority ®ion_priority) +{ + int ret = OB_SUCCESS; + bool bool_ret = false; + region_priority = REGION_PRIORITY_UNKNOWN; + + SvrItem svr_item; + ZoneItem zone_item; + + if (OB_FAIL(get_svr_item_(svr, svr_item))) { + bool_ret = false; + } else if (OB_FAIL(get_zone_item_(svr_item.zone_, zone_item))) { + LOG_ERROR("failed to get zone item", KR(ret), K(svr_item), K(zone_item)); + } else if (is_svr_serve_(svr_item)) { + bool_ret = true; + // get region priority of server if server is available + region_priority = svr_item.region_priority_; + LOG_DEBUG("is svr avail", K(svr), K(region_priority), K(zone_item)); + } else { + region_priority = REGION_PRIORITY_UNKNOWN; + LOG_DEBUG("svr not avail", K(svr), K(zone_item), K(svr_item)); + } + + return bool_ret; +} + +int ObLogAllSvrCache::get_svr_item_(const common::ObAddr &svr, SvrItem &item) +{ + int ret = OB_SUCCESS; + int64_t cur_ver = ATOMIC_LOAD(&cur_version_); + + if (OB_FAIL(svr_map_.get(svr, item))) { + if (OB_ENTRY_NOT_EXIST != ret) { + LOG_ERROR("get server item from map fail", KR(ret), K(svr)); + } + } else if (item.version_ < cur_ver) { + // treat as invalid record if version of data little than current version + ret = OB_ENTRY_NOT_EXIST; + } else { + // succ + } + + LOG_DEBUG("[STAT] [ALL_SVR_CACHE] [GET_SVR_ITEM]", KR(ret), K(svr), + "status", OB_SUCCESS == ret ? print_svr_status(item.status_) : "NOT_EXIST", + "svr_ver", item.version_, K(cur_ver), + "zone", item.zone_, + "region_priority", item.region_priority_); + + return ret; +} + +int ObLogAllSvrCache::get_region_priority_(const common::ObRegion ®ion, + RegionPriority &priority) +{ + int ret = OB_SUCCESS; + + if (is_assign_region_(region)) { + // specified region + priority = REGION_PRIORITY_HIGH; + } else { + // other region or empty region + priority = REGION_PRIORITY_LOW; + } + LOG_DEBUG("get region priority", K(region), K(g_assign_region)); + + return ret; +}; + +bool ObLogAllSvrCache::is_svr_serve_(const SvrItem &svr_item) const +{ + bool bool_ret = false; + StatusType status = svr_item.status_; + bool_ret = ObServerStatus::OB_SERVER_ACTIVE == status + || ObServerStatus::OB_SERVER_DELETING == status; + return bool_ret; +} + +bool ObLogAllSvrCache::is_assign_region_(const common::ObRegion ®ion) const +{ + bool bool_ret = false; + + // ignore case + bool_ret = (0 == strncasecmp(g_assign_region.ptr(), + region.ptr(), + g_assign_region.size())); + + return bool_ret; +} + +int ObLogAllSvrCache::init(IObLogSysTableHelper &systable_helper, IObLogErrHandler &err_handler) +{ + int ret = OB_SUCCESS; + int pthread_ret = 0; + + if (OB_FAIL(svr_map_.init(ObModIds::OB_LOG_ALL_SERVER_CACHE))) { + LOG_ERROR("init svr map fail", KR(ret)); + } else if (OB_FAIL(zone_map_.init(ObModIds::OB_LOG_ALL_SERVER_CACHE))) { + LOG_ERROR("init zone map fail", KR(ret)); + } else { + tid_ = 0; + stop_flag_ = false; + cur_version_ = 0; + cur_zone_version_ = 0; + zone_need_update_ = false; + zone_last_update_tstamp_ = OB_INVALID_TIMESTAMP; + is_region_info_valid_ = true; + err_handler_ = &err_handler; + systable_helper_ = &systable_helper; + + LOG_INFO("init all svr cache succ"); + + if (OB_UNLIKELY(0 != (pthread_ret = pthread_create(&tid_, NULL, thread_func_, this)))) { + LOG_ERROR("create thread for all server cache fail", K(pthread_ret), KERRNOMSG(pthread_ret)); + ret = OB_ERR_UNEXPECTED; + } + } + return ret; +} + +void ObLogAllSvrCache::destroy() +{ + stop_flag_ = true; + + if (0 != tid_) { + int pthread_ret = pthread_join(tid_, NULL); + if (0 != pthread_ret) { + LOG_ERROR("pthread_join fail", K(tid_), K(pthread_ret), KERRNOMSG(pthread_ret)); + } + + tid_ = 0; + } + + err_handler_ = NULL; + systable_helper_ = NULL; + + cur_version_ = 0; + cur_zone_version_ = 0; + zone_need_update_ = false; + zone_last_update_tstamp_ = OB_INVALID_TIMESTAMP; + is_region_info_valid_ = true; + (void)svr_map_.destroy(); + (void)zone_map_.destroy(); + + LOG_INFO("destroy all svr cache succ"); +} + +void *ObLogAllSvrCache::thread_func_(void *arg) +{ + ObLogAllSvrCache *host = static_cast(arg); + + if (NULL != host) { + host->run(); + } + + return NULL; +} + +void ObLogAllSvrCache::run() +{ + int ret = OB_SUCCESS; + + LOG_INFO("all svr cache thread start"); + + while (! stop_flag_ && OB_SUCCESS == ret) { + if (need_update_zone_()) { + if (OB_FAIL(update_zone_cache_())) { + LOG_ERROR("update zone cache error", KR(ret)); + } else if (OB_FAIL(purge_stale_zone_records_())) { + LOG_ERROR("purge stale records fail", KR(ret)); + } else { + // do nothing + } + } + + if (OB_SUCC(ret)) { + int64_t all_svr_cache_update_interval = ATOMIC_LOAD(&g_all_server_cache_update_interval); + if (REACH_TIME_INTERVAL(all_svr_cache_update_interval)) { + if (OB_FAIL(update_server_cache_())) { + LOG_ERROR("update server cache error", KR(ret)); + } else if (OB_FAIL(purge_stale_records_())) { + LOG_ERROR("purge stale records fail", KR(ret)); + } else { + // succ + } + } + } + + // sleep + usec_sleep(USLEEP_INTERVAL); + } + + if (OB_SUCCESS != ret) { + if (NULL != err_handler_) { + err_handler_->handle_error(ret, "all server cache update thread exits, err=%d", ret); + } + } + + LOG_INFO("all svr cache thread stop", KR(ret)); +} + +void ObLogAllSvrCache::configure(const ObLogConfig & config) +{ + int ret = OB_SUCCESS; + + int64_t all_server_cache_update_interval_sec = config.all_server_cache_update_interval_sec; + ATOMIC_STORE(&g_all_server_cache_update_interval, all_server_cache_update_interval_sec * _SEC_); + int64_t all_zone_cache_update_interval_sec = config.all_zone_cache_update_interval_sec; + ATOMIC_STORE(&g_all_zone_cache_update_interval, all_zone_cache_update_interval_sec * _SEC_); + + if (OB_FAIL(g_assign_region.assign(config.region.str()))) { + LOG_ERROR("g_assign_region assign fail", KR(ret), K(g_assign_region)); + } + + LOG_INFO("[CONFIG]", K(all_server_cache_update_interval_sec)); + LOG_INFO("[CONFIG]", K(all_zone_cache_update_interval_sec)); + LOG_INFO("[CONFIG]", K(g_assign_region)); +} + +bool ObLogAllSvrCache::need_update_zone_() +{ + bool bool_ret = false; + + int64_t all_zone_cache_update_interval = ATOMIC_LOAD(&g_all_zone_cache_update_interval); + bool is_region_info_valid = ATOMIC_LOAD(&is_region_info_valid_); + int64_t update_delta_time = get_timestamp() - zone_last_update_tstamp_; + + if (!is_region_info_valid) { + bool_ret = false; + } + // need update if never update + else if (OB_INVALID_TIMESTAMP == zone_last_update_tstamp_) { + bool_ret = true; + } + // update if set zone_need_update_ + else if (zone_need_update_) { + bool_ret = true; + } + // update by interval + else if (update_delta_time >= all_zone_cache_update_interval) { + bool_ret = true; + } + + return bool_ret; +} + +int ObLogAllSvrCache::update_zone_cache_() +{ + int ret = OB_SUCCESS; + IObLogSysTableHelper::AllZoneRecordArray record_array; + IObLogSysTableHelper::AllZoneTypeRecordArray zone_type_record_array; + record_array.reset(); + + if (OB_ISNULL(systable_helper_)) { + LOG_ERROR("invalid systable helper", K(systable_helper_)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(systable_helper_->query_all_zone_info(record_array))) { + if (OB_NEED_RETRY == ret) { + LOG_WARN("query all zone info need retry", KR(ret)); + ret = OB_SUCCESS; + } else if (OB_ITEM_NOT_SETTED == ret) { + ATOMIC_STORE(&is_region_info_valid_, false); + LOG_INFO("'region' is not availalbe in __all_zone table. would not update zone cache", + K_(is_region_info_valid)); + ret = OB_SUCCESS; + } else { + LOG_ERROR("query all zone info fail", KR(ret)); + } + } else if (OB_FAIL(systable_helper_->query_all_zone_type(zone_type_record_array))) { + if (OB_NEED_RETRY == ret) { + LOG_WARN("query all zone type need retry", KR(ret)); + ret = OB_SUCCESS; + } else if (OB_ITEM_NOT_SETTED == ret) { + LOG_INFO("'zone_type' is not availalbe in __all_zone table. would not update zone cache", K(zone_type_record_array)); + ret = OB_SUCCESS; + } else { + LOG_ERROR("query all zone type fail", KR(ret)); + } + } else { + int64_t next_version = cur_zone_version_ + 1; + + for (int64_t index = 0; OB_SUCCESS == ret && index < record_array.count(); index++) { + IObLogSysTableHelper::AllZoneRecord &record = record_array.at(index); + common::ObZone &zone = record.zone_; + common::ObRegion ®ion = record.region_; + + _LOG_INFO("[STAT] [ALL_ZONE] INDEX=%ld/%ld ZONE=%s REGION=%s VERSION=%lu", + index, record_array.count(), to_cstring(zone), to_cstring(region), next_version); + + ZoneItem item; + item.reset(next_version, region); + LOG_DEBUG("update zone cache item", K(zone), K(item)); + + if (OB_FAIL(zone_map_.insert_or_update(zone, item))) { + LOG_ERROR("zone_map_ insert_or_update fail", KR(ret), K(zone), K(item)); + } + } + + for (int64_t idx = 0; OB_SUCCESS == ret && idx < zone_type_record_array.count(); idx ++) { + ZoneItem item; + IObLogSysTableHelper::AllZoneTypeRecord &record = zone_type_record_array.at(idx); + common::ObZone &zone = record.zone_; + common::ObZoneType &zone_type = record.zone_type_; + if (OB_FAIL(get_zone_item_(zone, item))) { + LOG_ERROR("fail to get zone item from cache by zone", KR(ret), K(zone)); + } else { + item.set_zone_type(zone_type); + if (OB_FAIL(zone_map_.insert_or_update(zone, item))) { + LOG_ERROR("zone_map_ insert_or_update set zone_type fail", KR(ret), K(zone), K(item), K(zone_type)); + } + } + _LOG_INFO("[STAT] [ALL_ZONE] INDEX=%ld/%ld ZONE=%s ZONE_TYPE=%s VERSION=%lu", + idx, zone_type_record_array.count(), to_cstring(zone), zone_type_to_str(item.get_zone_type()), next_version); + } + + ATOMIC_INC(&cur_zone_version_); + _LOG_INFO("[STAT] [ALL_ZONE] COUNT=%ld VERSION=%lu", record_array.count(), cur_zone_version_); + } + + if (OB_SUCC(ret)) { + zone_need_update_ = false; + zone_last_update_tstamp_ = get_timestamp(); + } + + return ret; +} + +int ObLogAllSvrCache::update_server_cache_() +{ + int ret = OB_SUCCESS; + IObLogSysTableHelper::AllServerRecordArray record_array(common::ObModIds::OB_LOG_ALL_SERVER_ARRAY, common::OB_MALLOC_NORMAL_BLOCK_SIZE); + record_array.reset(); + + if (OB_ISNULL(systable_helper_)) { + LOG_ERROR("invalid systable helper", K(systable_helper_)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(systable_helper_->query_all_server_info(record_array))) { + if (OB_NEED_RETRY == ret) { + LOG_WARN("query all server info need retry", KR(ret)); + ret = OB_SUCCESS; + } else { + LOG_ERROR("query all server info fail", KR(ret)); + } + } else { + int64_t next_version = cur_version_ + 1; + + for (int64_t index = 0; OB_SUCCESS == ret && index < record_array.count(); index++) { + IObLogSysTableHelper::AllServerRecord &record = record_array.at(index); + ObAddr svr; + svr.set_ip_addr(record.svr_ip_, record.svr_port_); + const char *status_str = NULL; + + int tmp_ret = ObServerStatus::display_status_str(record.status_, status_str); + if (OB_SUCCESS != tmp_ret) { + LOG_ERROR("invalid server status, can not cast to string", K(tmp_ret), + K(record.status_), K(svr)); + } + + ZoneItem zone_item; + RegionPriority region_priority = REGION_PRIORITY_UNKNOWN; + + if (OB_FAIL(get_zone_item_(record.zone_, zone_item))) { + LOG_ERROR("get_zone_item_ fail", KR(ret), "zone", record.zone_, K(zone_item)); + } else if (OB_FAIL(get_region_priority_(zone_item.region_, region_priority))) { + LOG_ERROR("get priority based region fail", KR(ret), K(svr), + "region", zone_item.region_, + "region_priority", print_region_priority(region_priority)); + } else { + SvrItem item; + item.reset(record.status_, next_version, record.zone_, region_priority); + LOG_DEBUG("update cache item", K(item)); + + if (OB_FAIL(svr_map_.insert_or_update(svr, item))) { + LOG_ERROR("svr_map_ insert_or_update fail", KR(ret), K(svr), K(item)); + } + } + + _LOG_INFO("[STAT] [ALL_SERVER_LIST] INDEX=%ld/%ld SERVER=%s STATUS=%d(%s) " + "ZONE=%s REGION=%s(%s) VERSION=%lu", + index, record_array.count(), to_cstring(svr), record.status_, status_str, + to_cstring(record.zone_), to_cstring(zone_item.region_), + print_region_priority(region_priority), next_version); + } + + ATOMIC_INC(&cur_version_); + _LOG_INFO("[STAT] [ALL_SERVER_LIST] COUNT=%ld VERSION=%lu", record_array.count(), cur_version_); + } + + return ret; +} + +int ObLogAllSvrCache::get_zone_item_(const common::ObZone &zone, + ZoneItem &zone_item) +{ + int ret = OB_SUCCESS; + bool is_region_info_valid = ATOMIC_LOAD(&is_region_info_valid_); + zone_item.reset(); + + if (!is_region_info_valid) { + LOG_DEBUG("region is invalid, do not use"); + } else { + if (OB_FAIL(zone_map_.get(zone, zone_item))) { + if (OB_ENTRY_NOT_EXIST != ret) { + LOG_ERROR("zone_map_ get zone_item fail", KR(ret), K(zone), K(zone_item)); + } else { + // update all zone cache if can't get region info in __zone_map + zone_need_update_ = true; + LOG_DEBUG("zone_map_ get zone_item not exist, need update", KR(ret), K(zone)); + } + } + + int64_t cur_zone_ver = ATOMIC_LOAD(&cur_zone_version_); + if (OB_SUCCESS == ret) { + if (zone_item.version_ < cur_zone_ver) { + // treate as invalid record if version little than current version + ret = OB_ENTRY_NOT_EXIST; + } else { + // do nothing + } + } + } + + if (OB_ENTRY_NOT_EXIST == ret) { + ret = OB_SUCCESS; + zone_item.reset(); + } + + return ret; +} + +int ObLogAllSvrCache::purge_stale_records_() +{ + int ret = OB_SUCCESS; + StaleRecPurger purger(cur_version_); + + if (OB_FAIL(svr_map_.remove_if(purger))) { + LOG_ERROR("remove if fail", KR(ret), K(cur_version_)); + } else { + _LOG_INFO("[STAT] [ALL_SERVER_LIST] [PURGE] PURGE_COUNT=%ld CUR_COUNT=%ld VERSION=%lu", + purger.purge_count_, svr_map_.count(), cur_version_); + } + return ret; +} + +int ObLogAllSvrCache::purge_stale_zone_records_() +{ + int ret = OB_SUCCESS; + StaleZoneRecPurger purger(cur_zone_version_); + + if (OB_FAIL(zone_map_.remove_if(purger))) { + LOG_ERROR("zone_map_ remove if fail", KR(ret), K(cur_zone_version_)); + } else { + _LOG_INFO("[STAT] [ALL_ZONE] [PURGE] PURGE_COUNT=%ld CUR_COUNT=%ld VERSION=%lu", + purger.purge_count_, zone_map_.count(), cur_zone_version_); + } + return ret; +} + +bool ObLogAllSvrCache::StaleRecPurger::operator()(const common::ObAddr &svr, + const SvrItem &svr_item) +{ + bool need_purge = (svr_item.version_ < cur_ver_); + + if (need_purge) { + purge_count_++; + _LOG_INFO("[STAT] [ALL_SERVER_LIST] [PURGE] SERVER=%s VERSION=%lu/%lu", + to_cstring(svr), svr_item.version_, cur_ver_); + } + return need_purge; +} + +bool ObLogAllSvrCache::StaleZoneRecPurger::operator()(const common::ObZone &zone, + const ZoneItem &zone_item) +{ + bool need_purge = (zone_item.version_ < cur_ver_); + + if (need_purge) { + purge_count_++; + _LOG_INFO("[STAT] [ALL_ZONE] [PURGE] ZONE=%s VERSION=%lu/%lu", + zone.ptr(), zone_item.version_, cur_ver_); + } + return need_purge; +} + +void ObLogAllSvrCache::set_update_interval_(const int64_t time) +{ + ATOMIC_STORE(&g_all_server_cache_update_interval, time); +} + +} +} diff --git a/src/liboblog/src/ob_log_all_svr_cache.h b/src/liboblog/src/ob_log_all_svr_cache.h new file mode 100644 index 0000000000000000000000000000000000000000..5a5eb5fcd12d1bcdf8bdf2975f25cb1042a19601 --- /dev/null +++ b/src/liboblog/src/ob_log_all_svr_cache.h @@ -0,0 +1,228 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_ALL_SVR_CACHE_H__ +#define OCEANBASE_LIBOBLOG_OB_LOG_ALL_SVR_CACHE_H__ + +#include // pthread_* + +#include "lib/net/ob_addr.h" // ObAddr +#include "lib/hash/ob_linear_hash_map.h" // ObLinearHashMap +#include "common/ob_zone.h" // ObZone +#include "common/ob_zone_type.h" // ObZoneType +#include "common/ob_region.h" // ObRegin +#include "share/ob_server_status.h" // ObServerStatus + +#include "ob_log_utils.h" // _SEC_ +#include "ob_log_server_priority.h" // RegionPriority + +namespace oceanbase +{ +namespace liboblog +{ + +class IObLogAllSvrCache +{ +public: + typedef share::ObServerStatus::DisplayStatus StatusType; + + static const int64_t USLEEP_INTERVAL = 1 * 1000 * 1000; + +public: + virtual ~IObLogAllSvrCache() {} + + // check server is available or not + // server avail means server can serve for RPC(locate start log id, fetch log, ...) + virtual bool is_svr_avail(const common::ObAddr &svr) = 0; + + // 1. check server is available or not + // 2. if svr is available, return region priority of region, otherwise return REGION_PRIORITY_UNKNOWN + // server is available means: + // 1. server status is ACTIVE or DELETING + // 2. server not in ENCRYPTION zone + // + // @param [in] svr server addr + // @param [out] region_priority region priority + // + // @retval true server is available + // @retval false server is not available + virtual bool is_svr_avail( + const common::ObAddr &svr, + RegionPriority ®ion_priority) = 0; + +}; + +///////////////////// ObLogAllSvrCache ////////////////////// + +class IObLogErrHandler; +class IObLogSysTableHelper; +class ObLogConfig; +class ObLogAllSvrCache : public IObLogAllSvrCache +{ + // class static variables +public: + static int64_t g_all_server_cache_update_interval; + static int64_t g_all_zone_cache_update_interval; + // specified region(used for svr priority) + static common::ObRegion g_assign_region; + +public: + ObLogAllSvrCache(); + virtual ~ObLogAllSvrCache(); + +public: + virtual bool is_svr_avail(const common::ObAddr &svr); + virtual bool is_svr_avail( + const common::ObAddr &svr, + RegionPriority ®ion_priority); + + static const char *print_svr_status(StatusType status); + +public: + int init(IObLogSysTableHelper &systable_helper, IObLogErrHandler &err_handler); + void destroy(); + void run(); + +public: + static void configure(const ObLogConfig & config); + +private: + struct SvrItem; + int get_svr_item_(const common::ObAddr &svr, SvrItem &item); + // 1. get region of specified zone from zone_map_ + // 2. refresh cache of __all_zone and retry query if query return ret == OB_ENTRY_NOT_EXIST + struct ZoneItem; + int get_zone_item_(const common::ObZone &zone, ZoneItem &zone_item); + // two priroity of region, named from high to low: + // 1. region_priority = REGION_PRIORITY_HIGH if current region = specified region(g_assign_zone) + // 2. other region or empty region(no retion info for lower version of observer) + // region_priority = REGION_PRIORITY_LOW + int get_region_priority_(const common::ObRegion ®ion, RegionPriority &priority); + bool is_assign_region_(const common::ObRegion ®ion) const; + static void *thread_func_(void *arg); + bool need_update_zone_(); + int update_zone_cache_(); + int update_server_cache_(); + int purge_stale_records_(); + int purge_stale_zone_records_(); + // NOTE: server serve in such cases: + // 1. server status is ACTIVE or DELETING + bool is_svr_serve_(const SvrItem &svr_item) const; + +private: + struct SvrItem + { + StatusType status_; + uint64_t version_; + common::ObZone zone_; + RegionPriority region_priority_; + + void reset(const StatusType status, + const uint64_t version, + const common::ObZone &zone, + const RegionPriority region_priority) + { + status_ = status; + version_ = version; + zone_ = zone; + region_priority_ = region_priority; + } + + TO_STRING_KV(K_(status), K_(version), K_(zone), + "region_priority", print_region_priority(region_priority_)); + }; + typedef common::ObLinearHashMap SvrMap; + + struct ZoneItem + { + // Compatibility: the observer of the lower version does not have the region field, + // and the region is empty by default + common::ObRegion region_; + common::ObZoneType zone_type_; + uint64_t version_; + + void reset() + { + version_ = -1; + region_.reset(); + zone_type_ = common::ZONE_TYPE_INVALID; + } + + void reset(const uint64_t version, + const common::ObRegion ®ion) + { + version_ = version; + region_ = region; + zone_type_ = common::ZONE_TYPE_INVALID; + } + + void set_zone_type(const common::ObZoneType &zone_type) + { + zone_type_ = zone_type; + } + + const common::ObZoneType& get_zone_type() const { return zone_type_; } + + TO_STRING_KV(K_(region), "zone_type", zone_type_to_str(zone_type_), K_(version)); + }; + typedef common::ObLinearHashMap ZoneMap; + + struct StaleRecPurger + { + uint64_t cur_ver_; + int64_t purge_count_; + + explicit StaleRecPurger(const int64_t ver) : cur_ver_(ver), purge_count_(0) + {} + + bool operator()(const common::ObAddr &svr, const SvrItem &svr_item); + }; + + struct StaleZoneRecPurger + { + uint64_t cur_ver_; + int64_t purge_count_; + + explicit StaleZoneRecPurger(const int64_t ver) : cur_ver_(ver), purge_count_(0) + {} + + bool operator()(const common::ObZone &zone, const ZoneItem &zone_item); + }; + + // set g_all_server_cache_update_interval for unitest + static void set_update_interval_(const int64_t time); +private: + pthread_t tid_; + IObLogErrHandler *err_handler_; + IObLogSysTableHelper *systable_helper_; + + bool stop_flag_ CACHE_ALIGNED; + uint64_t cur_version_ CACHE_ALIGNED; + uint64_t cur_zone_version_ CACHE_ALIGNED; + + bool zone_need_update_; + int64_t zone_last_update_tstamp_; + // For low version observer compatibility, region information exists by default, + // if update_zone_cache does not query the record, no region information exists + bool is_region_info_valid_ CACHE_ALIGNED; + + SvrMap svr_map_; + ZoneMap zone_map_; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogAllSvrCache); +}; + +} +} + +#endif diff --git a/src/liboblog/src/ob_log_binlog_record.cpp b/src/liboblog/src/ob_log_binlog_record.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f380e64f966e1756136e24064a0f5445935a6aa7 --- /dev/null +++ b/src/liboblog/src/ob_log_binlog_record.cpp @@ -0,0 +1,450 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG + +#include // ITableMeta + +#include "ob_log_binlog_record.h" +#include "ob_log_utils.h" +#include "ob_log_instance.h" // TCTX + +using namespace oceanbase::common; + +namespace oceanbase +{ +namespace liboblog +{ +const char *ObLogBR::COLUMN_CHANGED_LABEL_PTR = ""; +const char *ObLogBR::COLUMN_UNCHANGED_LABEL_PTR = NULL; + +ObLogBR::ObLogBR() : ObLogResourceRecycleTask(ObLogResourceRecycleTask::BINLOG_RECORD_TASK), + data_(NULL), + is_serilized_(false), + host_(NULL), + log_entry_task_(NULL), + next_(NULL), + valid_(true), + precise_timestamp_(0), + freeze_version_(), + tenant_id_(OB_INVALID_TENANT_ID), + ddl_schema_version_(OB_INVALID_VERSION), + part_trans_task_count_(0) +{ +} + +ObLogBR::~ObLogBR() +{ + reset(); + + destruct_data_(); +} + +void ObLogBR::construct_data_(const bool creating_binlog_record) +{ + data_ = LogMsgFactory::createLogRecord(TCTX.drc_message_factory_binlog_record_type_, creating_binlog_record); + + if (OB_ISNULL(data_)) { + OBLOG_LOG(ERROR, "LogMsgFactory::createLogRecord fails"); + } else { + // set user data pointer to the pointer hold the binlog record + data_->setUserData(this); + } +} + +void ObLogBR::destruct_data_() +{ + if (NULL != data_) { + LogMsgFactory::destroy(data_); + data_ = NULL; + } +} + +void ObLogBR::reset() +{ + if (NULL != data_) { + data_->clear(); + + // note reset all filed used by liboblog, cause clear() may won't reset fields + + // clear new/old column array + data_->setNewColumn(NULL, 0); + data_->setOldColumn(NULL, 0); + + // clear TableMeta and IDBMeta + data_->setTableMeta(NULL); + data_->setTbname(NULL); + data_->setDBMeta(NULL); + data_->setDbname(NULL); + + // set user data pointer to the pointer hold the binlog record + data_->setUserData(this); + } + + host_ = NULL; + log_entry_task_ = NULL; + next_ = NULL; + valid_ = true; + precise_timestamp_ = 0; + freeze_version_.reset(); + tenant_id_ = OB_INVALID_TENANT_ID; + part_trans_task_count_ = 0; +} + +int ObLogBR::set_table_meta(ITableMeta *table_meta) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(data_)) { + LOG_ERROR("ILogRecord has not been created"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(table_meta)) { + LOG_ERROR("invalid argument", K(table_meta)); + ret = OB_INVALID_ARGUMENT; + } else { + data_->setTableMeta(table_meta); + data_->setTbname(table_meta->getName()); + } + + return ret; +} + +int ObLogBR::set_db_meta(IDBMeta *db_meta) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(data_)) { + LOG_ERROR("ILogRecord has not been created"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(db_meta)) { + LOG_ERROR("invalid argument", K(db_meta)); + ret = OB_INVALID_ARGUMENT; + } else { + data_->setDBMeta(db_meta); + data_->setDbname(db_meta->getName()); + } + + return ret; +} + +int ObLogBR::init_dml_data_first(const RecordType type, + const uint64_t tenant_id) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(data_)) { + LOG_ERROR("ILogRecord has not been created"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(EUNKNOWN == type) + || OB_UNLIKELY(OB_INVALID_TENANT_ID == tenant_id)) { + LOG_ERROR("invalid argument", K(type), K(tenant_id)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(init_binlog_record_first_(type))) { + LOG_ERROR("init_binlog_record_first_ fail", KR(ret), K(type), K(tenant_id)); + } else { + tenant_id_ = tenant_id; + set_next(NULL); + valid_ = true; + } + + return ret; +} + +int ObLogBR::init_dml_data_second(const RecordType type, + const uint64_t cluster_id, + const int64_t tenant_id, + const common::ObString &trace_id, + const common::ObString &trace_info, + const common::ObString &unique_id, + const common::ObVersion &freeze_version, + const int64_t commit_version) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(data_)) { + LOG_ERROR("ILogRecord has not been created"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(init_binlog_record_second_(type, cluster_id, trace_id, trace_info, unique_id, commit_version))) { + LOG_ERROR("init_binlog_record_second_ fail", KR(ret), K(type), K(cluster_id), K(trace_id), K(trace_info), + K(unique_id), K(commit_version)); + } else { + LOG_DEBUG("init_dml_data_second succ", "type", print_record_type(type), K(cluster_id), K(tenant_id), + K(trace_id), K(trace_info), K(unique_id), K(commit_version)); + + set_precise_timestamp(commit_version); + freeze_version_ = freeze_version; + } + + return ret; +} + +int ObLogBR::init_data(const RecordType type, + const uint64_t cluster_id, + const uint64_t tenant_id, + const int64_t ddl_schema_version, + const common::ObString &trace_id, + const common::ObString &trace_info, + const common::ObString &unique_id, + const common::ObVersion &freeze_version, + const int64_t commit_version, + const int64_t part_trans_task_count, + const common::ObString *major_version_str) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(data_)) { + LOG_ERROR("ILogRecord has not been created"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(EUNKNOWN == type) + || OB_UNLIKELY(! is_valid_cluster_id(cluster_id)) + || OB_UNLIKELY(OB_INVALID_TENANT_ID == tenant_id) + || OB_UNLIKELY(commit_version <= 0) + || OB_UNLIKELY(OB_INVALID_TIMESTAMP == ddl_schema_version)) { + LOG_ERROR("invalid argument", K(type), K(cluster_id), K(commit_version), K(tenant_id), K(ddl_schema_version)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(verify_part_trans_task_count_(type, part_trans_task_count))) { + LOG_ERROR("verify_part_trans_task_count_ fail", KR(ret), K(type), + "type", print_record_type(type), K(part_trans_task_count)); + } else if (OB_FAIL(init_binlog_record_(type, cluster_id, trace_id, trace_info, unique_id, + commit_version, major_version_str))) { + LOG_ERROR("init_binlog_record_ fail", KR(ret), K(type), K(cluster_id), K(tenant_id), K(trace_id), K(trace_info), + K(unique_id), K(commit_version), K(major_version_str)); + } else { + set_precise_timestamp(commit_version); + tenant_id_ = tenant_id; + freeze_version_ = freeze_version; + ddl_schema_version_ = ddl_schema_version; + set_next(NULL); + part_trans_task_count_ = part_trans_task_count; + valid_ = true; + } + + return ret; +} + +int ObLogBR::init_binlog_record_(const RecordType type, + const uint64_t cluster_id, + const common::ObString &trace_id, + const common::ObString &trace_info, + const common::ObString &unique_id, + const int64_t commit_version, + const common::ObString *major_version_str) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(data_)) { + LOG_ERROR("ILogRecord has not been created"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(init_binlog_record_first_(type))) { + LOG_ERROR("init_binlog_record_first_ fail", KR(ret), K(type)); + } else if (OB_FAIL(init_binlog_record_second_(type, cluster_id, trace_id, trace_info, unique_id, + commit_version, major_version_str))) { + LOG_ERROR("init_binlog_record_second_ fail", KR(ret), K(type), K(cluster_id), K(trace_id), K(trace_info), + K(unique_id), K(commit_version), K(major_version_str)); + } else { + // succ + } + + return ret; +} + +int ObLogBR::init_binlog_record_first_(const RecordType type) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(data_)) { + LOG_ERROR("ILogRecord has not been created"); + ret = OB_NOT_INIT; + } else { + // set to invalid_data + int src_category = SRC_NO; + // NOTE: init checkpint to 0 (sec/microsecond) + uint64_t checkpoint_sec = 0; + uint64_t checkpoint_usec = 0; + + data_->setRecordType(type); + data_->setSrcCategory(src_category); + data_->setCheckpoint(checkpoint_sec, checkpoint_usec); + data_->setId(0); // always set id to 0 + data_->setSrcType(SRC_OCEANBASE_1_0); // for OB 1.0 + + // means that two consecutive statements operate on different fields + // set this field to true for performance + data_->setFirstInLogevent(true); + } + + return ret; +} + +int ObLogBR::init_binlog_record_second_(const RecordType type, + const uint64_t cluster_id, + const common::ObString &trace_id, + const common::ObString &trace_info, + const common::ObString &unique_id, + const int64_t commit_version, + const common::ObString *major_version_str) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(data_)) { + LOG_ERROR("ILogRecord has not been created"); + ret = OB_NOT_INIT; + } else { + // treate cluster_id as thread_id + // convert from 64 bit to 32 bit + data_->setThreadId(static_cast(cluster_id)); + // set trans commit timestamp (second) + data_->setTimestamp(commit_version / 1000000); + // set trans commit timestamp (microsecond) + // note: combine getTimestamp() and getRecordUsec() as complete trans commit timestamp + data_->setRecordUsec(static_cast(commit_version % 1000000)); + + // won't use this field + data_->putFilterRuleVal("0", 1); + // set unique id to binlog record + data_->putFilterRuleVal(unique_id.ptr(), unique_id.length()); + // set OBTraceID + data_->putFilterRuleVal(trace_id.ptr(), trace_id.length()); + + // TODO setObTraceInfo has bug, relay on drc message support in new release + UNUSED(trace_info); + // data_->setObTraceInfo(trace_info.ptr()); + + // put major version(from int32_t to char*) to the forth field + if (EBEGIN == type) { + if (OB_ISNULL(major_version_str)) { + LOG_ERROR("major version str for EBEGIN statement should not be null!", KR(ret), K(cluster_id), + K(type), K(trace_id)); + } else { + data_->putFilterRuleVal(major_version_str->ptr(), major_version_str->length()); + } + } + } + + return ret; +} + +int ObLogBR::put_old(ILogRecord *br, const bool is_changed) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(br)) { + LOG_ERROR("invalid argument", K(br)); + ret = OB_INVALID_ARGUMENT; + } else { + // DRC proto + // mark value of OldCol to empty string, use global unique empty string value + // value of unchanged OldCol as NULL + const char *val = is_changed ? ObLogBR::COLUMN_CHANGED_LABEL_PTR : + ObLogBR::COLUMN_UNCHANGED_LABEL_PTR; + + int64_t pos = (NULL == val ? 0 : strlen(val)); + + (void)br->putOld(val, static_cast(pos)); + } + + return ret; +} + +int ObLogBR::get_record_type(int &record_type) +{ + int ret = OB_SUCCESS; + record_type = 0; + + if (OB_ISNULL(data_)) { + LOG_ERROR("data_ is null", K(data_)); + ret = OB_ERR_UNEXPECTED; + } else { + record_type = data_->recordType(); + } + + return ret; +} + +int ObLogBR::setInsertRecordTypeForHBasePut(const RecordType type) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(data_)) { + LOG_ERROR("ILogRecord has not been created"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(EINSERT != type)) { + LOG_ERROR("invalid argument", "type", print_record_type(type)); + } else { + data_->setRecordType(type); + } + + return ret; +} + +int ObLogBR::verify_part_trans_task_count_(const RecordType type, + const int64_t part_trans_task_count) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(EUNKNOWN == type)) { + LOG_ERROR("invalid argument", K(type), "type", print_record_type(type)); + ret = OB_INVALID_ARGUMENT; + } else { + if ((EDDL == type) || (EBEGIN == type) || (ECOMMIT == type)) { + // verify part_trans_task_count, should greater than 0 if DDL/BEGIN/COMMIT + if (OB_UNLIKELY(part_trans_task_count <= 0)) { + LOG_ERROR("part_trans_task_count is not greater than 0", K(type), + "type", print_record_type(type), K(part_trans_task_count)); + ret = OB_ERR_UNEXPECTED; + } else { + // do nothing + } + } + } + + return ret; +} + +// unserilized Binlog record +ObLogUnserilizedBR::ObLogUnserilizedBR() : ObLogBR() +{ + construct_unserilized_data_(); + + ObLogBR::reset(); +} + +ObLogUnserilizedBR::~ObLogUnserilizedBR() +{ +} + +void ObLogUnserilizedBR::construct_unserilized_data_() +{ + const bool creating_binlog_record = true; + construct_data_(creating_binlog_record); +} + +// serilized Binlog Record +ObLogSerilizedBR::ObLogSerilizedBR() : ObLogBR() +{ + construct_serilized_data_(); + + ObLogBR::reset(); +} + +ObLogSerilizedBR::~ObLogSerilizedBR() +{ +} + +void ObLogSerilizedBR::construct_serilized_data_() +{ + const bool creating_binlog_record = false; + construct_data_(creating_binlog_record); +} + +} // end namespace liboblog +} // end namespace oceanbase diff --git a/src/liboblog/src/ob_log_binlog_record.h b/src/liboblog/src/ob_log_binlog_record.h new file mode 100644 index 0000000000000000000000000000000000000000..3b40d2836c16a5413cd9809bb4e9105cbae31aa9 --- /dev/null +++ b/src/liboblog/src/ob_log_binlog_record.h @@ -0,0 +1,211 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_BINLOG_RECORD_ +#define OCEANBASE_LIBOBLOG_BINLOG_RECORD_ + +#include // ILogRecord +#include // createLogRecord + +#include "share/ob_define.h" +#include "lib/oblog/ob_log_module.h" // OBLOG_LOG +#include "lib/string/ob_string.h" // ObString +#include "common/ob_range.h" // ObVersion + +#include "ob_log_resource_recycle_task.h" // ObLogResourceRecycleTask + +using namespace oceanbase::logmessage; + +namespace oceanbase +{ +namespace liboblog +{ + +class ObLogBR : public ObLogResourceRecycleTask +{ +public: + static const char *COLUMN_CHANGED_LABEL_PTR; + static const char *COLUMN_UNCHANGED_LABEL_PTR; + +public: + ObLogBR(); + virtual ~ObLogBR(); + +public: + static int put_old(ILogRecord *br, const bool is_changed); + +public: + void reset(); + + // init INSERT/UPDATE/DELETE Binlog Record + // set record_type/srcCategory/checkpoint/scrType/firstInLogevent + int init_dml_data_first(const RecordType type, + const uint64_t tenant_id); + + // read persist data, fill data after deserialize + // + // INSERT/UPDATE/DELETE + // threadId/timestamp(checkpoint)/filterRuleVal + int init_dml_data_second(const RecordType type, + const uint64_t cluster_id, + const int64_t tenant_id, + const common::ObString &trace_id, + const common::ObString &trace_info, + const common::ObString &unique_id, + const common::ObVersion &freeze_version, + const int64_t commit_version); + + // init Binlog Record of DDL/BEGIN/COMMIT + int init_data(const RecordType type, + const uint64_t cluster_id, + const uint64_t tenant_id, + const int64_t ddl_schema_version, + const common::ObString &trace_id, + const common::ObString &trace_info, + const common::ObString &unique_id, + const common::ObVersion &freeze_version, + const int64_t commit_version, + const int64_t part_trans_task_count = 0, + const common::ObString *major_version_str = NULL); + + ILogRecord *get_data() { return data_; } + int get_record_type(int &record_type); + + void set_next(ObLogBR *next) {next_ = next;}; + ObLogBR *get_next() {return next_;}; + + void set_is_valid(const bool is_valid) { valid_ = is_valid; } + bool is_valid() const { return valid_; } + + int set_table_meta(ITableMeta *table_meta); + int set_db_meta(IDBMeta *db_meta); + + inline void set_precise_timestamp(int64_t precise_timestamp) { precise_timestamp_ = precise_timestamp; } + inline int64_t get_precise_timestamp() const { return precise_timestamp_; } + + inline void *get_host() { return host_; } + void set_host(void *host) { host_ = host; } + + inline void *get_log_entry_task() { return log_entry_task_; } + void set_log_entry_task(void *log_entry_task) { log_entry_task_ = log_entry_task; } + + inline bool is_serilized() const { return is_serilized_; } + void set_serilized(const bool is_serilized) { is_serilized_ = is_serilized; } + + int32_t get_major_version() const { return freeze_version_.major_; } + uint64_t get_tenant_id() const { return tenant_id_; } + int64_t get_ddl_schema_version() const { return ddl_schema_version_; } + int64_t get_part_trans_task_count() const { return part_trans_task_count_; } + + // for put operation of HBASE: store data type as update, new value use full-column mode, old value is empty + // special treatment for liboblog: + // TODO:observer add new dml operation type to represend PUT operation + int setInsertRecordTypeForHBasePut(const RecordType type); + + public: + TO_STRING_KV("is_ser", is_serilized_, + K_(valid)); + +private: + int verify_part_trans_task_count_(const RecordType type, + const int64_t part_trans_task_count); + + // unique id of BinlogRecord: Pkey + LogId + row_index + // 1. DML statement + // set unique id by puFilterRuleVal + // 2. DDL statement + // set tenant id and schema version + int init_binlog_record_(const RecordType type, + const uint64_t cluster_id, + const common::ObString &trace_id, + const common::ObString &trace_info, + const common::ObString &unique_id, + const int64_t commit_version, + const common::ObString *major_version_str = NULL); + int init_binlog_record_first_(const RecordType type); + int init_binlog_record_second_(const RecordType type, + const uint64_t cluster_id, + const common::ObString &trace_id, + const common::ObString &trace_info, + const common::ObString &unique_id, + const int64_t commit_version, + const common::ObString *major_version_str = NULL); + +private: + ILogRecord *data_; ///< real BinlogRecord + // is binglog record serilized, reference construct_data_ function implementation + // 1. If binglog record has not been serilized, is_serilized_ = false + // 2. If binglog record parse from string, is_serilized_ = true + // is_serilized_ can only be modified by set_serilized function + bool is_serilized_; + void *host_; ///< record corresponsding RowIndex + void *log_entry_task_; + ObLogBR *next_; + bool valid_; ///< statement is valid or not + int64_t precise_timestamp_; ///< precise timestamp in micro seconds + + common::ObVersion freeze_version_; + uint64_t tenant_id_; + // corresponding schema version for DDL + // use schema version allocated by Sequencer for DML + // 0 for HEARTBEAT + int64_t ddl_schema_version_; + + // Number of tasks in the transaction partition, i.e. number of participants in the transaction + // 1. DDL + // 2. DML begin/commit binglog record will carry this info + int64_t part_trans_task_count_; + +protected: + /* + * LogMsgFactory + * static ILogRecord* createLogRecord( + * const std::string& type = DFT_BR, bool creating = true); + * + * @param creating is to differentiate two kinds of usage, if creating is + * true, it means the created binlog record has not been + * serilized, all in-memory functions can be called. Otherwise + * if creating is false, only after-serialized function could + * be called + */ + void construct_data_(const bool creating_binlog_record); + +private: + void destruct_data_(); +}; + +class ObLogUnserilizedBR : public ObLogBR +{ +public: + ObLogUnserilizedBR(); + virtual ~ObLogUnserilizedBR(); + +private: + // Build unserialized ILogRecord, in-memory operations + void construct_unserilized_data_(); +}; + +class ObLogSerilizedBR : public ObLogBR +{ +public: + ObLogSerilizedBR(); + virtual ~ObLogSerilizedBR(); + +private: + // Build serialized ILogRecord, parse based on persistent data + void construct_serilized_data_(); +}; + +} // end namespace liboblog +} // end namespace oceanbase + +#endif // end OCEANBASE_LIBOBLOG_BINLOG_RECORD_ diff --git a/src/liboblog/src/ob_log_binlog_record_pool.cpp b/src/liboblog/src/ob_log_binlog_record_pool.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4d1eb7cce62a74c505f42426b1a4c5dc926699b9 --- /dev/null +++ b/src/liboblog/src/ob_log_binlog_record_pool.cpp @@ -0,0 +1,144 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG + +#include "ob_log_binlog_record_pool.h" + +using namespace oceanbase::common; + +namespace oceanbase +{ +namespace liboblog +{ + +ObLogBRPool::ObLogBRPool() : inited_(false), unserilized_pool_(), serilized_pool_() +{ +} + +ObLogBRPool::~ObLogBRPool() +{ + destroy(); +} + +int ObLogBRPool::init(const int64_t fixed_br_count) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("BRPool has been initialized"); + ret = OB_INIT_TWICE; + } else if (OB_UNLIKELY(fixed_br_count <= 0)) { + LOG_ERROR("invalid argument", K(fixed_br_count)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(unserilized_pool_.init(fixed_br_count, ObModIds::OB_LOG_BINLOG_RECORD_POOL))) { + LOG_ERROR("initialize binlog record pool fail", KR(ret), K(fixed_br_count)); + } else if (OB_FAIL(serilized_pool_.init(fixed_br_count, ObModIds::OB_LOG_BINLOG_RECORD_POOL))) { + LOG_ERROR("initialize binlog record pool fail", KR(ret), K(fixed_br_count)); + } else { + inited_ = true; + } + return ret; +} + +void ObLogBRPool::destroy() +{ + inited_ = false; + unserilized_pool_.destroy(); + serilized_pool_.destroy(); +} + +int ObLogBRPool::alloc(const bool is_serilized, ObLogBR *&br, void *host/* = NULL */, void *log_entry_task/*=NULL*/) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("BRPool has not been initialized"); + ret = OB_NOT_INIT; + } else { + if (! is_serilized) { + ObLogUnserilizedBR *unserilized_br = NULL; + + if (OB_FAIL(unserilized_pool_.alloc(unserilized_br))) { + LOG_ERROR("alloc binlog record fail", KR(ret)); + } else { + br = unserilized_br; + } + } else { + ObLogSerilizedBR *serilized_br = NULL; + + if (OB_FAIL(serilized_pool_.alloc(serilized_br))) { + LOG_ERROR("alloc binlog record fail", KR(ret)); + } else { + br = serilized_br; + } + } + + if (OB_SUCC(ret)) { + if (OB_ISNULL(br)) { + LOG_ERROR("alloc binlog record fail", K(br)); + ret = OB_ERR_UNEXPECTED; + } else { + br->set_host(host); + br->set_log_entry_task(log_entry_task); + br->set_serilized(is_serilized); + } + } + } + + return ret; +} + +void ObLogBRPool::free(ObLogBR *br) +{ + int ret = OB_SUCCESS; + + if (OB_LIKELY(inited_) && OB_LIKELY(NULL != br)) { + const bool is_serilized = br->is_serilized(); + // recycle memory + br->reset(); + + if (! is_serilized) { + ObLogUnserilizedBR *unserilized_br = NULL; + + if (OB_ISNULL(unserilized_br = static_cast(br))) { + LOG_ERROR("unserilized_br is NULL"); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(unserilized_pool_.free(unserilized_br))) { + LOG_ERROR("free binlog record fail", KR(ret), K(br)); + } + } else { + ObLogSerilizedBR *serilized_br = NULL; + + if (OB_ISNULL(serilized_br = static_cast(br))) { + LOG_ERROR("serilized_br is NULL"); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(serilized_pool_.free(serilized_br))) { + LOG_ERROR("free binlog record fail", KR(ret), K(br)); + } + } + + if (OB_SUCC(ret)) { + br = NULL; + } + } +} + +void ObLogBRPool::print_stat_info() const +{ + _LOG_INFO("[STAT] [BR_POOL] [UNSER](TOTAL=%ld FREE=%ld FIXED=%ld) " + "[SER](TOTAL=%ld FREE=%ld FIXED=%ld)", + unserilized_pool_.get_alloc_count(), unserilized_pool_.get_free_count(), unserilized_pool_.get_fixed_count(), + serilized_pool_.get_alloc_count(), serilized_pool_.get_free_count(), serilized_pool_.get_fixed_count()); +} + +} // namespace liboblog +} // namespace oceanbase diff --git a/src/liboblog/src/ob_log_binlog_record_pool.h b/src/liboblog/src/ob_log_binlog_record_pool.h new file mode 100644 index 0000000000000000000000000000000000000000..541f726d23df18e3c39fa945f83061348405934f --- /dev/null +++ b/src/liboblog/src/ob_log_binlog_record_pool.h @@ -0,0 +1,68 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_SRC_LIBOBLOG_OB_LOG_BINLOG_RECORD_POOL_ +#define OCEANBASE_SRC_LIBOBLOG_OB_LOG_BINLOG_RECORD_POOL_ + +#include "ob_log_binlog_record.h" // ObLogBR +#include "lib/objectpool/ob_small_obj_pool.h" // ObSmallObjPool + +namespace oceanbase +{ +namespace liboblog +{ +class IObLogBRPool +{ +public: + virtual ~IObLogBRPool() {} + +public: + // If host is valid, then set host to binlog record: ObLogBR::set_host() + // is_serilized = false, to allocate in-memory ILogRecord, i.e. for serialization + // is_serilized = true, for allocating deserialized ILogRecord + virtual int alloc(const bool is_serilized, ObLogBR *&br, void *host = NULL, void *log_entry_task = NULL) = 0; + virtual void free(ObLogBR *br) = 0; + virtual void print_stat_info() const = 0; +}; + +////////////////////////////////////////////////////////////////////////////// + +class ObLogBRPool : public IObLogBRPool +{ + typedef common::ObSmallObjPool UnserilizedBRObjPool; + typedef common::ObSmallObjPool SerilizedBRObjPool; + +public: + ObLogBRPool(); + virtual ~ObLogBRPool(); + +public: + int alloc(const bool is_serilized, ObLogBR *&br, void *host = NULL, void *log_entry_task = NULL); + void free(ObLogBR *br); + void print_stat_info() const; + +public: + int init(const int64_t fixed_br_count); + void destroy(); + +private: + bool inited_; + UnserilizedBRObjPool unserilized_pool_; + SerilizedBRObjPool serilized_pool_; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogBRPool); +}; + +} // namespace liboblog +} // namespace oceanbase +#endif /* OCEANBASE_SRC_LIBOBLOG_OB_LOG_BINLOG_RECORD_POOL_ */ diff --git a/src/liboblog/src/ob_log_binlog_record_queue.cpp b/src/liboblog/src/ob_log_binlog_record_queue.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e403e84626bcf0827c04d8d194972921a8d81b71 --- /dev/null +++ b/src/liboblog/src/ob_log_binlog_record_queue.cpp @@ -0,0 +1,263 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG + +#include "share/ob_define.h" +#include "ob_log_binlog_record_queue.h" +#include "ob_log_utils.h" // get_timestamp + +using namespace oceanbase::common; + +namespace oceanbase +{ +namespace liboblog +{ + +int BRQueue::init(const int64_t queue_size) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("BRQueue has been initialized"); + ret = OB_INIT_TWICE; + } else if (0 >= queue_size) { + ret = OB_INVALID_ARGUMENT; + } else if (OB_SUCCESS != (ret = queue_.init(queue_size))) { + LOG_ERROR("init fixed queue fail", KR(ret), K(queue_size)); + } else { + dml_br_count_ = 0; + ddl_br_count_ = 0; + part_trans_task_count_ = 0; + inited_ = true; + } + + return ret; +} + +void BRQueue::destroy() +{ + inited_ = false; + dml_br_count_ = 0; + ddl_br_count_ = 0; + part_trans_task_count_ = 0; + queue_.destroy(); +} + +int BRQueue::push(ObLogBR *data, const int64_t timeout) +{ + int ret = OB_SUCCESS; + ILogRecord *br_data = NULL; + bool need_accumulate_stat = true; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("BRQueue has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(data)) { + LOG_ERROR("invalid argument", K(data)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(br_data = data->get_data())) { + LOG_ERROR("binlog record data is invalid", K(data)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(do_stat_for_part_trans_task_count_(*data, need_accumulate_stat))) { + LOG_ERROR("do_stat_for_part_trans_task_count_ fail", KR(ret), K(need_accumulate_stat)); + } else { + int64_t end_time = timeout + get_timestamp(); + int record_type = br_data->recordType(); + + if (EDDL == record_type) { + ATOMIC_INC(&ddl_br_count_); + } else if (HEARTBEAT != record_type && EBEGIN != record_type && ECOMMIT != record_type) { + ATOMIC_INC(&dml_br_count_); + } else { + // do nothing + } + + while (true) { + ret = queue_.push(data); + + if (OB_UNLIKELY(OB_SIZE_OVERFLOW != ret)) { + break; + } + + int64_t left_time = end_time - get_timestamp(); + + if (OB_UNLIKELY(left_time <= 0)) { + ret = OB_TIMEOUT; + break; + } + + cond_.timedwait(left_time); + } + + if (OB_FAIL(ret)) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("push data into fixed queue fail", KR(ret), K(data)); + } + } else { + cond_.signal(); + } + } + + return ret; +} + +int BRQueue::pop(ILogRecord *&record, const int64_t timeout) +{ + int ret = OB_SUCCESS; + int32_t major_version = 0; + uint64_t tenant_id = OB_INVALID_ID; + + if (OB_FAIL(pop(record, major_version, tenant_id, timeout))) { + LOG_ERROR("pop BinlogRecord faili", KR(ret), K(record)); + } + + return ret; +} + +int BRQueue::pop(ILogRecord *&record, + int32_t &major_version, + uint64_t &tenant_id, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + ObLogBR *next_br = NULL; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("BRQueue has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(pop_next_br_(next_br, timeout))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("pop binlog record from br_queue fail", KR(ret)); + } + } else if (OB_ISNULL(next_br)) { + LOG_ERROR("pop binlog record from br_queue fail", KR(ret), K(next_br)); + ret = OB_ERR_UNEXPECTED; + } else { + record = next_br->get_data(); + major_version = next_br->get_major_version(); + tenant_id = next_br->get_tenant_id(); + } + + if (OB_SUCC(ret)) { + if (OB_ISNULL(record)) { + LOG_ERROR("binlog record data is invalid", K(record), K(next_br)); + ret = OB_ERR_UNEXPECTED; + } else { + int record_type = record->recordType(); + + if (EDDL == record_type) { + ATOMIC_DEC(&ddl_br_count_); + } else if (HEARTBEAT != record_type && EBEGIN != record_type && ECOMMIT != record_type) { + ATOMIC_DEC(&dml_br_count_); + } else { + // do nothing + } + } + } + + return ret; +} + +int BRQueue::pop_next_br_(ObLogBR *&data, const int64_t timeout) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("BRQueue has not been initialized"); + ret = OB_NOT_INIT; + } else { + int64_t end_time = timeout + get_timestamp(); + + while (true) { + ret = queue_.pop(data); + + if (OB_UNLIKELY(OB_ENTRY_NOT_EXIST != ret)) { + break; + } + + int64_t left_time = end_time - get_timestamp(); + + if (OB_UNLIKELY(left_time <= 0)) { + ret = OB_TIMEOUT; + break; + } + + cond_.timedwait(left_time); + } + + if (OB_FAIL(ret)) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("pop data from fixed queue fail", KR(ret)); + } + } else { + bool need_accumulate_stat = false; + + if (OB_FAIL(do_stat_for_part_trans_task_count_(*data, need_accumulate_stat))) { + LOG_ERROR("do_stat_for_part_trans_task_count_ fail", KR(ret), K(need_accumulate_stat)); + } + cond_.signal(); + } + } + + return ret; +} + +int64_t BRQueue::get_dml_br_count() const +{ + return ATOMIC_LOAD(&dml_br_count_); +} + +int64_t BRQueue::get_ddl_br_count() const +{ + return ATOMIC_LOAD(&ddl_br_count_); +} + +int64_t BRQueue::get_part_trans_task_count() const +{ + return ATOMIC_LOAD(&part_trans_task_count_); +} + +int BRQueue::do_stat_for_part_trans_task_count_(ObLogBR &data, + bool need_accumulate_stat) +{ + int ret = OB_SUCCESS; + int record_type = 0; + int64_t part_trans_task_count = 0; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("BRQueue has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(data.get_record_type(record_type))) { + LOG_ERROR("data get_record_type fail", KR(ret), + "record_type", print_record_type(record_type)); + } else { + part_trans_task_count = data.get_part_trans_task_count(); + + if ((EDDL == record_type) || (EBEGIN == record_type)) { + if (need_accumulate_stat) { + // enter BRQueue + (void)ATOMIC_AAF(&part_trans_task_count_, part_trans_task_count); + } else { + // leave BRQueue + (void)ATOMIC_AAF(&part_trans_task_count_, -part_trans_task_count); + } + } else { + // do nothing + } + } + + return ret; +} + +} // namespace liboblog +} // namespace oceanbase diff --git a/src/liboblog/src/ob_log_binlog_record_queue.h b/src/liboblog/src/ob_log_binlog_record_queue.h new file mode 100644 index 0000000000000000000000000000000000000000..111b3a6af47e4496263f384933c7dd599adaaaab --- /dev/null +++ b/src/liboblog/src/ob_log_binlog_record_queue.h @@ -0,0 +1,71 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_BINLOG_RECORD_QUEUE_ +#define OCEANBASE_LIBOBLOG_OB_LOG_BINLOG_RECORD_QUEUE_ + +#include "lib/queue/ob_fixed_queue.h" // ObFixedQueue +#include "common/ob_queue_thread.h" // ObCond + +#include "ob_log_binlog_record.h" // ObLogBR + +namespace oceanbase +{ +namespace liboblog +{ +class BRQueue +{ +public: + BRQueue() : + inited_(false), + queue_(), + cond_(), + dml_br_count_(0), + ddl_br_count_(0), + part_trans_task_count_(0) + {} + + virtual ~BRQueue() { destroy(); } + +public: + int init(const int64_t queue_size); + void destroy(); + + // To support large transactions - implement a streaming commit model where each push and pop is a separate ObLogBR + int push(ObLogBR *data, const int64_t timeout); + int pop(ILogRecord *&record, const int64_t timeout); + int pop(ILogRecord *&record, int32_t &major_version, uint64_t &tenant_id, const int64_t timeout); + + int64_t get_dml_br_count() const; + int64_t get_ddl_br_count() const; + int64_t get_part_trans_task_count() const; + +private: + int pop_next_br_(ObLogBR *&data, const int64_t timeout); + int do_stat_for_part_trans_task_count_(ObLogBR &data, + bool need_accumulate_stat); + +private: + bool inited_; + common::ObFixedQueue queue_; + common::ObCond cond_; + + int64_t dml_br_count_ CACHE_ALIGNED; + int64_t ddl_br_count_ CACHE_ALIGNED; + + // Statistics on the number of partitioned transaction tasks + int64_t part_trans_task_count_ CACHE_ALIGNED; +}; +} // namespace liboblog +} // namespace oceanbase +#endif /* OCEANBASE_LIBOBLOG_OB_LOG_BINLOG_RECORD_QUEUE_ */ + diff --git a/src/liboblog/src/ob_log_cluster_id_filter.cpp b/src/liboblog/src/ob_log_cluster_id_filter.cpp new file mode 100644 index 0000000000000000000000000000000000000000..cee12dad58f90b25a5dd35f7c45b29a621cdae9e --- /dev/null +++ b/src/liboblog/src/ob_log_cluster_id_filter.cpp @@ -0,0 +1,161 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG_PARSER + +#include "ob_log_cluster_id_filter.h" // ObLogClusterIDFilter + +#include "lib/string/ob_string.h" // ObString + +#include "ob_log_utils.h" // get_timestamp, get_record_type, split_int64 + +using namespace oceanbase::common; +namespace oceanbase +{ +namespace liboblog +{ + +const char IObLogClusterIDFilter::DEFAULT_CLUSTER_ID_BLACK_LIST_DELIMITER = '|'; + +ObLogClusterIDFilter::ObLogClusterIDFilter() : + inited_(false), + cluster_id_ignored_part_trans_count_(0), + last_cluster_id_ignored_part_trans_count_(0), + last_stat_time_(0), + cluster_id_black_list_() +{} + +ObLogClusterIDFilter::~ObLogClusterIDFilter() +{ + destroy(); +} + + +int ObLogClusterIDFilter::init(const char *cluster_id_black_list, + const int64_t cluster_id_black_value_min, + const int64_t cluster_id_black_value_max) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("init twice", K(inited_)); + ret = OB_INIT_TWICE; + } else if (OB_UNLIKELY(OB_ISNULL(cluster_id_black_list) + || OB_UNLIKELY(cluster_id_black_value_min > cluster_id_black_value_max))) { + LOG_ERROR("invalid argument", K(cluster_id_black_list), K(cluster_id_black_value_min), + K(cluster_id_black_value_max)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(init_cluster_id_black_list_(cluster_id_black_list, + cluster_id_black_value_min, cluster_id_black_value_max))) { + LOG_ERROR("init cluster id black list fail", KR(ret), K(cluster_id_black_list), + K(cluster_id_black_value_min), K(cluster_id_black_value_max)); + } else { + inited_ = true; + } + + return ret; +} + +int ObLogClusterIDFilter::init_cluster_id_black_list_(const char *cluster_id_black_list, + const int64_t cluster_id_black_value_min, + const int64_t cluster_id_black_value_max) +{ + int ret = OB_SUCCESS; + ObString str(cluster_id_black_list); + const char delimiter = DEFAULT_CLUSTER_ID_BLACK_LIST_DELIMITER; + + if (OB_ISNULL(cluster_id_black_list) + || OB_UNLIKELY(cluster_id_black_value_min > cluster_id_black_value_max)) { + LOG_ERROR("invalid argument", K(cluster_id_black_list), K(cluster_id_black_value_min), + K(cluster_id_black_value_max)); + ret = OB_INVALID_ARGUMENT; + } + // split into int64 data + else if (OB_FAIL(split_int64(str, delimiter, cluster_id_black_list_))) { + LOG_ERROR("fail to parse cluster_id_black_list", + KR(ret), K(str), K(delimiter), K(cluster_id_black_list_)); + } else { + _LOG_INFO("[STAT] [CLUSTER_ID_BLACK_LIST] count=%ld, black_list='%s', min=%ld, max=%ld", + cluster_id_black_list_.count(), cluster_id_black_list, + cluster_id_black_value_min, cluster_id_black_value_max); + + // Check the validity of each element and whether it is within a reasonable range + for (int64_t idx = 0; OB_SUCCESS == ret && idx < cluster_id_black_list_.count(); idx++) { + int64_t cluster_id = cluster_id_black_list_.at(idx); + + _LOG_INFO("[STAT] [CLUSTER_ID_BLACK_LIST] idx=%ld, cluster_id=%ld", idx, cluster_id); + + if (OB_UNLIKELY(cluster_id < cluster_id_black_value_min) + || OB_UNLIKELY(cluster_id > cluster_id_black_value_max)) { + LOG_ERROR("invalid cluster id in black list, which is out of range", + K(cluster_id), K(cluster_id_black_value_min), + K(cluster_id_black_value_max), + K(cluster_id_black_list)); + ret = OB_INVALID_CONFIG; + } + } + } + return ret; +} + +void ObLogClusterIDFilter::destroy() +{ + inited_ = false; + cluster_id_ignored_part_trans_count_ = 0; + last_cluster_id_ignored_part_trans_count_ = 0; + last_stat_time_ = 0; + cluster_id_black_list_.destroy(); +} + +void ObLogClusterIDFilter::stat_ignored_tps() +{ + int64_t cur_time = get_timestamp(); + int64_t cur_count = ATOMIC_LOAD(&cluster_id_ignored_part_trans_count_); + int64_t last_count = ATOMIC_LOAD(&last_cluster_id_ignored_part_trans_count_); + int64_t delta_time = (cur_time - last_stat_time_) / 1000000; + + if (last_stat_time_ > 0 && delta_time > 0) { + double tps = static_cast(cur_count - last_count) / static_cast(delta_time); + + _LOG_INFO("[TPS_STAT] CLUSTER_ID_IGNORED_PART_TPS=%.3lf", tps); + } + + last_cluster_id_ignored_part_trans_count_ = cur_count; + last_stat_time_ = cur_time; +} + +int ObLogClusterIDFilter::check_is_served(const uint64_t cluster_id, bool &is_served, + const bool stat_tps) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not inited", K(inited_)); + ret = OB_NOT_INIT; + } else { + is_served = true; + for (int64_t idx = 0; OB_SUCCESS == ret && is_served && idx < cluster_id_black_list_.count(); idx++) { + // not serve if in blacklist + if (cluster_id == cluster_id_black_list_.at(idx)) { + is_served = false; + } + } + + if (! is_served && stat_tps) { + (void)ATOMIC_FAA(&cluster_id_ignored_part_trans_count_, 1); + } + } + + return ret; +} + +} +} diff --git a/src/liboblog/src/ob_log_cluster_id_filter.h b/src/liboblog/src/ob_log_cluster_id_filter.h new file mode 100644 index 0000000000000000000000000000000000000000..92df4b16aec26a4814d0c20968f0f97caf2cde90 --- /dev/null +++ b/src/liboblog/src/ob_log_cluster_id_filter.h @@ -0,0 +1,78 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_CLUSTER_ID_FILTER_H_ +#define OCEANBASE_LIBOBLOG_OB_LOG_CLUSTER_ID_FILTER_H_ + +#include "lib/utility/ob_macro_utils.h" // CACHE_ALIGNED +#include "lib/container/ob_se_array.h" // ObSEArray + +namespace oceanbase +{ +namespace liboblog +{ + +class IObLogClusterIDFilter +{ +public: + static const char DEFAULT_CLUSTER_ID_BLACK_LIST_DELIMITER; + +public: + virtual ~IObLogClusterIDFilter() {} + +public: + virtual int check_is_served(const uint64_t cluster_id, bool &is_served, + const bool stat_tps = true) = 0; + virtual void stat_ignored_tps() = 0; +}; + +class ObLogClusterIDFilter : public IObLogClusterIDFilter +{ + static const int64_t DEFAULT_CLUSTER_ID_BLACK_LIST_SIZE = 8; +public: + ObLogClusterIDFilter(); + virtual ~ObLogClusterIDFilter(); + +public: + virtual int check_is_served(const uint64_t cluster_id, bool &is_served, + const bool stat_tps = true); + virtual void stat_ignored_tps(); + +public: + int init(const char *cluster_id_black_list, + const int64_t cluster_id_black_value_min, + const int64_t cluster_id_black_value_max); + void destroy(); + +private: + int init_cluster_id_black_list_(const char *cluster_id_black_list, + const int64_t cluster_id_black_value_min, + const int64_t cluster_id_black_value_max); + +private: + bool inited_; + // TPS statistics based on cluster_id filtering + // The TPS statistics here refers to the number of partition transactions + int64_t cluster_id_ignored_part_trans_count_ CACHE_ALIGNED; + int64_t last_cluster_id_ignored_part_trans_count_ CACHE_ALIGNED; + int64_t last_stat_time_ CACHE_ALIGNED; + + // blacklist of cluster id + common::ObSEArray cluster_id_black_list_; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogClusterIDFilter); +}; + +} +} +#endif diff --git a/src/liboblog/src/ob_log_committer.cpp b/src/liboblog/src/ob_log_committer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9fbd9d3f9583a997500a75a42077af6bdf3b5c28 --- /dev/null +++ b/src/liboblog/src/ob_log_committer.cpp @@ -0,0 +1,1391 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG_COMMITTER + +#include "ob_log_committer.h" + +#include "lib/string/ob_string.h" // ObString +#include "common/ob_range.h" // ObVersion +#include "storage/transaction/ob_trans_define.h" // ObTransID + +#include "ob_log_binlog_record_queue.h" // BRQueue +#include "ob_log_instance.h" // IObLogErrHandler +#include "ob_log_binlog_record.h" // ObLogBR +#include "ob_log_part_mgr.h" // IObLogPartMgr +#include "ob_log_trans_ctx_mgr.h" // IObLogTransCtxMgr +#include "ob_log_trans_stat_mgr.h" // IObLogTransStatMgr +#include "ob_log_resource_collector.h" // IObLogResourceCollector +#include "ob_log_binlog_record_pool.h" // IObLogBRPool +#include "ob_log_config.h" // ObLogConfig +#include "ob_log_tenant_mgr.h" // IObLogTenantMgr +#include "ob_log_row_data_index.h" // ObLogRowDataIndex + +#define _STAT(level, fmt, args...) _OBLOG_COMMITTER_LOG(level, "[STAT] [COMMITTER] " fmt, ##args) +#define STAT(level, fmt, args...) OBLOG_COMMITTER_LOG(level, "[STAT] [COMMITTER] " fmt, ##args) +#define _ISTAT(fmt, args...) _STAT(INFO, fmt, ##args) +#define ISTAT(fmt, args...) STAT(INFO, fmt, ##args) +#define _DSTAT(fmt, args...) _STAT(DEBUG, fmt, ##args) +#define DSTAT(fmt, args...) STAT(DEBUG, fmt, ##args) + +using namespace oceanbase::common; +using namespace oceanbase::transaction; + +namespace oceanbase +{ +namespace liboblog +{ + +/////////////////////////////////////// ObLogCommitter::CheckpointTask /////////////////////////////////////// + +ObLogCommitter::CheckpointTask::CheckpointTask(PartTransTask &task) +{ + task_type_ = task.get_type(); + timestamp_ = task.get_timestamp(); + + if (task.is_offline_partition_task()) { + new (value_) ObPartitionKey(task.get_partition()); + } +} + +ObLogCommitter::CheckpointTask::~CheckpointTask() +{ + if (PartTransTask::TASK_TYPE_OFFLINE_PARTITION == task_type_) { + reinterpret_cast(value_)->~ObPartitionKey(); + } + task_type_ = PartTransTask::TASK_TYPE_UNKNOWN; + timestamp_ = 0; +} + +/////////////////////////////////////// ObLogCommitter /////////////////////////////////////// + +int64_t ObLogCommitter::g_output_heartbeat_interval = + ObLogConfig::default_output_heartbeat_interval_sec * _SEC_; + +ObLogCommitter::ObLogCommitter() : + inited_(false), + br_queue_(NULL), + tag_br_alloc_(NULL), + err_handler_(NULL), + trans_ctx_mgr_(NULL), + trans_stat_mgr_(NULL), + resource_collector_(NULL), + commit_pid_(0), + heartbeat_pid_(0), + stop_flag_(true), + trans_committer_queue_(), + trans_committer_queue_cond_(), + br_committer_queue_(), + checkpoint_queue_(), + checkpoint_queue_cond_(), + checkpoint_queue_allocator_(), + global_heartbeat_seq_(0), + global_heartbeat_info_queue_(), + dml_part_trans_task_count_(0), + ddl_part_trans_task_count_(0), + dml_trans_count_(0) +{ +} + +ObLogCommitter::~ObLogCommitter() +{ + destroy(); +} + +int ObLogCommitter::init(const int64_t start_seq, + BRQueue *br_queue, + IObLogResourceCollector *resource_collector, + IObLogBRPool *tag_br_alloc, + IObLogTransCtxMgr *trans_ctx_mgr, + IObLogTransStatMgr *trans_stat_mgr, + IObLogErrHandler *err_handler) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("committer has been initialized", K(inited_)); + ret = OB_INIT_TWICE; + } else if (OB_UNLIKELY(start_seq < 0) + || OB_ISNULL(br_queue_ = br_queue) + || OB_ISNULL(resource_collector_ = resource_collector) + || OB_ISNULL(tag_br_alloc_ = tag_br_alloc) + || OB_ISNULL(trans_ctx_mgr_ = trans_ctx_mgr) + || OB_ISNULL(trans_stat_mgr_ = trans_stat_mgr) + || OB_ISNULL(err_handler_ = err_handler)) { + LOG_ERROR("invalid arguments", K(start_seq), K(br_queue), + K(resource_collector), K(tag_br_alloc), K(trans_ctx_mgr), K(trans_stat_mgr), K(err_handler)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(trans_committer_queue_.init(start_seq, OB_MALLOC_MIDDLE_BLOCK_SIZE))) { + LOG_ERROR("init trans_committer_queue_ fail", KR(ret), K(start_seq)); + } else if (OB_FAIL(br_committer_queue_.init(start_seq, OB_MALLOC_BIG_BLOCK_SIZE))) { + LOG_ERROR("init br_committer_queue_ fail", KR(ret), K(start_seq)); + } else if (OB_FAIL(checkpoint_queue_.init(start_seq, OB_MALLOC_NORMAL_BLOCK_SIZE))) { + LOG_ERROR("init checkpoint_queue fail", KR(ret), K(start_seq)); + } else if (OB_FAIL(global_heartbeat_info_queue_.init(start_seq, OB_MALLOC_NORMAL_BLOCK_SIZE))) { + LOG_ERROR("init global_heartbeat_info_queue fail", KR(ret), K(start_seq)); + } else if (OB_FAIL(checkpoint_queue_allocator_.init(CHECKPOINT_QUEUE_ALLOCATOR_TOTAL_LIMIT, + CHECKPOINT_QUEUE_ALLOCATOR_HOLD_LIMIT, + CHECKPOINT_QUEUE_ALLOCATOR_PAGE_SIZE))) { + LOG_ERROR("init checkpoint_queue_allocator_ fail", KR(ret)); + } else { + checkpoint_queue_allocator_.set_label(ObModIds::OB_LOG_COMMITTER_CHECKPOINT_QUEUE); + global_heartbeat_seq_ = start_seq; + commit_pid_ = 0; + heartbeat_pid_ = 0; + dml_part_trans_task_count_ = 0; + ddl_part_trans_task_count_ = 0; + dml_trans_count_ = 0; + stop_flag_ = true; + inited_ = true; + + LOG_INFO("init committer succ", K(start_seq)); + } + + return ret; +} + +void ObLogCommitter::destroy() +{ + stop(); + + inited_ = false; + commit_pid_ = 0; + heartbeat_pid_ = 0; + stop_flag_ = true; + + br_queue_ = NULL; + tag_br_alloc_ = NULL; + err_handler_ = NULL; + trans_ctx_mgr_ = NULL; + trans_stat_mgr_ = NULL; + resource_collector_ = NULL; + + (void)trans_committer_queue_.destroy(); + (void)br_committer_queue_.destroy(); + (void)checkpoint_queue_.destroy(); + checkpoint_queue_allocator_.destroy(); + + global_heartbeat_seq_ = 0; + (void)global_heartbeat_info_queue_.destroy(); + + dml_part_trans_task_count_ = 0; + ddl_part_trans_task_count_ = 0; + dml_trans_count_ = 0; +} + +int ObLogCommitter::start() +{ + int ret = OB_SUCCESS; + int pthread_ret = 0; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("committer has not been initialized"); + ret = OB_NOT_INIT; + } else if (stop_flag_) { + stop_flag_ = false; + + + if (0 != (pthread_ret = pthread_create(&commit_pid_, NULL, + commit_thread_func_, this))){ + LOG_ERROR("create commit thread fail", K(pthread_ret), KERRNOMSG(pthread_ret)); + ret = OB_ERR_UNEXPECTED; + } else if (0 != (pthread_ret = pthread_create(&heartbeat_pid_, NULL, + heartbeat_thread_func_, this))){ + LOG_ERROR("create HEARTBEAT thread fail", K(pthread_ret), KERRNOMSG(pthread_ret)); + ret = OB_ERR_UNEXPECTED; + } else { + LOG_INFO("start Committer commit and HEARTBEAT thread succ"); + } + + if (OB_FAIL(ret)) { + stop_flag_ = true; + } + } + + return ret; +} + +void ObLogCommitter::stop() +{ + if (inited_) { + stop_flag_ = true; + + if (0 != commit_pid_) { + int pthread_ret = pthread_join(commit_pid_, NULL); + + if (0 != pthread_ret) { + LOG_ERROR("join Committer commit thread fail", K(commit_pid_), KERRNOMSG(pthread_ret)); + } else { + LOG_INFO("stop Committer commit thread succ"); + } + + commit_pid_ = 0; + } + + if (0 != heartbeat_pid_) { + int pthread_ret = pthread_join(heartbeat_pid_, NULL); + + if (0 != pthread_ret) { + LOG_ERROR("join Committer HEARTBEAT thread fail", K(heartbeat_pid_), KERRNOMSG(pthread_ret)); + } else { + LOG_INFO("stop Committer HEARTBEAT thread succ"); + } + + heartbeat_pid_ = 0; + } + } +} + +void ObLogCommitter::mark_stop_flag() +{ + stop_flag_ = true; +} + +int ObLogCommitter::push(PartTransTask *task, + const int64_t task_count, + const int64_t timeout, + ObLogTenant *tenant /* = NULL*/) +{ + UNUSED(timeout); + + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("committer has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(task) + || OB_UNLIKELY(! task->is_task_info_valid()) + || OB_UNLIKELY(task_count <= 0)) { + LOG_ERROR("invalid task", KPC(task), K(task_count)); + ret = OB_INVALID_ARGUMENT; + } + // DDL tasks + // Note: The is_ddl_offline_task() task is an offline task and is not specially handled here + else if (task->is_ddl_trans()) { + const int64_t seq = task->get_global_trans_seq(); + + if (OB_FAIL(trans_committer_queue_.set(seq, task))) { + LOG_ERROR("trans_committer_queue_ set fail", KR(ret), K(seq), KPC(task), + "begin_sn", trans_committer_queue_.begin_sn(), + "end_sn", trans_committer_queue_.end_sn(), + KPC(tenant)); + } else { + trans_committer_queue_cond_.signal(); + } + // Increase the number of DDL transactions + (void)ATOMIC_AAF(&ddl_part_trans_task_count_, 1); + } + // DML task + else if (task->is_dml_trans()) { + (void)ATOMIC_AAF(&dml_part_trans_task_count_, task_count); + (void)ATOMIC_AAF(&dml_trans_count_, 1); + // DML does not allow tenant to be invalid + const int64_t seq = task->get_global_trans_seq(); + + if (OB_FAIL(trans_committer_queue_.set(seq, task))) { + LOG_ERROR("trans_committer_queue_ set fail", KR(ret), K(seq), KPC(task), + "begin_sn", trans_committer_queue_.begin_sn(), + "end_sn", trans_committer_queue_.end_sn(), + KPC(tenant)); + } else { + trans_committer_queue_cond_.signal(); + } + } + // push heartbeat task + else if (task->is_global_heartbeat() || task->is_part_heartbeat()) { + if (OB_FAIL(push_heartbeat_(*task))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("push_heartbeat_ fail", KR(ret), K(*task)); + } + } else {} + } + // push partitin offline task + else if (task->is_offline_partition_task()) { + if (OB_FAIL(push_offline_partition_task_(*task))) { + LOG_ERROR("push_offline_partition_task_ fail", KR(ret), KPC(task)); + } + } + // Processing of unserviced service tasks + else if (task->is_not_served_trans()) { + if (OB_FAIL(handle_not_served_trans_(*task))) { + LOG_ERROR("handle_not_served_trans_ fail", KR(ret), KPC(task)); + } + } else { + LOG_ERROR("unknown part trans task", K(*task)); + ret = OB_NOT_SUPPORTED; + } + + return ret; +} + +int ObLogCommitter::push_br_task(ObLogBR &task) +{ + int ret = OB_SUCCESS; + ObLogRowDataIndex *row_data_index = static_cast(task.get_host()); + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("committer has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(row_data_index)) { + LOG_ERROR("row_data_index is NULL"); + ret = OB_ERR_UNEXPECTED; + } else { + const int64_t trans_seq = row_data_index->get_br_commit_seq(); + TransCtx *trans_ctx = static_cast(row_data_index->get_trans_ctx_host()); + + if (OB_ISNULL(trans_ctx)) { + LOG_ERROR("trans_ctx is NULL"); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(br_committer_queue_.set(trans_seq, &task))) { + LOG_ERROR("br_committer_queue_ set fail", KR(ret), K(trans_seq), K(task), + "begin_sn", br_committer_queue_.begin_sn(), + "end_sn", br_committer_queue_.end_sn()); + } else { + LOG_DEBUG("br_committer_queue_ set succ", K(trans_seq), K(task), + "begin_sn", br_committer_queue_.begin_sn(), + "end_sn", br_committer_queue_.end_sn()); + + trans_ctx->br_committer_queue_signal(); + } + } + + return ret; +} + +int ObLogCommitter::alloc_checkpoint_task_(PartTransTask &task, CheckpointTask *&checkpoint_task) +{ + int ret = OB_SUCCESS; + void *ptr = NULL; + int64_t size = 0; + checkpoint_task = NULL; + + // Additional PKey information to be added for offline partitioning tasks + if (task.is_offline_partition_task()) { + size = sizeof(CheckpointTask) + sizeof(ObPartitionKey); + } else { + size = sizeof(CheckpointTask); + } + + if (OB_ISNULL(ptr = checkpoint_queue_allocator_.alloc(size))) { + LOG_ERROR("alloc memory for CheckpointTask fail", K(size)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + checkpoint_task = new (ptr) CheckpointTask(task); + } + return ret; +} + +void ObLogCommitter::free_checkpoint_task_(CheckpointTask *checkpoint_task) +{ + if (NULL != checkpoint_task) { + checkpoint_task->~CheckpointTask(); + checkpoint_queue_allocator_.free(checkpoint_task); + checkpoint_task = NULL; + } +} + +int ObLogCommitter::update_checkpoint_info_(PartTransTask &task) +{ + int ret = OB_SUCCESS; + CheckpointTask *checkpoint_task = NULL; + int64_t checkpoint_seq = task.get_checkpoint_seq(); + + if (OB_UNLIKELY(checkpoint_seq < 0)) { + LOG_ERROR("task checkpoint sequence is invalid", K(checkpoint_seq), K(task)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(alloc_checkpoint_task_(task, checkpoint_task))) { + LOG_ERROR("alloc_checkpoint_task_ fail", KR(ret), K(task)); + } else if (OB_ISNULL(checkpoint_task)) { + LOG_ERROR("invalid checkpoint_task", K(checkpoint_task)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else if (OB_FAIL(checkpoint_queue_.set(checkpoint_seq, checkpoint_task))) { + LOG_ERROR("set checkpoint_queue_ fail", KR(ret), K(checkpoint_seq), K(checkpoint_task)); + // 释放内存 + free_checkpoint_task_(checkpoint_task); + checkpoint_task = NULL; + } else { + checkpoint_queue_cond_.signal(); + } + + return ret; +} + +// Handles both GLOBAL and PART types of heartbeats +int ObLogCommitter::push_heartbeat_(PartTransTask &task) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(! task.is_task_info_valid())) { + LOG_ERROR("invalid task", K(task)); + ret = OB_INVALID_DATA; + } else { + ret = recycle_task_directly_(task); + } + return ret; +} + +int ObLogCommitter::handle_not_served_trans_(PartTransTask &task) +{ + const bool can_async_recycle = task.is_contain_empty_redo_log(); + + return recycle_task_directly_(task, can_async_recycle); +} + +// recycle task directly +int ObLogCommitter::recycle_task_directly_(PartTransTask &task, const bool can_async_recycle) +{ + int ret = OB_SUCCESS; + int revert_ret = OB_SUCCESS; + + // Only single-threaded calls can be made here, and the GLOBAL HEARTBEAT distribution must be single-threaded + if (OB_FAIL(record_global_heartbeat_info_(task))) { + LOG_ERROR("record_global_heartbeat_info_ fail", KR(ret), K(task)); + } + // upadte checkpoint info + else if (OB_FAIL(update_checkpoint_info_(task))) { + LOG_ERROR("update_checkpoint_info_ fail", KR(ret), K(task)); + } + + if (can_async_recycle) { + if (OB_NOT_NULL(resource_collector_) + && OB_SUCCESS != (revert_ret = resource_collector_->revert(&task))) { + if (OB_IN_STOP_STATE != revert_ret) { + LOG_ERROR("revert HEARTBEAT task fail", K(revert_ret), K(task)); + } + ret = OB_SUCCESS == ret ? revert_ret : ret; + } + } + + return ret; +} + +int ObLogCommitter::record_global_heartbeat_info_(PartTransTask &task) +{ + int ret = OB_SUCCESS; + + // 1. checkpoint_seq of the global heartbeat logging task + // 2. checkpoint_seq is uniformly +1, then shifted 1 bit left + // (1) +1: to avoid the global heartbeat sequence number being exactly 0, which makes it impossible to pop + // (2) Shift one bit left: avoid checkpoint_seq is odd, set successfully aligned with default address, get will be minus 1 + if (task.is_global_heartbeat()) { + int64_t checkpoint_seq = (task.get_checkpoint_seq() + 1) << 1; + + if (OB_FAIL(global_heartbeat_info_queue_.set(global_heartbeat_seq_, reinterpret_cast(checkpoint_seq)))) { + LOG_ERROR("set global_heartbeat_info_queue_ fail", KR(ret), K(global_heartbeat_seq_), K(checkpoint_seq)); + } else { + ++global_heartbeat_seq_; + } + } + + return ret; +} + +int ObLogCommitter::push_offline_partition_task_(PartTransTask &task) +{ + int ret = OB_SUCCESS; + // partition should be valid + if (OB_UNLIKELY(! task.get_partition().is_valid())) { + LOG_ERROR("invalid offline partition task", K(task)); + ret = OB_INVALID_ERROR; + } else { + ret = recycle_task_directly_(task); + } + return ret; +} + + +void *ObLogCommitter::commit_thread_func_(void *arg) +{ + if (NULL != arg) { + ObLogCommitter *committer = static_cast(arg); + committer->commit_routine(); + } + + return NULL; +} + +void *ObLogCommitter::heartbeat_thread_func_(void *arg) +{ + if (NULL != arg) { + ObLogCommitter *committer = static_cast(arg); + committer->heartbeat_routine(); + } + + return NULL; +} + +int ObLogCommitter::next_checkpoint_task_(CheckpointTask *&task) +{ + int ret = OB_SUCCESS; + bool popped = false; + CheckpointQueuePopFunc pop_func; + + task = NULL; + if (OB_FAIL(checkpoint_queue_.pop(pop_func, task, popped))) { + if (OB_ENTRY_NOT_EXIST != ret) { + LOG_ERROR("pop from CheckpointQueue fail", KR(ret), K(popped)); + } else { + // not element, normal + ret = OB_SUCCESS; + task = NULL; + } + } else if (! popped) { + // No pop out element + task = NULL; + } else if (OB_ISNULL(task)) { + LOG_ERROR("invalid task", K(task)); + ret = OB_ERR_UNEXPECTED; + } else { + // success + } + + return ret; +} + +int ObLogCommitter::handle_checkpoint_task_(CheckpointTask &task, int64_t &checkpoint_timestamp) +{ + int ret = OB_SUCCESS; + int64_t cur_checkpoint_seq = checkpoint_queue_.begin_sn() - 1; + + DSTAT("[HEARTBEAT] [POP_TASK]", K(task), "seq", cur_checkpoint_seq); + + // If it is a heartbeat task, update the checkpoint timestamp, and pop the corresponding checkpoint information + if (task.is_global_heartbeat()) { + if (OB_INVALID_TIMESTAMP == checkpoint_timestamp) { + checkpoint_timestamp = task.timestamp_; + } + // Checks if the heartbeat checkpoint timestamp will fall back + else if (OB_UNLIKELY(checkpoint_timestamp > task.timestamp_)) { + LOG_ERROR("heartbeat timestamp is rollback", K(checkpoint_timestamp), K(task), + K(cur_checkpoint_seq)); + ret = OB_ERR_UNEXPECTED; + } else { + checkpoint_timestamp = task.timestamp_; + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(next_global_heartbeat_info_(cur_checkpoint_seq))) { + LOG_ERROR("next_global_heartbeat_info_ fail", KR(ret), K(cur_checkpoint_seq), K(task)); + } + } + } + // If it is a delete partition task, then notify PartMgr to reclaim the partition + else if (task.is_offline_partition_task()) { + if (OB_FAIL(handle_offline_checkpoint_task_(task))) { + LOG_ERROR("handle_offline_checkpoint_task_ fail", KR(ret), K(task)); + } + } else { + // Other tasks are not processed + } + + return ret; +} + +int ObLogCommitter::next_global_heartbeat_info_(const int64_t cur_checkpoint_seq) +{ + int ret = OB_SUCCESS; + bool popped = false; + GHeartbeatInfoQueuePopFunc pop_func; + const int64_t *checkpoint_seq = NULL; + + // For the global heartbeat, the checkpoint_seq is first put into the GlobalHeartbeatInfoQueue + // So the global heartbeat that is processed to the CheckpointQueue, must exist here + if (OB_FAIL(global_heartbeat_info_queue_.pop(pop_func, checkpoint_seq, popped))) { + LOG_ERROR("pop from GlobalHeartbeatInfoQueue fail", KR(ret), K(popped)); + } else if (! popped) { + // No pop out element + LOG_ERROR("pop from GlobalHeartbeatInfoQueue fail", KR(ret), K(popped)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_ISNULL(checkpoint_seq)) { + LOG_ERROR("checkpoint_seq is NULL", K(checkpoint_seq)); + ret = OB_ERR_UNEXPECTED; + } else { + // succ + const int64_t next_checkpoint_seq = (reinterpret_cast(checkpoint_seq)) / 2 - 1; + + if (OB_UNLIKELY(cur_checkpoint_seq != next_checkpoint_seq)) { + LOG_ERROR("global heartbeat cur_checkpoint_seq is not equal to next_checkpoint_seq", K(cur_checkpoint_seq), + K(next_checkpoint_seq), "hb_begin_sn", global_heartbeat_info_queue_.begin_sn(), + "hb_end_sn", global_heartbeat_info_queue_.end_sn()); + ret = OB_ERR_UNEXPECTED; + } + } + + return ret; +} + +void ObLogCommitter::print_global_heartbeat_info_() +{ + int ret = OB_SUCCESS; + const int64_t *checkpoint_seq = NULL; + int64_t next_checkpoint_seq = 0; + int64_t next_seq = global_heartbeat_info_queue_.begin_sn(); + int64_t end_seq = global_heartbeat_info_queue_.end_sn(); + int64_t checkpoint_queue_begin_sn = checkpoint_queue_.begin_sn(); + int64_t checkpoint_queue_end_sn = checkpoint_queue_.end_sn(); + + ret = global_heartbeat_info_queue_.get(next_seq, checkpoint_seq); + + // The next one is not ready, invalid value + if (OB_ERR_OUT_OF_UPPER_BOUND == ret || (OB_SUCC(ret) && NULL == checkpoint_seq)) { + next_checkpoint_seq = -1; + } else { + // Refer to generation_rules of record_global_heartbeat_info_ + next_checkpoint_seq = (reinterpret_cast(checkpoint_seq)) / 2 - 1; + } + int64_t delta = -1; + if (-1 == next_checkpoint_seq) { + delta = -1; + } else { + delta = next_checkpoint_seq - checkpoint_queue_begin_sn; + } + + _ISTAT("[CHECKPOINT_QUEUE] NEXT_SEQ=%ld NEXT_HEARTBEAT=%ld DELAT=%ld " + "QUEUE(HB=%ld,TOTAL=%ld)", + checkpoint_queue_begin_sn, next_checkpoint_seq, delta, + end_seq - next_seq, + checkpoint_queue_end_sn - checkpoint_queue_begin_sn); +} + +int ObLogCommitter::handle_offline_checkpoint_task_(CheckpointTask &task) +{ + int ret = OB_SUCCESS; + IObLogTenantMgr *tenant_mgr = TCTX.tenant_mgr_; + if (OB_UNLIKELY(! task.is_offline_partition_task())) { + LOG_ERROR("invalid argument which is not offline partition task", K(task)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(tenant_mgr)) { + LOG_ERROR("tenant mgr is NULL", K(tenant_mgr)); + ret = OB_ERR_UNEXPECTED; + } else { + // See alloc_checkpoint_task_allocate_memory for details + ObPartitionKey &pkey = *(reinterpret_cast(task.value_)); + uint64_t tenant_id = pkey.get_tenant_id(); + + // Go offline and reclaim the partition resources, requiring a successful recovery + // Since the previous data has been exported, there are no transaction dependencies, so the recovery must be successful + if (OB_FAIL(tenant_mgr->recycle_partition(pkey))) { + LOG_ERROR("recycle_partition fail", KR(ret), K(pkey), K(task)); + } else { + // success + } + + LOG_INFO("handle partition offline task", KR(ret), K(tenant_id), K(task)); + } + + return ret; +} + +int ObLogCommitter::dispatch_heartbeat_binlog_record_(const int64_t heartbeat_timestamp) +{ + int ret = OB_SUCCESS; + ObLogBR *br = NULL; + // heartbeat ObLogBR does not require cluster_id, freeze_version, tenant_id + const uint64_t cluster_id = 1; + const ObVersion freeze_version = ObVersion(1); + const uint64_t tenant_id = 1; + const int64_t ddl_schema_version = 0; + ObString trace_id; + ObString trace_info; + ObString unique_id; + const uint64_t row_index = 0; + const bool is_serilized = false; + + ISTAT("[HEARTBEAT]", "DELAY", TS_TO_DELAY(heartbeat_timestamp), "heartbeat", TS_TO_STR(heartbeat_timestamp)); + + if (OB_ISNULL(tag_br_alloc_)) { + LOG_ERROR("invalid tag_br_alloc_ fail", KR(ret), K(tag_br_alloc_)); + ret = OB_INVALID_ERROR; + } else if (OB_FAIL(tag_br_alloc_->alloc(is_serilized, br, NULL))) { + LOG_ERROR("alloc binlog record for HEARTBEAT fail", KR(ret)); + } else if (OB_ISNULL(br)) { + LOG_ERROR("alloc binlog record for HEARTBEAT fail", KR(ret), K(br)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(br->init_data(HEARTBEAT, cluster_id, tenant_id, ddl_schema_version, trace_id, trace_info, unique_id, + freeze_version, heartbeat_timestamp))) { + LOG_ERROR("init HEARTBEAT binlog record fail", KR(ret), K(heartbeat_timestamp), + K(cluster_id), K(freeze_version), K(tenant_id), K(ddl_schema_version), K(trace_id), K(trace_info), + K(unique_id), K(row_index)); + } else if (OB_FAIL(push_br_queue_(br))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("push_br_queue_ fail", KR(ret)); + } + } else { + br = NULL; + } + + if (OB_FAIL(ret)) { + if (NULL != br) { + tag_br_alloc_->free(br); + br = NULL; + } + } + return ret; +} + +void ObLogCommitter::heartbeat_routine() +{ + int ret = OB_SUCCESS; + int64_t checkpoint_tstamp = OB_INVALID_TIMESTAMP; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("committer has not been initialized"); + ret = OB_NOT_INIT; + } else { + // Heartbeat thread that periodically generates heartbeat messages + while (! stop_flag_ && OB_SUCCESS == ret) { + CheckpointTask *task = NULL; + bool need_continue = false; + + // fetch next task + if (OB_FAIL(next_checkpoint_task_(task))) { + LOG_ERROR("next_checkpoint_task_ fail", KR(ret)); + } else if (NULL == task) { + // next task is not ready + need_continue = false; + } else { + need_continue = true; + + // Process checkpoint tasks and update checkpoint timestamps + if (OB_FAIL(handle_checkpoint_task_(*task, checkpoint_tstamp))) { + LOG_ERROR("handle_checkpoint_task_ fail", KR(ret), KPC(task), K(checkpoint_tstamp)); + } else { + // Free task memory when processing task is complete + free_checkpoint_task_(task); + task = NULL; + } + } + + // periodically send a heartbeat binlog record + // checkpoint timestamp is invalid for the first time, here ensure that the heartbeat is sent as soon as the checkpoint timestamp is valid + if (OB_SUCCESS == ret && OB_INVALID_TIMESTAMP != checkpoint_tstamp) { + if (REACH_TIME_INTERVAL(g_output_heartbeat_interval)) { + if (OB_FAIL(dispatch_heartbeat_binlog_record_(checkpoint_tstamp))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("dispatch_heartbeat_binlog_record_ fail", KR(ret), K(checkpoint_tstamp)); + } + } + } + } + + if (REACH_TIME_INTERVAL(PRINT_GLOBAL_HEARTBEAT_CHECKPOINT_INTERVAL)) { + print_global_heartbeat_info_(); + } + + // If there is no need to continue processing the task, wait for a while + if (OB_SUCCESS == ret && ! need_continue) { + checkpoint_queue_cond_.timedwait(g_output_heartbeat_interval); + } + } // while + + if (stop_flag_) { + ret = OB_IN_STOP_STATE; + } + + if (OB_SUCCESS != ret && OB_IN_STOP_STATE != ret && NULL != err_handler_) { + err_handler_->handle_error(ret, "committer HEARTBEAT thread exits, err=%d", ret); + stop_flag_ = true; + } + } + + LOG_INFO("committer HEARTBEAT thread exits", KR(ret), K_(stop_flag), K(checkpoint_tstamp)); +} + +void ObLogCommitter::commit_routine() +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("committer has not been initialized"); + ret = OB_NOT_INIT; + } else { + int64_t commit_trans_count = 0; + + while (OB_SUCC(ret) && ! stop_flag_) { + PartTransTask *part_trans_task = NULL; + int64_t next_seq = trans_committer_queue_.begin_sn(); + ret = trans_committer_queue_.get(next_seq, part_trans_task); + + if (OB_ERR_OUT_OF_UPPER_BOUND == ret || (OB_SUCCESS == ret && NULL == part_trans_task)) { + // data not ready + ret = OB_SUCCESS; + trans_committer_queue_cond_.timedwait(DATA_OP_TIMEOUT); + } else if (OB_FAIL(ret)) { + LOG_ERROR("get task from commit queue fail", KR(ret), KPC(part_trans_task), + "begin_sn", trans_committer_queue_.begin_sn(), "end_sn", trans_committer_queue_.end_sn()); + } else { + // get a valid & ready trans + if (OB_FAIL(handle_when_trans_ready_(part_trans_task, commit_trans_count))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("handle_when_trans_ready_ fail", KR(ret), KPC(part_trans_task), + K(commit_trans_count)); + } + } else { + bool popped = false; + bool use_lock = true; + PartTransTask *pop_task = NULL; + CommitQueuePopFunc pop_func; + + // trans can definitely pop out + if (OB_FAIL(trans_committer_queue_.pop(pop_func, pop_task, popped, use_lock))) { + LOG_ERROR("pop task from commit queue fail", KR(ret), KPC(pop_task), K(popped), K(use_lock), + "begin_sn", trans_committer_queue_.begin_sn(), "end_sn", trans_committer_queue_.end_sn()); + } else if (OB_UNLIKELY(! popped)) { + LOG_ERROR("pop task from commit queue fail", "tenant_id", part_trans_task->get_tenant_id(), + "begin_sn", trans_committer_queue_.begin_sn(), "end_sn", trans_committer_queue_.end_sn()); + ret = OB_ERR_UNEXPECTED; + } else { + // succ + } + } + } + } // while + } + + if (stop_flag_) { + ret = OB_IN_STOP_STATE; + } + + if (OB_SUCCESS != ret && OB_IN_STOP_STATE != ret && NULL != err_handler_) { + err_handler_->handle_error(ret, "Committer commit thread exits, err=%d", ret); + stop_flag_ = true; + } + + if (OB_FAIL(ret)) { + LOG_INFO("Committer commit thread exits", KR(ret), K_(stop_flag)); + } +} + +int ObLogCommitter::handle_when_trans_ready_(PartTransTask *task, + int64_t &commit_trans_count) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("committer has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(task)) { + LOG_ERROR("task is null", K(task)); + ret = OB_INVALID_ARGUMENT; + } else { + share::ObWorker::CompatMode compat_mode = share::ObWorker::CompatMode::INVALID; + const uint64_t tenant_id = task->get_tenant_id(); + + if (OB_FAIL(get_tenant_compat_mode(tenant_id, compat_mode, stop_flag_))) { + LOG_ERROR("get_tenant_compat_mode fail", KR(ret), "tenant_id", tenant_id, + "compat_mode", print_compat_mode(compat_mode), KPC(task)); + } else { + share::CompatModeGuard g(compat_mode); + + // handle ready task + if (OB_FAIL(handle_task_(task))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("handle_task_ fail", KR(ret), "compat_mode", print_compat_mode(compat_mode)); + } + } else { + ++commit_trans_count; + } + } + } + + return ret; +} + +int ObLogCommitter::handle_ddl_task_(PartTransTask *ddl_task) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + ret = OB_NOT_INIT; + } else if (OB_ISNULL(ddl_task) + || (OB_UNLIKELY(! ddl_task->is_ddl_trans()))) { + LOG_ERROR("invalid ddl task", KPC(ddl_task)); + ret = OB_INVALID_ARGUMENT; + } else { + // Subtract the number of DDL transactions + ATOMIC_DEC(&ddl_part_trans_task_count_); + TransCtx *trans_ctx = NULL; + const ObTransID &trans_id = ddl_task->get_trans_id(); + int64_t local_schema_version = OB_INVALID_TIMESTAMP; + + // Advance the transaction context state to COMMITTED + if (OB_FAIL(trans_ctx_mgr_->get_trans_ctx(trans_id, trans_ctx, false))) { + LOG_ERROR("get_trans_ctx fail", KR(ret), K(trans_id), KPC(trans_ctx), KPC(ddl_task)); + } else if (OB_FAIL(trans_ctx->commit())) { + LOG_ERROR("TransCtx::commit fail", KR(ret), K(trans_id), KPC(trans_ctx), KPC(ddl_task)); + } else {} + + if (OB_SUCC(ret) && ddl_task->is_ddl_trans()) { + // Set the reference count to: number of statements + 1 + ddl_task->set_ref_cnt(ddl_task->get_stmt_num() + 1); + local_schema_version = ddl_task->get_local_schema_version(); + + // Iterate through each statement of the DDL + DdlStmtTask *stmt_task = static_cast(ddl_task->get_stmt_list().head_); + while (NULL != stmt_task && OB_SUCCESS == ret) { + if (OB_FAIL(handle_ddl_stmt_(*stmt_task))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("handle_ddl_stmt_ fail", KR(ret), KPC(stmt_task)); + } + } else { + stmt_task = static_cast(stmt_task->get_next()); + } + } + + if (OB_SUCCESS == ret) { + // update checkpoint info + if (OB_FAIL(update_checkpoint_info_(*ddl_task))) { + LOG_ERROR("update_checkpoint_info_ fail", KR(ret), K(ddl_task)); + } + } + } // is_ddl_trans + + if (OB_SUCCESS == ret) { + // update local cur_schema_version + // host.update_committer_cur_schema_version(local_schema_version); + LOG_DEBUG("update_committer_cur_schema_version", K(local_schema_version), KPC(ddl_task)); + + // Decrement the reference count + // If the reference count is 0, the DDL task needs to be recycled + if (0 == ddl_task->dec_ref_cnt()) { + if (OB_FAIL(resource_collector_->revert(ddl_task))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("revert DDl PartTransTask fail", KR(ret), K(ddl_task)); + } + } else { + ddl_task = NULL; + } + } + } + + // revert TransCtx + if (NULL != trans_ctx) { + int revert_ret = OB_SUCCESS; + if (OB_SUCCESS != (revert_ret = trans_ctx_mgr_->revert_trans_ctx(trans_ctx))) { + LOG_ERROR("revert_trans_ctx fail", K(revert_ret), K(trans_ctx)); + ret = OB_SUCCESS == ret ? revert_ret : ret; + } else { + trans_ctx = NULL; + } + } + } + + return ret; +} + +int ObLogCommitter::handle_ddl_stmt_(DdlStmtTask &stmt_task) +{ + int ret = OB_SUCCESS; + ObLogBR *br = stmt_task.get_binlog_record(); + + if (OB_ISNULL(br)) { + LOG_ERROR("invalid DDL binlog record", K(stmt_task)); + ret = OB_ERR_UNEXPECTED; + } + // If the binlog record is invalid, the binlog record resource is recycled + else if (! br->is_valid()) { + if (OB_FAIL(revert_binlog_record_(br))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("revert_binlog_record_ fail", KR(ret), K(br), K(stmt_task)); + } + } else { + br = NULL; + } + } else { + // If the binlog record is valid, output + // DDL push to the next element in the BRQueue, the next element in the chain is empty + br->set_next(NULL); + + if (OB_FAIL(push_br_queue_(br))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("push_br_queue_ fail", KR(ret), K(br)); + } + } else { + br = NULL; + } + } + + return ret; +} + +int ObLogCommitter::revert_binlog_record_(ObLogBR *br) +{ + int ret = OB_SUCCESS; + ILogRecord *br_data = NULL; + + if (OB_ISNULL(resource_collector_)) { + LOG_ERROR("invalid resource collector", K(resource_collector_)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(br)) { + LOG_ERROR("binlog record is invalid", K(br)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(br_data = br->get_data())) { + LOG_ERROR("binlog record data is invalid", K(br)); + ret = OB_INVALID_ARGUMENT; + } else { + int record_type = br_data->recordType(); + + if (OB_FAIL(resource_collector_->revert(record_type, br))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("revert binlog record fail", KR(ret), K(br), + "record_type", print_record_type(record_type)); + } + } else { + br = NULL; + } + } + + return ret; +} + +int ObLogCommitter::handle_task_(PartTransTask *participants) +{ + int ret = OB_SUCCESS; + LOG_DEBUG("ObLogCommitter handle_task", KPC(participants)); + + if (OB_UNLIKELY(! inited_)) { + ret = OB_NOT_INIT; + } else if (OB_ISNULL(participants)) { + ret = OB_INVALID_ARGUMENT; + } else if (participants->is_ddl_trans()) { + if (OB_FAIL(handle_ddl_task_(participants))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("handle_ddl_task_ fail", KR(ret), KPC(participants)); + } + } + } else if (participants->is_dml_trans()) { + if (OB_FAIL(handle_dml_task_(participants))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("handle_dml_task_ fail", KR(ret), KPC(participants)); + } + } + } else { + LOG_ERROR("not supported task", KPC(participants)); + ret = OB_NOT_SUPPORTED; + } + + return ret; +} + +int ObLogCommitter::handle_dml_task_(PartTransTask *participants) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + ret = OB_NOT_INIT; + } else if (OB_ISNULL(participants)) { + ret = OB_INVALID_ARGUMENT; + } else { + const uint64_t cluster_id = participants->get_cluster_id(); + int64_t global_trans_version = participants->get_global_trans_version(); + const common::ObVersion &freeze_version = participants->get_freeze_version(); + const uint64_t tenant_id = extract_tenant_id(participants->get_partition().get_table_id()); + TransCtx *trans_ctx = NULL; + const ObTransID &trans_id = participants->get_trans_id(); + int64_t valid_br_num = 0; + PartTransTask *part = participants; + int64_t part_trans_task_count = 0; + int64_t valid_part_trans_task_count = 0; + + if (stop_flag_) { + ret = OB_IN_STOP_STATE; + } + + // After processing all participants, update the transaction context information before pushing to the user queue + if (OB_SUCC(ret)) { + // Advance the transaction context state to COMMITTED + if (OB_FAIL(trans_ctx_mgr_->get_trans_ctx(trans_id, trans_ctx, false))) { + LOG_ERROR("get_trans_ctx fail", K(ret), K(trans_id), KPC(trans_ctx), KPC(participants)); + } else if (OB_FAIL(trans_ctx->commit())) { + LOG_ERROR("TransCtx::commit fail", K(ret), K(trans_id), KPC(trans_ctx), KPC(participants)); + } else {} + } + + if (OB_SUCC(ret)) { + valid_br_num = trans_ctx->get_total_br_count(); + part_trans_task_count = trans_ctx->get_ready_participant_count(); + valid_part_trans_task_count = trans_ctx->get_valid_part_trans_task_count(); + } + + // Statistical Information + if (OB_SUCC(ret)) { + if (OB_FAIL(do_trans_stat_(participants->get_partition(), valid_br_num))) { + LOG_ERROR("do trans stat fail", KR(ret), K(valid_br_num)); + } + } + + // Place the Binlog Record chain in the user queue + // Binlog Record may be recycled at any time + if (OB_SUCCESS == ret && valid_br_num > 0) { + if (OB_FAIL(commit_binlog_record_list_(*trans_ctx, cluster_id, valid_part_trans_task_count, + freeze_version, tenant_id, global_trans_version))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("commit_binlog_record_list_ fail", KR(ret), KPC(trans_ctx), + K(valid_br_num), K(valid_part_trans_task_count), + K(freeze_version), K(tenant_id), K(global_trans_version)); + } + } else { + // succ + } + } + + // Update Commit information + // NOTE: Since the above guarantees that the reference count is greater than the number of Binlog Records, the list of participants here must be valid + part = participants; + if (OB_SUCC(ret)) { + while (! stop_flag_ && OB_SUCCESS == ret && NULL != part) { + PartTransTask *next = part->next_task(); + + // update checkpint info + if (OB_FAIL(update_checkpoint_info_(*part))) { + LOG_ERROR("update_checkpoint_info_ fail", KR(ret), KPC(part)); + } + // Decrement the reference count after the Commit message is updated + // If the reference count is 0, the partition transaction is recycled + else if (0 == part->dec_ref_cnt()) { + if (OB_FAIL(resource_collector_->revert(part))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("revert PartTransTask fail", KR(ret), K(part)); + } + } else { + part = NULL; + } + } + + part = next; + } + + if (stop_flag_) { + ret = OB_IN_STOP_STATE; + } + } + + // Counting the number of partitioned tasks, reducing the number of participants + (void)ATOMIC_AAF(&dml_part_trans_task_count_, -part_trans_task_count); + (void)ATOMIC_AAF(&dml_trans_count_, -1); + + // revert TransCtx + if (NULL != trans_ctx) { + int revert_ret = OB_SUCCESS; + if (OB_SUCCESS != (revert_ret = trans_ctx_mgr_->revert_trans_ctx(trans_ctx))) { + LOG_ERROR("revert_trans_ctx fail", K(revert_ret), K(trans_ctx)); + ret = OB_SUCCESS == ret ? revert_ret : ret; + } else { + trans_ctx = NULL; + } + } + } + + return ret; +} + +int ObLogCommitter::do_trans_stat_(const common::ObPartitionKey &pkey, + const int64_t total_stmt_cnt) +{ + int ret = OB_SUCCESS; + uint64_t tenant_id = OB_INVALID_ID; + + if (OB_ISNULL(trans_stat_mgr_)) { + LOG_ERROR("trans_stat_mgr_ is null", K(trans_stat_mgr_)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_UNLIKELY(! pkey.is_valid()) || OB_UNLIKELY(total_stmt_cnt < 0)) { + LOG_ERROR("invalid argument", K(pkey), K(total_stmt_cnt)); + ret = OB_INVALID_ARGUMENT; + } else { + // A transaction must belong to only one tenant, distributed transactions can cross databases, but not cross tenants + tenant_id = extract_tenant_id(pkey.table_id_); + trans_stat_mgr_->do_rps_stat_after_filter(total_stmt_cnt); + if (OB_FAIL(trans_stat_mgr_->do_tenant_rps_stat_after_filter(tenant_id, total_stmt_cnt))) { + LOG_ERROR("do tenant rps stat after filter fail", KR(ret), K(tenant_id), K(total_stmt_cnt)); + } + } + + return ret; +} + +int ObLogCommitter::commit_binlog_record_list_(TransCtx &trans_ctx, + const uint64_t cluster_id, + const int64_t part_trans_task_count, + const common::ObVersion &freeze_version, + const uint64_t tenant_id, + const int64_t global_trans_version) +{ + int ret = OB_SUCCESS; + // COMMIT does not require trace id trace_info unique_id + // BEGIN does not require trace_id, trace_info where unique_id records the transaction ID, as a transaction-level unique ID + // Purpose: Support Oracle smooth migration, use transaction table in OB to Oracle link to achieve idempotent control + ObString trace_id; + ObString trace_info; + ObString unique_id ; + const ObTransID trans_id = trans_ctx.get_trans_id(); + const ObString &trans_id_str = trans_ctx.get_trans_id_str(); + const int64_t total_br_count = trans_ctx.get_total_br_count(); + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("committer has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(global_trans_version <= 0)) { + LOG_ERROR("invalid argument", K(global_trans_version)); + ret = OB_INVALID_ARGUMENT; + } else { + ObLogBR *begin_br = NULL; + ObLogBR *commit_br = NULL; + const int64_t ddl_schema_version = 0; + const uint64_t row_index = 0; + const bool is_serilized = false; + + // Assign BEGIN and COMMIT, place them at the beginning and end + // BEGIN/COMMIT does not need to set host information + if (OB_FAIL(tag_br_alloc_->alloc(is_serilized, begin_br, NULL))) { + LOG_ERROR("alloc begin binlog record fail", KR(ret)); + } else if (OB_ISNULL(begin_br)) { + LOG_ERROR("alloc begin binlog record fail", KR(ret), K(begin_br)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(tag_br_alloc_->alloc(is_serilized, commit_br, NULL))) { + LOG_ERROR("alloc commit binlog record fail", KR(ret)); + } else if (OB_ISNULL(commit_br)) { + LOG_ERROR("alloc commit binlog record fail", KR(ret), K(commit_br)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(begin_br->init_data(EBEGIN, cluster_id, tenant_id, ddl_schema_version, trace_id, trace_info, trans_id_str, + freeze_version, global_trans_version, part_trans_task_count, &trans_ctx.get_major_version_str()))) { + LOG_ERROR("init begin binlog record fail", KR(ret), K(global_trans_version), K(cluster_id), + K(freeze_version), K(tenant_id), K(ddl_schema_version), K(trace_id), K(trace_info), K(trans_id_str), + K(row_index), K(part_trans_task_count), "major_version:", trans_ctx.get_major_version_str()); + } else if (OB_FAIL(commit_br->init_data(ECOMMIT, cluster_id, tenant_id, ddl_schema_version, trace_id, trace_info, unique_id, + freeze_version, global_trans_version, part_trans_task_count))) { + LOG_ERROR("init commit binlog record fail", KR(ret), K(global_trans_version), K(cluster_id), + K(freeze_version), K(tenant_id), K(ddl_schema_version), K(trace_id), K(trace_info), K(unique_id), + K(row_index), K(part_trans_task_count)); + } else { + LOG_DEBUG("commit trans begin", K(trans_ctx.get_total_br_count())); + // push begin br to queue + if (OB_FAIL(push_br_queue_(begin_br))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("push_br_queue_ fail", KR(ret), K(begin_br)); + } + } + + // push data + while (! stop_flag_ && OB_SUCC(ret) && ! trans_ctx.is_all_br_committed()) { + ObLogBR *br_task = NULL; + + if (OB_FAIL(next_ready_br_task_(br_task))) { + LOG_ERROR("next_ready_br_task_ fail", KR(ret), KPC(br_task)); + } else if (NULL == br_task) { + trans_ctx.br_committer_queue_timedwait(DATA_OP_TIMEOUT); + } else { + // Single br down, next reset to NULL + br_task->set_next(NULL); + trans_ctx.inc_committed_br_count(); + + if (OB_FAIL(push_br_queue_(br_task))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("push_br_queue_ fail", KR(ret), K(br_task)); + } + } + } + } // while + + // push commit br to commit + if (OB_SUCC(ret)) { + if (OB_FAIL(push_br_queue_(commit_br))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("push_br_queue_ fail", KR(ret), K(commit_br)); + } + } + } + } + + if (OB_FAIL(ret)) { + if (NULL != begin_br) { + tag_br_alloc_->free(begin_br); + begin_br = NULL; + } + + if (NULL != commit_br) { + tag_br_alloc_->free(commit_br); + commit_br = NULL; + } + } + + LOG_DEBUG("commit_binlog_record_list", K(trans_id), K(trans_id_str), K(global_trans_version), K(cluster_id), + K(freeze_version), K(tenant_id), K(ddl_schema_version), K(trace_id), K(unique_id), + K(row_index), K(part_trans_task_count), + K(total_br_count), "br_committer_queue_cnt", get_br_committer_queue_count()); + } + + return ret; +} + +int ObLogCommitter::next_ready_br_task_(ObLogBR *&br_task) +{ + int ret = OB_SUCCESS; + bool br_popped = false; + bool use_lock = true; + BRCommitQueuePopFunc br_pop_func; + br_task = NULL; + + if (OB_FAIL(br_committer_queue_.pop(br_pop_func, br_task, br_popped, use_lock))) { + if (OB_ENTRY_NOT_EXIST != ret) { + LOG_ERROR("pop from CheckpointQueue fail", KR(ret), KPC(br_task), K(br_popped), K(use_lock), + "begin_sn", br_committer_queue_.begin_sn(), "end_sn", br_committer_queue_.end_sn()); + } else { + // no element, normal + ret = OB_SUCCESS; + br_task = NULL; + } + } else if (! br_popped) { + // No pop element + br_task = NULL; + } else if (OB_ISNULL(br_task)) { + LOG_ERROR("invalid task", K(br_task)); + ret = OB_ERR_UNEXPECTED; + } else { + // success + } + + return ret; +} + +int ObLogCommitter::push_br_queue_(ObLogBR *br) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + ret = OB_NOT_INIT; + } else if (OB_ISNULL(br)) { + ret = OB_INVALID_ARGUMENT; + } else { + RETRY_FUNC(stop_flag_, (*br_queue_), push, br, DATA_OP_TIMEOUT); + } + + return ret; +} + +void ObLogCommitter::get_part_trans_task_count(int64_t &ddl_part_trans_task_count, + int64_t &dml_part_trans_task_count, + int64_t &br_count) const +{ + dml_part_trans_task_count = ATOMIC_LOAD(&dml_part_trans_task_count_); + ddl_part_trans_task_count = ATOMIC_LOAD(&ddl_part_trans_task_count_); + br_count = br_committer_queue_.end_sn() - br_committer_queue_.begin_sn(); +} + +void ObLogCommitter::configure(const ObLogConfig &cfg) +{ + int64_t output_heartbeat_interval_sec = cfg.output_heartbeat_interval_sec; + + ATOMIC_STORE(&g_output_heartbeat_interval, output_heartbeat_interval_sec * _SEC_); + LOG_INFO("[CONFIG]", K(output_heartbeat_interval_sec)); +} + +} // namespace liboblog +} // namespace oceanbase diff --git a/src/liboblog/src/ob_log_committer.h b/src/liboblog/src/ob_log_committer.h new file mode 100644 index 0000000000000000000000000000000000000000..765955d4e870aa29493798799a8b5021b9b7e673 --- /dev/null +++ b/src/liboblog/src/ob_log_committer.h @@ -0,0 +1,272 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_COMMITTER_H__ +#define OCEANBASE_LIBOBLOG_COMMITTER_H__ + +#include "common/ob_queue_thread.h" // ObCond +#include "lib/container/ob_ext_ring_buffer.h" // ObExtendibleRingBuffer +#include "lib/atomic/ob_atomic.h" // ATOMIC_LOAD + +#include "ob_log_utils.h" // _SEC_ +#include "ob_log_part_trans_task.h" // PartTransTask, DdlStmtTask +#include "ob_log_trans_ctx.h" // TransCtx + +namespace oceanbase +{ +namespace transaction +{ +class ObTransID; +} + +namespace liboblog +{ +/////////////////////////////////////////////////////////////////////////////////////// +// IObLogCommitter + +class PartTransTask; +class ObLogConfig; +class BRQueue; +class ObLogTenant; + +class IObLogCommitter +{ +public: + virtual ~IObLogCommitter() {} + +public: + virtual int start() = 0; + virtual void stop() = 0; + virtual void mark_stop_flag() = 0; + virtual int push(PartTransTask *task, + const int64_t task_count, + const int64_t timeout, + ObLogTenant *tenant = NULL) = 0; + + virtual int push_br_task(ObLogBR &task) = 0; + + virtual int64_t get_dml_trans_count() const = 0; + + virtual void get_part_trans_task_count(int64_t &ddl_part_trans_task_count, + int64_t &dml_part_trans_task_count, + int64_t &br_count) const = 0; + + // update config of committer + virtual void configure(const ObLogConfig &cfg) = 0; + +}; + +/////////////////////////////////////////////////////////////////////////////////////// +// ObLogCommitter + +class IObLogResourceCollector; +class IObLogErrHandler; +class IObLogTransCtxMgr; +class IObLogTransStatMgr; +class DdlStmtTask; +class IObLogBRPool; + +class ObLogCommitter : public IObLogCommitter +{ + typedef common::ObExtendibleRingBuffer TransCommitterQueue; + typedef common::ObExtendibleRingBuffer BRCommitterQueue; + struct CheckpointTask; + typedef common::ObExtendibleRingBuffer CheckpointQueue; + // Record global heartbeat corresponding checkpoint_seq, easy to troubleshoot global checkpoint not advancing problem + typedef common::ObExtendibleRingBuffer GlobalHeartbeatInfoQueue; + typedef common::ObConcurrentFIFOAllocator CheckpointQueueAllocator; + + static const int64_t DATA_OP_TIMEOUT = 1L * _SEC_; + // No memory limit for checkpoint queue + static const int64_t CHECKPOINT_QUEUE_ALLOCATOR_TOTAL_LIMIT = INT64_MAX; + static const int64_t CHECKPOINT_QUEUE_ALLOCATOR_HOLD_LIMIT = 32 * _M_; + static const int64_t CHECKPOINT_QUEUE_ALLOCATOR_PAGE_SIZE = 2 * _M_; + static const int64_t COMMITTER_TRANS_COUNT_UPPER_LIMIT = 1000; + static int64_t g_output_heartbeat_interval; + static const int64_t PRINT_GLOBAL_HEARTBEAT_CHECKPOINT_INTERVAL = 10L * _SEC_; + +public: + ObLogCommitter(); + virtual ~ObLogCommitter(); + +public: + int start(); + void stop(); + void mark_stop_flag(); + int push(PartTransTask *task, + const int64_t task_count, + const int64_t timeout, + ObLogTenant *tenant = NULL); + int push_br_task(ObLogBR &task); + int64_t get_dml_trans_count() const { return ATOMIC_LOAD(&dml_trans_count_); } + void get_part_trans_task_count(int64_t &ddl_part_trans_task_count, + int64_t &dml_part_trans_task_count, + int64_t &br_count) const; + void configure(const ObLogConfig &cfg); + int64_t get_br_committer_queue_count() const { return br_committer_queue_.end_sn() - br_committer_queue_.begin_sn(); } + BRCommitterQueue &get_br_committer_queue() { return br_committer_queue_; } + +public: + int init(const int64_t start_seq, + BRQueue *br_queue, + IObLogResourceCollector *resource_collector, + IObLogBRPool *tag_br_alloc, + IObLogTransCtxMgr *trans_ctx_mgr, + IObLogTransStatMgr *trans_stat_mgr, + IObLogErrHandler *err_handler); + void destroy(); + void commit_routine(); + void heartbeat_routine(); + +private: + static void *commit_thread_func_(void *args); + static void *heartbeat_thread_func_(void *args); + +private: + int alloc_checkpoint_task_(PartTransTask &task, CheckpointTask *&checkpoint_task); + void free_checkpoint_task_(CheckpointTask *checkpoint_task); + int update_checkpoint_info_(PartTransTask &task); + int push_heartbeat_(PartTransTask &task); + // For not served PartTransTask: + // 1. If don't contain any data, can recycle directly + // 2. Otherwise, need to wait LogEntryTask callback + int handle_not_served_trans_(PartTransTask &task); + int push_offline_partition_task_(PartTransTask &task); + int next_checkpoint_task_(CheckpointTask *&task); + int handle_checkpoint_task_(CheckpointTask &task, int64_t &checkpoint_timestamp); + int next_global_heartbeat_info_(const int64_t cur_checkpoint_seq); + void print_global_heartbeat_info_(); + int dispatch_heartbeat_binlog_record_(const int64_t heartbeat_timestamp); + int handle_task_(PartTransTask *task); + int handle_dml_task_(PartTransTask *task); + int handle_ddl_task_(PartTransTask *ddl_task); + int handle_ddl_stmt_(DdlStmtTask &stmt_task); + int revert_binlog_record_(ObLogBR *br); + int do_trans_stat_(const common::ObPartitionKey &pkey, const int64_t total_stmt_cnt); + int commit_binlog_record_list_(TransCtx &trans_ctx, + const uint64_t cluster_id, + const int64_t part_trans_task_count, + const common::ObVersion &freeze_version, + const uint64_t tenant_id, + const int64_t global_trans_version); + int push_br_queue_(ObLogBR *br); + int handle_offline_checkpoint_task_(CheckpointTask &task); + int recycle_task_directly_(PartTransTask &task, const bool can_async_recycle = true); + int record_global_heartbeat_info_(PartTransTask &task); + int handle_when_trans_ready_(PartTransTask *task, + int64_t &commit_trans_count); + int next_ready_br_task_(ObLogBR *&br_task); + +private: + struct CheckpointTask + { + PartTransTask::TaskType task_type_; // PartTransTask::TaskType + int64_t timestamp_; // timestamp of task + char value_[0]; // following data + + bool is_global_heartbeat() const + { + return PartTransTask::TASK_TYPE_GLOBAL_HEARTBEAT == task_type_; + } + bool is_offline_partition_task() const { return PartTransTask::TASK_TYPE_OFFLINE_PARTITION == task_type_; } + + explicit CheckpointTask(PartTransTask &task); + ~CheckpointTask(); + + TO_STRING_KV( + "task_type", PartTransTask::print_task_type(task_type_), + K_(timestamp), + "value", is_offline_partition_task() ? to_cstring(*((common::ObPartitionKey *)value_)) : "NULL"); + }; + + struct CheckpointQueuePopFunc + { + // Operators to determine if Ready + bool operator()(const int64_t sn, CheckpointTask *task) + { + UNUSED(sn); + return NULL != task; + } + }; + + struct CommitQueuePopFunc + { + // Operators to determine if Ready + bool operator()(const int64_t sn, PartTransTask *task) + { + UNUSED(sn); + return NULL != task; + } + }; + + struct GHeartbeatInfoQueuePopFunc + { + // Operators to determine if Ready + bool operator()(const int64_t sn, const int64_t *checkpoint_seq) + { + UNUSED(sn); + return NULL != checkpoint_seq; + } + }; + + struct BRCommitQueuePopFunc + { + // Operators to determine if Ready + bool operator()(const int64_t sn, ObLogBR *task) + { + UNUSED(sn); + return NULL != task; + } + }; + +private: + bool inited_; + BRQueue *br_queue_; + IObLogBRPool *tag_br_alloc_; + IObLogErrHandler *err_handler_; + IObLogTransCtxMgr *trans_ctx_mgr_; + IObLogTransStatMgr *trans_stat_mgr_; + IObLogResourceCollector *resource_collector_; + + // threads + pthread_t commit_pid_; // commit thread + pthread_t heartbeat_pid_; // heartbeat thread + + volatile bool stop_flag_ CACHE_ALIGNED; + + TransCommitterQueue trans_committer_queue_; // Queue of distribute trans + common::ObCond trans_committer_queue_cond_; + + BRCommitterQueue br_committer_queue_; + + // Globally unique sequence queue for generating checkpoint + // + // Fetcher assigns checkpoint seq to all tasks that are sent down and periodically calculates checkpoint information to be sent down via heartbeat tasks + // Committer sorts the tasks that arrive in disorder based on the checkpoint seq, and maintains the overall checkpoint by processing the tasks sequentially + CheckpointQueue checkpoint_queue_; + common::ObCond checkpoint_queue_cond_; + CheckpointQueueAllocator checkpoint_queue_allocator_; + + int64_t global_heartbeat_seq_; + GlobalHeartbeatInfoQueue global_heartbeat_info_queue_; + + // Count the number of DML partition transaction tasks + int64_t dml_part_trans_task_count_ CACHE_ALIGNED; + int64_t ddl_part_trans_task_count_; + int64_t dml_trans_count_; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogCommitter); +}; +} // namespace liboblog +} // namespace oceanbase +#endif /* OCEANBASE_LIBOBLOG_COMMITTER_H__ */ diff --git a/src/liboblog/src/ob_log_common.h b/src/liboblog/src/ob_log_common.h new file mode 100644 index 0000000000000000000000000000000000000000..7473ec530c4a503e28dfc7d8d903ec58cb590e4e --- /dev/null +++ b/src/liboblog/src/ob_log_common.h @@ -0,0 +1,60 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_COMMON_H__ +#define OCEANBASE_LIBOBLOG_COMMON_H__ + +#include + +namespace oceanbase +{ +namespace liboblog +{ + +#define LOG_STAT(level, format_str, ...) OBLOG_LOG(level, "STAT: " format_str, ##__VA_ARGS__) + +#define DEFAULT_LOG_DIR "./log/" +#define DEFAULT_LOG_FILE DEFAULT_LOG_DIR "liboblog.log" +#define DEFAULT_STDERR_LOG_FILE DEFAULT_LOG_DIR "liboblog.log.stderr" +#define DEFAULT_LOG_FILE_NAME "liboblog.log" +#define DEFAULT_STDERR_LOG_FILE_NAME "liboblog.log.stderr" +#define DEFAULT_TIMEZONE_INFO "+8:00" +#define DEFAULT_PID_FILE_DIR "./run/" +#define DEFAULT_PID_FILE DEFAULT_PID_FILE_DIR "liboblog.pid" +#define DEFAULT_CONFIG_FPATN "etc/liboblog.conf" + +#define DEFAULT_PENDING_TRANS_INFO_FILE "./log/pending_trans_info.log" + +static const int64_t MAX_LOG_FILE_SIZE = 1 << 28; +static const int64_t MAX_MEMORY_USAGE_PERCENT = 80; +static const int64_t DEFAULT_QUEUE_SIZE = 100000; +static const int64_t DEFAULT_START_SEQUENCE_NUM = 0; +static const int64_t MAX_CACHED_TRANS_CTX_COUNT = 10 * 10000; +static const int64_t RELOAD_CONFIG_INTERVAL = 10 * 1000 * 1000; +static const int64_t PRINT_GLOBAL_FLOW_CONTROL_INTERVAL = 5 * 1000 * 1000; +static const int64_t SINGLE_INSTANCE_NUMBER = 1; // single instance +static const int64_t GET_SCHEMA_TIMEOUT_ON_START_UP = 7200LL * 1000 * 1000; // Start moment, get schema timeout + +// column id of table __all_ddl_operation(used by liboblog) +static const uint64_t ALL_DDL_OPERATION_TABLE_SCHEMA_VERSION_COLUMN_ID = 18; +static const uint64_t ALL_DDL_OPERATION_TABLE_TENANT_ID_COLUMN_ID = 19; +static const uint64_t ALL_DDL_OPERATION_TABLE_DATABASE_ID_COLUMN_ID = 21; +static const uint64_t ALL_DDL_OPERATION_TABLE_TABLEGROUP_ID_COLUMN_ID = 23; +static const uint64_t ALL_DDL_OPERATION_TABLE_TABLE_ID_COLUMN_ID = 24; +static const uint64_t ALL_DDL_OPERATION_TABLE_OPERATION_TYPE_COLUMN_ID = 26; +static const uint64_t ALL_DDL_OPERATION_TABLE_DDL_STMT_STR_COLUMN_ID = 27; +static const uint64_t ALL_DDL_OPERATION_TABLE_EXEC_TENANT_ID_COLUMN_ID = 28; + +} // namespace liboblog +} // namespace oceanbase + +#endif diff --git a/src/liboblog/src/ob_log_config.cpp b/src/liboblog/src/ob_log_config.cpp new file mode 100644 index 0000000000000000000000000000000000000000..80bce12040aadbdeb2c146b772f665acae7d8c58 --- /dev/null +++ b/src/liboblog/src/ob_log_config.cpp @@ -0,0 +1,415 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG + +#include "ob_log_config.h" + +#include "lib/container/ob_array.h" // ObArray +#include "lib/container/ob_array_iterator.h" // ObArray::begin +#include "lib/allocator/ob_malloc.h" // ob_malloc/ob_free + +#include "ob_log_utils.h" // TS_TO_STR, get_timestamp + +using namespace oceanbase::common; +namespace oceanbase +{ +namespace liboblog +{ +ObLogConfig& ObLogConfig::get_instance() +{ + static ObLogConfig config; + return config; +} + +int ObLogConfig::init() +{ + int ret = OB_SUCCESS; + const int64_t buf_len= OBLOG_MAX_CONFIG_LENGTH; + + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("init twice", K(inited_)); + ret = OB_INIT_TWICE; + } else if (OB_ISNULL(config_file_buf1_= static_cast(ob_malloc(buf_len, ObModIds::OB_LOG_CONFIG)))) { + LOG_ERROR("allocate memory for buffer fail", K(config_file_buf1_), K(buf_len)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else if (OB_ISNULL(config_file_buf2_= static_cast(ob_malloc(buf_len, ObModIds::OB_LOG_CONFIG)))) { + LOG_ERROR("allocate memory for buffer fail", K(config_file_buf2_), K(buf_len)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + inited_ = true; + } + + return ret; +} + +void ObLogConfig::destroy() +{ + if (NULL != config_file_buf1_) { + ob_free(config_file_buf1_); + config_file_buf1_ = NULL; + } + + if (NULL != config_file_buf2_) { + ob_free(config_file_buf2_); + config_file_buf2_ = NULL; + } + + inited_ = false; +} + +// Remove the quotes from the URL +int ObLogConfig::format_cluster_url() +{ + int ret = OB_SUCCESS; + static const int64_t MAX_CLUSTER_URL_LEN = 1024; + char cluster_url_buffer[MAX_CLUSTER_URL_LEN] = {0}; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogConfig has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(cluster_url.copy(cluster_url_buffer, MAX_CLUSTER_URL_LEN))) { + LOG_ERROR("copy cluster_url fail", KR(ret), K(cluster_url.str())); + } else if (strlen(cluster_url_buffer) <= 0) { + LOG_ERROR("invalid config, cluster_url is empty", K(cluster_url.str())); + ret = OB_INVALID_CONFIG; + } else { + int64_t orig_len = strlen(cluster_url_buffer); + char *start_ptr = cluster_url_buffer; + char *end_ptr = cluster_url_buffer + strlen(cluster_url_buffer) - 1; + + // remove quotes + if ('\"' == *start_ptr) { + start_ptr++; + } + + if (end_ptr >= start_ptr && '\"' == *end_ptr) { + *end_ptr = '\0'; + end_ptr--; + } + + if (end_ptr < start_ptr) { + LOG_ERROR("cluster_url is empty after formatting", "cluster_url", cluster_url.str()); + ret = OB_INVALID_CONFIG; + } else if ((end_ptr - start_ptr + 1) < orig_len) { + _LOG_INFO("format cluster_url from [%s] to [%s]", cluster_url.str(), start_ptr); + + if (! cluster_url.set_value(start_ptr)) { + LOG_ERROR("cluster_url set_value fail", "cluster_url", start_ptr, + "length", end_ptr - start_ptr + 1); + } + } else {} + } + + return ret; +} + +int ObLogConfig::check_all() +{ + int ret = OB_SUCCESS; + ObConfigContainer::const_iterator it = container_.begin(); + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogConfig has not been initialized"); + ret = OB_NOT_INIT; + } else { + for (; OB_SUCCESS == ret && it != container_.end(); it++) { + if (OB_ISNULL(it->second)) { + LOG_ERROR("config item const_iterator second element is NULL", + "first_item", it->first.str()); + ret = OB_ERR_UNEXPECTED; + } else if (OB_ISNULL(it->second->str())) { + LOG_ERROR("config item string value is NULL", + "first_item", it->first.str()); + ret = OB_ERR_UNEXPECTED; + } else if (! it->second->check()) { + _LOG_ERROR("invalid config, name: [%s], value: [%s]", it->first.str(), it->second->str()); + ret = OB_INVALID_CONFIG; + } else if (0 == strlen(it->second->str())) { + // All configuration items are not allowed to be empty + _LOG_ERROR("invalid empty config, name: [%s], value: [%s]", + it->first.str(), it->second->str()); + ret = OB_INVALID_CONFIG; + } else { + // normal + } + } + } + + return ret; +} + +struct ConfigItem +{ + std::string key_; + std::string val_; + + bool operator == (const ConfigItem &item) + { + return key_ == item.key_; + } + + bool operator < (const ConfigItem &item) + { + return key_ < item.key_; + } + + ConfigItem() : key_(), val_() + {} + + ConfigItem(const char *key, const char *val) : key_(key), val_(val) {} + + TO_STRING_KV("key", key_.c_str(), "val", val_.c_str()); +}; + +typedef ObArray ConfigItemArray; + +void get_sorted_config_items(const ObConfigContainer &container, ConfigItemArray &configs) +{ + // Transfer the configuration items to an array and sort the output + ObConfigContainer::const_iterator it = container.begin(); + for (; it != container.end(); it++) { + ConfigItem item(it->first.str(), NULL == it->second ? "" : it->second->str()); + (void)configs.push_back(item); + } + std::sort(configs.begin(), configs.end()); +} + +void ObLogConfig::print() const +{ + static const int64_t BUF_SIZE = 1L << 22; + char *buf = static_cast(ob_malloc(BUF_SIZE, ObModIds::OB_LOG_CONFIG)); + + if (OB_ISNULL(buf)) { + LOG_ERROR("allocate memory fail", K(BUF_SIZE)); + } else { + int64_t pos = 0; + int64_t size = BUF_SIZE; + ConfigItemArray configs; + + get_sorted_config_items(container_, configs); + + (void)databuff_printf(buf, size, pos, + "\n%s ================================ *liboblog config begin* ================================\n", + TS_TO_STR(get_timestamp())); + + for (int64_t index = 0; index < configs.count(); index++) { + (void)databuff_printf(buf, size, pos, "%s [CONFIG] %-45s = %s\n", + TS_TO_STR(get_timestamp()), configs.at(index).key_.c_str(), + configs.at(index).val_.c_str()); + } + + (void)databuff_printf(buf, size, pos, + "%s ================================ *liboblog config end* ================================\n", + TS_TO_STR(get_timestamp())); + + _LOG_INFO("%s", buf); + } + + if (NULL != buf) { + ob_free(buf); + buf = NULL; + } +} + +int ObLogConfig::load_from_map(const ConfigMap& configs, + const int64_t version /* = 0 */, + const bool check_name /* = false */) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogConfig has not been initialized"); + ret = OB_NOT_INIT; + } else { + std::map::const_iterator iter = configs.begin(); + for (; OB_SUCCESS == ret && iter != configs.end(); iter++) { + ObConfigItem *const *pp_item = NULL; + + if (NULL == (pp_item = container_.get(ObConfigStringKey(iter->first.c_str())))) { + if (check_name) { + _LOG_WARN("invalid config string, unknown config item! name: [%s] value: [%s]", + iter->first.c_str(), iter->second.c_str()); + ret = OB_INVALID_ARGUMENT; + } + } else { + (*pp_item)->set_value(iter->second.c_str()); + (*pp_item)->set_version(version); + _LOG_INFO("load config succ, %s=%s", iter->first.c_str(), iter->second.c_str()); + } + } + } + + return ret; +} + +int ObLogConfig::load_from_buffer(const char *config_str, + const int64_t config_str_len, + const int64_t version /* = 0 */, + const bool check_name /* = false */) +{ + int ret = OB_SUCCESS; + char *saveptr = NULL; + char *token = NULL; + int64_t pos =0; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogConfig has not been initialized"); + ret = OB_NOT_INIT; + } else if (NULL == config_str || config_str_len <= 0) { + LOG_ERROR("invalid argument", K(config_str), K(config_str_len)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(config_file_buf2_)) { + LOG_ERROR("config_file_buf2_ is NULL", K(config_file_buf2_)); + ret = OB_ERR_UNEXPECTED; + } else { + config_file_buf2_[0] = '\0'; + const int64_t buf_size = OBLOG_MAX_CONFIG_LENGTH; + + if (config_str_len > (buf_size - 1)) { + LOG_ERROR("extra config is too long!", K(config_str_len), K(buf_size)); + ret = OB_BUF_NOT_ENOUGH; + } else if (OB_FAIL(databuff_printf(config_file_buf2_, buf_size, pos, "%.*s", + static_cast(config_str_len), config_str))) { + LOG_ERROR("copy config string fail", KR(ret), K(config_file_buf2_), K(buf_size), K(pos), K(config_str_len), + K(config_str)); + } else { + token = strtok_r(config_file_buf2_, ",\n", &saveptr); + while (NULL != token && OB_SUCCESS == ret) { + char *saveptr_one = NULL; + const char *name = NULL; + const char *value = NULL; + ObConfigItem *const *pp_item = NULL; + if (NULL == (name = strtok_r(token, "=", &saveptr_one))) { + LOG_ERROR("fail to parse config string, can not find '=' from token", + K(token), K(config_str)); + ret = OB_INVALID_CONFIG; + } else if ('\0' == *(value = saveptr_one)) { + _LOG_WARN("empty config string: [%s]", token); + name = ""; + } else if (NULL == (pp_item = container_.get(ObConfigStringKey(name)))) { + if (check_name) { + _LOG_WARN("invalid config string, unknown config item! name: [%s] value: [%s]", + name, value); + ret = OB_INVALID_ARGUMENT; + } + } else { + (*pp_item)->set_value(value); + (*pp_item)->set_version(version); + _LOG_INFO("load config succ, %s=%s", name, value); + } + + if (OB_SUCCESS == ret) { + token = strtok_r(NULL, ",\n", &saveptr); + } + } + } + } + + return ret; +} + +int ObLogConfig::load_from_file(const char *config_file, + const int64_t version /* = 0 */, + const bool check_name /* = false */) +{ + int ret = OB_SUCCESS; + FILE *fp = NULL; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogConfig has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(config_file)) { + LOG_ERROR("invalid argument", K(config_file)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(config_file_buf1_)) { + LOG_ERROR("config_file_buf1_ is NULL", K(config_file_buf1_)); + ret = OB_ERR_UNEXPECTED; + } else if (NULL == (fp = fopen(config_file, "rb"))) { + ret = OB_IO_ERROR; + LOG_ERROR("can't open file", K(config_file), KR(ret), KERRNOMSG(errno)); + } else { + config_file_buf1_[0] = '\0'; + int64_t buffer_size = OBLOG_MAX_CONFIG_LENGTH; + int64_t read_len = fread(config_file_buf1_, 1, buffer_size - 1, fp); + + if (0 != ferror(fp)) { + ret = OB_IO_ERROR; + LOG_ERROR("read config file error!", K(config_file), KERRNOMSG(errno)); + } else if (0 == feof(fp)) { + ret = OB_BUF_NOT_ENOUGH; + LOG_ERROR("config file is too long!", K(config_file), K(buffer_size)); + } else if (read_len <= 0) { + LOG_WARN("config file is empty", K(config_file)); + } else if (read_len >= buffer_size) { + LOG_ERROR("fread buffer overflow", K(read_len), K(buffer_size)); + ret = OB_SIZE_OVERFLOW; + } else { + // end with '\0' + config_file_buf1_[read_len] = '\0'; + + if (OB_FAIL(load_from_buffer(config_file_buf1_, read_len, version, check_name))) { + LOG_ERROR("load config fail", KR(ret), K(config_file), K(version), K(check_name), + K(read_len)); + } else { + LOG_INFO("load config from file succ", K(config_file)); + } + } + } + + if (NULL != fp) { + fclose(fp); + fp = NULL; + } + + return ret; +} + +int ObLogConfig::dump2file(const char *file) const +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(file)) { + LOG_ERROR("invalid argument", K(file)); + ret = OB_INVALID_ARGUMENT; + } else { + FILE *fp = NULL; + ConfigItemArray configs; + + get_sorted_config_items(container_, configs); + + if (NULL == (fp = fopen(file, "w+"))) { + ret = OB_IO_ERROR; + LOG_ERROR("open file fail", K(file), KERRMSG); + } else { + for (int64_t index = 0; index < configs.count(); index++) { + const ConfigItem &item = configs.at(index); + + int write_len = fprintf(fp, "%s=%s\n", item.key_.c_str(), item.val_.c_str()); + if (write_len <= 0) { + LOG_WARN("write config file fail", + K(write_len), "config: name", item.key_.c_str(), "value", item.val_.c_str()); + } + } + } + + if (NULL != fp) { + fclose(fp); + fp = NULL; + } + } + + return ret; +} + +} // namespace liboblog +} // namespace oceanbase diff --git a/src/liboblog/src/ob_log_config.h b/src/liboblog/src/ob_log_config.h new file mode 100644 index 0000000000000000000000000000000000000000..64ba0e9d857a3509e184c2e55f34d94b26aa7743 --- /dev/null +++ b/src/liboblog/src/ob_log_config.h @@ -0,0 +1,548 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_CONFIG_H__ +#define OCEANBASE_LIBOBLOG_CONFIG_H__ + +#include +#include "share/ob_define.h" +#include "share/parameter/ob_parameter_macro.h" +#include "share/config/ob_common_config.h" // ObInitConfigContainer + +#include "ob_log_common.h" +#include "ob_log_fake_common_config.h" // ObLogFakeCommonConfig + +////////////// Define member variables of type INT, no limit on maximum value ////////////// +// DEF: default value +// MIN: minimum value +// +// Note: DEF, MIN must be literal values, not variable names +#define T_DEF_INT_INFT(name, SCOPE, DEF, MIN, NOTE) \ + public: \ + static const int64_t default_##name = (DEF); \ + DEF_INT(name, SCOPE, #DEF, "[" #MIN ",]", NOTE); + +////////////// Define INT type member variable ////////////// +// DEF: default value +// MIN: minimum value +// MAX: maximum value +// +// Note: DEF, MIN, MAX must be literal values, not variable names +#define T_DEF_INT(name, SCOPE, DEF, MIN, MAX, NOTE) \ + public: \ + static const int64_t default_##name = (DEF); \ + static const int64_t max_##name = (MAX); \ + DEF_INT(name, SCOPE, #DEF, "[" #MIN "," #MAX "]", NOTE); + +////////////// Define INT type member variable ////////////// +// DEF: default value, 0 or 1 +// +// Note: DEF must be a literal value, not a variable name +#define T_DEF_BOOL(name, SCOPE, DEF, NOTE) \ + public: \ + static const int64_t default_##name = DEF; \ + DEF_INT(name, SCOPE, #DEF, "[0,1]", NOTE); + +namespace oceanbase +{ +namespace liboblog +{ +class ObLogConfig : public common::ObInitConfigContainer +{ + typedef std::map ConfigMap; + +public: + ObLogConfig() : inited_(false), common_config_(), config_file_buf1_(NULL), config_file_buf2_(NULL) + { + rpc_tenant_id = common::OB_EXT_LOG_TENANT_ID; + } + + virtual ~ObLogConfig() { destroy(); } + + int init(); + void destroy(); + static ObLogConfig &get_instance(); + +public: + int check_all(); + void print() const; + int load_from_buffer(const char *config_str, + const int64_t config_str_len, + const int64_t version = 0, + const bool check_name = false); + int load_from_file(const char *config_file, + const int64_t version = 0, + const bool check_name = false); + int load_from_map(const ConfigMap& configs, + const int64_t version = 0, + const bool check_name = false); + int dump2file(const char *file) const; + + common::ObCommonConfig &get_common_config() { return common_config_; } + + // remove quotes of cluster_url + int format_cluster_url(); + +public: + +#ifdef OB_CLUSTER_PARAMETER +#undef OB_CLUSTER_PARAMETER +#endif +#define OB_CLUSTER_PARAMETER(args...) args + // Liboblog config. + // max memory occupied by liboblog: 20G + DEF_CAP(memory_limit, OB_CLUSTER_PARAMETER, "20G", "[2G,]", "memory limit"); + // Preserve the lower bound of system memory in %, in the range of 10% ~ 80% + // i.e.: ensure that the system memory remaining cannot be lower than this percentage based on the memory occupied by liboblog + DEF_INT(system_memory_avail_percentage_lower_bound, OB_CLUSTER_PARAMETER, "10", "[10, 80]", "system memory avail upper bound"); + DEF_CAP(tenant_manager_memory_upper_limit, OB_CLUSTER_PARAMETER, "5G", "[1G,]", "tenant manager memory upper limit"); + DEF_INT(dml_parser_thread_num, OB_CLUSTER_PARAMETER, "5", "[1,]", "DML parser thread number"); + DEF_INT(ddl_parser_thread_num, OB_CLUSTER_PARAMETER, "1", "[1,]", "DDL parser thread number"); + DEF_INT(sequencer_thread_num, OB_CLUSTER_PARAMETER, "5", "[1,]", "sequencer thread number"); + DEF_INT(sequencer_queue_length, OB_CLUSTER_PARAMETER, "102400", "[1,]", "sequencer queue length"); + DEF_INT(formatter_thread_num, OB_CLUSTER_PARAMETER, "10", "[1,]", "formatter thread number"); + DEF_INT(formatter_batch_stmt_count, OB_CLUSTER_PARAMETER, "100", "[1,]", "formatter batch stmt count"); + DEF_INT(committer_queue_length, OB_CLUSTER_PARAMETER, "102400", "[1,]", "committer queue length"); + DEF_INT(committer_thread_num, OB_CLUSTER_PARAMETER, "1", "[1,]", "committer thread number"); + DEF_INT(storager_thread_num, OB_CLUSTER_PARAMETER, "10", "[1,]", "storager thread number"); + DEF_INT(storager_queue_length, OB_CLUSTER_PARAMETER, "102400", "[1,]", "storager queue length"); + DEF_INT(data_processor_thread_num, OB_CLUSTER_PARAMETER, "10", "[1,]", "data_processor thread number"); + DEF_INT(data_processor_queue_length, OB_CLUSTER_PARAMETER, "102400", "[1,]", "data_processor queue length"); + DEF_INT(cached_schema_version_count, OB_CLUSTER_PARAMETER, "32", "[1,]", "cached schema version count"); + DEF_INT(history_schema_version_count, OB_CLUSTER_PARAMETER, "16", "[1,]", "history schema version count"); + DEF_INT(resource_collector_thread_num, OB_CLUSTER_PARAMETER, "10", "[1,]", "resource collector thread number"); + DEF_INT(resource_collector_thread_num_for_br, OB_CLUSTER_PARAMETER, "7", "[1,]", "binlog record resource collector thread number"); + DEF_INT(instance_num, OB_CLUSTER_PARAMETER, "1", "[1,]", "store instance number"); + DEF_INT(instance_index, OB_CLUSTER_PARAMETER, "0", "[0,]", "store instance index, start from 0"); + DEF_INT(part_trans_task_prealloc_count, OB_CLUSTER_PARAMETER, "300000", "[1,]", + "partition trans task pre-alloc count"); + DEF_INT(part_trans_task_active_count_upper_bound, OB_CLUSTER_PARAMETER, "200000", "[1,]", + "active partition trans task count upper bound"); + DEF_INT(log_entry_task_active_count_upper_bound, OB_CLUSTER_PARAMETER, "200000", "[1,]", + "active log entry task count upper bound"); + DEF_INT(part_trans_task_reusable_count_upper_bound, OB_CLUSTER_PARAMETER, "10240", "[1,]", + "reusable partition trans task count upper bound"); + DEF_INT(ready_to_seq_task_upper_bound, OB_CLUSTER_PARAMETER, "20000", "[1,]", + "ready to sequencer task count upper bound"); + DEF_INT(part_trans_task_dynamic_alloc, OB_CLUSTER_PARAMETER, "1", "[0,1]", "partition trans task dynamic alloc"); + DEF_CAP(part_trans_task_page_size, OB_CLUSTER_PARAMETER, "8KB", "[1B,]", "partition trans task page size"); + DEF_INT(part_trans_task_prealloc_page_count, OB_CLUSTER_PARAMETER, "20000", "[1,]", + "partition trans task prealloc page count"); + // Log_level=INFO in the startup scenario, and then optimize the schema to WARN afterwards + DEF_STR(init_log_level, OB_CLUSTER_PARAMETER, "ALL.*:INFO;SHARE.SCHEMA:INFO", "log level: DEBUG, TRACE, INFO, WARN, USER_ERR, ERROR"); + DEF_STR(log_level, OB_CLUSTER_PARAMETER, "ALL.*:INFO;SHARE.SCHEMA:WARN", "log level: DEBUG, TRACE, INFO, WARN, USER_ERR, ERROR"); + // root server info for oblog, seperated by `;` between multi rootserver, a root server info format as `ip:rpc_port:sql_port` + DEF_STR(rootserver_list, OB_CLUSTER_PARAMETER, "|", "OB RootServer list"); + DEF_STR(cluster_url, OB_CLUSTER_PARAMETER, "|", "OB configure url"); + DEF_STR(cluster_user, OB_CLUSTER_PARAMETER, "", "OB login user"); + DEF_STR(cluster_password, OB_CLUSTER_PARAMETER, "", "OB login password"); + DEF_STR(cluster_db_name, OB_CLUSTER_PARAMETER, "oceanbase", "OB login database name"); + DEF_STR(config_fpath, OB_CLUSTER_PARAMETER, DEFAULT_CONFIG_FPATN, "configuration file path"); + DEF_STR(timezone, OB_CLUSTER_PARAMETER, DEFAULT_TIMEZONE_INFO, "timezone info"); + // tenant_name.db_name.table_name + DEF_STR(tb_white_list, OB_CLUSTER_PARAMETER, "*.*.*", "tb-select white list"); + DEF_STR(tb_black_list, OB_CLUSTER_PARAMETER, "|", "tb-select black list"); + // tenant_name.tablegroup_name + DEF_STR(tablegroup_white_list, OB_CLUSTER_PARAMETER, "*.*", "tablegroup-select white list"); + DEF_STR(tablegroup_black_list, OB_CLUSTER_PARAMETER, "|", "tablegroup-select black list"); + + DEF_STR(data_start_schema_version, OB_CLUSTER_PARAMETER, "|", "tenant:schema_version"); + // cluster id black list, using vertical line separation, for example cluster_id_black_list=100|200|300 + // Default value: 2^31 - 10000, this is a special cluster ID agreed in OCP for deleting historical data scenarios + // liboblog filters REDO data from deleted historical data scenarios by default + DEF_STR(cluster_id_black_list, OB_CLUSTER_PARAMETER, "2147473648", "cluster id black list"); + + // minimum value of default cluster id blacklist value + // The minimum value is: 2^31 - 10000 = 2147473648 + // This definition can only be a literal value + T_DEF_INT_INFT(cluster_id_black_value_min, OB_CLUSTER_PARAMETER, 2147473648, 1, "min cluster id black value"); + + // The maximum value of the default cluster id blacklist value + // Maximum value: 2^31 - 1 = 2147483647 + // This definition can only be a literal value + T_DEF_INT_INFT(cluster_id_black_value_max, OB_CLUSTER_PARAMETER, 2147483647, 1, "max cluster id black value"); + + DEF_INT(log_entry_task_prealloc_count, OB_CLUSTER_PARAMETER, "100000", "[1,]", "log entry task pre-alloc count"); + + DEF_INT(binlog_record_prealloc_count, OB_CLUSTER_PARAMETER, "100000", "[1,]", "binlog record pre-alloc count"); + + DEF_STR(store_service_path, OB_CLUSTER_PARAMETER, "./storage", "store sevice path"); + + // Whether to do ob version compatibility check + // default value '0:not_skip' + T_DEF_BOOL(skip_ob_version_compat_check, OB_CLUSTER_PARAMETER, 0, "0:not_skip, 1:skip") + + // default DFT_BR(LogRecordImpl), add DFT_BR_PB + // passed in via IObLog::init interface + // string LogMsgFactory::DFT_ColMeta = "ColMetaImpl"; + // string LogMsgFactory::DFT_TableMeta = "TableMetaImpl"; + // string LogMsgFactory::DFT_DBMeta = "DBMetaImpl"; + // string LogMsgFactory::DFT_METAS = "MetaDataCollectionsImpl"; + // string LogMsgFactory::DFT_BR = "LogRecordImpl"; + // string LogMsgFactory::DFT_BR_PB = "BinlogRecordProtobuf"; + DEF_STR(drc_message_factory_binlog_record_type, OB_CLUSTER_PARAMETER, "LogRecordImpl", "LogMsgFactory::DFT_BR"); + + // whether to check ObTraceId + T_DEF_BOOL(need_verify_ob_trace_id, OB_CLUSTER_PARAMETER, 0, "0:disabled, 1:enabled"); + // ObTraceId, Configurable, default is default + DEF_STR(ob_trace_id, OB_CLUSTER_PARAMETER, "default", "ob trace id"); + // Whether to turn on the verification mode + // 1. verify dml unique id + // 2. Verify ddl binlog record: schema version + T_DEF_BOOL(enable_verify_mode, OB_CLUSTER_PARAMETER, 0, "0:disabled, 1:enabled"); + + DEF_INT(enable_dump_pending_trans_info, OB_CLUSTER_PARAMETER, "0", "[0,1]", + "enable dump pending transaction information"); + + DEF_INT(log_clean_cycle_time_in_hours, OB_CLUSTER_PARAMETER, "24", "[0,]", + "clean log cycle time in hours, 0 means not to clean log"); + + T_DEF_BOOL(skip_dirty_data, OB_CLUSTER_PARAMETER, 0, "0:disabled, 1:enabled"); + + T_DEF_BOOL(skip_reversed_schema_verison, OB_CLUSTER_PARAMETER, 0, "0:disabled, 1:enabled"); + + T_DEF_BOOL(skip_rename_tenant_ddl, OB_CLUSTER_PARAMETER, 0, "0:disabled, 1:enabled"); + + // Whether to sort the list of participants within a distributed transaction + // Scenario: online business does not need to enable this configuration item; this configuration item is only for obtest test scenario. + // After each restart of obtest, the list of participants in the observer is random. In order to ensure consistent case results, + // the list of participants needs to be sorted to ensure consistent results each time + T_DEF_BOOL(sort_trans_participants, OB_CLUSTER_PARAMETER, 0, "0:disabled, 1:enabled"); + + // Whether to allow globally unique indexes to be located in multiple instances + // For example, in a count bin scenario, there is no strong reliance on global unique indexes to resolve dependencies + T_DEF_BOOL(enable_global_unique_index_belong_to_multi_instance, OB_CLUSTER_PARAMETER, 0, "0:disabled, 1:enabled"); + + ////////////////////////////// Fetcher config ////////////////////////////// + // + // ------------------------------------------------------------------------ + // Configurations that do not support dynamic changes + // ------------------------------------------------------------------------ + // liboblog support multiple working mode, default is storage + // 1. storage: transaction data is stored, can support large transactions + // 2. memory: transaction data is not stored, it means better performance, but may can not support large transactions + DEF_STR(working_mode, OB_CLUSTER_PARAMETER, "memory", "liboblog working mode"); + + T_DEF_BOOL(fetcher_mock_mode, OB_CLUSTER_PARAMETER, 0, "0:disabled, 1:enabled"); + T_DEF_INT_INFT(io_thread_num, OB_CLUSTER_PARAMETER, 4, 1, "io thread number"); + T_DEF_INT_INFT(rpc_tenant_id, OB_CLUSTER_PARAMETER, 1, 1, "tenant id used in rpc"); + T_DEF_INT(svr_finder_thread_num, OB_CLUSTER_PARAMETER, 4, 1, 32, "svr finder thread num"); + T_DEF_INT(fetcher_heartbeat_thread_num, OB_CLUSTER_PARAMETER, 4, 1, 32, "fetcher heartbeat thread num"); + T_DEF_INT(idle_pool_thread_num, OB_CLUSTER_PARAMETER, 4, 1, 32, "idle pool thread num"); + T_DEF_INT(dead_pool_thread_num, OB_CLUSTER_PARAMETER, 1, 1, 32, "dead pool thread num"); + T_DEF_INT(stream_worker_thread_num, OB_CLUSTER_PARAMETER, 8, 1, 64, "stream worker thread num"); + T_DEF_INT(start_log_id_locator_thread_num, OB_CLUSTER_PARAMETER, 4, 1, 32, "start log id locator thread num"); + T_DEF_INT_INFT(start_log_id_locator_locate_count, OB_CLUSTER_PARAMETER, 3, 1, "start log id locator locate count"); + // Whether to skip the starting log ID positioning result consistency check, i.e. whether there is a positioning log bias scenario + T_DEF_BOOL(skip_start_log_id_locator_result_consistent_check, OB_CLUSTER_PARAMETER, 0, "0:disabled, 1:enabled"); + T_DEF_INT_INFT(svr_stream_cached_count, OB_CLUSTER_PARAMETER, 16, 1, "cached svr stream object count"); + T_DEF_INT_INFT(fetch_stream_cached_count, OB_CLUSTER_PARAMETER, 16, 1, "cached fetch stream object count"); + + // region + DEF_STR(region, OB_CLUSTER_PARAMETER, "default_region", "OB region"); + + // Number of globally cached RPC results + T_DEF_INT_INFT(rpc_result_cached_count, OB_CLUSTER_PARAMETER, 16, 1, "cached rpc result object count"); + + // Number of active partitions in memory + // This value can be used as a reference for the number of data structure objects cached at the partition level + T_DEF_INT_INFT(active_partition_count, OB_CLUSTER_PARAMETER, 10000, 1, "active partition count in memory"); + + // Maximum number of partitions currently supported + T_DEF_INT_INFT(partition_count_upper_limit, OB_CLUSTER_PARAMETER, 2000000, 1, "max partition count supported"); + + // Maximum number of threads using systable helper + T_DEF_INT(access_systable_helper_thread_num, OB_CLUSTER_PARAMETER, 64, 48, 1024, "access systable helper thread num"); + + // Global starting schema version, all tenants set to one version, only valid for schema non-split mode + T_DEF_INT_INFT(global_data_start_schema_version, OB_CLUSTER_PARAMETER, 0, 0, + "data start schema version for all tenant"); + // ------------------------------------------------------------------------ + + + // ------------------------------------------------------------------------ + // configurations which supports dynamically modify + // ------------------------------------------------------------------------ + T_DEF_INT_INFT(mysql_connect_timeout_sec, OB_CLUSTER_PARAMETER, 40, 1, "mysql connection timeout in seconds"); + T_DEF_INT_INFT(mysql_query_timeout_sec, OB_CLUSTER_PARAMETER, 30, 1, "mysql query timeout in seconds"); + T_DEF_INT_INFT(start_log_id_locator_rpc_timeout_sec, OB_CLUSTER_PARAMETER, 60, 1, + "start log id locator rpc timeout in seconds"); + T_DEF_INT_INFT(start_log_id_locator_batch_count, OB_CLUSTER_PARAMETER, 2000, 1, "start log id locator batch count"); + T_DEF_INT(svr_finder_sql_batch_count, OB_CLUSTER_PARAMETER, 1, 1, 200, "svr_finder sql batch count"); + + // server blacklist, default is|,means no configuration, support configuration single/multiple servers + // Single: SEVER_IP1:PORT1 + // Multiple: SEVER_IP1:PORT1|SEVER_IP2:PORT2|SEVER_IP3:PORT3 + DEF_STR(server_blacklist, OB_CLUSTER_PARAMETER, "|", "server black list"); + DEF_STR(sql_server_blacklist, OB_CLUSTER_PARAMETER, "|", "sql server black list"); + + T_DEF_INT_INFT(heartbeater_rpc_timeout_sec, OB_CLUSTER_PARAMETER, 60, 1, "heartbeater rpc timeout in seconds"); + T_DEF_INT_INFT(heartbeater_batch_count, OB_CLUSTER_PARAMETER, 2000, 1, "heartbeater batch count"); + T_DEF_INT_INFT(svr_list_update_interval_sec, OB_CLUSTER_PARAMETER, 600, 1, "svr list update interval in seconds"); + T_DEF_INT_INFT(leader_info_update_interval_sec, OB_CLUSTER_PARAMETER, 600, 1, "leader update interval in seconds"); + T_DEF_INT_INFT(heartbeat_interval_sec, OB_CLUSTER_PARAMETER, 1, 1, "leader update interval in seconds"); + + T_DEF_INT_INFT(stream_life_time_sec, OB_CLUSTER_PARAMETER, 60, 1, "fetch log stream life time in seconds"); + T_DEF_INT_INFT(stream_max_partition_count, OB_CLUSTER_PARAMETER, 5000, 1, "fetch log stream max partition count"); + T_DEF_INT_INFT(stream_feedback_interval_sec, OB_CLUSTER_PARAMETER, 3, 0, + "fetch log stream feedback interval in seconds, 0 means need feedback info always"); + T_DEF_INT_INFT(fetch_log_rpc_timeout_sec, OB_CLUSTER_PARAMETER, 15, 1, "fetch log rpc timeout in seconds"); + T_DEF_INT_INFT(fetch_log_cnt_per_part_per_round, OB_CLUSTER_PARAMETER, 8000, 1, + "fetch log count in one rpc for one partition in one round"); + + // Maximum observer log retention time in minutes + // Used to start the logging locating process, if all servers return less than the lower bound and + // the start timestamp is within the log saving time, we consider it possible to force a start. + // Doing so can greatly alleviate the startup failure problem caused by 2.0 restarting liboblog + T_DEF_INT_INFT(observer_clog_save_time_minutes, OB_CLUSTER_PARAMETER, 120, 1, "observer clog save time in minutes"); + + // Forced start mode, used for operation and maintenance processing + // Auto-start mode can take effect only when all servers return OB_ERR_OUT_OF_LOWER_BOUND + // Forced start mode, only requires at least one server to return OB_ERR_OUT_OF_LOWER_BOUND + // default off + T_DEF_BOOL(enable_force_start_mode, OB_CLUSTER_PARAMETER, 0, "0:disabled, 1:enabled"); + + // Upper limit of progress difference between partitions, in seconds + T_DEF_INT_INFT(progress_limit_sec_for_dml, OB_CLUSTER_PARAMETER, 3, 1, "dml progress limit in seconds"); + + // Partition timeout in seconds + // If the logs are not fetched after a certain period of time, the stream will be cut + T_DEF_INT_INFT(partition_timeout_sec, OB_CLUSTER_PARAMETER, 15, 1, "partition timeout in seconds"); + // Timeout time for partitions for lagging copies + // + // If logs are not fetched for more than a certain period of time on a lagging copy, cut the stream + T_DEF_INT_INFT(partition_timeout_sec_for_lagged_replica, OB_CLUSTER_PARAMETER, 3, 1, + "partition timeout for lagged replica in seconds"); + + // cache update interval of sys table __all_server + T_DEF_INT_INFT(all_server_cache_update_interval_sec, OB_CLUSTER_PARAMETER, 5, 1, + "__all_server table cache update internal in seconds"); + + // cache update interval of sys table __all_zone + T_DEF_INT_INFT(all_zone_cache_update_interval_sec, OB_CLUSTER_PARAMETER, 5, 1, + "__all_zone table cache update internal in seconds"); + + // pause fetcher + T_DEF_BOOL(pause_fetcher, OB_CLUSTER_PARAMETER, 0, "0:disabled, 1:enabled"); + + // Maximum number of tasks supported by the timer + T_DEF_INT_INFT(timer_task_count_upper_limit, OB_CLUSTER_PARAMETER, 1024, 1, "max timer task count"); + // Timer task timing time + T_DEF_INT_INFT(timer_task_wait_time_msec, OB_CLUSTER_PARAMETER, 100, 1, "timer task wait time in milliseconds"); + + // the upper limit observer takes for the log rpc processing time + // Print RPC chain statistics logs if this limit is exceeded + T_DEF_INT_INFT(rpc_process_handler_time_upper_limit_msec, OB_CLUSTER_PARAMETER, 200, 1, + "observer fetch log rpc process handler timer upper limit"); + + // Survival time of server to blacklist, in seconds + T_DEF_INT_INFT(blacklist_survival_time_sec, OB_CLUSTER_PARAMETER, 30, 1, "blacklist-server surival time in seconds"); + + // The maximum time the server can be blacklisted, in minutes + T_DEF_INT_INFT(blacklist_survival_time_upper_limit_min, OB_CLUSTER_PARAMETER, 4, 1, "blacklist-server survival time upper limit in minute"); + + // The server is blacklisted in the partition, based on the time of the current server service partition - to decide whether to penalize the survival time + // When the service time is less than a certain interval, a doubling-live-time policy is adopted + // Unit: minutes + T_DEF_INT_INFT(blacklist_survival_time_penalty_period_min, OB_CLUSTER_PARAMETER, 1, 1, "blacklist survival time punish interval in minute"); + + // Blacklist history expiration time, used to delete history + T_DEF_INT_INFT(blacklist_history_overdue_time_min, OB_CLUSTER_PARAMETER, 30, 10, "blacklist history overdue in minute"); + + // Clear blacklist history period, unit: minutes + T_DEF_INT_INFT(blacklist_history_clear_interval_min, OB_CLUSTER_PARAMETER, 20, 10, "blacklist history clear interval in minute"); + + // Check the need for active cut-off cycles, in minutes + T_DEF_INT_INFT(check_switch_server_interval_min, OB_CLUSTER_PARAMETER, 30, 1, "check switch server interval in minute"); + + // Print the number of partitions with the slowest progress of the Fetcher module + T_DEF_INT_INFT(print_fetcher_slowest_part_num, OB_CLUSTER_PARAMETER, 10, 1, "print fetcher slowest partition num"); + + // Maximum number of RPC results per RPC + T_DEF_INT_INFT(rpc_result_count_per_rpc_upper_limit, OB_CLUSTER_PARAMETER, 16, 1, + "max rpc result count per rpc"); + + // Whether to print RPC processing information + // Print every RPC processing + // No printing by default + T_DEF_BOOL(print_rpc_handle_info, OB_CLUSTER_PARAMETER, 0, "0:disabled, 1:enabled"); + T_DEF_BOOL(print_stream_dispatch_info, OB_CLUSTER_PARAMETER, 0, "0:disabled, 1:enabled"); + + // ------------------------------------------------------------------------ + // Print partition heartbeat information + T_DEF_BOOL(print_partition_heartbeat_info, OB_CLUSTER_PARAMETER, 0, "0:disabled, 1:enabled"); + // Print partition service information + T_DEF_BOOL(print_partition_serve_info, OB_CLUSTER_PARAMETER, 0, "0:disabled, 1:enabled"); + // Print partition not in service information + T_DEF_BOOL(print_participant_not_serve_info, OB_CLUSTER_PARAMETER, 0, "0:disabled, 1:enabled"); + // Print the svr list of each partition update, off by default + T_DEF_BOOL(print_partition_server_list_update_info, OB_CLUSTER_PARAMETER, 0, "0:disabled, 1:enabled"); + // Whether to use the new partitioning algorithm + // Enabled by default, according to the new partitioning method: table_id+part_id + // Temporarily keep the old partitioning method with rollback capability + T_DEF_BOOL(enable_new_partition_hash_algorithm, OB_CLUSTER_PARAMETER, 1, "0:disabled, 1:enabled"); + // Whether to sequentially output within a transaction + // Not on by default (partition-by-partition output) + T_DEF_BOOL(enable_output_trans_order_by_sql_operation, OB_CLUSTER_PARAMETER, 0, "0:disabled, 1:enabled"); + // ------------------------------------------------------------------------ + // Test mode, used only in obtest and other test tool scenarios + T_DEF_BOOL(test_mode_on, OB_CLUSTER_PARAMETER, 0, "0:disabled, 1:enabled"); + + // Whether check tenant status for each schema request with tenant_id under test mode, default disabled + T_DEF_BOOL(test_mode_force_check_tenant_status, OB_CLUSTER_PARAMETER, 0, "0:disabled, 1:enabled"); + + // Whether to allow the output of the current transaction's major_version in test mode, not allowed by default + T_DEF_BOOL(test_output_major_version, OB_CLUSTER_PARAMETER, 0, "0:disabled, 1:enabled"); + + // The number of times sqlServer cannot get the rs list in test mode + T_DEF_INT_INFT(test_mode_block_sqlserver_count, OB_CLUSTER_PARAMETER, 0, 0, + "mock times of con't get rs list under test mode"); + + // Number of REDO logs ignored in test mode + T_DEF_INT_INFT(test_mode_ignore_redo_count, OB_CLUSTER_PARAMETER, 0, 0, + "ignore redo log count under test mode"); + + // Test checkpoint mode, used only in obtest and other test tool scenarios + T_DEF_BOOL(test_checkpoint_mode_on, OB_CLUSTER_PARAMETER, 0, "0:disabled, 1:enabled"); + + // test mode, whether to block the participant list confirmation process, and if so, how long to block + // Equal to 0, means no blocking + // greater than 0 means blocking time in seconds + // + // The purpose is to delay the participant list confirmation process and wait for the participant information to be confirmed before operating + T_DEF_INT_INFT(test_mode_block_verify_participants_time_sec, OB_CLUSTER_PARAMETER, 0, 0, + "time in seconds to block to verify participants list"); + + // test mode, whether blocking committer processing task, if blocking, how long to block + // equal to 0, means no blocking + // greater than 0, means blocking time in seconds + // + // test drop tenant, committer processing task delayed, wait long enough to ensure tenant structure can be deleted + T_DEF_INT_INFT(test_mode_block_committer_handle_time_sec, OB_CLUSTER_PARAMETER, 0, 0, + "time in seconds to block to verify tenant has been dropped"); + + // In test mode, set the upper limit of the number of tasks consumed by the committer at one time + T_DEF_INT_INFT(test_mode_committer_handle_trans_count_upper_limit, OB_CLUSTER_PARAMETER, 0, 0, + "commiter handle trans count upper limit under test mode"); + + // test mode, whether blocking create table DDL, if blocking, how long blocking + // Equal to 0, means no blocking + // greater than 0 means blocking time in seconds + // + // The purpose is to block the create table DDL, test PG filtering + T_DEF_INT_INFT(test_mode_block_create_table_ddl_sec, OB_CLUSTER_PARAMETER, 0, 0, + "time in seconds to block to create table"); + + // test mode, whether blocking alter table DDL, if blocking, how long blocking + // Equal to 0, means no blocking + // greater than 0 means blocking time in seconds + // + // The purpose is to block alter table DDL, test PG filtering + T_DEF_INT_INFT(test_mode_block_alter_table_ddl_sec, OB_CLUSTER_PARAMETER, 0, 0, + "time in seconds to block to alter table"); + + // test mode, whether blocking filter row process, if blocking, how long to block + // Equal to 0, means no blocking + // greater than 0, means blocking time in seconds + // + // The purpose is to block filter row, test PG filtering + T_DEF_INT_INFT(test_mode_block_parser_filter_row_data_sec, OB_CLUSTER_PARAMETER, 0, 0, + "time in seconds to block to filter row data"); + + // INNER_HEARTBEAT_INTERVAL + T_DEF_INT_INFT(output_inner_heartbeat_interval_msec, OB_CLUSTER_PARAMETER, 100, 1, "output heartbeat interval in seconds"); + + // Output heartbeat interval to external, default 3s + T_DEF_INT_INFT(output_heartbeat_interval_sec, OB_CLUSTER_PARAMETER, 3, 1, "output heartbeat interval in seconds"); + + // Whether to have incremental backup mode + // Off by default; if it is, then incremental backup mode + T_DEF_BOOL(enable_backup_mode, OB_CLUSTER_PARAMETER, 0, "0:disabled, 1:enabled"); + + // Whether to expose no primary key table hidden primary key to the public + // 1. DRC linking is off by default; if it is in effect, output the hidden primary key + // 2. Backup is on by default + T_DEF_BOOL(enable_output_hidden_primary_key, OB_CLUSTER_PARAMETER, 0, "0:disabled, 1:enabled"); + + // Ignore inconsistencies in the number of HBase mode put columns or not + // Do not skip by default + T_DEF_BOOL(skip_hbase_mode_put_column_count_not_consistency, OB_CLUSTER_PARAMETER, 0, "0:disabled, 1:enabled"); + + // Whether to ignore the transaction log for exceptions + // Do not skip by default + T_DEF_BOOL(skip_abnormal_trans_log, OB_CLUSTER_PARAMETER, 0, "0:disabled, 1:enabled"); + + // Whether to allow hbase schema to take effect + // off by default; if it is, then convert the hbase table T timestamp field to a positive number + T_DEF_BOOL(enable_hbase_mode, OB_CLUSTER_PARAMETER, 0, "0:disabled, 1:enabled"); + + // Whether to allow timestamp->utc integer time + // 1. off by default, the timestamp field is converted to year-month-day format based on time zone information. + // 2. When configured on, the timestamp field is synchronized to integer + T_DEF_BOOL(enable_convert_timestamp_to_unix_timestamp, OB_CLUSTER_PARAMETER, 0, "0:disabled, 1:enabled"); + + // Whether to output invisible columns externally + // 1. DRC link is off by default; if valid, output hidden primary key + // 2. Backup is on by default + T_DEF_BOOL(enable_output_invisible_column, OB_CLUSTER_PARAMETER, 0, "0:disabled, 1:enabled"); + + // The point in time when the sql server used for querying in SYSTABLE HELPER changes, i.e., the periodic rotation of the sql server + T_DEF_INT_INFT(sql_server_change_interval_sec, OB_CLUSTER_PARAMETER, 60, 1, + "change interval of sql server in seconds"); + + // Check if version matches, default 600s + T_DEF_INT_INFT(cluster_version_refresh_interval_sec, OB_CLUSTER_PARAMETER, 600, 1, "cluster version refresh interval in seconds"); + + // Oracle mode table/database may have case, and case sensitive + // default enable_oracle_mode_match_case_sensitive=0 whitelist match is consistent with mysql behavior, match is not sensitive + // enable_oracle_mode_match_case_sensitive=1 allow match sensitive + T_DEF_BOOL(enable_oracle_mode_match_case_sensitive, OB_CLUSTER_PARAMETER, 0, "0:disabled, 1:enabled"); + + // Switch: Whether to format the module to print the relevant logs + // No printing by default + T_DEF_BOOL(enable_formatter_print_log, OB_CLUSTER_PARAMETER, 0, "0:disabled, 1:enabled"); + + // switch: whether to get the tenant schema version + // Not on by default + T_DEF_BOOL(enable_get_tenant_refreshed_schema_version, OB_CLUSTER_PARAMETER, 0, "0:disabled, 1:enabled"); + + // Switch: Whether to enable SSL authentication: including MySQL and RPC + // Disabled by default + T_DEF_BOOL(ssl_client_authentication, OB_CLUSTER_PARAMETER, 0, "0:disabled, 1:enabled"); + + // SSL external kms info + // 1. Local file mode: ssl_external_kms_info=file + DEF_STR(ssl_external_kms_info, OB_CLUSTER_PARAMETER, "|", "ssl external kms info"); + +#undef OB_CLUSTER_PARAMETER + +private: + static const int64_t OBLOG_MAX_CONFIG_LENGTH = 5 * 1024 * 1024; // 5M + +private: + bool inited_; + ObLogFakeCommonConfig common_config_; + + // for load_from_file + char *config_file_buf1_; + // for load_from_buffer + char *config_file_buf2_; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogConfig); +}; + +#define TCONF (::oceanbase::liboblog::ObLogConfig::get_instance()) + +} // namespace liboblog +} // namespace oceanbase +#endif /* OCEANBASE_LIBOBLOG_CONFIG_H__ */ diff --git a/src/liboblog/src/ob_log_data_processor.cpp b/src/liboblog/src/ob_log_data_processor.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9d4feeb31bb9ee75408d0be4c980c02aedee7d0d --- /dev/null +++ b/src/liboblog/src/ob_log_data_processor.cpp @@ -0,0 +1,331 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG + +#include "lib/string/ob_string.h" // ObString +#include "ob_log_data_processor.h" +#include "ob_log_binlog_record.h" +#include "ob_log_trans_ctx.h" +#include "ob_log_committer.h" +#include "ob_log_instance.h" +#include "ob_log_store_service.h" + +using namespace oceanbase::common; + +namespace oceanbase +{ +namespace liboblog +{ + +ObLogDataProcessor::ObLogDataProcessor() : + inited_(false), + working_mode_(WorkingMode::UNKNOWN_MODE), + round_value_(0), + rps_stat_(), + last_stat_time_(0), + row_task_count_(0), + reader_(), + err_handler_(NULL) +{ +} + +ObLogDataProcessor::~ObLogDataProcessor() +{ + destroy(); +} + +int ObLogDataProcessor::init(const int64_t thread_num, + const int64_t queue_size, + const WorkingMode working_mode, + IObStoreService &store_service, + IObLogErrHandler &err_handler) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("ObLogDataProcessor has been initialized"); + ret = OB_INIT_TWICE; + } else if (OB_UNLIKELY(thread_num <= 0) + || OB_UNLIKELY(queue_size <= 0) + || OB_UNLIKELY(! is_working_mode_valid(working_mode))) { + LOG_ERROR("invalid arguments", K(thread_num), K(queue_size), K(working_mode)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(DataProcessorThread::init(thread_num, queue_size))) { + LOG_ERROR("init DataProcessorThread queue thread fail", K(ret), K(thread_num), K(queue_size)); + } else if (OB_FAIL(reader_.init(store_service))) { + LOG_ERROR("reader_ init fail", KR(ret)); + } else { + working_mode_ = working_mode; + round_value_ = 0; + rps_stat_.reset(); + last_stat_time_ = get_timestamp(); + row_task_count_ = 0; + err_handler_ = &err_handler; + inited_ = true; + + LOG_INFO("DataProcessor init succ", K(working_mode_), "working_mode", print_working_mode(working_mode_), + K(thread_num), K(queue_size)); + } + + return ret; +} + +void ObLogDataProcessor::destroy() +{ + if (inited_) { + DataProcessorThread::destroy(); + + inited_ = false; + round_value_ = 0; + rps_stat_.reset(); + last_stat_time_ = 0; + row_task_count_ = 0; + reader_.destroy(); + err_handler_ = NULL; + } +} + +int ObLogDataProcessor::start() +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogDataProcessor has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(DataProcessorThread::start())) { + LOG_ERROR("start DataProcessorThread thread fail", K(ret), "thread_num", get_thread_num()); + } else { + LOG_INFO("start DataProcessorThread threads succ", "thread_num", get_thread_num()); + } + + return ret; +} + +void ObLogDataProcessor::stop() +{ + if (inited_) { + DataProcessorThread::stop(); + LOG_INFO("stop DataProcessorThread threads succ", "thread_num", get_thread_num()); + } +} + +int ObLogDataProcessor::push(ObLogRowDataIndex &task, const int64_t timeout) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogDataProcessor has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(! task.is_valid())) { + LOG_ERROR("invalid arguments", K(task)); + ret = OB_INVALID_ARGUMENT; + } else { + uint64_t hash_value = ATOMIC_FAA(&round_value_, 1); + void *push_task = static_cast(&task); + if (OB_FAIL(DataProcessorThread::push(push_task, hash_value, timeout))) { + if (OB_TIMEOUT != ret && OB_IN_STOP_STATE != ret) { + LOG_ERROR("push task into DataProcessorThread fail", K(ret), K(push_task), K(hash_value)); + } + } else { + ATOMIC_INC(&row_task_count_); + } + } + + return ret; +} + +void ObLogDataProcessor::print_stat_info() +{ + int64_t current_timestamp = get_timestamp(); + int64_t local_last_stat_time = last_stat_time_; + int64_t delta_time = current_timestamp - local_last_stat_time; + // Update last statistic value + last_stat_time_ = current_timestamp; + StoreServiceStatInfo &store_service_stat = reader_.get_store_stat_info(); + + double storager_rps = rps_stat_.calc_rps(delta_time); + double read_rate = store_service_stat.calc_rate(delta_time); + double read_total_size = store_service_stat.get_total_data_size(); + _LOG_INFO("[DATA_PROCESSOR] [STAT] RPS=%.3lf READ_RATE=%.5fM/s READ_TOTAL_SIZE=%.5fG", + storager_rps, read_rate, read_total_size); +} + +int ObLogDataProcessor::handle(void *data, const int64_t thread_index, volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + ObLogRowDataIndex *task = static_cast(data); + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogDataProcessor has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(task) || OB_UNLIKELY(! task->is_valid())) { + LOG_ERROR("invalid arguments", KPC(task)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(handle_task_(*task, thread_index, stop_flag))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("handle_task_ fail", KR(ret), KPC(task), K(thread_index)); + } + } else { + ATOMIC_DEC(&row_task_count_); + } + + if (stop_flag) { + ret = OB_IN_STOP_STATE; + } + + // exit on fail + if (OB_SUCCESS != ret && OB_IN_STOP_STATE != ret && NULL != err_handler_) { + err_handler_->handle_error(ret, "DataProcessorThread thread exits, thread_index=%ld, err=%d", + thread_index, ret); + stop_flag = true; + } + + return ret; +} + +int ObLogDataProcessor::handle_task_(ObLogRowDataIndex &row_data_index, + const int64_t thread_index, + volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + const uint64_t tenant_id = row_data_index.get_tenant_id(); + PartTransTask *part_trans_task = NULL; + ObLogBR *br = NULL; + int record_type = 0; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogDataProcessor has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(part_trans_task = static_cast(row_data_index.get_host()))) { + LOG_ERROR("part_trans_task is NULL", K(row_data_index)); + ret = OB_ERR_UNEXPECTED; + } else if (is_memory_working_mode(working_mode_)) { + // do nothing + } else if (is_storage_working_mode(working_mode_)) { + if (OB_FAIL(reader_.read(row_data_index))) { + LOG_ERROR("reader_ read fail", KR(ret), K(row_data_index)); + } + } else { + ret = OB_NOT_SUPPORTED; + } + + if (OB_SUCC(ret)) { + if (OB_ISNULL(br = row_data_index.get_binlog_record())) { + LOG_ERROR("br is NULL", K(row_data_index), K(part_trans_task)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(br->get_record_type(record_type))) { + LOG_ERROR("br get_record_type fail", KR(ret), + "record_type", print_record_type(record_type), K(row_data_index)); + } else {} + } + + if (OB_SUCC(ret)) { + const uint64_t cluster_id = part_trans_task->get_cluster_id(); + const ObString &trace_id = part_trans_task->get_trace_id(); + const ObString &trace_info = part_trans_task->get_trace_info(); + ObString dml_unique_id; + const common::ObVersion &freeze_version = part_trans_task->get_freeze_version(); + const int64_t commit_version = part_trans_task->get_global_trans_version(); + + rps_stat_.do_rps_stat(1); + + if (OB_FAIL(init_dml_unique_id_(row_data_index, *part_trans_task, dml_unique_id))) { + LOG_ERROR("init_dml_unique_id_ fail", KR(ret), K(row_data_index), KPC(part_trans_task), K(dml_unique_id)); + } else if (OB_FAIL(br->init_dml_data_second(static_cast(record_type), cluster_id, tenant_id, + trace_id, trace_info, dml_unique_id, freeze_version, commit_version))) { + LOG_ERROR("ObLogBR init_dml_data_second fail", KR(ret), K(record_type), K(cluster_id), K(tenant_id), K(trace_id), K(trace_info), + K(dml_unique_id), K(freeze_version), K(commit_version)); + } else if (OB_FAIL(push_task_into_trx_queue_(*br, thread_index, stop_flag))) { + LOG_ERROR("push_task_into_trx_queue_ fail", KR(ret), K(row_data_index)); + } else {} + } + + if (stop_flag) { + ret = OB_IN_STOP_STATE; + } + + return ret; +} + +int ObLogDataProcessor::init_dml_unique_id_(ObLogRowDataIndex &row_data_index, + PartTransTask &part_trans_task, + common::ObString &dml_unique_id) +{ + int ret = OB_SUCCESS; + const ObString &pkey_and_log_id_str = part_trans_task.get_pkey_and_log_id_str(); + const int32_t log_offset = row_data_index.get_log_offset(); + const uint64_t row_no = row_data_index.get_row_no(); + DmlStmtUniqueID dml_stmt_unique_id(pkey_and_log_id_str, log_offset, row_no); + + if (OB_UNLIKELY(! dml_stmt_unique_id.is_valid())) { + LOG_ERROR("dml_stmt_unique_id is not valid", K(dml_stmt_unique_id)); + ret = OB_INVALID_ARGUMENT; + } else { + // TODO allocator + common::ObIAllocator &allocator= part_trans_task.get_allocator(); + const int64_t buf_len = dml_stmt_unique_id.get_dml_unique_id_length(); + char *buf = static_cast(allocator.alloc(buf_len)); + int64_t pos = 0; + + if (OB_ISNULL(buf)) { + LOG_ERROR("allocate memory for trans id buffer fail", K(buf)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else if (OB_FAIL(dml_stmt_unique_id.customized_to_string(buf, buf_len, pos))) { + LOG_ERROR("dml_stmt_unique_id customized_to_string fail", KR(ret), K(buf), K(buf_len), K(pos)); + } else { + dml_unique_id.assign_ptr(buf, static_cast(pos)); + } + } + + return ret; +} + +int ObLogDataProcessor::push_task_into_trx_queue_(ObLogBR &br, + const int64_t thread_index, + volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + IObLogCommitter *trans_committer = TCTX.committer_; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogDataProcessor has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(trans_committer)) { + LOG_ERROR("trans_committer is NULL"); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(trans_committer->push_br_task(br))) { + LOG_ERROR("push_br_task fail", KR(ret), K(thread_index), K(br), K(stop_flag)); + } else { + // succ + } + + return ret; +} + +void ObLogDataProcessor::print_task_count_() +{ + int ret = OB_SUCCESS; + int64_t total_thread_num = get_thread_num(); + + for (int64_t idx = 0; OB_SUCC(ret) && idx < total_thread_num; ++idx) { + int64_t task_count = 0; + if (OB_FAIL(get_task_num(idx, task_count))) { + LOG_ERROR("get_task_num fail", K(ret)); + } else { + _LOG_INFO("[INC_STAT] [DATA_PROCESSOR] [%ld/%ld] TASK_COUNT=%ld", idx, total_thread_num, task_count); + } + } +} + +} // namespace liboblog +} // namespace oceanbase diff --git a/src/liboblog/src/ob_log_data_processor.h b/src/liboblog/src/ob_log_data_processor.h new file mode 100644 index 0000000000000000000000000000000000000000..9ec7087d9ce0dcae301709e8b8914c88fbe67cf9 --- /dev/null +++ b/src/liboblog/src/ob_log_data_processor.h @@ -0,0 +1,111 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_DATA_PROCESSOR_H_ +#define OCEANBASE_LIBOBLOG_DATA_PROCESSOR_H_ + +#include "lib/thread/ob_multi_fixed_queue_thread.h" // ObMQThread +#include "ob_log_trans_stat_mgr.h" // TransRpsStatInfo +#include "ob_log_store_service_stat.h" // StoreServiceStatInfo +#include "ob_log_part_trans_task.h" +#include "ob_log_row_data_index.h" +#include "ob_log_reader_plug_in.h" +#include "ob_log_work_mode.h" // WorkingMode + +namespace oceanbase +{ +namespace liboblog +{ +///////////////////////////////////////////////////////////////////////////////////////// +class IObLogDataProcessor +{ +public: + enum + { + MAX_PARSER_NUM = 32 + }; + +public: + virtual ~IObLogDataProcessor() {} + +public: + virtual int start() = 0; + virtual void stop() = 0; + virtual void mark_stop_flag() = 0; + virtual int push(ObLogRowDataIndex &task, int64_t timeout) = 0; + virtual void get_task_count(int64_t &row_task_count) const = 0; + virtual void print_stat_info() = 0; +}; + +///////////////////////////////////////////////////////////////////////////////////////// + +class IObStoreService; +class IObLogErrHandler; + +typedef common::ObMQThread DataProcessorThread; + +class ObLogDataProcessor : public IObLogDataProcessor, public DataProcessorThread +{ +public: + ObLogDataProcessor(); + virtual ~ObLogDataProcessor(); + +public: + int start(); + void stop(); + void mark_stop_flag() { DataProcessorThread::mark_stop_flag(); } + int push(ObLogRowDataIndex &task, int64_t timeout); + void get_task_count(int64_t &row_task_count) const { row_task_count = ATOMIC_LOAD(&row_task_count_); } + void print_stat_info(); + int handle(void *data, const int64_t thread_index, volatile bool &stop_flag); + +public: + int init(const int64_t thread_num, + const int64_t queue_size, + const WorkingMode working_mode, + IObStoreService &store_service, + IObLogErrHandler &err_handler); + void destroy(); + +private: + int handle_task_(ObLogRowDataIndex &row_data_index, + const int64_t thread_index, + volatile bool &stop_flag); + int init_dml_unique_id_(ObLogRowDataIndex &row_data_index, + PartTransTask &part_trans_task, + common::ObString &dml_unique_id); + int push_task_into_trx_queue_(ObLogBR &br, + const int64_t thread_index, + volatile bool &stop_flag); + + void print_task_count_(); + +private: + bool inited_; + WorkingMode working_mode_; + // Used to ensure that tasks are evenly distributed to threads + uint64_t round_value_; + + TransRpsStatInfo rps_stat_; + int64_t last_stat_time_ CACHE_ALIGNED; + int64_t row_task_count_ CACHE_ALIGNED; + + ObLogReader reader_; + IObLogErrHandler *err_handler_; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogDataProcessor); +}; + +} // namespace liboblog +} // namespace oceanbase +#endif diff --git a/src/liboblog/src/ob_log_ddl_handler.cpp b/src/liboblog/src/ob_log_ddl_handler.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0c8f2827614d61ccb86407ba5ef12f7ea61bc1a8 --- /dev/null +++ b/src/liboblog/src/ob_log_ddl_handler.cpp @@ -0,0 +1,2463 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG + +#include "ob_log_ddl_handler.h" + +#include "ob_log_ddl_parser.h" // IObLogDdlParser +#include "ob_log_sequencer1.h" // IObLogSequencer +#include "ob_log_committer.h" // IObLogCommitter +#include "ob_log_instance.h" // IObLogErrHandler, TCTX +#include "ob_log_part_trans_task.h" // PartTransTask +#include "ob_log_schema_getter.h" // IObLogSchemaGetter +#include "ob_log_tenant_mgr.h" // IObLogTenantMgr +#include "ob_log_table_matcher.h" // IObLogTableMatcher +#include "ob_log_config.h" // TCONF +#include "share/ob_cluster_version.h" // GET_MIN_CLUSTER_VERSION + +#define _STAT(level, fmt, args...) _OBLOG_LOG(level, "[STAT] [DDL_HANDLER] " fmt, ##args) +#define STAT(level, fmt, args...) OBLOG_LOG(level, "[STAT] [DDL_HANDLER] " fmt, ##args) +#define _ISTAT(fmt, args...) _STAT(INFO, fmt, ##args) +#define ISTAT(fmt, args...) STAT(INFO, fmt, ##args) +#define _DSTAT(fmt, args...) _STAT(DEBUG, fmt, ##args) + +#define IGNORE_SCHEMA_ERROR(ret, args...) \ + if (OB_TENANT_HAS_BEEN_DROPPED == ret) { \ + LOG_WARN("ignore DDL on schema error, tenant may be dropped in future", ##args); \ + ret = OB_SUCCESS; \ + } + +namespace oceanbase +{ +using namespace common; +using namespace share; +using namespace share::schema; + +namespace liboblog +{ + +////////////////////////////// ObLogDDLHandler::TaskQueue ////////////////////////////// + +ObLogDDLHandler::TaskQueue::TaskQueue() : + queue_() +{} + +ObLogDDLHandler::TaskQueue::~TaskQueue() +{ +} + +// The requirement task must be a DDL transactional task, as the code that maintains the progress depends on +int ObLogDDLHandler::TaskQueue::push(PartTransTask *task) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(task)) { + LOG_ERROR("invalid task", K(task)); + ret = OB_INVALID_ARGUMENT; + } else { + queue_.push(task); + } + return ret; +} + +void ObLogDDLHandler::TaskQueue::pop() +{ + (void)queue_.pop(); +} + +int64_t ObLogDDLHandler::TaskQueue::size() const +{ + return queue_.size(); +} + +// This function is only the observer, only read the top element +// It must be guaranteed that the person calling this function is the only consumer, i.e. no one else will pop elements during the call to this function +int ObLogDDLHandler::TaskQueue::next_ready_to_handle(const int64_t timeout, + PartTransTask *&top_task, + common::ObCond &cond) +{ + int ret = OB_SUCCESS; + int64_t cur_time = get_timestamp(); + int64_t end_time = cur_time + timeout; + + while (NULL == queue_.top() && OB_SUCCESS == ret) { + int64_t wait_time = end_time - cur_time; + + if (wait_time <= 0) { + ret = OB_TIMEOUT; + } else { + cond.timedwait(wait_time); + cur_time = get_timestamp(); + } + } + + if (OB_SUCCESS == ret) { + if (NULL == queue_.top()) { + LOG_ERROR("invalid error, top task is NULL", K(queue_.top())); + ret = OB_ERR_UNEXPECTED; + } else { + PartTransTask *task = queue_.top(); + int64_t wait_time = end_time - cur_time; + + // DDL task have to wait for parse complete, other types of tasks are considered ready + if (task->is_ddl_trans() && OB_FAIL(task->wait_formatted(wait_time, cond))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("task wait_formatted fail", KR(ret), K(wait_time), KPC(task)); + } + } else { + top_task = task; + } + } + } + + return ret; +} + +///////////////////////////////// ObLogDDLHandler ///////////////////////////////// + +ObLogDDLHandler::ObLogDDLHandler() : + inited_(false), + ddl_parser_(NULL), + sequencer_(NULL), + err_handler_(NULL), + schema_getter_(NULL), + skip_reversed_schema_version_(false), + handle_pid_(0), + stop_flag_(true), + ddl_fetch_queue_(), + wait_formatted_cond_() +{} + +ObLogDDLHandler::~ObLogDDLHandler() +{ + destroy(); +} + +int ObLogDDLHandler::init(IObLogDdlParser *ddl_parser, + IObLogSequencer *sequencer, + IObLogErrHandler *err_handler, + IObLogSchemaGetter *schema_getter, + const bool skip_reversed_schema_version) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("init twice", K(inited_)); + ret = OB_INIT_TWICE; + } else if (OB_ISNULL(ddl_parser_ = ddl_parser) + || OB_ISNULL(sequencer_ = sequencer) + || OB_ISNULL(err_handler_ = err_handler) + || OB_ISNULL(schema_getter_ = schema_getter)) { + LOG_ERROR("invalid argument", K(ddl_parser), K(sequencer), K(err_handler), K(schema_getter)); + ret = OB_INVALID_ARGUMENT; + } else { + skip_reversed_schema_version_ = skip_reversed_schema_version; + handle_pid_ = 0; + stop_flag_ = true; + inited_ = true; + } + + return ret; +} + +void ObLogDDLHandler::destroy() +{ + stop(); + + inited_ = false; + ddl_parser_ = NULL; + sequencer_ = NULL; + err_handler_ = NULL; + schema_getter_ = NULL; + skip_reversed_schema_version_ = false; + handle_pid_ = 0; + stop_flag_ = true; +} + +int ObLogDDLHandler::start() +{ + int ret = OB_SUCCESS; + int pthread_ret = 0; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init", K(inited_)); + ret = OB_NOT_INIT; + } else if (stop_flag_) { + stop_flag_ = false; + + if (0 != (pthread_ret = pthread_create(&handle_pid_, NULL, handle_thread_func_, this))){ + LOG_ERROR("create DDL handle thread fail", K(pthread_ret), KERRNOMSG(pthread_ret)); + ret = OB_ERR_UNEXPECTED; + } else { + LOG_INFO("start DDL handle thread succ"); + } + + if (OB_FAIL(ret)) { + stop_flag_ = true; + } + } + + return ret; +} + +void ObLogDDLHandler::stop() +{ + if (inited_) { + stop_flag_ = true; + + if (0 != handle_pid_) { + int pthread_ret = pthread_join(handle_pid_, NULL); + + if (0 != pthread_ret) { + LOG_ERROR("join DDL handle thread fail", K(handle_pid_), K(pthread_ret), + KERRNOMSG(pthread_ret)); + } else { + LOG_INFO("stop DDL handle thread succ"); + } + + handle_pid_ = 0; + } + } +} + +int ObLogDDLHandler::push(PartTransTask *task, const int64_t timeout) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(ddl_parser_)) { + LOG_ERROR("invalid DDL parser", K(ddl_parser_)); + ret = OB_NOT_INIT; + } + // Only DDL partitions are supported, as well as heartbeats for DDL partitions + else if (OB_UNLIKELY(! task->is_ddl_trans() + && ! task->is_ddl_part_heartbeat() + && !task->is_ddl_offline_task())) { + LOG_ERROR("task is not DDL trans, or HEARTBEAT, or OFFLINE task, not supported", KPC(task)); + ret = OB_NOT_SUPPORTED; + } + // DDL task have to push to the DDL parser first, because the task will retry after the task push DDL parser times out. + // that is, the same task may be pushed multiple times.To avoid the same task being added to the queue more than once, the DDL parser is pushed first + else if (task->is_ddl_trans() && OB_FAIL(ddl_parser_->push(*task, timeout))) { + if (OB_IN_STOP_STATE != ret && OB_TIMEOUT != ret) { + LOG_ERROR("push task into DDL parser fail", KR(ret), K(task)); + } + } + // Add the task to the Fetch queue without timeout failure, ensuring that it will only be pushed once in the Parser + else if (OB_FAIL(ddl_fetch_queue_.push(task))) { + LOG_ERROR("push DDL task into fetch queue fail", KR(ret), KPC(task)); + } else { + // success + } + return ret; +} + +int ObLogDDLHandler::get_progress(uint64_t &ddl_min_progress_tenant_id, + int64_t &ddl_min_progress, + uint64_t &ddl_min_handle_log_id) +{ + int ret = OB_SUCCESS; + IObLogTenantMgr *tenant_mgr = TCTX.tenant_mgr_; + + ddl_min_progress = OB_INVALID_TIMESTAMP; + ddl_min_handle_log_id = OB_INVALID_ID; + ddl_min_progress_tenant_id = OB_INVALID_TENANT_ID; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogDDLHandler not init", K(inited_)); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(tenant_mgr)) { + LOG_ERROR("tenant_mgr_ is NULL", K(tenant_mgr)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(tenant_mgr->get_ddl_progress(ddl_min_progress_tenant_id, ddl_min_progress, + ddl_min_handle_log_id))) { + LOG_ERROR("get_ddl_progress fail", KR(ret), K(ddl_min_progress_tenant_id), + K(ddl_min_progress), K(ddl_min_handle_log_id)); + } else { + // success + } + + return ret; +} + +int64_t ObLogDDLHandler::get_part_trans_task_count() const +{ + return ddl_fetch_queue_.size(); +} + +void *ObLogDDLHandler::handle_thread_func_(void *arg) +{ + if (NULL != arg) { + ObLogDDLHandler *ddl_handler = static_cast(arg); + ddl_handler->handle_ddl_routine(); + } + + return NULL; +} + +int ObLogDDLHandler::next_task_(PartTransTask *&task) +{ + int ret = OB_SUCCESS; + // Get the next task to be processed + RETRY_FUNC(stop_flag_, ddl_fetch_queue_, next_ready_to_handle, DATA_OP_TIMEOUT, task, + wait_formatted_cond_); + + // Unconditionally pop out + ddl_fetch_queue_.pop(); + + if (OB_SUCCESS != ret) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("task_queue next_ready_to_handle fail", KR(ret)); + } + } else if (OB_ISNULL(task)) { + LOG_ERROR("invalid task", K(task)); + ret = OB_ERR_UNEXPECTED; + } + return ret; +} + +int ObLogDDLHandler::handle_task_(PartTransTask &task, + bool &is_schema_split_mode, + const uint64_t ddl_tenant_id, + ObLogTenant *tenant, + const bool is_tenant_served) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! task.is_ddl_trans() + && ! task.is_ddl_part_heartbeat() + && ! task.is_ddl_offline_task())) { + LOG_ERROR("task is not DDL trans task, or HEARTBEAT, or OFFLINE task, not supported", K(task)); + ret = OB_NOT_SUPPORTED; + } else if (task.is_ddl_offline_task()) { + // OFFLINE tasks for DDL partitions + if (OB_FAIL(handle_ddl_offline_task_(task))) { + LOG_ERROR("handle_ddl_offline_task_ fail", KR(ret), K(task)); + } + } else if (! is_tenant_served) { + // Unserviced tenant DDL, ignore it + ISTAT("[DDL] tenent is not served, ignore DDL task", K(ddl_tenant_id), K(is_schema_split_mode), + K(task), KPC(tenant)); + // Mark all binlog records as invalid + mark_all_binlog_records_invalid_(task); + } else if (OB_ISNULL(tenant)) { + LOG_ERROR("invalid tenant", KPC(tenant), K(ddl_tenant_id), K(is_tenant_served), K(task)); + ret = OB_ERR_UNEXPECTED; + } + // An error is reported if the tenant is not in the service state. + // The current implementation assumes that the tenant is in service during all DDL processing under the tenant, and that the tenant is taken offline by the DDL offline task + else if (OB_UNLIKELY(! tenant->is_serving())) { + LOG_ERROR("tenant state is not serving, unexpected", KPC(tenant), K(task), + K(is_schema_split_mode), K(ddl_tenant_id), K(is_tenant_served)); + ret = OB_ERR_UNEXPECTED; + } else { + // The following handles DDL transaction tasks and DDL heartbeat tasks + if (task.is_ddl_trans() && OB_FAIL(handle_ddl_trans_(task, is_schema_split_mode, *tenant))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("handle_ddl_trans_ fail", KR(ret), K(task), K(ddl_tenant_id), K(tenant), + K(is_schema_split_mode), K(is_tenant_served)); + } + } + // Both DDL transactions and heartbeats update DDL information + else if (OB_FAIL(update_ddl_info_(task, is_schema_split_mode, *tenant))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("update_ddl_info_ fail", KR(ret), K(task), K(is_schema_split_mode), KPC(tenant)); + } + } + } + return ret; +} + +int ObLogDDLHandler::dispatch_task_(PartTransTask *task, ObLogTenant *tenant, + const bool is_tenant_served) +{ + int ret = OB_SUCCESS; + IObLogCommitter *trans_committer = TCTX.committer_; + + if (OB_ISNULL(sequencer_)) { + LOG_ERROR("invalid committer", K(sequencer_)); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(task) || (is_tenant_served && OB_ISNULL(tenant)) || (! is_tenant_served && NULL != tenant)) { + LOG_ERROR("invalid tenant", K(is_tenant_served), K(tenant)); + ret = OB_INVALID_ARGUMENT; + } else { + if (task->is_ddl_trans()) { + if (OB_FAIL(sequencer_->push(task, stop_flag_))) { + LOG_ERROR("sequencer_ push fail", KR(ret), KPC(task)); + } + } else { + if (OB_ISNULL(trans_committer)) { + LOG_ERROR("trans_committer is NULL"); + ret = OB_ERR_UNEXPECTED; + } else { + const int64_t task_count = 1; + RETRY_FUNC(stop_flag_, (*trans_committer), push, task, task_count, DATA_OP_TIMEOUT); + } + } + + if (OB_SUCCESS != ret) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("push into committer fail", KR(ret), KPC(task), KPC(tenant), K(is_tenant_served)); + } + } else { + task = NULL; + } + } + + return ret; +} + +void ObLogDDLHandler::handle_ddl_routine() +{ + int ret = OB_SUCCESS; + while (! stop_flag_ && OB_SUCCESS == ret) { + PartTransTask *task = NULL; + + // Iterate for the next task + if (OB_FAIL(next_task_(task))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("next_task_ fail", KR(ret)); + } + } else { + ObLogTenantGuard guard; + ObLogTenant *tenant = NULL; + bool is_tenant_served = false; + uint64_t ddl_tenant_id = OB_INVALID_ID; + bool is_schema_split_mode = TCTX.is_schema_split_mode_; + + // First obtain tenant information, the tenant may not serve + if (OB_FAIL(get_tenant_(*task, + is_schema_split_mode, + ddl_tenant_id, + guard, + tenant, + is_tenant_served))) { + LOG_ERROR("get_tenant_ fail", KR(ret), KPC(task), K(is_schema_split_mode)); + } + // Then process the task + else if (OB_FAIL(handle_task_(*task, + is_schema_split_mode, + ddl_tenant_id, + tenant, + is_tenant_served))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("handle DDL task fail", KR(ret), KPC(task), K(is_schema_split_mode), + K(ddl_tenant_id), KPC(tenant), K(is_tenant_served)); + } + } + // Final distribution of tasks + else if (OB_FAIL(dispatch_task_(task, tenant, is_tenant_served))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("dispatch task fail", KR(ret), KPC(task), KPC(tenant), + K(is_tenant_served)); + } + } + } + } + + if (stop_flag_) { + ret = OB_IN_STOP_STATE; + } + + if (OB_SUCCESS != ret && OB_IN_STOP_STATE != ret && NULL != err_handler_) { + err_handler_->handle_error(ret, "DDL handle thread exits, err=%d", ret); + stop_flag_ = true; + } +} + +int ObLogDDLHandler::handle_ddl_offline_task_(const PartTransTask &task) +{ + int ret = OB_SUCCESS; + IObLogTenantMgr *tenant_mgr = TCTX.tenant_mgr_; + // DDL __all_ddl_operation table offline tasks, must be schema split mode + // If a offline task is received, it means that all DDLs of the tenant have been processed and it is safe to delete the tenant + uint64_t ddl_tenant_id = task.get_tenant_id(); + + ISTAT("[DDL] [DDL_OFFLINE_TASK] begin drop tenant", K(ddl_tenant_id), + "pkey", task.get_partition()); + + if (OB_ISNULL(tenant_mgr)) { + LOG_ERROR("invalid tenant mgr", K(tenant_mgr)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(tenant_mgr->drop_tenant(ddl_tenant_id, "DDL_OFFLINE_TASK"))) { + LOG_ERROR("tenant mgr drop tenant fail", KR(ret), K(ddl_tenant_id), K(task)); + } else { + ISTAT("[DDL] [DDL_OFFLINE_TASK] drop tenant succ", K(ddl_tenant_id), + "pkey", task.get_partition()); + } + + return ret; +} + +int64_t ObLogDDLHandler::decide_ddl_tenant_id_for_schema_non_split_mode_(const PartTransTask &task) const +{ + uint64_t ddl_tenant_id = OB_INVALID_TENANT_ID; + uint64_t task_op_tenant_id = OB_INVALID_TENANT_ID; + int64_t stmt_index = 0; + const int64_t stmt_num = task.get_stmt_num(); + + // Iterate through each statement of the DDL to determine the tenant ID corresponding to the DDL in non-split mode + IStmtTask *stmt_task = task.get_stmt_list().head_; + while (NULL != stmt_task && OB_INVALID_TENANT_ID == ddl_tenant_id) { + DdlStmtTask *ddl_stmt = dynamic_cast(stmt_task); + if (OB_NOT_NULL(ddl_stmt)) { + const uint64_t op_tenant_id = ddl_stmt->get_op_tenant_id(); + const uint64_t exec_tenant_id = ddl_stmt->get_exec_tenant_id(); + const int64_t op_type = ddl_stmt->get_operation_type(); + const int64_t op_schema_version = ddl_stmt->get_op_schema_version(); + + // Record op_tenant_id, whichever is the first + if (OB_INVALID_TENANT_ID == task_op_tenant_id) { + task_op_tenant_id = op_tenant_id; + } + + // ALTER_TENANT takes precedence over exec_tenant_id, if it is not allowed, then set it to SYS + if (OB_DDL_ALTER_TENANT == op_type) { + ddl_tenant_id = exec_tenant_id; + + // If exec_tenant_id is buggy and neither equal to SYS nor op_tenant_id, then error is reported and set to SYS + if (op_tenant_id != exec_tenant_id && OB_SYS_TENANT_ID != exec_tenant_id) { + ddl_tenant_id = OB_SYS_TENANT_ID; + LOG_ERROR("ALTER_TENANT DDL exec_tenant_id is different with op_tenant_id and SYS", + K(op_tenant_id), K(exec_tenant_id), KPC(ddl_stmt), K(task)); + } + } + // In case of tenant creation and tenant deletion DDL, the tenant ID is forced to be set to SYS + else if (OB_DDL_ADD_TENANT == op_type + || OB_DDL_ADD_TENANT_END == op_type + || OB_DDL_ADD_TENANT_START == op_type + || OB_DDL_DEL_TENANT == op_type + || OB_DDL_DEL_TENANT_START == op_type + || OB_DDL_DEL_TENANT_END == op_type) { + ddl_tenant_id = OB_SYS_TENANT_ID; + } + + ISTAT("[DDL] [DECIDE_TENANT_ID] SCAN_DDL_STMT", + K(stmt_index), K(stmt_num), K(ddl_tenant_id), + K(op_tenant_id), K(exec_tenant_id), K(op_schema_version), + K(op_type), "op_type", ObSchemaOperation::type_str((ObSchemaOperationType)op_type), + "ddl_stmt", ddl_stmt->get_ddl_stmt_str()); + } + + stmt_index++; + // next statement + stmt_task = stmt_task->get_next(); + } + + // If not set tenant_id during the scan, set to op_tenant_id + if (OB_INVALID_TENANT_ID == ddl_tenant_id) { + ddl_tenant_id = task_op_tenant_id; + + // If it still doesn't work, set it to SYS + if (OB_INVALID_TENANT_ID == ddl_tenant_id) { + ddl_tenant_id = OB_SYS_TENANT_ID; + } + } + + ISTAT("[DDL] [DECIDE_TENANT_ID] DONE", K(ddl_tenant_id), "scan_stmt_num", stmt_index, K(stmt_num), + K(task_op_tenant_id), "schema_version", task.get_local_schema_version()); + return ddl_tenant_id; +} + +int ObLogDDLHandler::decide_ddl_tenant_id_(const PartTransTask &task, + const bool is_schema_split_mode, + uint64_t &ddl_tenant_id) +{ + int ret = OB_SUCCESS; + ddl_tenant_id = OB_INVALID_TENANT_ID; + + // DDL partition heartbeat and DDL offline tasks, whether in split mode or not, use the tenant ID of the DDL partition + if (task.is_ddl_part_heartbeat() || task.is_ddl_offline_task()) { + ddl_tenant_id = task.get_tenant_id(); + } else if (task.is_ddl_trans()) { + // For DDL partitioned tasks, split mode uses the tenant ID of the DDL partition, and non-split mode uses the executor tenant + if (is_schema_split_mode) { + ddl_tenant_id = task.get_tenant_id(); + } else { + ddl_tenant_id = decide_ddl_tenant_id_for_schema_non_split_mode_(task); + } + } else { + LOG_ERROR("unknown DDL task, cannot decide DDL tenant id", K(task), K(is_schema_split_mode)); + ret = OB_ERR_UNEXPECTED; + } + return ret; +} + +int ObLogDDLHandler::get_tenant_( + PartTransTask &task, + const bool is_schema_split_mode, + uint64_t &ddl_tenant_id, + ObLogTenantGuard &guard, + ObLogTenant *&tenant, + bool &is_tenant_served) +{ + int ret = OB_SUCCESS; + + // Default setting for tenant non-service + tenant = NULL; + is_tenant_served = false; + + // Determine the tenant ID to which the task belongs + if (OB_FAIL(decide_ddl_tenant_id_(task, is_schema_split_mode, ddl_tenant_id))) { + LOG_ERROR("decide ddl tenant id fail", KR(ret), K(task), K(is_schema_split_mode)); + } else if (OB_FAIL(TCTX.get_tenant_guard(ddl_tenant_id, guard))) { + if (OB_ENTRY_NOT_EXIST == ret) { + // not serve ddl if tenant not exist + if (is_schema_split_mode) { + // In split mode, the tenant cannot not exist, and if it does not exist, then the tenant is deleted in advance, which must have a bug + LOG_ERROR("tenant not exist when handle DDL task under schema split mode, unexpected", + KR(ret), K(ddl_tenant_id), K(is_schema_split_mode), K(task)); + ret = OB_ERR_UNEXPECTED; + } else { + // schema non-split mode, the sys tenant will pull the DDL of all tenants and will encounter the DDL of the unserviced tenant + ret = OB_SUCCESS; + is_tenant_served = false; + } + } else { + LOG_ERROR("get_tenant fail", KR(ret), K(ddl_tenant_id), K(guard)); + } + } else if (OB_ISNULL(guard.get_tenant())) { + LOG_ERROR("get tenant fail, tenant is NULL", K(ddl_tenant_id)); + ret = OB_ERR_UNEXPECTED; + } else { + tenant = guard.get_tenant(); + is_tenant_served = true; + } + + if (! task.is_ddl_part_heartbeat()) { + ISTAT("[DDL] detect tenant DDL", + K(ddl_tenant_id), + K(is_tenant_served), + "tenant_state", NULL == tenant ? "NONE" : ObLogTenant::print_state(tenant->get_tenant_state()), + "task_type", PartTransTask::print_task_type(task.get_type()), + K(is_schema_split_mode), + "schema_version", task.get_local_schema_version(), + "log_id", task.get_prepare_log_id(), + "tstamp", TS_TO_STR(task.get_timestamp()), + "delay", TS_TO_DELAY(task.get_timestamp())); + } + + return ret; +} + +struct DDLInfoUpdater +{ + int err_code_; + uint64_t host_ddl_tenant_id_; + PartTransTask &host_ddl_task_; + + DDLInfoUpdater(PartTransTask &task, const uint64_t tenant_id) : + err_code_(OB_SUCCESS), + host_ddl_tenant_id_(tenant_id), + host_ddl_task_(task) + {} + + bool operator()(const TenantID &tid, ObLogTenant *tenant) + { + int ret = OB_SUCCESS; + // For non-host tenants, execute the update_ddl_info() action + if (tid.tenant_id_ != host_ddl_tenant_id_ && NULL != tenant) { + // Tenants that are not in service are filtered internally + if (OB_FAIL(tenant->update_ddl_info(host_ddl_task_))) { + LOG_ERROR("update_ddl_info fail", KR(ret), K(tid), K(host_ddl_task_), KPC(tenant)); + } + } + err_code_ = ret; + return OB_SUCCESS == ret; + } +}; + +int ObLogDDLHandler::update_ddl_info_(PartTransTask &task, + const bool is_schema_split_mode, + ObLogTenant &tenant) +{ + int ret = OB_SUCCESS; + // Update DDL information whenever a tenant is served, regardless of whether the task type is a DDL task or a heartbeat task + if (OB_FAIL(tenant.update_ddl_info(task))) { + LOG_ERROR("update tenant ddl info fail", KR(ret), K(tenant), K(task)); + } else { + // If it is schema non-split mode, update DDL information for all tenants + // Since this tenant has already been updated, filter this tenant here + // + // The DDL of tenant split will dynamically change is_schema_split_mode, i.e. the tenant split DDL does not need to update the state for each tenant + if (! is_schema_split_mode) { + uint64_t ddl_tenant_id = tenant.get_tenant_id(); + DDLInfoUpdater updater(task, ddl_tenant_id); + + // Update DDL Info for all tenants + if (OB_FAIL(for_each_tenant_(updater))) { + LOG_ERROR("update ddl info for all tenant fail", KR(ret), K(task), + K(is_schema_split_mode), K(tenant)); + } + } + } + return ret; +} + +template +int ObLogDDLHandler::for_each_tenant_(Func &func) +{ + int ret = OB_SUCCESS; + IObLogTenantMgr *tenant_mgr = TCTX.tenant_mgr_; + if (OB_ISNULL(tenant_mgr)) { + LOG_ERROR("invalid tenant mgr", K(tenant_mgr)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(dynamic_cast(tenant_mgr)->for_each_tenant(func))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("for each tenant fail", KR(ret), K(func.err_code_)); + } + } else if (OB_UNLIKELY(OB_SUCCESS != func.err_code_)) { + // Error during scanning of all tenants + ret = func.err_code_; + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("for each tenant fail", KR(ret), K(func.err_code_)); + } + } else { + // success + } + return ret; +} + +void ObLogDDLHandler::mark_stmt_binlog_record_invalid_(DdlStmtTask &stmt_task) +{ + if (NULL != stmt_task.get_binlog_record()) { + stmt_task.get_binlog_record()->set_is_valid(false); + } +} + +void ObLogDDLHandler::mark_all_binlog_records_invalid_(PartTransTask &task) +{ + DdlStmtTask *stmt_task = static_cast(task.get_stmt_list().head_); + while (NULL != stmt_task) { + mark_stmt_binlog_record_invalid_(*stmt_task); + stmt_task = static_cast(stmt_task->get_next()); + } +} + +int ObLogDDLHandler::get_old_schema_version_(const uint64_t tenant_id, + PartTransTask &task, + const int64_t tenant_ddl_cur_schema_version, + int64_t &old_schema_version) +{ + int ret = OB_SUCCESS; + int64_t ddl_schema_version = task.get_local_schema_version(); + + // 1. use tenant_ddl_cur_schema_version as old_schema_version by default + // 2. Special case: when the schema version is reversed and skip_reversed_schema_version_ = true, + // then to ensure that the corresponding schema is obtained, take the suitable schema version[official version] as old_schema_version + // + // e.g.: cur_schema_version=104, reversed ddl_schema_version=90(drop database operation), + // then according to schema_version =88 can guarantee to get the corresponding database schema + old_schema_version = tenant_ddl_cur_schema_version; + + if (OB_UNLIKELY(ddl_schema_version <= tenant_ddl_cur_schema_version)) { + // 遇到Schema版本反转情况,如果skip_reversed_schema_version_=true忽略,否则报错退出 + LOG_ERROR("DDL schema version is reversed", K(ddl_schema_version), K(tenant_ddl_cur_schema_version), + K(task)); + + if (skip_reversed_schema_version_) { + if (OB_FAIL(get_schema_version_by_timestamp_util_succ_(tenant_id, ddl_schema_version, old_schema_version))) { + if (OB_TENANT_HAS_BEEN_DROPPED == ret) { + // do nothing + } else if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("get_schema_version_by_timestamp_util_succ_ fail", KR(ret), K(tenant_id), K(ddl_schema_version), + K(old_schema_version)); + } + } else if (OB_UNLIKELY(OB_INVALID_TIMESTAMP == old_schema_version)) { + LOG_ERROR("old_schema_version is not valid", K(old_schema_version)); + ret = OB_ERR_UNEXPECTED; + } else { + LOG_WARN("ignore DDL schema version is reversed, " + "set old schema version as suitable ddl_schema_version", + K(skip_reversed_schema_version_), K(old_schema_version), + K(ddl_schema_version), K(tenant_ddl_cur_schema_version)); + } + } else { + ret = OB_ERR_UNEXPECTED; + } + } + + return ret; +} + +// @retval OB_SUCCESS success +// @retval OB_TENANT_HAS_BEEN_DROPPED tenant has been dropped +// @retval OB_IN_STOP_STATE exit +// @retval other error code fail +int ObLogDDLHandler::get_schema_version_by_timestamp_util_succ_(const uint64_t tenant_id, + const int64_t ddl_schema_version, + int64_t &old_schema_version) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(schema_getter_)) { + LOG_ERROR("schema_getter_ is NULL", K(schema_getter_)); + ret = OB_ERR_UNEXPECTED; + } else { + RETRY_FUNC(stop_flag_, (*schema_getter_), get_schema_version_by_timestamp, tenant_id, ddl_schema_version -1, + old_schema_version, DATA_OP_TIMEOUT); + } + + return ret; +} + +int ObLogDDLHandler::filter_ddl_stmt_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + IObLogTenantMgr &tenant_mgr, + bool &chosen, + const bool only_filter_by_tenant /* = false */) +{ + int ret = OB_SUCCESS; + ObSchemaOperationType op_type = + static_cast(ddl_stmt.get_operation_type()); + + chosen = false; + + // Note that. + // 0.1 Push up schema version number based on op_type and tenant, only_filter_by_tenant=false + // 0.2 Filter ddl output to committer based on tenant, only_filter_by_tenant=true + // + // 1. Do not filter add tenant statement because new tenants may be added (located in whitelist) + // OB_DDL_ADD_TENANT corresponds to the version before schema splitting + // OB_DDL_ADD_TENANT_START records ddl_stmt, which corresponds to the version after schema split, and outputs only ddl_stmt_str + // OB_DDL_ADD_TENANT_END + // OB_DDL_FINISH_SCHEMA_SPLIT does not filter by default + // + // 2. do not filter tenant del tenant statements that are in the whitelist + // OB_DDL_DEL_TENANT + // OB_DDL_DEL_TENANT_START + // OB_DDL_DEL_TENANT_END + // 3. If a tenant is created after the start bit, the start moment will add all tenants at that time (located in the whitelist) + // 4. filter outline + if (only_filter_by_tenant) { + if (OB_FAIL(tenant_mgr.filter_ddl_stmt(tenant.get_tenant_id(), chosen))) { + LOG_ERROR("filter ddl stmt fail", KR(ret), K(tenant.get_tenant_id()), K(chosen)); + } + } else { + if (OB_DDL_ADD_TENANT == op_type + || OB_DDL_ADD_TENANT_START == op_type + || OB_DDL_ADD_TENANT_END == op_type + || OB_DDL_FINISH_SCHEMA_SPLIT == op_type) { + chosen = true; + } else if (OB_DDL_DEL_TENANT == op_type + || OB_DDL_DEL_TENANT_START == op_type + || OB_DDL_DEL_TENANT_END == op_type) { + chosen = true; + } else if (OB_DDL_CREATE_OUTLINE == op_type + || OB_DDL_REPLACE_OUTLINE == op_type + || OB_DDL_DROP_OUTLINE == op_type + || OB_DDL_ALTER_OUTLINE== op_type) { + chosen = false; + } + // filter based on tenant that ddl belongs to + else if (OB_FAIL(tenant_mgr.filter_ddl_stmt(tenant.get_tenant_id(), chosen))) { + LOG_ERROR("filter ddl stmt fail", KR(ret), K(tenant.get_tenant_id()), K(chosen)); + } else { + // succ + } + } + + if (OB_SUCCESS == ret && ! chosen) { + _ISTAT("[DDL] [FILTER_DDL_STMT] TENANT_ID=%lu OP_TYPE=%s(%d) SCHEMA_VERSION=%ld " + "SCHEMA_DELAY=%.3lf(sec) CUR_SCHEMA_VERSION=%ld OP_TABLE_ID=%ld OP_TENANT_ID=%ld " + "EXEC_TENANT_ID=%lu OP_DB_ID=%ld OP_TG_ID=%ld DDL_STMT=[%s] ONLY_FILTER_BY_TENANT=%d", + tenant.get_tenant_id(), + ObSchemaOperation::type_str(op_type), op_type, + ddl_stmt.get_op_schema_version(), + get_delay_sec(ddl_stmt.get_op_schema_version()), + tenant.get_schema_version(), + ddl_stmt.get_op_table_id(), + ddl_stmt.get_op_tenant_id(), + ddl_stmt.get_exec_tenant_id(), + ddl_stmt.get_op_database_id(), + ddl_stmt.get_op_tablegroup_id(), + to_cstring(ddl_stmt.get_ddl_stmt_str()), + only_filter_by_tenant); + } + + return ret; +} + +int ObLogDDLHandler::handle_ddl_trans_(PartTransTask &task, + bool &is_schema_split_mode, + ObLogTenant &tenant) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(! task.is_ddl_trans())) { + LOG_ERROR("invalid ddl task which is not DDL trans", K(task)); + ret = OB_INVALID_ARGUMENT; + } + // Iterate through all DDL statements + else if (OB_FAIL(handle_tenant_ddl_task_(task, is_schema_split_mode, tenant))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("handle_tenant_ddl_task_ fail", KR(ret), K(task), K(is_schema_split_mode), K(tenant)); + } + } else { + // If non-split mode, issue Virtual DDL Task for all tenants, except itself. + // Ensure that in non-split mode, all tenant schemas are refreshed to the latest version, and optimize + // individual tenant schema refreshes, which can affect the refresh speed if individual tenants use different versions of schema + // + // Upgrade process processing: update is_schema_split_mode when SCHEMA_SPLIT_FINISH DDL, that is, tenant split DDL does not need to issue VirtualDDLTask + if (! is_schema_split_mode) { + } + } + return ret; +} + +int ObLogDDLHandler::handle_tenant_ddl_task_(PartTransTask &task, + bool &is_schema_split_mode, + ObLogTenant &tenant) +{ + int ret = OB_SUCCESS; + IObLogTenantMgr *tenant_mgr = TCTX.tenant_mgr_; + int64_t ddl_part_tstamp = task.get_timestamp(); + int64_t ddl_schema_version = task.get_local_schema_version(); + const int64_t checkpoint_seq = task.get_checkpoint_seq(); + int64_t old_schema_version = OB_INVALID_TIMESTAMP; + int64_t new_schema_version = ddl_schema_version; // Adopt ddl schema version as new schema version + const uint64_t ddl_tenant_id = tenant.get_tenant_id(); // The tenant ID to which the DDL belongs + const int64_t start_schema_version = tenant.get_start_schema_version(); + const int64_t tenant_ddl_cur_schema_version = tenant.get_schema_version(); + + _ISTAT("[DDL] [HANDLE_TRANS] IS_SCHEMA_SPLIT_MODE=%d TENANT_ID=%ld STMT_COUNT=%ld CHECKPOINT_SEQ=%ld " + "SCHEMA_VERSION=%ld CUR_SCHEMA_VERSION=%ld LOG_DELAY=%.3lf(sec) SCHEMA_DELAY=%.3lf(sec)", + is_schema_split_mode, ddl_tenant_id, task.get_stmt_num(), checkpoint_seq, ddl_schema_version, + tenant_ddl_cur_schema_version, get_delay_sec(ddl_part_tstamp), + get_delay_sec(ddl_schema_version)); + + if (OB_ISNULL(tenant_mgr)) { + LOG_ERROR("tenant_mgr is NULL", K(tenant_mgr)); + ret = OB_ERR_UNEXPECTED; + } + // Ignore DDL operations that are smaller than the start Schema version + else if (OB_UNLIKELY(ddl_schema_version <= start_schema_version)) { + LOG_WARN("ignore DDL task whose schema version is not greater than start schema version", + K(ddl_schema_version), K(start_schema_version), K(task)); + // Mark all binlog records as invalid + mark_all_binlog_records_invalid_(task); + } + // Calculate the old_schema_version + else if (OB_FAIL(get_old_schema_version_(ddl_tenant_id, task, tenant_ddl_cur_schema_version, old_schema_version))) { + if (OB_TENANT_HAS_BEEN_DROPPED == ret) { + // Tenant does not exist, or schema fetching failure, ignore this DDL statement + LOG_WARN("get old schema version fail, tenant may be dropped, ignore", + KR(ret), K(ddl_tenant_id), K(task), K(tenant_ddl_cur_schema_version), K(old_schema_version)); + // Set all records to be invalid + mark_all_binlog_records_invalid_(task); + ret = OB_SUCCESS; + } else if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("get_old_schema_version_ fail", KR(ret), K(ddl_tenant_id), K(task), K(tenant_ddl_cur_schema_version), + K(old_schema_version)); + } + } else { + // Iterate through each statement of the DDL + IStmtTask *stmt_task = task.get_stmt_list().head_; + bool only_filter_by_tenant = true; + while (NULL != stmt_task && OB_SUCCESS == ret) { + bool stmt_is_chosen = false; + DdlStmtTask *ddl_stmt = dynamic_cast(stmt_task); + + if (OB_UNLIKELY(! stmt_task->is_ddl_stmt()) || OB_ISNULL(ddl_stmt)) { + LOG_ERROR("invalid DDL statement", KPC(stmt_task), K(ddl_stmt)); + ret = OB_ERR_UNEXPECTED; + } + // filter ddl stmt + else if (OB_FAIL(filter_ddl_stmt_(tenant, *ddl_stmt, *tenant_mgr, stmt_is_chosen))) { + LOG_ERROR("filter_ddl_stmt_ fail", KR(ret), KPC(ddl_stmt), K(tenant), K(stmt_is_chosen)); + } else if (! stmt_is_chosen) { + // If the DDL statement is filtered, mark the binlog record as invalid + mark_stmt_binlog_record_invalid_(*ddl_stmt); + } else { + // statements are not filtered, processing DDL statements + if (OB_FAIL(handle_ddl_stmt_(tenant, task, *ddl_stmt, old_schema_version, new_schema_version, + is_schema_split_mode))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("handle_ddl_stmt_ fail", KR(ret), K(tenant), K(task), K(ddl_stmt), + K(old_schema_version), K(new_schema_version)); + } + } + // The first filter_ddl_stmt_() will let go of some of the DDLs of the non-service tenants, and here it should be filtered again based on the tenant ID + // Ensure that only the DDLs of whitelisted tenants are output + else if (OB_FAIL(filter_ddl_stmt_(tenant, *ddl_stmt, *tenant_mgr, stmt_is_chosen, only_filter_by_tenant))) { + LOG_ERROR("filter_ddl_stmt fail", KR(ret), KPC(ddl_stmt), K(tenant), K(stmt_is_chosen)); + } else if (! stmt_is_chosen) { + // If the DDL statement is filtered, mark the binlog record as invalid + mark_stmt_binlog_record_invalid_(*ddl_stmt); + } + } + + if (OB_SUCCESS == ret) { + stmt_task = stmt_task->get_next(); + } + } + } + + return ret; +} + +int ObLogDDLHandler::handle_ddl_stmt_(ObLogTenant &tenant, + PartTransTask &task, + DdlStmtTask &ddl_stmt, + const int64_t old_schema_version, + const int64_t new_schema_version, + bool &is_schema_split_mode) +{ + int ret = OB_SUCCESS; + ObSchemaOperationType op_type = (ObSchemaOperationType)ddl_stmt.get_operation_type(); + const int64_t checkpoint_seq = ddl_stmt.get_host().get_checkpoint_seq(); + + _ISTAT("[DDL] [HANDLE_STMT] TENANT_ID=%lu OP_TYPE=%s(%d) OP_TABLE_ID=%ld SCHEMA_VERSION=%ld " + "SCHEMA_DELAY=%.3lf(sec) CUR_SCHEMA_VERSION=%ld EXEC_TENANT_ID=%ld OP_TENANT_ID=%ld " + "OP_TABLE_ID=%ld OP_DB_ID=%ld OP_TG_ID=%ld DDL_STMT=[%s] CHECKPOINT_SEQ=%ld TRANS_ID=%s", + tenant.get_tenant_id(), ObSchemaOperation::type_str(op_type), op_type, + ddl_stmt.get_op_table_id(), + ddl_stmt.get_op_schema_version(), + get_delay_sec(ddl_stmt.get_op_schema_version()), + tenant.get_schema_version(), + ddl_stmt.get_exec_tenant_id(), + ddl_stmt.get_op_tenant_id(), + ddl_stmt.get_op_table_id(), + ddl_stmt.get_op_database_id(), + ddl_stmt.get_op_tablegroup_id(), + to_cstring(ddl_stmt.get_ddl_stmt_str()), + checkpoint_seq, + task.get_trans_id_str().ptr()); + + switch (op_type) { + case OB_DDL_DROP_TABLE : { + ret = handle_ddl_stmt_drop_table_(tenant, ddl_stmt, old_schema_version, new_schema_version); + break; + } + case OB_DDL_ALTER_TABLE : { + ret = handle_ddl_stmt_alter_table_(tenant, ddl_stmt, old_schema_version, new_schema_version, "alter_table"); + break; + } + case OB_DDL_CREATE_TABLE : { + ret = handle_ddl_stmt_create_table_(tenant, ddl_stmt, new_schema_version); + break; + } + case OB_DDL_TABLE_RENAME : { + ret = handle_ddl_stmt_rename_table_(tenant, ddl_stmt, old_schema_version, new_schema_version); + break; + } + case OB_DDL_ADD_TABLEGROUP : { + ret = handle_ddl_stmt_add_tablegroup_partition_(tenant, ddl_stmt, new_schema_version); + break; + } + case OB_DDL_DEL_TABLEGROUP : { + ret = handle_ddl_stmt_drop_tablegroup_partition_(tenant, ddl_stmt, old_schema_version, new_schema_version); + break; + } + case OB_DDL_PARTITIONED_TABLEGROUP_TABLE : { + ret = handle_ddl_stmt_split_tablegroup_partition_(tenant, ddl_stmt, new_schema_version); + break; + } + case OB_DDL_SPLIT_TABLEGROUP_PARTITION : { + ret = handle_ddl_stmt_split_tablegroup_partition_(tenant, ddl_stmt, new_schema_version); + break; + } + case OB_DDL_ALTER_TABLEGROUP_ADD_TABLE : { + ret = handle_ddl_stmt_change_tablegroup_(tenant, ddl_stmt, old_schema_version, new_schema_version); + break; + } + case OB_DDL_ALTER_TABLEGROUP_PARTITION : { + // 1. tablegroup partitions are dynamically added and removed + // 2. tablegroup splitting + ret = handle_ddl_stmt_alter_tablegroup_partition_(tenant, ddl_stmt, old_schema_version, new_schema_version); + break; + } + case OB_DDL_ADD_SUB_PARTITION : { + ret = handle_ddl_stmt_alter_table_(tenant, ddl_stmt, old_schema_version, new_schema_version, "add_sub_partition"); + break; + } + case OB_DDL_DROP_SUB_PARTITION : { + ret = handle_ddl_stmt_alter_table_(tenant, ddl_stmt, old_schema_version, new_schema_version, "drop_sub_partition"); + break; + } + case OB_DDL_TRUNCATE_TABLE_DROP : { + ret = handle_ddl_stmt_truncate_table_drop_(tenant, ddl_stmt, old_schema_version, new_schema_version); + break; + } + case OB_DDL_TRUNCATE_DROP_TABLE_TO_RECYCLEBIN : { + ret = handle_ddl_stmt_truncate_drop_table_to_recyclebin_(tenant, ddl_stmt, old_schema_version, + new_schema_version); + break; + } + case OB_DDL_TRUNCATE_TABLE_CREATE : { + ret = handle_ddl_stmt_truncate_table_create_(tenant, ddl_stmt, new_schema_version); + break; + } + case OB_DDL_TRUNCATE_PARTITION: { + ret = handle_ddl_stmt_alter_table_(tenant, ddl_stmt, old_schema_version, new_schema_version, "truncate_partition"); + break; + } + case OB_DDL_TRUNCATE_SUB_PARTITION: { + ret = handle_ddl_stmt_alter_table_(tenant, ddl_stmt, old_schema_version, new_schema_version, "truncate_sub_partition"); + break; + } + case OB_DDL_DROP_TABLE_TO_RECYCLEBIN : { + ret = handle_ddl_stmt_drop_table_to_recyclebin_(tenant, ddl_stmt, old_schema_version, + new_schema_version); + break; + } + case OB_DDL_ADD_TENANT : { + ret = handle_ddl_stmt_add_tenant_(tenant, ddl_stmt, new_schema_version); + break; + } + case OB_DDL_DEL_TENANT : { + ret = handle_ddl_stmt_drop_tenant_(tenant, ddl_stmt, old_schema_version, new_schema_version); + break; + } + case OB_DDL_ALTER_TENANT : { + ret = handle_ddl_stmt_alter_tenant_(tenant, ddl_stmt, old_schema_version, new_schema_version); + break; + } + case OB_DDL_ADD_TENANT_END: { + ret = handle_ddl_stmt_add_tenant_(tenant, ddl_stmt, new_schema_version); + break; + } + case OB_DDL_DEL_TENANT_START: { + ret = handle_ddl_stmt_drop_tenant_(tenant, ddl_stmt, old_schema_version, new_schema_version, is_schema_split_mode, + true/*is_del_tenant_start_op*/); + break; + } + case OB_DDL_DEL_TENANT_END: { + ret = handle_ddl_stmt_drop_tenant_(tenant, ddl_stmt, old_schema_version, new_schema_version, is_schema_split_mode, + false/*is_del_tenant_start_op*/); + break; + } + case OB_DDL_RENAME_TENANT: { + ret = handle_ddl_stmt_rename_tenant_(tenant, ddl_stmt, old_schema_version, new_schema_version); + break; + } + case OB_DDL_DROP_TENANT_TO_RECYCLEBIN: { + ret = handle_ddl_stmt_drop_tenant_to_recyclebin_(tenant, ddl_stmt, old_schema_version, + new_schema_version); + break; + } + case OB_DDL_ALTER_DATABASE : { + ret = handle_ddl_stmt_alter_database_(tenant, ddl_stmt, old_schema_version, new_schema_version); + break; + } + case OB_DDL_DEL_DATABASE : { + ret = handle_ddl_stmt_drop_database_(tenant, ddl_stmt, old_schema_version, new_schema_version); + break; + } + case OB_DDL_RENAME_DATABASE : { + ret = handle_ddl_stmt_rename_database_(tenant, ddl_stmt, old_schema_version, new_schema_version); + break; + } + case OB_DDL_DROP_DATABASE_TO_RECYCLEBIN : { + ret = handle_ddl_stmt_drop_database_to_recyclebin_(tenant, ddl_stmt, old_schema_version, + new_schema_version); + break; + } + case OB_DDL_CREATE_GLOBAL_INDEX: { + // add global index + ret = handle_ddl_stmt_create_index_(tenant, ddl_stmt, new_schema_version); + break; + } + case OB_DDL_DROP_GLOBAL_INDEX: { + // delete global index + ret = handle_ddl_stmt_drop_index_(tenant, ddl_stmt, old_schema_version, new_schema_version); + break; + } + case OB_DDL_CREATE_INDEX: { + // add unique index to TableIDCache + ret = handle_ddl_stmt_create_index_(tenant, ddl_stmt, new_schema_version); + break; + } + case OB_DDL_DROP_INDEX : { + // delete unique index from TableIDCache + ret = handle_ddl_stmt_drop_index_(tenant, ddl_stmt, old_schema_version, new_schema_version); + break; + } + case OB_DDL_DROP_INDEX_TO_RECYCLEBIN : { + ret = handle_ddl_stmt_drop_index_to_recyclebin_(tenant, ddl_stmt, old_schema_version, new_schema_version); + break; + } + // Modify the number of table partitions and start the partition split + // Note: OB_DDL_FINISH_SPLIT represents the end of the split, but the end of the split does not write the DDL, so it will not be processed here + // Non-partitioned table -> Partitioned table + case OB_DDL_PARTITIONED_TABLE : { + ret = handle_ddl_stmt_split_begin_(tenant, ddl_stmt, new_schema_version); + break; + } + // Partition Table Split + case OB_DDL_SPLIT_PARTITION: { + ret = handle_ddl_stmt_split_begin_(tenant, ddl_stmt, new_schema_version); + break; + } + case OB_DDL_FINISH_SCHEMA_SPLIT: { + ret = handle_ddl_stmt_finish_schema_split_(tenant, ddl_stmt, new_schema_version, + is_schema_split_mode); + break; + } + + default: { + // Other DDL types, by default, are output directly and not processed + // new version of schema parsing is used by default + ret = handle_ddl_stmt_direct_output_(tenant, ddl_stmt, new_schema_version); + break; + } + } + + if (OB_FAIL(ret)) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("handle ddl statement fail", KR(ret), K(op_type), K(ddl_stmt)); + } + } + + return ret; +} + +int ObLogDDLHandler::handle_ddl_stmt_direct_output_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t schema_version) +{ + int ret = OB_SUCCESS; + ObSchemaOperationType op_type = + static_cast(ddl_stmt.get_operation_type()); + _ISTAT("[DDL] [HANDLE_STMT] [DIRECT_OUTPUT] TENANT_ID=%ld OP_TYPE=%s(%d) DDL_STMT=[%s]", + tenant.get_tenant_id(), ObSchemaOperation::type_str(op_type), op_type, + to_cstring(ddl_stmt.get_ddl_stmt_str())); + + if (OB_FAIL(commit_ddl_stmt_(tenant, ddl_stmt, schema_version))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("commit_ddl_stmt_ fail", KR(ret), K(tenant), K(ddl_stmt), K(schema_version)); + } + } else { + // succ + } + + return ret; +} + +int ObLogDDLHandler::commit_ddl_stmt_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t schema_version, + const char *tenant_name /* = NULL */, + const char *db_name /* = NULL */, + const bool filter_ddl_stmt /* = false */) +{ + int ret = OB_SUCCESS; + ObLogSchemaGuard schema_guard; + ObLogBR *br = ddl_stmt.get_binlog_record(); + ILogRecord *br_data = NULL; + ObSchemaOperationType op_type = (ObSchemaOperationType)ddl_stmt.get_operation_type(); + const char *op_type_str = ObSchemaOperation::type_str(op_type); + /// Need to get schema when the tenant name is empty + /// Allow DB name to be empty + bool need_get_schema = (NULL == tenant_name); + + // The tenant to which this DDL statement belongs is the one that follows + uint64_t ddl_tenant_id = tenant.get_tenant_id(); + const int64_t checkpoint_seq = ddl_stmt.get_host().get_checkpoint_seq(); + + if (ddl_stmt.get_ddl_stmt_str().empty()) { + // Ignore empty DDL statements + ISTAT("[DDL] [FILTER_DDL_STMT] ignore empty DDL", + "schema_version", ddl_stmt.get_op_schema_version(), K(op_type_str), + K(ddl_tenant_id), + "op_tenant_id", ddl_stmt.get_op_tenant_id(), + "exec_tenant_id", ddl_stmt.get_exec_tenant_id(), + "op_database_id", ddl_stmt.get_op_database_id(), + "op_table_id", ddl_stmt.get_op_table_id()); + + // Set binlog record invalid + mark_stmt_binlog_record_invalid_(ddl_stmt); + } else if (filter_ddl_stmt) { + // 过滤指定的DDL语句 + ISTAT("[DDL] [FILTER_DDL_STMT] ignore DDL", + "schema_version", ddl_stmt.get_op_schema_version(), K(op_type_str), + K(ddl_tenant_id), + "op_tenant_id", ddl_stmt.get_op_tenant_id(), + "exec_tenant_id", ddl_stmt.get_exec_tenant_id(), + "op_database_id", ddl_stmt.get_op_database_id(), + "op_table_id", ddl_stmt.get_op_table_id(), + "ddl_stmt_str", ddl_stmt.get_ddl_stmt_str()); + + // Set binlog record invalid + mark_stmt_binlog_record_invalid_(ddl_stmt); + } else if (OB_ISNULL(br) || OB_ISNULL(br_data = br->get_data())) { + LOG_ERROR("invalid binlog record", K(br), K(br_data), K(ddl_stmt)); + ret = OB_ERR_UNEXPECTED; + } + // get tenant schmea and db schema + else if (need_get_schema + && OB_FAIL(get_schemas_for_ddl_stmt_(ddl_tenant_id, ddl_stmt, schema_version, schema_guard, + tenant_name, db_name))) { + if (OB_TENANT_HAS_BEEN_DROPPED == ret) { + // Tenant does not exist, or schema fetching failure, ignore this DDL statement + LOG_WARN("get schemas for ddl stmt fail, tenant may be dropped, ignore DDL statement", + KR(ret), K(ddl_tenant_id), K(schema_version), K(ddl_stmt)); + // Set all binlog record invalid + mark_stmt_binlog_record_invalid_(ddl_stmt); + ret = OB_SUCCESS; + } else if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("get_schemas_for_ddl_stmt_ fail", KR(ret), K(ddl_stmt), K(schema_version), + K(ddl_tenant_id)); + } + } + // set db name for binlog record + else if (OB_FAIL(set_binlog_record_db_name_(*br_data, op_type, tenant_name, db_name))) { + LOG_ERROR("set_binlog_record_db_name_ fail", KR(ret), K(op_type), K(tenant_name), K(db_name)); + } else { + // handle done + _ISTAT("[DDL] [HANDLE_DONE] TENANT_ID=%lu DB_NAME=%s OP_TYPE=%s(%d) SCHEMA_VERSION=%ld " + "OP_TENANT_ID=%lu EXEC_TENANT_ID=%lu OP_DB_ID=%lu OP_TABLE_ID=%lu DDL_STMT=[%s] CHECKPOINT_SEQ=%ld", + ddl_tenant_id, br_data->dbname(), op_type_str, op_type, ddl_stmt.get_op_schema_version(), + ddl_stmt.get_op_tenant_id(), ddl_stmt.get_exec_tenant_id(), ddl_stmt.get_op_database_id(), + ddl_stmt.get_op_table_id(), + to_cstring(ddl_stmt.get_ddl_stmt_str()), + checkpoint_seq); + } + + return ret; +} + +// @retval OB_SUCCESS success +// @retval OB_TENANT_HAS_BEEN_DROPPED tenant has been dropped +// @retval OB_IN_STOP_STATE exit +// @retval other error code fail +int ObLogDDLHandler::get_lazy_schema_guard_(const uint64_t tenant_id, + const int64_t version, + ObLogSchemaGuard &schema_guard) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(schema_getter_)) { + LOG_ERROR("schema getter is invalid", K(schema_getter_)); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(version < 0)) { + LOG_ERROR("invalid version", K(version)); + ret = OB_INVALID_ARGUMENT; + } else { + RETRY_FUNC(stop_flag_, (*schema_getter_), get_lazy_schema_guard, tenant_id, version, + DATA_OP_TIMEOUT, schema_guard); + } + + return ret; +} + +// The database_id determines the DDL BinlogRcord output database_name, database_id determines the policy: +// 1. when using new schema, directly use the database id that comes with DDL stmt +// 2. When using old schema, if the table id is invalid, use the database_id in DDL directly; otherwise, +// refresh the table_schema based on the table_id, and then determine the databse id according to the table schema. +// In some cases, the database ids in the old and new schema are not the same, for example, +// if you drop a table to the recycle bin, the DDL database id is "__recyclebin" database id, not the original database id +// +// +// @retval OB_SUCCESS success +// @retval OB_TENANT_HAS_BEEN_DROPPED tenant has been dropped +// @retval OB_IN_STOP_STATE exit +// @retval other error code fail +int ObLogDDLHandler::decide_ddl_stmt_database_id_(DdlStmtTask &ddl_stmt, + const int64_t schema_version, + ObLogSchemaGuard &schema_guard, + uint64_t &db_id) +{ + int ret = OB_SUCCESS; + const bool is_use_new_schema_version_mode = is_use_new_schema_version(ddl_stmt, schema_version); + + if (is_use_new_schema_version_mode) { + db_id = ddl_stmt.get_op_database_id(); + } else { + uint64_t table_id = ddl_stmt.get_op_table_id(); + + // If the table id is invalid, the db_id is used in the DDL. + if (OB_INVALID_ID == table_id || 0 == table_id) { + db_id = ddl_stmt.get_op_database_id(); + } else { + const ObSimpleTableSchemaV2 *tb_schema = NULL; + + // Retry to get the table schema until it succeeds or exit + RETRY_FUNC(stop_flag_, schema_guard, get_table_schema, table_id, tb_schema, DATA_OP_TIMEOUT); + + if (OB_FAIL(ret)) { + if (OB_IN_STOP_STATE != ret) { + // OB_TENANT_HAS_BEEN_DROPPED means tenant has been droped, dealed by caller + LOG_ERROR("get_table_schema fail", KR(ret), K(table_id), K(schema_version), K(ddl_stmt)); + } + } + // If the schema of the table is empty, the database id is invalid + else if (NULL == tb_schema) { + LOG_WARN("table schema is NULL. set database name NULL", + K(table_id), K(schema_version), "ddl_stmt", ddl_stmt.get_ddl_stmt_str()); + + db_id = OB_INVALID_ID; + } else { + db_id = tb_schema->get_database_id(); + } + } + } + + return ret; +} + +// @retval OB_SUCCESS success +// @retval OB_TENANT_HAS_BEEN_DROPPED tenant has been dropped +// @retval OB_IN_STOP_STATE exit +// @retval other error code fail +int ObLogDDLHandler::get_schemas_for_ddl_stmt_(const uint64_t ddl_tenant_id, + DdlStmtTask &ddl_stmt, + const int64_t schema_version, + ObLogSchemaGuard &schema_guard, + const char *&tenant_name, + const char *&db_name) +{ + int ret = OB_SUCCESS; + uint64_t db_id = OB_INVALID_ID; + TenantSchemaInfo tenant_schema_info; + DBSchemaInfo db_schema_info; + + // Get schema guard based on tenant_id and version number + if (OB_FAIL(get_lazy_schema_guard_(ddl_tenant_id, schema_version, schema_guard))) { + if (OB_IN_STOP_STATE != ret) { + // OB_TENANT_HAS_BEEN_DROPPED indicates that the tenant may have been deleted + LOG_WARN("get_lazy_schema_guard_ fail", KR(ret), K(ddl_tenant_id), K(schema_version)); + } + } + // decide database id + else if (OB_FAIL(decide_ddl_stmt_database_id_(ddl_stmt, schema_version, schema_guard, db_id))) { + // OB_TENANT_HAS_BEEN_DROPPED indicates that the tenant may have been deleted + if (OB_IN_STOP_STATE != ret) { + LOG_WARN("decide_ddl_stmt_database_id_ fail", KR(ret), K(ddl_stmt), K(schema_version)); + } + } else { + // Require ddl_tenant_id to match the tenant to which db_id belongs + // If it does not match, print ERROR log + if (OB_INVALID_ID != db_id && 0 != db_id && ddl_tenant_id != extract_tenant_id(db_id)) { + LOG_ERROR("DDL database id does not match ddl_tenant_id", K(db_id), K(ddl_tenant_id), + K(extract_tenant_id(db_id)), K(ddl_stmt)); + } + + // Retry to get tenant schema until success or exit + RETRY_FUNC(stop_flag_, schema_guard, get_tenant_schema_info, ddl_tenant_id, tenant_schema_info, + DATA_OP_TIMEOUT); + + if (OB_FAIL(ret)) { + if (OB_IN_STOP_STATE != ret) { + // OB_TENANT_HAS_BEEN_DROPPED indicates that the tenant may have been deleted + LOG_WARN("get_tenant_schema_info fail", KR(ret), K(ddl_tenant_id), K(tenant_schema_info), + K(schema_version), K(ddl_stmt)); + } + } else { + // set tenant name + tenant_name = tenant_schema_info.name_; + + // FIXME: Currently there are two invalid values: 0 and OB_INVALID_ID, it is recommended that the observer is unified + if (OB_INVALID_ID == db_id || 0 == db_id) { + // If db_id is invalid, the corresponding database name is not retrieved + db_name = NULL; + } else { + // Retry to get the database schema until it succeeds or exit + RETRY_FUNC(stop_flag_, schema_guard, get_database_schema_info, db_id, db_schema_info, + DATA_OP_TIMEOUT); + + if (OB_FAIL(ret)) { + if (OB_TENANT_HAS_BEEN_DROPPED == ret) { + // OB_TENANT_HAS_BEEN_DROPPED indicates that the tenant may have been deleted + // DB does not exist and is considered normal + LOG_WARN("get database schema fail, set database name NULL", KR(ret), + K(tenant_schema_info), K(db_id), K(schema_version), + "ddl_stmt", ddl_stmt.get_ddl_stmt_str()); + db_name = NULL; + ret = OB_SUCCESS; + } else if (OB_IN_STOP_STATE != ret) { + LOG_WARN("get_database_schema_info fail", KR(ret), K(db_id), K(schema_version)); + } + } else { + db_name = db_schema_info.name_; + } + } + } + } + + if (OB_SUCCESS == ret) { + if (ddl_tenant_id != ddl_stmt.get_op_tenant_id()) { + LOG_INFO("[DDL] [NOTICE] DDL stmt belong to different tenant with operated tenant", + K(ddl_tenant_id), K(ddl_stmt)); + } + } + + return ret; +} + +bool ObLogDDLHandler::is_use_new_schema_version(DdlStmtTask &ddl_stmt, + const int64_t schema_version) +{ + const int64_t part_local_ddl_schema_version = ddl_stmt.get_host().get_local_schema_version(); + + return schema_version == part_local_ddl_schema_version; +} + +int ObLogDDLHandler::set_binlog_record_db_name_(ILogRecord &br_data, + const int64_t ddl_operation_type, + const char * const tenant_name, + const char * const db_name) +{ + int ret = OB_SUCCESS; + + // allow db_name empty + if (OB_ISNULL(tenant_name)) { + LOG_ERROR("invalid argument", K(tenant_name)); + ret = OB_INVALID_ARGUMENT; + } else { + ObSchemaOperationType op_type = + static_cast(ddl_operation_type); + + // If a DDL only operates on a tenant, the database is invalid + std::string db_name_str = tenant_name; + // For create database DDL statement, ILogRecord db information only records tenant information, no database information is recorded. + if (NULL != db_name && OB_DDL_ADD_DATABASE != op_type && OB_DDL_FLASHBACK_DATABASE != op_type) { + db_name_str.append("."); + db_name_str.append(db_name); + } + + br_data.setDbname(db_name_str.c_str()); + } + + return ret; +} + +int ObLogDDLHandler::handle_ddl_stmt_drop_table_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t old_schema_version, + const int64_t new_schema_version) +{ + int ret = OB_SUCCESS; + ObLogSchemaGuard old_schema_guard; + const char *tenant_name = NULL; + const char *db_name = NULL; + bool is_table_should_ignore_in_committer = false; + + RETRY_FUNC(stop_flag_, tenant.get_part_mgr(), drop_table, + ddl_stmt.get_op_table_id(), + old_schema_version, + new_schema_version, + is_table_should_ignore_in_committer, + old_schema_guard, + tenant_name, + db_name, + DATA_OP_TIMEOUT); + + // If the schema error is encountered, it means that the tenant may be deleted in the future, so the schema of table, + // database, tenant or table group cannot be obtained, in this case, the DDL will be ignored. + IGNORE_SCHEMA_ERROR(ret, "schema_version", old_schema_version, K(ddl_stmt), K(is_table_should_ignore_in_committer)); + + if (OB_SUCC(ret)) { + // Delete table using old_schema_version parsing + if (OB_FAIL(commit_ddl_stmt_(tenant, ddl_stmt, old_schema_version, tenant_name, db_name, + is_table_should_ignore_in_committer))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("commit_ddl_stmt_ fail", KR(ret), K(ddl_stmt), K(tenant), K(old_schema_version), + K(tenant_name), K(db_name), K(is_table_should_ignore_in_committer)); + } + } else { + // succ + } + } + + return ret; +} + +int ObLogDDLHandler::handle_ddl_stmt_drop_table_to_recyclebin_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t old_schema_version, + const int64_t new_schema_version) +{ + int ret = OB_SUCCESS; + + // TODO: After the Table is put into the Recycle Bin, modify the places related to the Table name in PartMgr; in addition, support displaying the table into different states + UNUSED(new_schema_version); + + // Parsing with older schema versions + ret = handle_ddl_stmt_direct_output_(tenant, ddl_stmt, old_schema_version); + + return ret; +} + +int ObLogDDLHandler::handle_ddl_stmt_alter_table_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t old_schema_version, + const int64_t new_schema_version, + const char *event) +{ + int ret = OB_SUCCESS; + ObLogSchemaGuard old_schema_guard; + ObLogSchemaGuard new_schema_guard; + const char *old_tenant_name = NULL; + const char *old_db_name = NULL; + + // TODO:Support table renaming, refiltering based on filtering rules + int64_t prepare_log_timestamp = ddl_stmt.get_host().get_timestamp(); + int64_t start_serve_timestamp = get_start_serve_timestamp_(new_schema_version, + prepare_log_timestamp); + + // Atopt new_schema_version + RETRY_FUNC(stop_flag_, tenant.get_part_mgr(), alter_table, + ddl_stmt.get_op_table_id(), + old_schema_version, + new_schema_version, + start_serve_timestamp, + old_schema_guard, + new_schema_guard, + old_tenant_name, + old_db_name, + event, + DATA_OP_TIMEOUT); + + // If the schema error is encountered, it means that the tenant may be deleted in the future, so the schema of table, + // database, tenant or table group cannot be obtained, in this case, the DDL will be ignored. + IGNORE_SCHEMA_ERROR(ret, "schema_version", new_schema_version, K(ddl_stmt)); + + if (OB_SUCC(ret)) { + // Set tenant, database and table name with old schema + if (OB_FAIL(commit_ddl_stmt_(tenant, ddl_stmt, old_schema_version, old_tenant_name, old_db_name))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("commit_ddl_stmt_ fail", KR(ret), K(tenant), K(ddl_stmt), K(old_schema_version), + K(old_tenant_name), K(old_db_name)); + } + } + } + + return ret; +} + +int ObLogDDLHandler::handle_ddl_stmt_create_table_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t new_schema_version) +{ + int ret = OB_SUCCESS; + bool is_create_table = true; + bool is_table_should_ignore_in_committer = false; + int64_t prepare_log_timestamp = ddl_stmt.get_host().get_timestamp(); + int64_t start_serve_tstamp = get_start_serve_timestamp_(new_schema_version, + prepare_log_timestamp); + ObLogSchemaGuard schema_guard; + const char *tenant_name = NULL; + const char *db_name = NULL; + + RETRY_FUNC(stop_flag_, tenant.get_part_mgr(), add_table, + ddl_stmt.get_op_table_id(), + new_schema_version, + start_serve_tstamp, + is_create_table, + is_table_should_ignore_in_committer, + schema_guard, + tenant_name, + db_name, + DATA_OP_TIMEOUT); + + // If the schema error is encountered, it means that the tenant may be deleted in the future, so the schema of table, + // database, tenant or table group cannot be obtained, in this case, the DDL will be ignored. + IGNORE_SCHEMA_ERROR(ret, "schema_version", new_schema_version, K(ddl_stmt), K(is_table_should_ignore_in_committer)); + + if (OB_SUCC(ret)) { + if (OB_FAIL(commit_ddl_stmt_(tenant, ddl_stmt, new_schema_version, tenant_name, db_name, + is_table_should_ignore_in_committer))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("commit_ddl_stmt_ fail", KR(ret), K(tenant), K(ddl_stmt), K(tenant_name), + K(db_name), K(is_table_should_ignore_in_committer)); + } + } else {} + } + + return ret; +} + +int ObLogDDLHandler::handle_ddl_stmt_rename_table_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t old_schema_version, + const int64_t new_schema_version) +{ + int ret = OB_SUCCESS; + + // TODO:support table rename + + UNUSED(new_schema_version); + // Parsing with older schema versions + ret = handle_ddl_stmt_direct_output_(tenant, ddl_stmt, old_schema_version); + + return ret; +} + +int ObLogDDLHandler::handle_ddl_stmt_create_index_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t new_schema_version) +{ + int ret = OB_SUCCESS; + int64_t prepare_log_timestamp = ddl_stmt.get_host().get_timestamp(); + int64_t start_serve_tstamp = get_start_serve_timestamp_(new_schema_version, + prepare_log_timestamp); + ObLogSchemaGuard schema_guard; + const char *tenant_name = NULL; + const char *db_name = NULL; + + RETRY_FUNC(stop_flag_, tenant.get_part_mgr(), add_index_table, + ddl_stmt.get_op_table_id(), + new_schema_version, + start_serve_tstamp, + schema_guard, + tenant_name, + db_name, + DATA_OP_TIMEOUT); + + // If the schema error is encountered, it means that the tenant may be deleted in the future, so the schema of table, + // database, tenant or table group cannot be obtained, in this case, the DDL will be ignored. + IGNORE_SCHEMA_ERROR(ret, "schema_version", new_schema_version, K(ddl_stmt)); + + if (OB_SUCC(ret)) { + if (OB_FAIL(commit_ddl_stmt_(tenant, ddl_stmt, new_schema_version, tenant_name, db_name))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("commit_ddl_stmt_ fail", KR(ret), K(tenant), K(ddl_stmt), K(tenant_name), + K(db_name)); + } + } else {} + } + + return ret; +} + +int ObLogDDLHandler::handle_ddl_stmt_drop_index_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t old_schema_version, + const int64_t new_schema_version) +{ + int ret = OB_SUCCESS; + ObLogSchemaGuard old_schema_guard; + const char *tenant_name = NULL; + const char *db_name = NULL; + + RETRY_FUNC(stop_flag_, tenant.get_part_mgr(), drop_index_table, + ddl_stmt.get_op_table_id(), + old_schema_version, + new_schema_version, + old_schema_guard, + tenant_name, + db_name, + DATA_OP_TIMEOUT); + + // If the schema error is encountered, it means that the tenant may be deleted in the future, so the schema of table, + // database, tenant or table group cannot be obtained, in this case, the DDL will be ignored. + IGNORE_SCHEMA_ERROR(ret, K(old_schema_version), K(new_schema_version), K(ddl_stmt)); + + if (OB_SUCC(ret)) { + // drop index uses old_schema_version parsing + // drop index DDL, __all_ddl_operation table_id is the table_id of the index table, in order to ensure + // that the table_schema is available, use the old_schema version to ensure that the database information is available in BinlogRecord + if (OB_FAIL(commit_ddl_stmt_(tenant, ddl_stmt, old_schema_version, tenant_name, db_name))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("commit_ddl_stmt_ fail", KR(ret), K(tenant), K(ddl_stmt), K(tenant_name), + K(db_name)); + } + } else { + // succ + } + } + + return OB_SUCCESS; +} + +int ObLogDDLHandler::handle_ddl_stmt_add_tablegroup_partition_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t new_schema_version) +{ + int ret = OB_SUCCESS; + int64_t prepare_log_timestamp = ddl_stmt.get_host().get_timestamp(); + int64_t start_serve_timestamp = get_start_serve_timestamp_(new_schema_version, + prepare_log_timestamp); + ObLogSchemaGuard schema_guard; + // TableGroup has no DB Name + const char *tenant_name = NULL; + + // Adopt new version of schema version + RETRY_FUNC(stop_flag_, tenant.get_part_mgr(), add_tablegroup_partition, + ddl_stmt.get_op_tablegroup_id(), + new_schema_version, + start_serve_timestamp, + schema_guard, + tenant_name, + DATA_OP_TIMEOUT); + + // If the schema error is encountered, it means that the tenant may be deleted in the future, so the schema of table, + // database, tenant or table group cannot be obtained, in this case, the DDL will be ignored. + IGNORE_SCHEMA_ERROR(ret, K(new_schema_version), K(ddl_stmt)); + + if (OB_SUCC(ret)) { + if (OB_FAIL(commit_ddl_stmt_(tenant, ddl_stmt, new_schema_version, tenant_name))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("commit_ddl_stmt_ fail", KR(ret), K(tenant), K(ddl_stmt), K(tenant_name)); + } + } else {} + } + + return ret; +} + +int ObLogDDLHandler::handle_ddl_stmt_drop_tablegroup_partition_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t old_schema_version, + const int64_t new_schema_version) +{ + int ret = OB_SUCCESS; + ObLogSchemaGuard schema_guard; + const char *tenant_name = NULL; + + // Adopt new version of schema version + RETRY_FUNC(stop_flag_, tenant.get_part_mgr(), drop_tablegroup_partition, + ddl_stmt.get_op_tablegroup_id(), + old_schema_version, + new_schema_version, + schema_guard, + tenant_name, + DATA_OP_TIMEOUT); + + // If the schema error is encountered, it means that the tenant may be deleted in the future, so the schema of table, + // database, tenant or table group cannot be obtained, in this case, the DDL will be ignored. + IGNORE_SCHEMA_ERROR(ret, K(new_schema_version), K(ddl_stmt)); + + if (OB_SUCC(ret)) { + if (OB_FAIL(commit_ddl_stmt_(tenant, ddl_stmt, new_schema_version, tenant_name))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("commit_ddl_stmt_ fail", KR(ret), K(tenant), K(ddl_stmt), K(tenant_name)); + } + } else {} + } + + return ret; +} + +int ObLogDDLHandler::handle_ddl_stmt_split_tablegroup_partition_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t new_schema_version) +{ + int ret = OB_SUCCESS; + ObLogSchemaGuard schema_guard; + int64_t prepare_log_timestamp = ddl_stmt.get_host().get_timestamp(); + int64_t start_serve_timestamp = get_start_serve_timestamp_(new_schema_version, + prepare_log_timestamp); + const char *tenant_name = NULL; + + // 采用新版本schema version + RETRY_FUNC(stop_flag_, tenant.get_part_mgr(), split_tablegroup_partition, + ddl_stmt.get_op_tablegroup_id(), + new_schema_version, + start_serve_timestamp, + schema_guard, + tenant_name, + DATA_OP_TIMEOUT); + + // If the schema error is encountered, it means that the tenant may be deleted in the future, so the schema of table, + // database, tenant or table group cannot be obtained, in this case, the DDL will be ignored. + IGNORE_SCHEMA_ERROR(ret, K(new_schema_version), K(ddl_stmt)); + + if (OB_SUCC(ret)) { + if (OB_FAIL(commit_ddl_stmt_(tenant, ddl_stmt, new_schema_version, tenant_name))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("commit_ddl_stmt_ fail", KR(ret), K(tenant), K(ddl_stmt), K(tenant_name)); + } + } else {} + } + + return ret; +} + +int ObLogDDLHandler::handle_ddl_stmt_change_tablegroup_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t old_schema_version, + const int64_t new_schema_version) +{ + int ret = OB_SUCCESS; + + // TODO:support change TableGroup + UNUSED(old_schema_version); + + // 采用新/老版本schema都可以解析 + // can be resloved by new/old schema version + ret = handle_ddl_stmt_direct_output_(tenant, ddl_stmt, new_schema_version); + + return ret; +} + +int ObLogDDLHandler::handle_ddl_stmt_alter_tablegroup_partition_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t old_schema_version, + const int64_t new_schema_version) +{ + int ret = OB_SUCCESS; + int64_t prepare_log_timestamp = ddl_stmt.get_host().get_timestamp(); + int64_t start_serve_timestamp = get_start_serve_timestamp_(new_schema_version, + prepare_log_timestamp); + ObLogSchemaGuard old_schema_guard; + ObLogSchemaGuard new_schema_guard; + const char *tenant_name = NULL; + + // 采用新版本schema version + RETRY_FUNC(stop_flag_, tenant.get_part_mgr(), alter_tablegroup_partition, + ddl_stmt.get_op_tablegroup_id(), + old_schema_version, + new_schema_version, + start_serve_timestamp, + old_schema_guard, + new_schema_guard, + tenant_name, + DATA_OP_TIMEOUT); + + // If the schema error is encountered, it means that the tenant may be deleted in the future, so the schema of table, + // database, tenant or table group cannot be obtained, in this case, the DDL will be ignored. + IGNORE_SCHEMA_ERROR(ret, K(old_schema_version), K(new_schema_version), K(ddl_stmt)); + + if (OB_SUCC(ret)) { + // Set tenant, database and table name with old schema + if (OB_FAIL(commit_ddl_stmt_(tenant, ddl_stmt, old_schema_version, tenant_name))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("commit_ddl_stmt_ fail", KR(ret), K(ddl_stmt), + "schema_version", old_schema_version); + } + } + } + + return ret; +} + +int ObLogDDLHandler::handle_ddl_stmt_truncate_table_drop_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t old_schema_version, + const int64_t new_schema_version) +{ + int ret = OB_SUCCESS; + ObLogSchemaGuard old_schema_guard; + const char *tenant_name = NULL; + const char *db_name = NULL; + bool is_table_should_ignore_in_committer = false; + + _ISTAT("[DDL] [TRUNCATE_DROP] TENANT_ID=%lu TABLE_ID=%ld SCHEMA_VERSION=(OLD=%ld,NEW=%ld) DDL_STMT=[%s]", + tenant.get_tenant_id(), + ddl_stmt.get_op_table_id(), + old_schema_version, + new_schema_version, + to_cstring(ddl_stmt.get_ddl_stmt_str())); + + RETRY_FUNC(stop_flag_, tenant.get_part_mgr(), drop_table, + ddl_stmt.get_op_table_id(), + old_schema_version, + new_schema_version, + is_table_should_ignore_in_committer, + old_schema_guard, + tenant_name, + db_name, + DATA_OP_TIMEOUT); + + // If the schema error is encountered, it means that the tenant may be deleted in the future, so the schema of table, + // database, tenant or table group cannot be obtained, in this case, the DDL will be ignored. + IGNORE_SCHEMA_ERROR(ret, K(old_schema_version), K(new_schema_version), K(ddl_stmt), K(is_table_should_ignore_in_committer)); + + if (OB_SUCC(ret)) { + // TRUNCATE DROP operation don't need output + mark_stmt_binlog_record_invalid_(ddl_stmt); + } + + return ret; +} + +int ObLogDDLHandler::handle_ddl_stmt_truncate_drop_table_to_recyclebin_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t old_schema_version, + const int64_t new_schema_version) +{ + int ret = OB_SUCCESS; + + _ISTAT("[DDL] [TRUNCATE_DROP_TABLE_TO_RECYCLEBIN] TENANT_ID=%lu TABLE_ID=%ld " + "SCHEMA_VERSION=(OLD=%ld,NEW=%ld) DDL_STMT=[%s]", + tenant.get_tenant_id(), + ddl_stmt.get_op_table_id(), + old_schema_version, + new_schema_version, + to_cstring(ddl_stmt.get_ddl_stmt_str())); + + if (OB_SUCC(ret)) { + // OB_DDL_TRUNCATE_DROP_TABLE_TO_RECYCLEBIN operation does not need to output DDL + // Set binlog record to be invalid + mark_stmt_binlog_record_invalid_(ddl_stmt); + } + + return ret; +} + +int ObLogDDLHandler::handle_ddl_stmt_truncate_table_create_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t new_schema_version) +{ + int ret = OB_SUCCESS; + bool is_create_table = true; + bool is_table_should_ignore_in_committer = false; + int64_t prepare_log_timestamp = ddl_stmt.get_host().get_timestamp(); + int64_t start_serve_tstamp = get_start_serve_timestamp_(new_schema_version, + prepare_log_timestamp); + + ObLogSchemaGuard schema_guard; + const char *tenant_name = NULL; + const char *db_name = NULL; + + _ISTAT("[DDL] [TRUNCATE_CREATE] TENANT_ID=%lu TABLE_ID=%ld SCHEMA_VERSION=%ld START_TSTAMP=%ld DDL_STMT=[%s]", + tenant.get_tenant_id(), + ddl_stmt.get_op_table_id(), + new_schema_version, + start_serve_tstamp, + to_cstring(ddl_stmt.get_ddl_stmt_str())); + + RETRY_FUNC(stop_flag_, tenant.get_part_mgr(), add_table, + ddl_stmt.get_op_table_id(), + new_schema_version, + start_serve_tstamp, + is_create_table, + is_table_should_ignore_in_committer, + schema_guard, + tenant_name, + db_name, + DATA_OP_TIMEOUT); + + // If the schema error is encountered, it means that the tenant may be deleted in the future, so the schema of table, + // database, tenant or table group cannot be obtained, in this case, the DDL will be ignored. + IGNORE_SCHEMA_ERROR(ret, K(new_schema_version), K(start_serve_tstamp), K(is_create_table), K(ddl_stmt), K(is_table_should_ignore_in_committer)); + + if (OB_SUCC(ret)) { + // Adopt new version of schema parsing + if (OB_FAIL(commit_ddl_stmt_(tenant, ddl_stmt, new_schema_version, tenant_name, db_name, + is_table_should_ignore_in_committer))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("commit_ddl_stmt_ fail", KR(ret), K(tenant), K(ddl_stmt), K(tenant_name), + K(db_name), K(is_table_should_ignore_in_committer)); + } + } else {} + } + + return ret; +} + +int ObLogDDLHandler::handle_ddl_stmt_drop_index_to_recyclebin_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t old_schema_version, + const int64_t new_schema_version) +{ + int ret = OB_SUCCESS; + + UNUSED(new_schema_version); + + // Use old version schema parsing + // Ensure that the binlog record DB information is correct + ret = handle_ddl_stmt_direct_output_(tenant, ddl_stmt, old_schema_version); + + return ret; +} + +int ObLogDDLHandler::handle_ddl_stmt_add_tenant_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t new_schema_version) +{ + int ret = OB_SUCCESS; + bool tenant_is_chosen = false; + bool is_new_created_tenant = true; + bool is_new_tenant_by_restore = false; + uint64_t target_tenant_id = ddl_stmt.get_op_tenant_id(); + int64_t prepare_log_timestamp = ddl_stmt.get_host().get_timestamp(); + int64_t start_serve_tstamp = get_start_serve_timestamp_(new_schema_version, + prepare_log_timestamp); + IObLogTenantMgr *tenant_mgr = TCTX.tenant_mgr_; + ObLogSchemaGuard schema_guard; + const char *tenant_name = NULL; + int64_t valid_schema_version = new_schema_version; + if (OB_ISNULL(tenant_mgr)) { + LOG_ERROR("invalid tenant mgr", K(tenant_mgr)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(parse_tenant_ddl_stmt_for_restore_(ddl_stmt, valid_schema_version, start_serve_tstamp, is_new_tenant_by_restore))) { + LOG_ERROR("parse_tenant_ddl_stmt_for_restore_ failed", KR(ret), K(ddl_stmt), K(valid_schema_version), K(start_serve_tstamp), K(is_new_tenant_by_restore)); + } else { + RETRY_FUNC(stop_flag_, (*tenant_mgr), add_tenant, + target_tenant_id, + is_new_created_tenant, + is_new_tenant_by_restore, + start_serve_tstamp, + valid_schema_version, + schema_guard, + tenant_name, + DATA_OP_TIMEOUT, + tenant_is_chosen); + + // If the schema error is encountered, it means that the tenant may be deleted in the future, so the schema of table, + // database, tenant or table group cannot be obtained, in this case, the DDL will be ignored. + IGNORE_SCHEMA_ERROR(ret, K(valid_schema_version), K(start_serve_tstamp), + K(target_tenant_id), K(ddl_stmt)); + + if (OB_SUCC(ret)) { + bool filter_ddl_stmt = false; + // Filter tenants that are not on the whitelist + if (! tenant_is_chosen) { + filter_ddl_stmt = true; + } + + // DB name is empty + if (OB_FAIL(commit_ddl_stmt_(tenant, ddl_stmt, new_schema_version, tenant_name, NULL, filter_ddl_stmt))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("commit_ddl_stmt_ fail", KR(ret), K(tenant), K(ddl_stmt), K(filter_ddl_stmt), + K(tenant_name)); + } + } else {} + } + } + + return ret; +} + +int ObLogDDLHandler::handle_ddl_stmt_drop_tenant_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t old_schema_version, + const int64_t new_schema_version, + const bool is_schema_split_mode /* = false */, + const bool is_del_tenant_start_op /* = false */) +{ + int ret = OB_SUCCESS; + uint64_t target_tenant_id = ddl_stmt.get_op_tenant_id(); + IObLogTenantMgr *tenant_mgr = TCTX.tenant_mgr_; + const int64_t prepare_log_timestamp = ddl_stmt.get_host().get_timestamp(); + + if (OB_ISNULL(tenant_mgr)) { + LOG_ERROR("invalid tenant mgr", K(tenant_mgr)); + ret = OB_ERR_UNEXPECTED; + } else { + ISTAT("[DDL] begin to handle drop tenant DDL stmt", K(is_schema_split_mode), + K(is_del_tenant_start_op), K(ddl_stmt), K(old_schema_version), K(new_schema_version)); + + if (! is_schema_split_mode) { + // For non-split mode, drop tenant DDL marks the end of this tenant's DDL flow, so the drop_tenant() interface is called directly to delete the tenant + // In split mode, it is triggered by DDL OFFLINE Task, see handle_ddl_offline_task_() for details + ret = tenant_mgr->drop_tenant(target_tenant_id, "DROP_TENANT_DDL"); + } else if (is_del_tenant_start_op) { + // DROP TENANT START for split mode, marking the start of tenant deletion + ret = tenant_mgr->drop_tenant_start(target_tenant_id, prepare_log_timestamp); + } else { + // DROP TENANT END for schema split mode, marking the end of tenant deletion + ret = tenant_mgr->drop_tenant_end(target_tenant_id, prepare_log_timestamp); + } + + // If the schema error is encountered, it means that the tenant may be deleted in the future, so the schema of table, + // database, tenant or table group cannot be obtained, in this case, the DDL will be ignored. + IGNORE_SCHEMA_ERROR(ret, K(target_tenant_id), K(old_schema_version), K(new_schema_version), + K(is_schema_split_mode), K(is_del_tenant_start_op), K(ddl_stmt)); + + if (OB_SUCC(ret)) { + if (OB_FAIL(commit_ddl_stmt_(tenant, ddl_stmt, old_schema_version))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("commit_ddl_stmt_ fail", KR(ret), K(tenant), K(ddl_stmt)); + } + } else {} + } + } + + return ret; +} + +int ObLogDDLHandler::handle_ddl_stmt_alter_tenant_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t old_schema_version, + const int64_t new_schema_version) +{ + int ret = OB_SUCCESS; + + UNUSED(new_schema_version); + + // TODO: support for changing tenant names + // Adopt new version of schema parsing + // Currently OB does not support changing the tenant name, the code needs to be tested later + + ret = handle_ddl_stmt_direct_output_(tenant, ddl_stmt, old_schema_version); + + return ret; +} + +// Background: For data consumption chain, a large number of tenant split deployment method is used, i.e. for tenant tt1, +// tenant whitelist is used to start single or multiple liboblog for synchronization +// When the tenant name changes, it will cause the tenant whitelist expire +// Support: liboblog processes a DDL to rename tenant, with the error OB_NOT_SUPPORTED; liboblog consumers need to start a new instance of the new tenant +int ObLogDDLHandler::handle_ddl_stmt_rename_tenant_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t old_schema_version, + const int64_t new_schema_version) +{ + int ret = OB_SUCCESS; + const char *tenant_name = NULL; + bool tenant_is_chosen = false; + const uint64_t target_tenant_id = ddl_stmt.get_op_tenant_id(); + IObLogTenantMgr *tenant_mgr = TCTX.tenant_mgr_; + + if (OB_ISNULL(tenant_mgr)) { + LOG_ERROR("invalid tenant mgr", K(tenant_mgr)); + ret = OB_ERR_UNEXPECTED; + } else { + RETRY_FUNC(stop_flag_, (*tenant_mgr), alter_tenant_name, + target_tenant_id, + old_schema_version, + new_schema_version, + DATA_OP_TIMEOUT, + tenant_name, + tenant_is_chosen); + + // If the schema error is encountered, it means that the tenant may be deleted in the future, so the schema of table, + // database, tenant or table group cannot be obtained, in this case, the DDL will be ignored. + IGNORE_SCHEMA_ERROR(ret, K(new_schema_version), K(target_tenant_id), K(ddl_stmt)); + + if (OB_SUCC(ret)) { + bool filter_ddl_stmt = false; + // Filter tenants that are not on the whitelist + if (! tenant_is_chosen) { + filter_ddl_stmt = true; + } + + // DB name is empty + if (OB_FAIL(commit_ddl_stmt_(tenant, ddl_stmt, new_schema_version, tenant_name, NULL, filter_ddl_stmt))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("commit_ddl_stmt_ fail", KR(ret), K(tenant), K(ddl_stmt), K(filter_ddl_stmt), + K(tenant_name)); + } + } else {} + } + } + + return ret; +} + +int ObLogDDLHandler::handle_ddl_stmt_drop_tenant_to_recyclebin_( + ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t old_schema_version, + const int64_t new_schema_version) +{ + int ret = OB_SUCCESS; + UNUSED(new_schema_version); + ret = handle_ddl_stmt_direct_output_(tenant, ddl_stmt, old_schema_version); + + return ret; +} + +int ObLogDDLHandler::handle_ddl_stmt_alter_database_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t old_schema_version, + const int64_t new_schema_version) +{ + int ret = OB_SUCCESS; + + UNUSED(new_schema_version); + + // TODO: support for changing database names + // Use old version schema + ret = handle_ddl_stmt_direct_output_(tenant, ddl_stmt, old_schema_version); + + return ret; +} + +int ObLogDDLHandler::handle_ddl_stmt_drop_database_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t old_schema_version, + const int64_t new_schema_version) +{ + int ret = OB_SUCCESS; + // don't need to handle drop database + UNUSED(new_schema_version); + ret = handle_ddl_stmt_direct_output_(tenant, ddl_stmt, old_schema_version); + return ret; +} + +int ObLogDDLHandler::handle_ddl_stmt_drop_database_to_recyclebin_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t old_schema_version, + const int64_t new_schema_version) +{ + int ret = OB_SUCCESS; + UNUSED(new_schema_version); + ret = handle_ddl_stmt_direct_output_(tenant, ddl_stmt, old_schema_version); + + return ret; +} + +int ObLogDDLHandler::handle_ddl_stmt_rename_database_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t old_schema_version, + const int64_t new_schema_version) +{ + int ret = OB_SUCCESS; + UNUSED(new_schema_version); + ret = handle_ddl_stmt_direct_output_(tenant, ddl_stmt, old_schema_version); + + return ret; +} + +int ObLogDDLHandler::handle_ddl_stmt_split_begin_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t new_schema_version) +{ + int ret = OB_SUCCESS; + int64_t prepare_log_timestamp = ddl_stmt.get_host().get_timestamp(); + int64_t start_serve_timestamp = get_start_serve_timestamp_(new_schema_version, + prepare_log_timestamp); + ObLogSchemaGuard new_schema_guard; + const char *tenant_name = NULL; + const char *db_name = NULL; + + RETRY_FUNC(stop_flag_, tenant.get_part_mgr(), split_table, + ddl_stmt.get_op_table_id(), + new_schema_version, + start_serve_timestamp, + new_schema_guard, + tenant_name, + db_name, + DATA_OP_TIMEOUT); + + // If the schema error is encountered, it means that the tenant may be deleted in the future, so the schema of table, + // database, tenant or table group cannot be obtained, in this case, the DDL will be ignored. + IGNORE_SCHEMA_ERROR(ret, K(new_schema_version), K(start_serve_timestamp), K(ddl_stmt)); + + if (OB_SUCC(ret)) { + if (OB_FAIL(commit_ddl_stmt_(tenant, ddl_stmt, new_schema_version, tenant_name, db_name))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("commit_ddl_stmt_ fail", KR(ret), K(tenant),K(ddl_stmt), K(tenant_name), K(db_name)); + } + } else {} + } + + return ret; +} + +int ObLogDDLHandler::handle_ddl_stmt_finish_schema_split_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t new_schema_version, + bool &is_schema_split_mode) +{ + int ret = OB_SUCCESS; + _ISTAT("[CHANGE_SCHEMA_SPLIT_MODE] [HANDLE_DDL_STMT_FINISH_SCHEMA_SPLIT] SCHEMA_VERSION=%ld IS_SCHEMA_SPLIT_MODE=%d", + new_schema_version, TCTX.is_schema_split_mode_); + + // enable schema split mode + TCTX.enable_schema_split_mode(); + + // enable schema split mode + is_schema_split_mode = true; + + int64_t prepare_log_timestamp = ddl_stmt.get_host().get_timestamp(); + int64_t start_serve_timestamp = get_start_serve_timestamp_(new_schema_version, + prepare_log_timestamp); + IObLogTenantMgr *tenant_mgr = TCTX.tenant_mgr_; + const int64_t split_schema_version = ddl_stmt.get_op_schema_version(); + + if (OB_ISNULL(tenant_mgr)) { + LOG_ERROR("invalid tenant mgr", K(tenant_mgr)); + ret = OB_ERR_UNEXPECTED; + } else { + RETRY_FUNC(stop_flag_, (*tenant_mgr), handle_schema_split_finish, + tenant.get_tenant_id(), + split_schema_version, + start_serve_timestamp, + DATA_OP_TIMEOUT); + + // If the schema error is encountered, it means that the tenant may be deleted in the future, so the schema of table, + // database, tenant or table group cannot be obtained, in this case, the DDL will be ignored. + IGNORE_SCHEMA_ERROR(ret, K(split_schema_version), K(start_serve_timestamp), K(ddl_stmt)); + + if (OB_SUCC(ret)) { + if (OB_FAIL(commit_ddl_stmt_(tenant, ddl_stmt, new_schema_version))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("commit_ddl_stmt_ fail", KR(ret), K(tenant), K(ddl_stmt)); + } + } else {} + } + } + + return ret; +} + +int ObLogDDLHandler::parse_tenant_ddl_stmt_for_restore_(DdlStmtTask &ddl_stmt, int64_t &schema_version, + int64_t &tenant_gts_value, bool &is_create_tenant_by_restore_ddl) +{ + int ret = OB_SUCCESS; + is_create_tenant_by_restore_ddl = false; + ObString ddl_stmt_str = ddl_stmt.get_ddl_stmt_str(); + int64_t len = ddl_stmt_str.length(); + char ddl_stmt_buf[len + 1]; + MEMSET(ddl_stmt_buf, '\0', len + 1); + MEMCPY(ddl_stmt_buf, ddl_stmt_str.ptr(), len); + const char *pair_delimiter = "; "; + const char *kv_delimiter = "="; + const char *key_gts = "tenant_gts"; + const char *key_version = "schema_version"; + const char *value_gts = NULL; + const char *value_version = NULL; + +//schema_version=1617073037190088; tenant_gts=1617073037222488 + const bool is_restore_tenant_ddl = !ddl_stmt_str.empty() + && (NULL != strstr(ddl_stmt_buf, pair_delimiter)) && (NULL != strstr(ddl_stmt_buf, kv_delimiter)) + && (NULL != strstr(ddl_stmt_buf, key_gts)) && (NULL != strstr(ddl_stmt_buf, key_version)); + + if (is_restore_tenant_ddl) { + const bool skip_dirty_data = (TCONF.skip_dirty_data != 0); + int64_t tenant_schema_version_from_ddl = 0; + int64_t tenant_gts_from_ddl = 0; + ObLogKVCollection kv_c; + + //ddl for physical backup restore contains str like: schema_version=%ld; tenant_gts=%ld + if (OB_FAIL(ret) || OB_FAIL(kv_c.init(kv_delimiter, pair_delimiter))) { + LOG_ERROR("init key-value str fail", KR(ret), K(ddl_stmt_str)); + } else if (OB_FAIL(kv_c.deserialize(ddl_stmt_buf))) { + LOG_ERROR("deserialize kv string fail", KR(ret), K(ddl_stmt_str)); + } else if (OB_UNLIKELY(!kv_c.is_valid())) { + LOG_ERROR("key-value collection built by ddl_stmt is not valid", K(ddl_stmt_str), K(kv_c)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(kv_c.get_value_of_key(key_gts, value_gts))) { + LOG_ERROR("failed to get tenant gts value", KR(ret), K(ddl_stmt_str), K(kv_c), K(key_gts)); + } else if (OB_FAIL(kv_c.get_value_of_key(key_version, value_version))) { + LOG_ERROR("failed to get tenant gts value", KR(ret), K(ddl_stmt_str), K(kv_c), K(key_version)); + } else if (OB_FAIL(c_str_to_int(value_version, tenant_schema_version_from_ddl))) { + LOG_ERROR("failed to get value of tenant schema version", KR(ret), K(value_version), K(tenant_schema_version_from_ddl), K(ddl_stmt)); + } else if (OB_FAIL(c_str_to_int(value_gts, tenant_gts_from_ddl))) { + LOG_ERROR("failed to get value of tenant schema version", KR(ret), K(value_gts), K(tenant_gts_value), K(ddl_stmt)); + } else { + is_create_tenant_by_restore_ddl = true; + schema_version = tenant_schema_version_from_ddl > schema_version ? tenant_schema_version_from_ddl : schema_version; + tenant_gts_value = tenant_gts_from_ddl; + } + if (OB_SUCC(ret)) { + mark_stmt_binlog_record_invalid_(ddl_stmt); + LOG_INFO("mark create_tenant_end_ddl invalid for restore tenant", KR(ret), K(is_create_tenant_by_restore_ddl), + K(schema_version), K(tenant_gts_value), K(ddl_stmt)); + } else if (skip_dirty_data) { + LOG_WARN("parse_tenant_ddl_stmt_for_restore_ fail!", KR(ret), K(is_create_tenant_by_restore_ddl), K(schema_version), K(tenant_gts_value), + K(value_gts), K(value_version), K(ddl_stmt), K(kv_c)); + ret = OB_SUCCESS; + } else { + LOG_ERROR("parse_tenant_ddl_stmt_for_restore_ fail!", KR(ret), K(is_create_tenant_by_restore_ddl), K(schema_version), K(tenant_gts_value), + K(value_gts), K(value_version), K(ddl_stmt), K(kv_c)); + } + } else { + LOG_INFO("parse_tenant_ddl_stmt_for_restore_ passby", K(is_create_tenant_by_restore_ddl), K(schema_version), K(tenant_gts_value), K(ddl_stmt)); + } + return ret; +} + +int64_t ObLogDDLHandler::get_start_serve_timestamp_(const int64_t new_schema_version, + const int64_t prepare_log_timestamp) +{ + // The table start timestamp selects the maximum value of the Schema version and prepare log timestamp of table __all_ddl_operation + // The purpose is to avoid heartbeat timestamp fallback + return std::max(new_schema_version, prepare_log_timestamp); +} + +} +} diff --git a/src/liboblog/src/ob_log_ddl_handler.h b/src/liboblog/src/ob_log_ddl_handler.h new file mode 100644 index 0000000000000000000000000000000000000000..4845342233d8f55a80181ea018096b1db0b4d569 --- /dev/null +++ b/src/liboblog/src/ob_log_ddl_handler.h @@ -0,0 +1,349 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_DDL_HANDLER_H__ +#define OCEANBASE_LIBOBLOG_OB_LOG_DDL_HANDLER_H__ + +#include "lib/lock/ob_small_spin_lock.h" // ObByteLock +#include "lib/utility/ob_macro_utils.h" // CACHE_ALIGNED, DISALLOW_COPY_AND_ASSIGN +#include "common/ob_queue_thread.h" // ObCond +#include // ILogRecord + +#include "ob_log_utils.h" // _SEC_ +#include "ob_log_part_trans_task.h" // PartTransTask +#include "ob_log_part_trans_task_queue.h" // SafePartTransTaskQueue + +namespace oceanbase +{ +namespace liboblog +{ +class IObLogDDLHandler +{ +public: + virtual ~IObLogDDLHandler() {} + +public: + virtual int start() = 0; + virtual void stop() = 0; + virtual void mark_stop_flag() = 0; + virtual int push(PartTransTask *task, const int64_t timeout) = 0; + + /// Get the processing progress, note that it is not the output progress, a DDL task can update the progress after the processing is completed + //// + /// 1. If there is a backlog of data to be processed in the queue, return the timestamp of the next pending task - 1 as the processing progress + /// 2. If there is no backlog in the queue, return OB_INVALID_PROGRESS + //// + /// @param [out] ddl_min_progress_key the partition key of the slowest ddl processing progress corresponding to the tenant __all_ddl_operation table + /// @param [out] ddl_min_progress slowest ddl_progress + /// @param [out] ddl_min_handle_log_id slowest ddl processing log ID + /// + /// @retval OB_SUCCESS success + /// @retval other error code fail + virtual int get_progress(uint64_t &ddl_min_progress_tenant_id, + int64_t &ddl_min_progress, + uint64_t &ddl_min_handle_log_id) = 0; + + virtual int64_t get_part_trans_task_count() const = 0; +}; + +///////////////////////////////// ObLogDDLHandler ///////////////////////////////// + +class IObLogDdlParser; +class IObLogErrHandler; +class IObLogSequencer; +class IObLogSchemaGetter; +class IStmtTask; +class PartTransTask; +class DdlStmtTask; +class ObLogSchemaGuard; +class IObLogTenantMgr; +class ObLogTenant; +class ObLogTenantGuard; +class ObLogDDLHandler : public IObLogDDLHandler +{ + enum + { + DATA_OP_TIMEOUT = 1 * _SEC_, + }; + +public: + ObLogDDLHandler(); + virtual ~ObLogDDLHandler(); + +public: + int start(); + void stop(); + void mark_stop_flag() { stop_flag_ = true; } + int push(PartTransTask *task, const int64_t timeout); + virtual int get_progress(uint64_t &ddl_min_progress_tenant_id, + int64_t &ddl_min_progress, + uint64_t &ddl_min_handle_log_id); + virtual int64_t get_part_trans_task_count() const; + +public: + int init(IObLogDdlParser *ddl_parser, + IObLogSequencer *committer, + IObLogErrHandler *err_handler, + IObLogSchemaGetter *schema_getter, + const bool skip_reversed_schema_version); + void destroy(); + +public: + void handle_ddl_routine(); + +private: + static void *handle_thread_func_(void *arg); + void mark_stmt_binlog_record_invalid_(DdlStmtTask &stmt_task); + void mark_all_binlog_records_invalid_(PartTransTask &task); + int get_old_schema_version_(const uint64_t tenant_id, + PartTransTask &task, + const int64_t tenant_ddl_cur_schema_version, + int64_t &old_schema_version); + int get_schema_version_by_timestamp_util_succ_(const uint64_t tenant_id, + const int64_t ddl_schema_version, + int64_t &old_schema_version); + int filter_ddl_stmt_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + IObLogTenantMgr &tenant_mgr, + bool &chosen, + const bool is_filter_by_tenant_id = false); + int handle_ddl_trans_(PartTransTask &task, + bool &is_schema_split_mode, + ObLogTenant &tenant); + int handle_tenant_ddl_task_(PartTransTask &task, + bool &is_schema_split_mode, + ObLogTenant &tenant); + int handle_ddl_stmt_(ObLogTenant &tenant, + PartTransTask &task, + DdlStmtTask &ddl_stmt, + const int64_t old_schema_version, + const int64_t new_schema_version, + bool &is_schema_split_mode); + int handle_ddl_stmt_direct_output_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t schema_version); + int commit_ddl_stmt_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t schema_version, + const char *tenant_name = NULL, + const char *db_name = NULL, + const bool filter_ddl_stmt = false); + int get_schemas_for_ddl_stmt_(const uint64_t ddl_tenant_id, + DdlStmtTask &ddl_stmt, + const int64_t schema_version, + ObLogSchemaGuard &schema_guard, + const char *&tenant_name, + const char *&db_name); + bool is_use_new_schema_version(DdlStmtTask &ddl_stmt, + const int64_t schema_version); + int set_binlog_record_db_name_(ILogRecord &br, + const int64_t ddl_operation_type, + const char * const tenant_name, + const char * const db_name); + int handle_ddl_stmt_drop_table_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t old_schema, + const int64_t new_schema); + int handle_ddl_stmt_drop_table_to_recyclebin_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t old_schema, + const int64_t new_schema); + // alter table DDL + // Currently only dynamic partitioning is supported. No longer maintain support for updating a table's db_id, to avoid db_id dependency + // Currently two ways to modify table's db_id, move a table to another db + // 1. alter table + // 2. rename table + int handle_ddl_stmt_alter_table_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t old_schema, + const int64_t new_schema, + const char *event); + int handle_ddl_stmt_create_table_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t new_schema); + int handle_ddl_stmt_rename_table_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t old_schema, + const int64_t new_schema); + // Support global index, unique index + int handle_ddl_stmt_create_index_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t new_schema); + // Support global index, unique index + int handle_ddl_stmt_drop_index_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t old_schema, + const int64_t new_schema); + int handle_ddl_stmt_add_tablegroup_partition_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t new_schema); + int handle_ddl_stmt_drop_tablegroup_partition_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t old_schema, + const int64_t new_schema); + int handle_ddl_stmt_split_tablegroup_partition_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t new_schema); + int handle_ddl_stmt_change_tablegroup_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t old_schema, + const int64_t new_schema); + int handle_ddl_stmt_alter_tablegroup_partition_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t old_schema, + const int64_t new_schema); + int handle_ddl_stmt_truncate_table_drop_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t old_schema, + const int64_t new_schema); + int handle_ddl_stmt_truncate_drop_table_to_recyclebin_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t old_schema, + const int64_t new_schema); + int handle_ddl_stmt_truncate_table_create_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t new_schema); + int handle_ddl_stmt_drop_index_to_recyclebin_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t old_schema, + const int64_t new_schema); + int handle_ddl_stmt_add_tenant_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t new_schema); + int handle_ddl_stmt_drop_tenant_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t old_schema, + const int64_t new_schema, + const bool is_schema_split_mode = false, + const bool is_del_tenant_start_op = false); + int handle_ddl_stmt_alter_tenant_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t old_schema_version, + const int64_t new_schema_version); + int handle_ddl_stmt_rename_tenant_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t old_schema_version, + const int64_t new_schema_version); + int handle_ddl_stmt_drop_tenant_to_recyclebin_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t old_schema, + const int64_t new_schema); + int handle_ddl_stmt_alter_database_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t old_schema, + const int64_t new_schema); + int handle_ddl_stmt_drop_database_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t old_schema, + const int64_t new_schema); + int handle_ddl_stmt_drop_database_to_recyclebin_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t old_schema, + const int64_t new_schema); + int handle_ddl_stmt_rename_database_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t old_schema, + const int64_t new_schema); + int handle_ddl_stmt_split_begin_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t new_schema_version); + int handle_ddl_stmt_finish_schema_split_(ObLogTenant &tenant, + DdlStmtTask &ddl_stmt, + const int64_t new_schema_version, + bool &is_schema_split_mode); + int64_t get_start_serve_timestamp_(const int64_t new_schema_version, + const int64_t prepare_log_timestamp); + int decide_ddl_tenant_id_(const PartTransTask &task, + const bool is_schema_split_mode, + uint64_t &ddl_tenant_id); + int get_tenant_( + PartTransTask &task, + const bool is_schema_split_mode, + uint64_t &ddl_tenant_id, + ObLogTenantGuard &guard, + ObLogTenant *&tenant, + bool &is_tenant_served); + int update_ddl_info_(PartTransTask &task, + const bool is_schema_split_mode, + ObLogTenant &tenant); + int get_lazy_schema_guard_(const uint64_t tenant_id, + const int64_t version, + ObLogSchemaGuard &schema_guard); + int decide_ddl_stmt_database_id_(DdlStmtTask &ddl_stmt, + const int64_t schema_version, + ObLogSchemaGuard &schema_guard, + uint64_t &db_id); + template + int for_each_tenant_(Func &func); + int handle_ddl_offline_task_(const PartTransTask &task); + int next_task_(PartTransTask *&task); + int handle_task_(PartTransTask &task, + bool &is_schema_split_mode, + const uint64_t ddl_tenant_id, + ObLogTenant *tenant, + const bool is_tenant_served); + int dispatch_task_(PartTransTask *task, ObLogTenant *tenant, const bool is_tenant_served); + // Parsing ddl stmt at the moment of recovery completion of backup recovery tenant + // format: schema_version=${schema_version};teannt_gts=${tenant_gts} + // tenant can be treated created from restore if ddl_stmt of create_tenant_end contains key_schema_version(schema_version) and key_tenant_gts(tenant_gts) + // if error while parsing ddl_stmt_str, this method will return OB_ERR_UNEXPECTED, otherwise return is is_create_tenant_by_restore_ddl + // if not create by restore, value of is_create_tenant_by_restore_ddl and tenant_gts_value won't change + int parse_tenant_ddl_stmt_for_restore_(DdlStmtTask &ddl_stmt, int64_t &tenant_schema_version, + int64_t &tenant_gts_value, bool &is_create_tenant_by_restore_ddl); + int64_t decide_ddl_tenant_id_for_schema_non_split_mode_(const PartTransTask &task) const; + +public: + // Task queue + // Supports single-threaded production and multi-threaded consumption with lock protection + struct TaskQueue + { + public: + TaskQueue(); + ~TaskQueue(); + + int push(PartTransTask *task); + void pop(); + int64_t size() const; + + // Wait for top to be ready to process + // If the top task is ready, return OB_SUCCESS and return top_task + int next_ready_to_handle(const int64_t timeout, PartTransTask *&top_task, common::ObCond &cond); + + private: + SafePartTransTaskQueue queue_; + }; + +private: + bool inited_; + IObLogDdlParser *ddl_parser_; + IObLogSequencer *sequencer_; + IObLogErrHandler *err_handler_; + IObLogSchemaGetter *schema_getter_; + + bool skip_reversed_schema_version_; + + // thread id of ddl handler + pthread_t handle_pid_; + volatile bool stop_flag_ CACHE_ALIGNED; + + // Queue of pending tasks exported from Fetcher + TaskQueue ddl_fetch_queue_; + common::ObCond wait_formatted_cond_; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogDDLHandler); +}; + +} +} + +#endif diff --git a/src/liboblog/src/ob_log_ddl_parser.cpp b/src/liboblog/src/ob_log_ddl_parser.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5355bd105afe95a330ade0e6faf2b68b15fbe6ed --- /dev/null +++ b/src/liboblog/src/ob_log_ddl_parser.cpp @@ -0,0 +1,194 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG_PARSER + +#include "ob_log_ddl_parser.h" + +#include "ob_log_instance.h" // IObLogErrHandler +#include "ob_log_part_trans_parser.h" // IObLogPartTransParser +#include "ob_log_part_trans_task.h" // PartTransTask + +using namespace oceanbase::common; + +namespace oceanbase +{ +namespace liboblog +{ + +ObLogDdlParser::ObLogDdlParser() : + inited_(false), + err_handler_(NULL), + part_trans_parser_(NULL), + push_seq_(0) +{ +} + +ObLogDdlParser::~ObLogDdlParser() +{ + destroy(); +} + +int ObLogDdlParser::init(const int64_t thread_num, + const int64_t queue_size, + IObLogErrHandler &err_handler, + IObLogPartTransParser &part_trans_parser) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("DDL parser has been initialized"); + ret = OB_INIT_TWICE; + } else if (OB_UNLIKELY(thread_num <= 0) + || OB_UNLIKELY(thread_num > MAX_THREAD_NUM) + || OB_UNLIKELY(queue_size <= 0)) { + LOG_ERROR("invalid argument", K(thread_num), LITERAL_K(MAX_THREAD_NUM), + K(queue_size)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(DdlParserThread::init(thread_num, queue_size))) { + LOG_ERROR("init DDL parser queue thread fail", KR(ret), K(thread_num), K(queue_size)); + } else { + err_handler_ = &err_handler; + part_trans_parser_ = &part_trans_parser; + push_seq_ = 0; + inited_ = true; + + LOG_INFO("init DDL parser succ", K(thread_num), K(queue_size)); + } + + return ret; +} + +void ObLogDdlParser::destroy() +{ + DdlParserThread::destroy(); + + inited_ = false; + err_handler_ = NULL; + part_trans_parser_ = NULL; + push_seq_ = 0; +} + +int ObLogDdlParser::start() +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("DDL parser has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(DdlParserThread::start())) { + LOG_ERROR("start DDL parser thread fail", KR(ret), "thread_num", get_thread_num()); + } else { + LOG_INFO("start DDL parser threads succ", "thread_num", get_thread_num()); + } + + return ret; +} + +void ObLogDdlParser::stop() +{ + if (inited_) { + DdlParserThread::stop(); + LOG_INFO("stop DDL parser threads succ", "thread_num", get_thread_num()); + } +} + +int ObLogDdlParser::push(PartTransTask &task, const int64_t timeout) +{ + int ret = OB_SUCCESS; + int64_t push_hash = ATOMIC_FAA(&push_seq_, 1); + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("DDL parser has not been initialized"); + ret = OB_NOT_INIT; + } + // Verify that the task information is valid + else if (OB_UNLIKELY(! task.is_task_info_valid())) { + LOG_ERROR("invalid task", K(task)); + ret = OB_INVALID_ARGUMENT; + } + // Deal only DDL type tasks + else if (OB_UNLIKELY(! task.is_ddl_trans())) { + LOG_ERROR("task type is not supported by DDL parser", K(task)); + ret = OB_NOT_SUPPORTED; + } else if (OB_UNLIKELY(DdlParserThread::is_stoped())) { + LOG_INFO("DDL parser has been stoped"); + ret = OB_IN_STOP_STATE; + } else if (OB_FAIL(DdlParserThread::push(&task, push_hash, timeout))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("push task into DDL queue thread fail", KR(ret), K(task), K(push_hash)); + } + } else { + LOG_DEBUG("push task into DDL parser", K(push_hash), K(task)); + } + + return ret; +} + +int ObLogDdlParser::get_part_trans_task_count(int64_t &task_num) +{ + return DdlParserThread::get_total_task_num(task_num); +} + +int ObLogDdlParser::handle(void *data, + const int64_t thread_index, + volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + PartTransTask *task = (PartTransTask *)data; + + if (OB_UNLIKELY(! inited_) || OB_ISNULL(part_trans_parser_)) { + LOG_ERROR("DDL parser has not been initialized", K(part_trans_parser_)); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(task)) { + LOG_ERROR("invalid argument", KPC(task)); + ret = OB_INVALID_ARGUMENT; + } + // Receive only DDL type tasks + else if (OB_UNLIKELY(! task->is_ddl_trans())) { + LOG_ERROR("task type is not supported by DDL Parser", KPC(task)); + ret = OB_NOT_SUPPORTED; + } else { + LOG_DEBUG("DDL parser handle task", K(thread_index), KPC(task)); + + share::ObWorker::CompatMode compat_mode = share::ObWorker::CompatMode::INVALID; + + if (OB_FAIL(get_tenant_compat_mode(task->get_tenant_id(), compat_mode, stop_flag))) { + LOG_ERROR("get_tenant_compat_mode fail", KR(ret), "tenant_id", task->get_tenant_id(), + "compat_mode", print_compat_mode(compat_mode), KPC(task)); + } else { + share::CompatModeGuard g(compat_mode); + + // Parse DDL task + if (OB_FAIL(part_trans_parser_->parse(*task, stop_flag))) { + LOG_ERROR("parse DDL task fail", KR(ret), KPC(task), "compat_mode", print_compat_mode(compat_mode)); + } else { + // The DDL task does not need to go through the formatter module, and here the formatting is set to complete directly + // DDL Handler directly waits for formatting to complete or not + task->set_formatted(); + // The task cannot be accessed after the marker is completed + task = NULL; + } + } + } + + // exit on fail + if (OB_SUCCESS != ret && OB_IN_STOP_STATE != ret && NULL != err_handler_) { + err_handler_->handle_error(ret, "DDL parser thread exits, thread_index=%ld, err=%d", + thread_index, ret); + stop_flag = true; + } + + return ret; +} + +} // namespace liboblog +} // namespace oceanbase diff --git a/src/liboblog/src/ob_log_ddl_parser.h b/src/liboblog/src/ob_log_ddl_parser.h new file mode 100644 index 0000000000000000000000000000000000000000..8e05f7697c7e160358a1ccaf099e88672fcf49d6 --- /dev/null +++ b/src/liboblog/src/ob_log_ddl_parser.h @@ -0,0 +1,87 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_DDL_PARSER_H_ +#define OCEANBASE_LIBOBLOG_OB_LOG_DDL_PARSER_H_ + +#include "lib/thread/ob_multi_fixed_queue_thread.h" // ObMQThread + +namespace oceanbase +{ + +namespace liboblog +{ +class PartTransTask; + +class IObLogDdlParser +{ +public: + static const int64_t MAX_THREAD_NUM = 256; +public: + virtual ~IObLogDdlParser() {} + +public: + virtual int start() = 0; + virtual void stop() = 0; + virtual void mark_stop_flag() = 0; + virtual int push(PartTransTask &task, const int64_t timeout) = 0; + virtual int get_part_trans_task_count(int64_t &task_num) = 0; +}; + +typedef common::ObMQThread DdlParserThread; + +class IObLogErrHandler; +class IObLogPartTransParser; +class ObLogDdlParser : public IObLogDdlParser, public DdlParserThread +{ + enum + { + DATA_OP_TIMEOUT = 1 * 1000 * 1000, + }; + +public: + ObLogDdlParser(); + virtual ~ObLogDdlParser(); + +public: + // DdlParserThread handle function + virtual int handle(void *task, const int64_t thread_index, volatile bool &stop_flag); + +public: + int start(); + void stop(); + void mark_stop_flag() { DdlParserThread::mark_stop_flag(); } + int push(PartTransTask &task, const int64_t timeout); + int get_part_trans_task_count(int64_t &task_num); + +public: + int init(const int64_t thread_num, + const int64_t queue_size, + IObLogErrHandler &err_handler, + IObLogPartTransParser &part_trans_parser); + void destroy(); + +private: + bool inited_; + IObLogErrHandler *err_handler_; + IObLogPartTransParser *part_trans_parser_; + + // The serial number of the currently processed task, used to rotate task push to the queue + int64_t push_seq_ CACHE_ALIGNED; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogDdlParser); +}; +} +} + +#endif diff --git a/src/liboblog/src/ob_log_dlist.h b/src/liboblog/src/ob_log_dlist.h new file mode 100644 index 0000000000000000000000000000000000000000..888abce4d671ae435db568a32613c38cddd7f5e4 --- /dev/null +++ b/src/liboblog/src/ob_log_dlist.h @@ -0,0 +1,130 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_DLIST_H__ +#define OCEANBASE_LIBOBLOG_OB_LOG_DLIST_H__ + +#include "lib/utility/ob_print_utils.h" // TO_STRING_KV + +namespace oceanbase +{ +namespace liboblog +{ +template +class ObLogDList; + +////////////////////////////////// ObLogDListNode ////////////////////////////////// +// Bidirectional linked list +// Requires the T type to be a derived class of ObLogDListNode +template +class ObLogDListNode +{ + friend class ObLogDList; +public: + ObLogDListNode() { reset(); } + virtual ~ObLogDListNode() { reset(); } + +public: + void reset() + { + next_ = NULL; + prev_ = NULL; + } + + void reset_list_node() + { + reset(); + } + + T *get_next() { return next_; } + T *get_prev() { return prev_; } + +protected: + T *next_; + T *prev_; +}; + +////////////////////////////////// ObLogDList ////////////////////////////////// +// Bidirectional linked list +// Requires the T type to be a derived class of ObLogDListNode +template +class ObLogDList +{ +public: + ObLogDList() { reset(); } + virtual ~ObLogDList() { reset(); } + +public: + void reset() + { + count_ = 0; + head_ = NULL; + } + + T *head() { return head_; } + int64_t count() const { return count_; } + + // Add to header node + void add_head(T &node) + { + node.prev_ = NULL; + node.next_ = head_; + + if (NULL != head_) { + head_->prev_ = &node; + } + + head_ = &node; + count_++; + } + + // delete node + void erase(T &node) + { + if (head_ == &node) { + head_ = node.next_; + } + + if (NULL != node.prev_) { + node.prev_->next_ = node.next_; + } + + if (NULL != node.next_) { + node.next_->prev_ = node.prev_; + } + + // reset node info + node.reset_list_node(); + count_--; + } + + bool operator == (const ObLogDList &other) + { + return other.head_ == head_ && other.count_ == count_; + } + + bool operator != (const ObLogDList &other) + { + return ! (*this == other); + } + + TO_STRING_KV(K_(count), KP_(head)); + +private: + T *head_; + int64_t count_; +}; + +} +} + +#endif diff --git a/src/liboblog/src/ob_log_dml_parser.cpp b/src/liboblog/src/ob_log_dml_parser.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ac254376b27d91a1a35de4512a7dd68e2e28f063 --- /dev/null +++ b/src/liboblog/src/ob_log_dml_parser.cpp @@ -0,0 +1,274 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG_PARSER + +#include "ob_log_dml_parser.h" + +#include "ob_log_formatter.h" // IObLogFormatter +#include "ob_log_instance.h" // IObLogErrHandler +#include "ob_log_part_trans_parser.h" // IObLogPartTransParser +#include "ob_ms_queue_thread.h" // BitSet + +using namespace oceanbase::common; + +namespace oceanbase +{ +namespace liboblog +{ + +ObLogDmlParser::ObLogDmlParser() : + inited_(false), + formatter_(NULL), + err_handler_(NULL), + part_trans_parser_(NULL), + log_entry_task_count_(0) +{ +} + +ObLogDmlParser::~ObLogDmlParser() +{ + destroy(); +} + +int ObLogDmlParser::init(const int64_t parser_thread_num, + const int64_t parser_queue_size, + IObLogFormatter &formatter, + IObLogErrHandler &err_handler, + IObLogPartTransParser &part_trans_parser) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("parser has been initialized"); + ret = OB_INIT_TWICE; + } else if (OB_UNLIKELY(parser_thread_num <= 0) + || OB_UNLIKELY(parser_thread_num > MAX_THREAD_NUM) + || OB_UNLIKELY(parser_queue_size <= 0)) { + LOG_ERROR("invalid argument", K(parser_thread_num), LITERAL_K(MAX_THREAD_NUM), + K(parser_queue_size)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(DmlParserThread::init(parser_thread_num, parser_queue_size))) { + LOG_ERROR("init parser queue thread fail", KR(ret), K(parser_thread_num), K(parser_queue_size)); + } else { + formatter_ = &formatter; + err_handler_ = &err_handler; + part_trans_parser_ = &part_trans_parser; + log_entry_task_count_ = 0; + inited_ = true; + + LOG_INFO("init DML parser succ", K(parser_thread_num), K(parser_queue_size)); + } + + return ret; +} + +void ObLogDmlParser::destroy() +{ + DmlParserThread::destroy(); + + inited_ = false; + formatter_ = NULL; + err_handler_ = NULL; + part_trans_parser_ = NULL; + log_entry_task_count_ = 0; +} + +int ObLogDmlParser::start() +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("parser has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(DmlParserThread::start())) { + LOG_ERROR("start parser thread fail", KR(ret), "thread_num", get_thread_num()); + } else { + LOG_INFO("start DML parser threads succ", "thread_num", get_thread_num()); + } + + return ret; +} + +void ObLogDmlParser::stop() +{ + if (inited_) { + DmlParserThread::stop(); + LOG_INFO("stop DML parser threads succ", "thread_num", get_thread_num()); + } +} + +int ObLogDmlParser::push(ObLogEntryTask &task, const int64_t timeout) +{ + int ret = OB_SUCCESS; + PartTransTask *part_trans_task = NULL; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("DML parser has not been initialized"); + ret = OB_NOT_INIT; + } + // Verify that the task information is valid + else if (OB_UNLIKELY(! task.is_valid())) { + LOG_ERROR("invalid task", K(task)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(part_trans_task = static_cast(task.get_host()))) { + LOG_ERROR("part_trans_task is NULL", K(part_trans_task), K(task)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_UNLIKELY(DmlParserThread::is_stoped())) { + LOG_INFO("DML parser has been stoped"); + ret = OB_IN_STOP_STATE; + } else { + // Hash rules ensure that tasks for the same partition transaction are assigned to the same queue + const uint64_t hash_value = part_trans_task->get_partition().hash(); + + if (OB_FAIL(DmlParserThread::push(&task, hash_value, timeout))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("push task into DML queue thread fail", KR(ret), K(task)); + } + } else { + ATOMIC_INC(&log_entry_task_count_); + LOG_DEBUG("push task into DML parser", K(task)); + } + } + + return ret; +} + +int ObLogDmlParser::get_log_entry_task_count(int64_t &task_num) +{ + int ret = OB_SUCCESS; + + task_num = ATOMIC_LOAD(&log_entry_task_count_); + + return ret; +} + +int ObLogDmlParser::handle(void *data, + const int64_t thread_index, + volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + ObLogEntryTask *task = (ObLogEntryTask *)(data); + PartTransTask *part_trans_task = NULL; + + if (OB_UNLIKELY(! inited_) || OB_ISNULL(part_trans_parser_)) { + LOG_ERROR("DML parser has not been initialized", K(part_trans_parser_)); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(task)) { + LOG_ERROR("invalid argument", KPC(task)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(part_trans_task = static_cast(task->get_host()))) { + LOG_ERROR("part_trans_task is NULL", K(part_trans_task), KPC(task)); + ret = OB_ERR_UNEXPECTED; + } else { + LOG_DEBUG("DML parser handle task", K(thread_index), KPC(task)); + const uint64_t tenant_id = part_trans_task->get_tenant_id(); + share::ObWorker::CompatMode compat_mode = share::ObWorker::CompatMode::INVALID; + + if (OB_FAIL(get_tenant_compat_mode(tenant_id, compat_mode, stop_flag))) { + LOG_ERROR("get_tenant_compat_mode fail", KR(ret), K(tenant_id), + "compat_mode", print_compat_mode(compat_mode), KPC(task)); + } else { + share::CompatModeGuard g(compat_mode); + + if (OB_FAIL(part_trans_parser_->parse(*task, stop_flag))) { + LOG_ERROR("parse task fail", KR(ret), KPC(task), "compat_mode", print_compat_mode(compat_mode)); + } else if (OB_FAIL(dispatch_task_(*task, *part_trans_task, stop_flag))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("dispatch_task_ fail", KR(ret), KPC(task), KPC(part_trans_task)); + } + } + } + + if (OB_SUCC(ret)) { + ATOMIC_DEC(&log_entry_task_count_); + } + } + + // Failure to exit + if (OB_SUCCESS != ret && OB_IN_STOP_STATE != ret && NULL != err_handler_) { + err_handler_->handle_error(ret, "DML parser thread exits, thread_index=%ld, err=%d", + thread_index, ret); + stop_flag = true; + } + + return ret; +} + + +int ObLogDmlParser::dispatch_task_(ObLogEntryTask &log_entry_task, + PartTransTask &part_trans_task, + volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + const int64_t stmt_count = log_entry_task.get_stmt_list().num_; + + if (stmt_count <= 0 ) { + if (OB_FAIL(handle_empty_stmt_(log_entry_task, part_trans_task, stop_flag))) { + LOG_ERROR("handle_empty_stmt_ fail", KR(ret), K(log_entry_task), K(part_trans_task)); + } + } else { + if (OB_FAIL(push_task_into_formatter_(log_entry_task, stop_flag))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("push_task_into_formatter_ fail", KR(ret), K(log_entry_task), K(part_trans_task)); + } + } + } + + return ret; +} + +int ObLogDmlParser::handle_empty_stmt_(ObLogEntryTask &log_entry_task, + PartTransTask &part_trans_task, + volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + bool is_unserved_part_trans_task_can_be_recycled = false; + + if (OB_FAIL(part_trans_task.handle_log_entry_task_callback(ObLogEntryTask::DML_PARSER_CB, + log_entry_task, + is_unserved_part_trans_task_can_be_recycled))) { + LOG_ERROR("handle_log_entry_task_callback fail", KR(ret), K(log_entry_task), + K(is_unserved_part_trans_task_can_be_recycled), K(part_trans_task), K(stop_flag)); + } else if (is_unserved_part_trans_task_can_be_recycled) { + LOG_DEBUG("handle_log_entry_task_callback: part_trans_task is revert", K(part_trans_task)); + part_trans_task.revert(); + } else {} + + return ret; +} + +int ObLogDmlParser::push_task_into_formatter_(ObLogEntryTask &task, volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(formatter_)) { + LOG_ERROR("invalid formatter", K(formatter_)); + ret = OB_INVALID_ARGUMENT; + } else { + const StmtList &stmt_list = task.get_stmt_list(); + IStmtTask *stmt = stmt_list.head_; + + if (OB_FAIL(formatter_->push(stmt, stop_flag))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("formatter_ push fail", KR(ret), K(task)); + } + } else { + // succ + } + } + + return ret; +} + +} // namespace liboblog +} // namespace oceanbase diff --git a/src/liboblog/src/ob_log_dml_parser.h b/src/liboblog/src/ob_log_dml_parser.h new file mode 100644 index 0000000000000000000000000000000000000000..9ee03760bf50ced25f634f06514cd49d0a7bcd0d --- /dev/null +++ b/src/liboblog/src/ob_log_dml_parser.h @@ -0,0 +1,99 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_DML_PARSER_H_ +#define OCEANBASE_LIBOBLOG_OB_LOG_DML_PARSER_H_ + +#include "lib/thread/ob_multi_fixed_queue_thread.h" // ObMQThread +#include "ob_log_part_trans_task.h" // ObLogEntryTask + +namespace oceanbase +{ + +namespace liboblog +{ +class PartTransTask; + +class IObLogDmlParser +{ +public: + static const int64_t MAX_THREAD_NUM = 256; +public: + virtual ~IObLogDmlParser() {} + +public: + virtual int start() = 0; + virtual void stop() = 0; + virtual void mark_stop_flag() = 0; + virtual int push(ObLogEntryTask &task, const int64_t timeout) = 0; + virtual int get_log_entry_task_count(int64_t &task_num) = 0; +}; + +// DML type tasks are assigned global task sequence numbers for the purpose of sequential consumption within Sequencer. +// Because Sequencer is a fixed-length seq queue internally, Parser has to ensure that threads are allocated by task +// sequence number to avoid starvation of tasks with small serial numbers. +// +// ObSeqThread is such a thread pool, which ensures concurrent consumption of tasks in order. +typedef common::ObMQThread DmlParserThread; + +class IObLogFormatter; +class IObLogErrHandler; +class IObLogPartTransParser; +class ObLogDmlParser : public IObLogDmlParser, public DmlParserThread +{ + enum + { + DATA_OP_TIMEOUT = 1 * 1000 * 1000, + }; + +public: + ObLogDmlParser(); + virtual ~ObLogDmlParser(); + +public: + // Handler functions for ObSeqThread + virtual int handle(void *task, const int64_t thread_index, volatile bool &stop_flag); + +public: + int start(); + void stop(); + void mark_stop_flag() { DmlParserThread::mark_stop_flag(); } + int push(ObLogEntryTask &task, const int64_t timeout); + int get_log_entry_task_count(int64_t &task_num); + +public: + int init(const int64_t parser_thread_num, + const int64_t parser_queue_size, + IObLogFormatter &formatter, + IObLogErrHandler &err_handler, + IObLogPartTransParser &part_trans_parser); + void destroy(); + +private: + int dispatch_task_(ObLogEntryTask &log_entry_task, PartTransTask &part_trans_task, volatile bool &stop_flag); + int handle_empty_stmt_(ObLogEntryTask &log_entry_task, PartTransTask &part_trans_task, volatile bool &stop_flag); + int push_task_into_formatter_(ObLogEntryTask &log_entry_task, volatile bool &stop_flag); + +private: + bool inited_; + IObLogFormatter *formatter_; + IObLogErrHandler *err_handler_; + IObLogPartTransParser *part_trans_parser_; + int64_t log_entry_task_count_ CACHE_ALIGNED; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogDmlParser); +}; +} +} + +#endif diff --git a/src/liboblog/src/ob_log_entry_task_pool.cpp b/src/liboblog/src/ob_log_entry_task_pool.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f2cf6220e8b03a8ef6e091ba356b88ae4d290fbc --- /dev/null +++ b/src/liboblog/src/ob_log_entry_task_pool.cpp @@ -0,0 +1,108 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG + +#include "ob_log_entry_task_pool.h" + +using namespace oceanbase::common; +namespace oceanbase +{ +namespace liboblog +{ + +ObLogEntryTaskPool::ObLogEntryTaskPool() + :inited_(false), + pool_() +{ +} + +ObLogEntryTaskPool::~ObLogEntryTaskPool() +{ + destroy(); +} + +int ObLogEntryTaskPool::init(const int64_t fixed_task_count) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("RowDataTaskPool has been initialized"); + ret = OB_INIT_TWICE; + } else if (OB_UNLIKELY(fixed_task_count <= 0)) { + LOG_ERROR("invalid argument", K(fixed_task_count)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(pool_.init(fixed_task_count, "LEntryTaskPool"))) { + LOG_ERROR("row data task pool init fail", KR(ret), K(fixed_task_count)); + } else { + inited_ = true; + LOG_INFO("LogEntryTaskPool init success", K(fixed_task_count)); + } + + return ret; +} + +void ObLogEntryTaskPool::destroy() +{ + inited_ = false; + pool_.destroy(); +} + +int ObLogEntryTaskPool::alloc(ObLogEntryTask *&log_entry_task, + void *host) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("RowDataTaskPool has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(pool_.alloc(log_entry_task))) { + LOG_ERROR("alloc binlog record fail", KR(ret)); + } else if (OB_ISNULL(log_entry_task)) { + LOG_ERROR("alloc binlog record fail", K(log_entry_task)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + log_entry_task->set_host(host); + } + + return ret; +} + +void ObLogEntryTaskPool::free(ObLogEntryTask *log_entry_task) +{ + int ret = OB_SUCCESS; + + if (OB_LIKELY(inited_) && OB_LIKELY(NULL != log_entry_task)) { + // Timely memory recycling + log_entry_task->reset(); + + if (OB_FAIL(pool_.free(log_entry_task))) { + LOG_ERROR("free binlog record fail", KR(ret), K(log_entry_task)); + } else { + log_entry_task = NULL; + } + } +} + +int64_t ObLogEntryTaskPool::get_alloc_count() const +{ + return pool_.get_alloc_count(); +} + +void ObLogEntryTaskPool::print_stat_info() const +{ + _LOG_INFO("[STAT] [LOG_ENTRY_TASK_POOL] TOTAL=%ld FREE=%ld FIXED=%ld", + pool_.get_alloc_count(), pool_.get_free_count(), pool_.get_fixed_count()); +} + +} // namespace liboblog +} // namespace oceanbase diff --git a/src/liboblog/src/ob_log_entry_task_pool.h b/src/liboblog/src/ob_log_entry_task_pool.h new file mode 100644 index 0000000000000000000000000000000000000000..82213294720c89d940806c66e3fef4363df3756b --- /dev/null +++ b/src/liboblog/src/ob_log_entry_task_pool.h @@ -0,0 +1,69 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_SRC_LIBOBLOG_OB_LOG_ENTRY_TASK_POOL_ +#define OCEANBASE_SRC_LIBOBLOG_OB_LOG_ENTRY_TASK_POOL_ + +#include "lib/objectpool/ob_small_obj_pool.h" // ObSmallObjPool +#include "common/ob_partition_key.h" // ObPartitionKey +#include "ob_log_part_trans_task.h" // ObLogEntryTask + +namespace oceanbase +{ +namespace liboblog +{ +class IObLogEntryTaskPool +{ +public: + virtual ~IObLogEntryTaskPool() {} + +public: + virtual int alloc(ObLogEntryTask *&task, + void *host) = 0; + virtual void free(ObLogEntryTask *task) = 0; + virtual int64_t get_alloc_count() const = 0; + virtual void print_stat_info() const = 0; +}; + +////////////////////////////////////////////////////////////////////////////// + +// ObLogEntryTaskPool +class ObLogEntryTaskPool : public IObLogEntryTaskPool +{ + typedef common::ObSmallObjPool LogEntryTaskPool; + +public: + ObLogEntryTaskPool(); + virtual ~ObLogEntryTaskPool(); + +public: + int alloc(ObLogEntryTask *&log_entry_task, + void *host); + void free(ObLogEntryTask *log_entry_task); + int64_t get_alloc_count() const; + void print_stat_info() const; + +public: + int init(const int64_t fixed_task_count); + void destroy(); + +private: + bool inited_; + LogEntryTaskPool pool_; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogEntryTaskPool); +}; + +} // namespace liboblog +} // namespace oceanbase +#endif diff --git a/src/liboblog/src/ob_log_entry_wrapper.cpp b/src/liboblog/src/ob_log_entry_wrapper.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e15e1635195b0952820cb29fd8a9ab72f177755c --- /dev/null +++ b/src/liboblog/src/ob_log_entry_wrapper.cpp @@ -0,0 +1,81 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG_FETCHER + +#include "ob_log_entry_wrapper.h" + +namespace oceanbase +{ +using namespace common; +namespace liboblog +{ +ObLogEntryWrapper::ObLogEntryWrapper(const bool is_pg, + const clog::ObLogEntry &log_entry, + ObLogAggreTransLog &aggre_trans_log) + : is_pg_(is_pg), + log_entry_(log_entry), + aggre_trans_log_(aggre_trans_log) +{ +} + +ObLogEntryWrapper::~ObLogEntryWrapper() +{ + is_pg_ = false; +} + +int64_t ObLogEntryWrapper::get_submit_timestamp() const +{ + int64_t submit_timestamp = OB_INVALID_TIMESTAMP; + + if (! is_pg_) { + submit_timestamp = log_entry_.get_header().get_submit_timestamp(); + } else { + submit_timestamp = aggre_trans_log_.submit_timestamp_; + } + + return submit_timestamp; +} + +const char *ObLogEntryWrapper::get_buf() const +{ + const char *res = NULL; + + if (! is_pg_) { + res = log_entry_.get_buf(); + } else { + res = aggre_trans_log_.buf_; + } + + return res; +} + +int64_t ObLogEntryWrapper::get_buf_len() const +{ + int64_t buf_len = 0; + + if (! is_pg_) { + buf_len = log_entry_.get_header().get_data_len(); + } else { + buf_len = aggre_trans_log_.buf_len_; + } + + return buf_len; +} + +bool ObLogEntryWrapper::is_batch_committed() const +{ + return log_entry_.is_batch_committed(); +} + +} // liboblog +} // oceanbase diff --git a/src/liboblog/src/ob_log_entry_wrapper.h b/src/liboblog/src/ob_log_entry_wrapper.h new file mode 100644 index 0000000000000000000000000000000000000000..5c71ec8a0ad2e863c0a82d19cb7aaae3499bbd49 --- /dev/null +++ b/src/liboblog/src/ob_log_entry_wrapper.h @@ -0,0 +1,122 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_LOG_ENTRY_WRAPPER_H__ +#define OCEANBASE_LIBOBLOG_LOG_ENTRY_WRAPPER_H__ + +#include "clog/ob_log_entry_header.h" // ObLogEntryHeader +#include "clog/ob_log_entry.h" // ObLogEntry +#include "storage/ob_storage_log_type.h" // ObStorageType + +namespace oceanbase +{ +namespace liboblog +{ +// 1. For non-PG, ObLogEntry prevails +// 2. For PG, the log format is as follows +// -------------------------------------------------------------------------------------------- +// ObLogEntry: | ObLogEntryHeader | offset | submit_timestamp | log_type | trans_id_inc | trans data | .... +// -------------------------------------------------------------------------------------------- +// 1. 4Byte offset: records the offset of the next transaction log +// 2. 8Byte submit_timestamp: records the commit timestamp of this transaction log +// 3. 8Byte log_type: records the transaction log type +// 4. 8Byte trans_id_inc: records the ObTransId inc number, ensuring that the same transaction is committed to the same replay engine queue +// +// So, for PG aggregation logs: +// 1. submit_timestamp cannot use the commit timestamp of the ObLogEntryHeader, it needs to use the 8Byte-submit_timestamp of the corresponding transaction log +// 2. buf and buf_len should be parsed and calculated, not using the ObLogEntryHeader's information +// 3. Aggregate log parsing needs to be done continuously until the end of the buffer + +struct ObLogAggreTransLog +{ + int32_t next_log_offset_; + int64_t submit_timestamp_; + storage::ObStorageLogType log_type_; + int64_t trans_id_inc_; + const char *buf_; + int64_t buf_len_; + + ObLogAggreTransLog() { reset(); } + + void reset() + { + next_log_offset_ = 0; + submit_timestamp_ = common::OB_INVALID_TIMESTAMP; + log_type_ = storage::OB_LOG_UNKNOWN; + trans_id_inc_ = 0; + buf_ = NULL; + buf_len_ = 0; + } + + void reset(const int64_t next_log_offset, + const int64_t submit_timestamp, + const storage::ObStorageLogType log_type, + const int64_t trans_id_inc, + const char *buf, + const int64_t buf_len) + { + next_log_offset_ = next_log_offset; + submit_timestamp_ = submit_timestamp; + log_type_ = log_type; + trans_id_inc_ = trans_id_inc; + buf_ = buf; + buf_len_ = buf_len; + } + + TO_STRING_KV(K_(next_log_offset), + K_(submit_timestamp), + K_(log_type), + K_(trans_id_inc), + KP_(buf), + K_(buf_len)); +}; + +class ObLogEntryWrapper +{ +public: + ObLogEntryWrapper(const bool is_pg, + const clog::ObLogEntry &log_entry, + ObLogAggreTransLog &aggre_trans_log); + ~ObLogEntryWrapper(); + +public: + const clog::ObLogEntryHeader &get_header() const { return log_entry_.get_header(); } + + bool is_pg_aggre_log() const { return is_pg_; } + + // 1. Non-PG, returns the commit timestamp of the ObLogEntryHeader + // 2. PG, returns the commit timestamp of the parsed log + int64_t get_submit_timestamp() const; + // 1. Non-PG, based on ObLogEntry + // 2. PG, returns the corresponding buf and buf_len of the aggregated log + const char *get_buf() const; + int64_t get_buf_len() const; + bool is_batch_committed() const; + int32_t get_log_offset() const { return aggre_trans_log_.next_log_offset_; } + + TO_STRING_KV(K_(is_pg), + K_(log_entry), + K_(aggre_trans_log)); + +private: + bool is_pg_; + const clog::ObLogEntry &log_entry_; + ObLogAggreTransLog &aggre_trans_log_; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogEntryWrapper); +}; + +} // liboblog +} // oceanbase + +#endif diff --git a/src/liboblog/src/ob_log_fake_common_config.h b/src/liboblog/src/ob_log_fake_common_config.h new file mode 100644 index 0000000000000000000000000000000000000000..bcb9308657529ee5c8de40b7912c1c905707e3b1 --- /dev/null +++ b/src/liboblog/src/ob_log_fake_common_config.h @@ -0,0 +1,38 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_FAKE_COMMON_CONFIG_H__ +#define OCEANBASE_LIBOBLOG_FAKE_COMMON_CONFIG_H__ + +#include "share/ob_define.h" +#include "share/config/ob_common_config.h" // ObCommonConfig + +namespace oceanbase +{ +namespace liboblog +{ +class ObLogFakeCommonConfig : public common::ObCommonConfig +{ +public: + ObLogFakeCommonConfig() {} + virtual ~ObLogFakeCommonConfig() {} + + virtual int check_all() const { return 0; } + virtual void print() const { /* do nothing */ } + virtual common::ObServerRole get_server_type() const { return common::OB_OBLOG; } + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogFakeCommonConfig); +}; +} // namespace liboblog +} // namespace oceanbase +#endif /* OCEANBASE_LIBOBLOG_FAKE_COMMON_CONFIG_H__ */ diff --git a/src/liboblog/src/ob_log_fetch_log_rpc.cpp b/src/liboblog/src/ob_log_fetch_log_rpc.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9e8f581bc6f56f9d139d92ed0794008e4b1446f7 --- /dev/null +++ b/src/liboblog/src/ob_log_fetch_log_rpc.cpp @@ -0,0 +1,1557 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG_FETCHER + +#include "ob_log_fetch_log_rpc.h" + +#include // getpid +#include "share/ob_errno.h" // OB_SUCCESS +#include "lib/atomic/ob_atomic.h" // ATOMIC_* +#include "lib/utility/ob_macro_utils.h" // OB_FAIL +#include "lib/oblog/ob_log_module.h" // LOG_ERROR +#include "lib/allocator/ob_malloc.h" // ob_malloc/ob_free + +#include "ob_log_rpc.h" // IObLogRpc +#include "ob_log_stream_worker.h" // IObLogStreamWorker +#include "ob_log_fetch_stream.h" // FetchStream +#include "ob_log_trace_id.h" // ObLogTraceIdGuard +#include "ob_log_config.h" // ObLogConfig + +using namespace oceanbase::common; +using namespace oceanbase::obrpc; + +namespace oceanbase +{ +namespace liboblog +{ + +///////////////////////////// OpenStreamSRpc //////////////////////// + +OpenStreamSRpc::OpenStreamSRpc() : + req_(), + resp_(), + rcode_() +{} + +OpenStreamSRpc::~OpenStreamSRpc() +{ + reset(); +} + +void OpenStreamSRpc::reset() +{ + req_.reset(); + resp_.reset(); + rcode_.reset(); +} + +int OpenStreamSRpc::open_stream(IObLogRpc &rpc, + const common::ObAddr &svr, + const int64_t timeout, + FetchTaskList &task_list, + const obrpc::ObStreamSeq &stale_stream_seq, + const int64_t stream_life_time) +{ + int ret = OB_SUCCESS; + + // build request + if (OB_FAIL(build_request_(task_list, stale_stream_seq, stream_life_time))) { + LOG_ERROR("build request fail", KR(ret), K(task_list), K(stale_stream_seq), K(stream_life_time)); + } else { + ret = launch_open_stream_rpc_(rpc, svr, timeout); + } + return ret; +} + +int OpenStreamSRpc::open_stream(IObLogRpc &rpc, + const common::ObAddr &svr, + const int64_t timeout, + const common::ObPartitionKey &pkey, + const uint64_t next_log_id, + const obrpc::ObStreamSeq &stale_stream_seq, + const int64_t stream_life_time) +{ + int ret = OB_SUCCESS; + // build request + if (OB_FAIL(build_request_(pkey, next_log_id, stale_stream_seq, stream_life_time))) { + LOG_ERROR("build request fail", KR(ret), K(pkey), K(next_log_id), K(stale_stream_seq), + K(stream_life_time)); + } else { + ret = launch_open_stream_rpc_(rpc, svr, timeout); + } + return ret; +} + +int OpenStreamSRpc::launch_open_stream_rpc_(IObLogRpc &rpc, + const common::ObAddr &svr, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + + // send open stream rpc + if(OB_FAIL(rpc.open_stream(svr, req_, resp_, timeout))) { + LOG_ERROR("send open stream rpc fail", KR(ret), K(svr), K(req_), K(resp_), K(timeout)); + + // set error code + rcode_.rcode_ = ret; + (void)snprintf(rcode_.msg_, sizeof(rcode_.msg_), "send open stream rpc fail"); + + // If the RPC send fails, the server is considered invalid directly and the fetch log server should be switched + // Reset the error code + ret = OB_SUCCESS; + } else { + // RPC success + } + + return ret; +} + +int OpenStreamSRpc::build_request_(FetchTaskList &part_list, + const obrpc::ObStreamSeq &stale_stream_seq, + const int64_t stream_life_time) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(set_request_basic_param_(stale_stream_seq, stream_life_time))) { + LOG_ERROR("set request basic param fail", KR(ret), K(stale_stream_seq), K(stream_life_time)); + } else if (OB_FAIL(set_request_part_list_(part_list))) { + LOG_ERROR("set request part list fail", KR(ret), K(part_list)); + } + return ret; +} + +int OpenStreamSRpc::build_request_(const common::ObPartitionKey &pkey, + const uint64_t next_log_id, + const obrpc::ObStreamSeq &stale_stream_seq, + const int64_t stream_life_time) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(set_request_basic_param_(stale_stream_seq, stream_life_time))) { + LOG_ERROR("set request basic param fail", KR(ret), K(stale_stream_seq), K(stream_life_time)); + } else if (OB_FAIL(set_request_part_list_(pkey, next_log_id))) { + LOG_ERROR("set request part list fail", KR(ret), K(pkey), K(next_log_id)); + } + return ret; +} + +int OpenStreamSRpc::set_request_basic_param_(const obrpc::ObStreamSeq &stale_stream_seq, + const int64_t stream_life_time) +{ + int ret = OB_SUCCESS; + + //////////////// set request parameters ///////////////// + // Set the outdated stream identifier to allow the server side to remove the outdated stream structure + req_.set_stale_stream(stale_stream_seq); + + // set stream lifetime + req_.set_stream_lifetime(stream_life_time); + + // set pid of liboblog progress + req_.set_liboblog_pid(static_cast(getpid())); + return ret; +} + +int OpenStreamSRpc::set_request_part_list_(FetchTaskList &part_list) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(part_list.count() <= 0) || OB_ISNULL(part_list.head())) { + LOG_ERROR("part list is empty", K(part_list)); + ret = OB_INVALID_ARGUMENT; + } else { + PartFetchCtx *task = part_list.head(); + + // Iterate through the task list and set the fetch_log_request list + while (OB_SUCCESS == ret && NULL != task) { + obrpc::ObLogOpenStreamReq::Param param; + param.pkey_ = task->get_pkey(); + param.start_log_id_ = task->get_next_log_id(); + + if (OB_FAIL(req_.append_param(param))) { + LOG_ERROR("open stream request append param fail", KR(ret), K(param), K(req_)); + } else { + task = task->get_next(); + } + } + } + return ret; +} + +int OpenStreamSRpc::set_request_part_list_(const common::ObPartitionKey &pkey, + const uint64_t next_log_id) +{ + int ret = OB_SUCCESS; + obrpc::ObLogOpenStreamReq::Param param; + param.pkey_ = pkey; + param.start_log_id_ = next_log_id; + + if (OB_FAIL(req_.append_param(param))) { + LOG_ERROR("open stream request append param fail", KR(ret), K(param), K(req_)); + } + return ret; +} + +////////////////////////////// FetchLogSRpc ////////////////////////////// +FetchLogSRpc::FetchLogSRpc() : + req_(), + resp_(), + rcode_(), + cb_(*this), + cond_(), + rpc_done_(false) +{ +} + +FetchLogSRpc::~FetchLogSRpc() +{ + reset(); +} + +void FetchLogSRpc::reset() +{ + req_.reset(); + resp_.reset(); + rcode_.reset(); + rpc_done_ = false; +} + +int FetchLogSRpc::build_request_(const obrpc::ObStreamSeq &seq, + const int64_t upper_limit, + const int64_t fetch_log_cnt_per_part_per_round, + const bool need_feed_back) +{ + int ret = OB_SUCCESS; + reset(); + + // Set request parameters + req_.set_upper_limit_ts(upper_limit); + req_.set_log_cnt_per_part_per_round(fetch_log_cnt_per_part_per_round); + req_.set_stream_seq(seq); + req_.set_feedback(need_feed_back); + return ret; +} + +int FetchLogSRpc::fetch_log(IObLogRpc &rpc, + const common::ObAddr &svr, + const int64_t timeout, + const obrpc::ObStreamSeq &seq, + const int64_t upper_limit, + const int64_t fetch_log_cnt_per_part_per_round, + const bool need_feed_back) +{ + int ret = OB_SUCCESS; + + reset(); + + // build request + if (OB_FAIL(build_request_(seq, upper_limit, fetch_log_cnt_per_part_per_round, need_feed_back))) { + LOG_ERROR("build request fail", KR(ret), K(seq), K(upper_limit), + K(fetch_log_cnt_per_part_per_round), K(need_feed_back)); + } + // Send asynchronous fetch log RPC + else if (OB_FAIL(rpc.async_stream_fetch_log(svr, req_, cb_, timeout))) { + LOG_ERROR("send async stream fetch log rpc fail", KR(ret), K(svr), K(req_), K(timeout)); + + rcode_.rcode_ = ret; + (void)snprintf(rcode_.msg_, sizeof(rcode_.msg_), "send async stream fetch log rpc fail"); + + // RPC send fails, finish directly + rpc_done_ = true; + + // RPC sending errors are always considered to be Server problems and require switching servers + // Reset the error code + ret = OB_SUCCESS; + } else { + // If the RPC is sent successfully, block waits for rpc done + while (! ATOMIC_LOAD(&rpc_done_)) { + // This relies on the fact that the RPC must eventually call back, so the TIMEOUT time is not set + cond_.wait(); + } + } + + return ret; +} + +int FetchLogSRpc::set_resp(const obrpc::ObRpcResultCode &rcode, + const obrpc::ObLogStreamFetchLogResp *resp) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(OB_SUCCESS == rcode.rcode_ && NULL == resp)) { + LOG_ERROR("invalid fetch log response", K(rcode), K(resp)); + ret = OB_INVALID_ARGUMENT; + } else { + rcode_ = rcode; + + if (OB_SUCCESS == rcode.rcode_) { + if (OB_FAIL(resp_.assign(*resp))) { + LOG_ERROR("assign new fetch log resp fail", KR(ret), KPC(resp), K(resp_)); + } + } + } + + if (OB_SUCCESS == ret) { + // mark rpc done + ATOMIC_SET(&rpc_done_, true); + cond_.signal(); + } + return ret; +} + +////////////////////////////// FetchLogSRpc::RpcCB ////////////////////////////// +FetchLogSRpc::RpcCB::RpcCB(FetchLogSRpc &host) : host_(host) +{} + +FetchLogSRpc::RpcCB::~RpcCB() +{} + +rpc::frame::ObReqTransport::AsyncCB *FetchLogSRpc::RpcCB::clone(const rpc::frame::SPAlloc &alloc) const +{ + void *buf = NULL; + RpcCB *cb = NULL; + + if (OB_ISNULL(buf = alloc(sizeof(RpcCB)))) { + LOG_ERROR("clone rpc callback fail", K(buf), K(sizeof(RpcCB))); + } else if (OB_ISNULL(cb = new(buf) RpcCB(host_))) { + LOG_ERROR("construct RpcCB fail", K(buf)); + } else { + // success + } + + return cb; +} + +int FetchLogSRpc::RpcCB::process() +{ + int ret = OB_SUCCESS; + ObLogStreamFetchLogResp &result = RpcCBBase::result_; + ObRpcResultCode &rcode = RpcCBBase::rcode_; + const common::ObAddr &svr = RpcCBBase::dst_; + + if (OB_FAIL(do_process_(rcode, &result))) { + LOG_ERROR("process fetch log callback fail", KR(ret), K(result), K(rcode), K(svr)); + } + // Note: destruct response after asynchronous RPC processing + result.reset(); + + return ret; +} + +void FetchLogSRpc::RpcCB::on_timeout() +{ + int ret = OB_SUCCESS; + ObRpcResultCode rcode; + const common::ObAddr &svr = RpcCBBase::dst_; + + rcode.rcode_ = OB_TIMEOUT; + (void)snprintf(rcode.msg_, sizeof(rcode.msg_), "fetch log rpc timeout, svr=%s", + to_cstring(svr)); + + if (OB_FAIL(do_process_(rcode, NULL))) { + LOG_ERROR("process fetch log callback on timeout fail", KR(ret), K(rcode), K(svr)); + } +} + +void FetchLogSRpc::RpcCB::on_invalid() +{ + int ret = OB_SUCCESS; + ObRpcResultCode rcode; + const common::ObAddr &svr = RpcCBBase::dst_; + + // 遇到无效的包,decode失败 + rcode.rcode_ = OB_RPC_PACKET_INVALID; + (void)snprintf(rcode.msg_, sizeof(rcode.msg_), + "fetch log rpc response packet is invalid, svr=%s", + to_cstring(svr)); + + if (OB_FAIL(do_process_(rcode, NULL))) { + LOG_ERROR("process fetch log callback on invalid fail", KR(ret), K(rcode), K(svr)); + } +} + +int FetchLogSRpc::RpcCB::do_process_(const ObRpcResultCode &rcode, const ObLogStreamFetchLogResp *resp) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(OB_SUCCESS == rcode.rcode_ && NULL == resp)) { + LOG_ERROR("invalid response packet", K(rcode), K(resp)); + ret = OB_INVALID_ERROR; + } + else if (OB_FAIL(host_.set_resp(rcode, resp))) { + LOG_ERROR("set fetch log response fail", KR(ret), K(resp), K(rcode)); + } else { + // success + } + return ret; +} + +////////////////////////////// FetchLogARpc ////////////////////////////// + +int64_t FetchLogARpc::g_rpc_result_count_per_rpc_upper_limit = + ObLogConfig::default_rpc_result_count_per_rpc_upper_limit; + +bool FetchLogARpc::g_print_rpc_handle_info = ObLogConfig::default_print_rpc_handle_info; + +void FetchLogARpc::configure(const ObLogConfig &config) +{ + int64_t rpc_result_count_per_rpc_upper_limit = config.rpc_result_count_per_rpc_upper_limit; + bool print_rpc_handle_info = config.print_rpc_handle_info; + + ATOMIC_STORE(&g_rpc_result_count_per_rpc_upper_limit, rpc_result_count_per_rpc_upper_limit); + LOG_INFO("[CONFIG]", K(rpc_result_count_per_rpc_upper_limit)); + ATOMIC_STORE(&g_print_rpc_handle_info, print_rpc_handle_info); + LOG_INFO("[CONFIG]", K(print_rpc_handle_info)); +} + +const char *FetchLogARpc::print_rpc_stop_reason(const RpcStopReason reason) +{ + const char *reason_str = "INVALID"; + switch (reason) { + case INVALID_REASON: + reason_str = "INVALID"; + break; + + case REACH_MAX_LOG: + reason_str = "REACH_MAX_LOG"; + break; + + case REACH_UPPER_LIMIT: + reason_str = "REACH_UPPER_LIMIT"; + break; + + case FETCH_NO_LOG: + reason_str = "FETCH_NO_LOG"; + break; + + case FETCH_LOG_FAIL: + reason_str = "FETCH_LOG_FAIL"; + break; + + case REACH_MAX_RPC_RESULT: + reason_str = "REACH_MAX_RPC_RESULT"; + break; + + case FORCE_STOP_RPC: + reason_str = "FORCE_STOP_RPC"; + break; + + default: + reason_str = "INVALID"; + break; + } + + return reason_str; +} + +FetchLogARpc::FetchLogARpc(FetchStream &host) : + host_(host), + svr_(), + rpc_(NULL), + stream_worker_(NULL), + result_pool_(NULL), + state_(IDLE), + cur_req_(NULL), + flying_req_list_(), + res_queue_(), + lock_() +{ + int ret = OB_SUCCESS; + if (OB_FAIL(res_queue_.init(ObModIds::OB_LOG_FETCH_LOG_ARPC_RES_QUEUE))) { + LOG_ERROR("init result queue fail", KR(ret)); + } +} + +FetchLogARpc::~FetchLogARpc() +{ + reset(); + res_queue_.destroy(); +} + +void FetchLogARpc::reset() +{ + // Wait for all asynchronous RPCs to complete + stop(); + + svr_.reset(); + rpc_ = NULL; + stream_worker_ = NULL; + result_pool_ = NULL; + state_ = IDLE; + cur_req_ = NULL; + flying_req_list_.reset(); + (void)res_queue_.reset(); +} + +void FetchLogARpc::reset(const common::ObAddr &svr, + IObLogRpc &rpc, + IObLogStreamWorker &stream_worker, + IFetchLogARpcResultPool &result_pool) +{ + reset(); + + svr_ = svr; + rpc_ = &rpc; + stream_worker_ = &stream_worker; + result_pool_ = &result_pool; +} + +int FetchLogARpc::prepare_request(const obrpc::ObStreamSeq &seq, + const int64_t part_cnt, + const int64_t fetch_log_cnt_per_part_per_round, + const bool need_feed_back, + const int64_t rpc_timeout) +{ + int ret = OB_SUCCESS; + ObSpinLockGuard guard(lock_); + + void *fetch_stream = &host_; + LOG_INFO("[STAT] [FETCH_LOG_ARPC] prepare rpc request", K(fetch_stream), + "stream_seq", seq, + "ready_result", res_queue_.count(), + "flying_rpc", flying_req_list_.count_, + K(part_cnt), K(fetch_log_cnt_per_part_per_round), K(need_feed_back), K(rpc_timeout)); + + // Requires IDLE status + if (OB_UNLIKELY(IDLE != state_)) { + LOG_ERROR("state not match which is not IDLE", K(state_)); + ret = OB_STATE_NOT_MATCH; + } + // Requirement to discard previous requests first + else if (OB_UNLIKELY(NULL != cur_req_)) { + LOG_ERROR("request has not been discarded", K(cur_req_), KPC(cur_req_)); + ret = OB_INVALID_ERROR; + } + // Allocate a new RPC request to carry the new stream + else if (OB_FAIL(alloc_rpc_request_(seq, part_cnt, + fetch_log_cnt_per_part_per_round, + need_feed_back, + rpc_timeout, + cur_req_))) { + LOG_ERROR("alloc rpc request fail", KR(ret), K(seq), K(part_cnt), + K(fetch_log_cnt_per_part_per_round), K(need_feed_back)); + } else if (OB_ISNULL(cur_req_)) { + LOG_ERROR("alloc rpc request fail", K(cur_req_)); + ret = OB_ERR_UNEXPECTED; + } else { + // success + } + return ret; +} + +// Requires repeatable calls +void FetchLogARpc::discard_request(const char *discard_reason) +{ + ObSpinLockGuard guard(lock_); + + // Reset current request + if (NULL != cur_req_) { + void *fetch_stream = &host_; + LOG_INFO("[STAT] [FETCH_LOG_ARPC] discard rpc request", K(discard_reason), + K(fetch_stream), K_(cur_req), KPC_(cur_req), + "ready_result", res_queue_.count(), + "flying_rpc", flying_req_list_.count_); + + // If the RPC has finished, the request structure is cleaned up directly + if (! cur_req_->rpc_is_flying()) { + free_rpc_request_(cur_req_); + } else { + // Add to the run list if the RPC has not finished + flying_req_list_.add(cur_req_); + } + + cur_req_ = NULL; + + // Clear all requests and then set the status to IDLE + clear_result_(); + } +} + +int FetchLogARpc::async_fetch_log(const obrpc::ObStreamSeq &seq, + const int64_t upper_limit, + bool &rpc_send_succeed) +{ + int ret = OB_SUCCESS; + ObSpinLockGuard guard(lock_); + + rpc_send_succeed = false; + + // Requires IDLE status + if (OB_UNLIKELY(IDLE != state_)) { + LOG_ERROR("state not match which is not IDLE", K(state_)); + ret = OB_STATE_NOT_MATCH; + } + // Requires the request structure to be prepared + else if (OB_ISNULL(cur_req_)) { + LOG_ERROR("current request is invalid", K(cur_req_)); + ret = OB_INVALID_ERROR; + } + // Request matching stream + else if (OB_UNLIKELY(seq != cur_req_->get_stream_seq())) { + LOG_ERROR("stream not match", "rpc_stream_seq", cur_req_->get_stream_seq(), + "request_stream_seq", seq, KPC(cur_req_)); + ret = OB_INVALID_ERROR; + } + // Requirement that no RPC requests are being executed + else if (OB_UNLIKELY(cur_req_->rpc_is_flying())) { + LOG_ERROR("RPC is flying, can not launch async fetch log request", + K(cur_req_->rpc_is_flying()), KPC(cur_req_)); + ret = OB_INVALID_ERROR; + } + // Initiating asynchronous requests + else if (OB_FAIL(launch_async_rpc_(*cur_req_, upper_limit, false, rpc_send_succeed))) { + LOG_ERROR("launch async rpc fail", KR(ret), K(upper_limit), KPC(cur_req_)); + } else { + // success + } + + return ret; +} + +int64_t FetchLogARpc::get_flying_request_count() +{ + ObSpinLockGuard guard(lock_); + return flying_req_list_.count_; +} + +void FetchLogARpc::print_flying_request_list() +{ + ObSpinLockGuard guard(lock_); + RpcRequest *req = flying_req_list_.head_; + int64_t index = 0; + while (NULL != req) { + LOG_INFO("[FLYING_RPC_REQUEST]", "fetch_stream", &host_, K_(svr), K(index), K(req), KPC(req)); + req = req->next_; + index++; + } +} + +void FetchLogARpc::stop() +{ + int64_t wait_count = 0; + + // Note: this function does not lock itself, it locks in the following sub-functions + + // Dispose the current request + discard_request("stop"); + + // Wait for the request list to be empty, i.e. all asynchronous RPCs have been completed + if (get_flying_request_count() > 0) { + LOG_INFO("wait for flying async fetch log rpc done", + "fetch_stream", &host_, K_(svr), + "flying_request_count", get_flying_request_count()); + print_flying_request_list(); + + int64_t start_time = get_timestamp(); + while (get_flying_request_count() > 0) { + wait_count++; + if (0 == (wait_count % WARN_COUNT_ON_STOP)) { + LOG_WARN("wait for flying async fetch log rpc done", + "fetch_stream", &host_, K_(svr), + "flying_request_count", get_flying_request_count(), + "wait_time", get_timestamp() - start_time); + print_flying_request_list(); + } + + usec_sleep(WAIT_TIME_ON_STOP); + } + + LOG_INFO("all flying async fetch log rpc is done", + "fetch_stream", &host_, K_(svr), + "wait_time", get_timestamp() - start_time, + "flying_request_count", get_flying_request_count()); + } +} + +int FetchLogARpc::next_result(FetchLogARpcResult *&result, bool &rpc_is_flying) +{ + int ret = OB_SUCCESS; + ObSpinLockGuard guard(lock_); + + // Requires current RPC request to be valid + if (OB_ISNULL(cur_req_)) { + LOG_ERROR("current request is invalid", K(cur_req_)); + ret = OB_INVALID_ERROR; + } + // Requires status is READY + else if (OB_UNLIKELY(READY != state_)) { + LOG_ERROR("state not match which is not READY", K(state_)); + ret = OB_STATE_NOT_MATCH; + } else if (OB_ISNULL(result_pool_)) { + LOG_ERROR("invalid rpc result pool", K(result_pool_)); + ret = OB_INVALID_ERROR; + } else { + rpc_is_flying = cur_req_->rpc_is_flying(); + + result = NULL; + while (OB_SUCCESS == ret && NULL == result) { + if (OB_FAIL(pop_result_(result))) { + if (OB_EAGAIN == ret) { + // Iteration complete + ret = OB_ITER_END; + } else { + LOG_ERROR("pop result fail", KR(ret)); + } + } else if (OB_ISNULL(result)) { + LOG_ERROR("invalid result", K(result)); + ret = OB_ERR_UNEXPECTED; + } else if (result->seq_ != cur_req_->get_stream_seq()) { + // Filtering data that is not the current stream + result_pool_->free(result); + result = NULL; + } else { + // success + } + } + } + + return ret; +} + +void FetchLogARpc::revert_result(FetchLogARpcResult *result) +{ + ObSpinLockGuard guard(lock_); + + if (OB_ISNULL(result_pool_)) { + LOG_ERROR("invalid rpc result pool", K(result_pool_)); + } else if (OB_NOT_NULL(result)) { + result_pool_->free(result); + result = NULL; + } +} + +int FetchLogARpc::update_request(const obrpc::ObStreamSeq &seq, + const int64_t upper_limit, + const int64_t fetch_log_cnt_per_part_per_round, + const bool need_feed_back, + const int64_t rpc_timeout) +{ + int ret = OB_SUCCESS; + ObSpinLockGuard guard(lock_); + + // Requires current RPC request to be valid + if (OB_ISNULL(cur_req_)) { + LOG_ERROR("current request is invalid", K(cur_req_)); + ret = OB_INVALID_ERROR; + } else if (OB_UNLIKELY(seq != cur_req_->get_stream_seq())) { + LOG_ERROR("stream does not match", K(seq), K(cur_req_->get_stream_seq())); + ret = OB_INVALID_ARGUMENT; + } else { + // Update request parameters + cur_req_->update_request(upper_limit, + fetch_log_cnt_per_part_per_round, + need_feed_back, + rpc_timeout); + } + + return ret; +} + +int FetchLogARpc::mark_request_stop(const obrpc::ObStreamSeq &seq) +{ + int ret = OB_SUCCESS; + ObSpinLockGuard guard(lock_); + + // Requires current RPC request to be valid + if (OB_ISNULL(cur_req_)) { + LOG_ERROR("current request is invalid", K(cur_req_)); + ret = OB_INVALID_ERROR; + } else if (OB_UNLIKELY(seq != cur_req_->get_stream_seq())) { + LOG_ERROR("stream does not match", K(seq), K(cur_req_->get_stream_seq())); + ret = OB_INVALID_ARGUMENT; + } else if (cur_req_->rpc_is_flying()) { + // If the RPC is running, mark the next round of RPCs to stop + cur_req_->mark_stop_flag(); + } else { + // RPC stop case, no need to mark stop + } + return ret; +} + +int FetchLogARpc::handle_rpc_response(RpcRequest &rpc_req, + const obrpc::ObRpcResultCode &rcode, + const obrpc::ObLogStreamFetchLogResp *resp) +{ + int ret = OB_SUCCESS; + int64_t start_proc_time = get_timestamp(); + + // Locked mutually exclusive access + ObSpinLockGuard lock_guard(lock_); + + // Use the trace id associated with the request + ObLogTraceIdGuard guard(rpc_req.get_trace_id()); + + if (OB_ISNULL(stream_worker_)) { + LOG_ERROR("invalid stream worker", K(stream_worker_)); + ret = OB_INVALID_ERROR; + } + // Verify that the request is the same as the current request: that is, whether it is the same object + else if (NULL == cur_req_ || cur_req_ != &rpc_req) { + // RPC requests have been deprecated + LOG_INFO("rpc request has been discarded", K_(svr), + "fetch_stream", &host_, K(rpc_req), KPC(cur_req_)); + + // Try to find the corresponding RPC request structure, then destroy the + if (OB_FAIL(destroy_flying_request_(&rpc_req))) { + LOG_ERROR("destroy_flying_request_ fail", KR(ret), K(rpc_req)); + } + } else { + bool need_dispatch_stream_task = false; + bool need_stop_rpc = false; + RpcStopReason rpc_stop_reason = INVALID_REASON; + int64_t next_upper_limit = OB_INVALID_TIMESTAMP; + + // Analyze the results and make decisions accordingly + // 1. Decide if the next RPC needs to be launched + // 2. Decide the upper limit of the next RPC + // + // TODO: Make more decisions based on request results + // 1. Decide whether to dynamically modify the upper limit interval based on whether the upper limit is reached or not + if (OB_FAIL(analyze_result_(rpc_req, rcode, resp, need_stop_rpc, rpc_stop_reason, + next_upper_limit))) { + LOG_ERROR("analyze_result_ fail", KR(ret), K(rpc_req), K(rcode), K(resp)); + } + // Generate RPC results and determine if a log fetching task needs to be assigned + else if (OB_FAIL(generate_rpc_result_(rpc_req, rcode, resp, start_proc_time, + need_stop_rpc, + rpc_stop_reason, + need_dispatch_stream_task))) { + LOG_ERROR("generate_rpc_result_ fail", KR(ret), K(rpc_req), K(rcode), K(resp), + K(start_proc_time), K(need_stop_rpc), K(rpc_stop_reason)); + } else { + // Print monitoring logs + print_handle_info_(rpc_req, resp, next_upper_limit, need_stop_rpc, rpc_stop_reason, + need_dispatch_stream_task); + + if (need_stop_rpc) { + // If you do not need to continue fetching logs, mark the RPC as not running + rpc_req.mark_flying_state(false); + } else { + bool rpc_send_succeed = false; + // Launch the next RPC + if (OB_FAIL(launch_async_rpc_(rpc_req, next_upper_limit, true, rpc_send_succeed))) { + LOG_ERROR("launch_async_rpc_ fail", KR(ret), K(rpc_req), K(next_upper_limit)); + } + } + + // Assign log stream fetching tasks as needed + if (OB_SUCCESS == ret && need_dispatch_stream_task) { + if (OB_FAIL(stream_worker_->dispatch_stream_task(host_, "RpcCallback"))) { + LOG_ERROR("dispatch stream task fail", KR(ret)); + } + } + } + } + + return ret; +} + +const char *FetchLogARpc::print_state(State state) +{ + const char *str = "UNKNOWN"; + switch (state) { + case IDLE: + str = "IDLE"; + break; + case READY: + str = "READY"; + break; + default: + str = "UNKNOWN"; + break; + } + return str; +} + +int FetchLogARpc::alloc_rpc_request_(const obrpc::ObStreamSeq &seq, + const int64_t part_cnt, + const int64_t fetch_log_cnt_per_part_per_round, + const bool need_feed_back, + const int64_t rpc_timeout, + RpcRequest *&req) +{ + int ret = OB_SUCCESS; + int64_t size = sizeof(RpcRequest); + void *buf = NULL; + + if (OB_UNLIKELY(part_cnt <= 0)) { + LOG_ERROR("invalid rpc partition count", K(part_cnt)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(buf = ob_malloc(size, ObModIds::OB_LOG_FETCH_LOG_ARPC_REQUEST))) { + LOG_ERROR("allocate memory for RpcRequest fail", K(size)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else if (OB_ISNULL(req = new(buf) RpcRequest(*this, seq, part_cnt, + fetch_log_cnt_per_part_per_round, + need_feed_back, + rpc_timeout))) { + LOG_ERROR("construct RpcRequest fail", K(buf), K(req), K(part_cnt), + K(fetch_log_cnt_per_part_per_round), K(need_feed_back)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + // success + } + + return ret; +} + +void FetchLogARpc::free_rpc_request_(RpcRequest *request) +{ + if (OB_NOT_NULL(request)) { + request->~RpcRequest(); + ob_free(request); + request = NULL; + } +} + +int FetchLogARpc::generate_rpc_result_(RpcRequest &rpc_req, + const obrpc::ObRpcResultCode &rcode, + const obrpc::ObLogStreamFetchLogResp *resp, + const int64_t rpc_callback_start_time, + const bool need_stop_rpc, + const RpcStopReason rpc_stop_reason, + bool &need_dispatch_stream_task) +{ + int ret = OB_SUCCESS; + FetchLogARpcResult *result = NULL; + bool is_state_idle = false; + int64_t rpc_start_time = rpc_req.get_rpc_start_time(); + const obrpc::ObStreamSeq &seq = rpc_req.get_stream_seq(); + const common::ObCurTraceId::TraceId &trace_id = rpc_req.get_trace_id(); + + if (OB_ISNULL(result_pool_)) { + LOG_ERROR("invalid result pool", K(result_pool_)); + ret = OB_INVALID_ERROR; + } + // Assign an RPC result + else if (OB_FAIL(result_pool_->alloc(result))) { + LOG_ERROR("alloc rpc result fail", KR(ret)); + } else if (OB_ISNULL(result)) { + LOG_ERROR("invalid result", K(result)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(result->set(seq, rcode, resp, trace_id, + rpc_start_time, + rpc_callback_start_time, + need_stop_rpc, + rpc_stop_reason))) { + LOG_ERROR("set rpc result fail", KR(ret), K(seq), K(rcode), KPC(resp), K(rpc_start_time), + K(rpc_callback_start_time), K(need_stop_rpc), K(rpc_stop_reason)); + result_pool_->free(result); + result = NULL; + } + // Push the results to the queue and modify the status at the same time + else if (OB_FAIL(push_result_and_be_ready_(result, is_state_idle))) { + LOG_ERROR("push result and be ready fail", KR(ret), KPC(result)); + } else { + // If the status is IDLE before inserting the result, the task needs to be assigned + need_dispatch_stream_task = is_state_idle; + } + + // Reclaiming memory in case of failure + if (OB_SUCCESS != ret && NULL != result_pool_ && NULL != result) { + result_pool_->free(result); + result = NULL; + } + + return ret; +} + +void FetchLogARpc::print_handle_info_(RpcRequest &rpc_req, + const obrpc::ObLogStreamFetchLogResp *resp, + const int64_t next_upper_limit, + const bool need_stop_rpc, + const RpcStopReason rpc_stop_reason, + const bool need_dispatch_stream_task) +{ + const bool print_rpc_handle_info = ATOMIC_LOAD(&g_print_rpc_handle_info); + int64_t part_count = rpc_req.get_part_count(); + int64_t req_upper_limit = rpc_req.get_upper_limit(); + int64_t rpc_time = get_timestamp() - rpc_req.get_rpc_start_time(); + const ObStreamSeq &stream_seq = rpc_req.get_stream_seq(); + void *fetch_stream = &host_; + + if (print_rpc_handle_info) { + LOG_INFO("handle rpc result by rpc callback", + K(fetch_stream), K(stream_seq), K(part_count), + K(need_stop_rpc), + "stop_reason", print_rpc_stop_reason(rpc_stop_reason), + "ready_result", res_queue_.count(), + "max_result", ATOMIC_LOAD(&g_rpc_result_count_per_rpc_upper_limit), + K(need_dispatch_stream_task), + "flying_rpc", flying_req_list_.count_, + "upper_limit", TS_TO_STR(req_upper_limit), + "delta", next_upper_limit - req_upper_limit, + K(rpc_time), KPC(resp)); + } else { + LOG_DEBUG("handle rpc result by rpc callback", + K(fetch_stream), K(stream_seq), K(part_count), + K(need_stop_rpc), + "stop_reason", print_rpc_stop_reason(rpc_stop_reason), + "ready_result", res_queue_.count(), + "max_result", ATOMIC_LOAD(&g_rpc_result_count_per_rpc_upper_limit), + K(need_dispatch_stream_task), + "flying_rpc", flying_req_list_.count_, + "upper_limit", TS_TO_STR(req_upper_limit), + "delta", next_upper_limit - req_upper_limit, + K(rpc_time), KPC(resp)); + } +} + +int FetchLogARpc::launch_async_rpc_(RpcRequest &rpc_req, + const int64_t upper_limit, + const bool launch_by_cb, + bool &rpc_send_succeed) +{ + int ret = OB_SUCCESS; + + rpc_send_succeed = false; + + if (OB_ISNULL(rpc_)) { + LOG_ERROR("invalid handlers", K(rpc_)); + ret = OB_INVALID_ERROR; + } else { + // First prepare the RPC request and update the upper limit + rpc_req.prepare(upper_limit); + + // Use the trace id of the request + ObLogTraceIdGuard guard(rpc_req.get_trace_id()); + + _LOG_DEBUG("launch async fetch log rpc by %s, request=%s", + launch_by_cb ? "callback" : "fetch stream", to_cstring(rpc_req)); + + // The default setting is flyin before sending an asynchronous request + // The reason for not setting it up after sending is that there is a concurrency problem after successful sending, + // and the RPC request may have come back before it is set up + rpc_req.mark_flying_state(true); + + // Sending the asynchronous RPC + ret = rpc_->async_stream_fetch_log(svr_, rpc_req.req_, rpc_req.cb_, rpc_req.rpc_timeout_); + + if (OB_SUCC(ret)) { + // RPC sent successfully + // You can't continue to manipulate the related structures afterwards + rpc_send_succeed = true; + } else { + int64_t start_proc_time = get_timestamp(); + + // First reset the ret return value + int err_code = ret; + ret = OB_SUCCESS; + + // RPC send failure + rpc_send_succeed = false; + + // Mark RPC is not running + rpc_req.mark_flying_state(false); + + // Set error code + ObRpcResultCode rcode; + rcode.rcode_ = err_code; + (void)snprintf(rcode.msg_, sizeof(rcode.msg_), "send async stream fetch log rpc fail"); + + LOG_ERROR("send async stream fetch log rpc fail", KR(ret), K(svr_), K(rpc_req), K(launch_by_cb)); + + // RPC send failure, uniformly considered to be a problem of the observer, directly generated RPC results + bool rpc_stopped = true; + RpcStopReason reason = FETCH_LOG_FAIL; + // Note: No need to process the return value here: need_dispatch_stream_task + // This function initiates the RPC request, and only determines whether the dispatch task is needed + // when the RPC result is processed by the asynchronous callback + bool need_dispatch_stream_task = false; + if (OB_FAIL(generate_rpc_result_(rpc_req, rcode, NULL, start_proc_time, + rpc_stopped, reason, need_dispatch_stream_task))) { + LOG_ERROR("generate rpc result fail", KR(ret), K(rpc_req), K(rcode), K(start_proc_time), + K(rpc_stopped), K(reason)); + } + } + } + return ret; +} + +int FetchLogARpc::push_result_and_be_ready_(FetchLogARpcResult *result, bool &is_state_idle) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(result)) { + LOG_ERROR("invalid argument", K(result)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(res_queue_.push(result))) { + LOG_ERROR("push result into queue fail", KR(ret), K(result)); + } else { + is_state_idle = (IDLE == state_); + + // After generating the data, modify the status and set it to READY unconditionally + if (READY != state_) { + (void)ATOMIC_SET(&state_, READY); + } + } + return ret; +} + +int FetchLogARpc::pop_result_(FetchLogARpcResult *&result) +{ + int ret = OB_SUCCESS; + void *data = NULL; + + ret = res_queue_.pop(data); + result = static_cast(data); + + if (OB_SUCCESS == ret) { + // success + } else if (OB_EAGAIN == ret) { + // No data + // Change the status to IDLE + (void)ATOMIC_SET(&state_, IDLE); + } else { + LOG_ERROR("pop result from queue fail", KR(ret)); + } + return ret; +} + +void FetchLogARpc::clear_result_() +{ + int ret = OB_SUCCESS; + void *data = NULL; + + // Empty the data when the queue is not empty + if (res_queue_.count() > 0) { + // Require result_pool_ be valid + if (OB_ISNULL(result_pool_)) { + LOG_ERROR("invalid rpc result pool, can not clear results", K(result_pool_)); + } else { + while (OB_SUCC(res_queue_.pop(data))) { + FetchLogARpcResult *result = static_cast(data); + if (OB_NOT_NULL(result)) { + result_pool_->free(result); + result = NULL; + } + data = NULL; + } + } + } + + // Status forced to IDLE + (void)ATOMIC_SET(&state_, IDLE); +} + +int FetchLogARpc::destroy_flying_request_(RpcRequest *target_request) +{ + int ret = OB_SUCCESS; + + // Remove the request from the run list + // If the request is not in the list, an exception must have occurred + if (OB_ISNULL(target_request)) { + LOG_ERROR("invalid argument", K(target_request)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(flying_req_list_.remove(target_request))) { + LOG_ERROR("remove rpc request from flying request list fail", KR(ret), K(target_request)); + } else { + free_rpc_request_(target_request); + target_request = NULL; + } + + return ret; +} + +int FetchLogARpc::analyze_result_(RpcRequest &rpc_req, + const obrpc::ObRpcResultCode &rcode, + const obrpc::ObLogStreamFetchLogResp *resp, + bool &need_stop_rpc, + RpcStopReason &rpc_stop_reason, + int64_t &next_upper_limit) +{ + int ret = OB_SUCCESS; + const int64_t rpc_result_count_per_rpc_upper_limit = + ATOMIC_LOAD(&g_rpc_result_count_per_rpc_upper_limit); + const int64_t cur_rpc_result_count = res_queue_.count(); + bool reach_max_rpc_result = (cur_rpc_result_count >= rpc_result_count_per_rpc_upper_limit); + bool force_stop_rpc = rpc_req.get_stop_flag(); + + need_stop_rpc = false; + rpc_stop_reason = INVALID_REASON; + next_upper_limit = rpc_req.get_upper_limit(); + + // If the RPC send fails, or the server returns a failure, there is no need to continue fetching logs + if (OB_SUCCESS != rcode.rcode_ || NULL == resp || OB_SUCCESS != resp->get_err()) { + need_stop_rpc = true; + rpc_stop_reason = FETCH_LOG_FAIL; + } else if (reach_max_rpc_result) { + // If the number of RPC results reaches the threshold, stop sending RPCs + need_stop_rpc = true; + rpc_stop_reason = REACH_MAX_RPC_RESULT; + } else if (force_stop_rpc) { + // External forced stop RPC + need_stop_rpc = true; + rpc_stop_reason = FORCE_STOP_RPC; + } else { + const ObFetchStatus &fetch_status = resp->get_fetch_status(); + + int64_t last_upper_limit = rpc_req.get_upper_limit(); + int64_t total_part_count = rpc_req.get_part_count(); + int64_t reach_upper_limit_count = fetch_status.reach_upper_limit_ts_pkey_count_; + int64_t reach_max_log_id_count = fetch_status.reach_max_log_id_pkey_count_; + + // All partitions reach maximum log + bool all_reach_max_log = (reach_max_log_id_count >= total_part_count); + + // All partitions reach one of two states: either the maximum log is reached, or the progress limit is reached + bool reach_max_log_or_upper_limit = + ((reach_upper_limit_count + reach_max_log_id_count) >= total_part_count); + + bool fetch_no_log = (resp->get_log_num() <= 0); + + // If all partitions have reached the maximum log, there is no need to continue fetching logs + if (all_reach_max_log) { + need_stop_rpc = true; + rpc_stop_reason = REACH_MAX_LOG; + } else if (reach_max_log_or_upper_limit) { + // For partitions that have reached their upper limit, get the latest upper limit + // Decide whether to continue to send RPC by whether the upper limit has changed + if (OB_FAIL(host_.get_upper_limit(next_upper_limit))) { + LOG_ERROR("get upper limit fail", KR(ret)); + } else { + need_stop_rpc = (next_upper_limit <= last_upper_limit); + rpc_stop_reason = REACH_UPPER_LIMIT; + } + } else if (fetch_no_log) { + // No logs need to be fetched next time, if no logs are fetched + need_stop_rpc = true; + rpc_stop_reason = FETCH_NO_LOG; + } else { + // The logs are requested to continue to be fetched in all other cases + need_stop_rpc = false; + rpc_stop_reason = INVALID_REASON; + } + } + return ret; +} + +///////////////////////////// FetchLogARpc::RpcCB ///////////////////////// + +rpc::frame::ObReqTransport::AsyncCB *FetchLogARpc::RpcCB::clone(const rpc::frame::SPAlloc &alloc) const +{ + void *buf = NULL; + RpcCB *cb = NULL; + + if (OB_ISNULL(buf = alloc(sizeof(RpcCB)))) { + LOG_ERROR("clone rpc callback fail", K(buf), K(sizeof(RpcCB))); + } else if (OB_ISNULL(cb = new(buf) RpcCB(host_))) { + LOG_ERROR("construct RpcCB fail", K(buf)); + } else { + // 成功 + } + + return cb; +} + +int FetchLogARpc::RpcCB::process() +{ + int ret = OB_SUCCESS; + ObLogStreamFetchLogResp &result = RpcCBBase::result_; + ObRpcResultCode &rcode = RpcCBBase::rcode_; + const common::ObAddr &svr = RpcCBBase::dst_; + + if (OB_FAIL(do_process_(rcode, &result))) { + LOG_ERROR("process fetch log callback fail", KR(ret), K(result), K(rcode), K(svr), K_(host)); + } + // Note: Active destructe response after asynchronous RPC processing + result.reset(); + + return ret; +} + +void FetchLogARpc::RpcCB::on_timeout() +{ + int ret = OB_SUCCESS; + ObRpcResultCode rcode; + const common::ObAddr &svr = RpcCBBase::dst_; + + rcode.rcode_ = OB_TIMEOUT; + (void)snprintf(rcode.msg_, sizeof(rcode.msg_), "fetch log rpc timeout, svr=%s", + to_cstring(svr)); + + if (OB_FAIL(do_process_(rcode, NULL))) { + LOG_ERROR("process fetch log callback on timeout fail", KR(ret), K(rcode), K(svr), K_(host)); + } +} + +void FetchLogARpc::RpcCB::on_invalid() +{ + int ret = OB_SUCCESS; + ObRpcResultCode rcode; + const common::ObAddr &svr = RpcCBBase::dst_; + + // Invalid package encountered, decode failed + rcode.rcode_ = OB_RPC_PACKET_INVALID; + (void)snprintf(rcode.msg_, sizeof(rcode.msg_), + "fetch log rpc response packet is invalid, svr=%s", + to_cstring(svr)); + + if (OB_FAIL(do_process_(rcode, NULL))) { + LOG_ERROR("process fetch log callback on invalid fail", KR(ret), K(rcode), K(svr), K_(host)); + } +} + +int FetchLogARpc::RpcCB::do_process_(const ObRpcResultCode &rcode, const ObLogStreamFetchLogResp *resp) +{ + int ret = OB_SUCCESS; + RpcRequest &rpc_req = host_; + FetchLogARpc &rpc_host = rpc_req.host_; + + if (OB_UNLIKELY(OB_SUCCESS == rcode.rcode_ && NULL == resp)) { + LOG_ERROR("invalid response packet", K(rcode), K(resp)); + ret = OB_INVALID_ERROR; + } + // Processing RPC response results + else if (OB_FAIL(rpc_host.handle_rpc_response(rpc_req, rcode, resp))) { + LOG_ERROR("set fetch log response fail", KR(ret), K(resp), K(rcode), K(rpc_req)); + } else { + // success + } + return ret; +} + +///////////////////////////// FetchLogARpc::RpcRequest ///////////////////////// + +FetchLogARpc::RpcRequest::RpcRequest(FetchLogARpc &host, + const obrpc::ObStreamSeq &seq, + const int64_t part_cnt, + const int64_t fetch_log_cnt_per_part_per_round, + const bool need_feed_back, + const int64_t rpc_timeout) : + host_(host), + part_cnt_(part_cnt), + cb_(*this), + need_feed_back_(false), + rpc_timeout_(rpc_timeout), + req_(), + trace_id_(), + next_(NULL), + force_stop_flag_(false), + rpc_start_time_(OB_INVALID_TIMESTAMP), + rpc_is_flying_(false) +{ + // 初始化请求 + req_.set_stream_seq(seq); + req_.set_log_cnt_per_part_per_round(fetch_log_cnt_per_part_per_round); + req_.set_feedback(need_feed_back); +} + +void FetchLogARpc::RpcRequest::update_request(const int64_t upper_limit, + const int64_t fetch_log_cnt_per_part_per_round, + const bool need_feed_back, + const int64_t rpc_timeout) +{ + req_.set_upper_limit_ts(upper_limit); + req_.set_log_cnt_per_part_per_round(fetch_log_cnt_per_part_per_round); + need_feed_back_ = need_feed_back; + rpc_timeout_ = rpc_timeout; +} + +void FetchLogARpc::RpcRequest::prepare(const int64_t upper_limit) +{ + // set request parameter: upper limit + // upper limit may need reset before rpc send, thus value of upper limit should be provided dynamicly + // + // Set request parameter: upper limit + req_.set_upper_limit_ts(upper_limit); + + // Update the next round of RPC trace id + trace_id_.init(get_self_addr()); + + // Update request time + rpc_start_time_ = get_timestamp(); + + // reset stop flag + force_stop_flag_ = false; + + // Set whether feedback is required + req_.set_feedback(need_feed_back_); + if (need_feed_back_) { + // Set the feedback only once + need_feed_back_ = false; + } +} + +void FetchLogARpc::RpcRequest::mark_flying_state(const bool is_flying) +{ + ATOMIC_SET(&rpc_is_flying_, is_flying); +} + +////////////////////////////// RpcRequestList ////////////////////////////// + +void FetchLogARpc::RpcRequestList::add(RpcRequest *req) +{ + if (OB_NOT_NULL(req)) { + req->next_ = NULL; + + if (NULL == head_) { + head_ = req; + tail_ = req; + } else { + tail_->next_ = req; + tail_ = req; + } + + count_++; + } +} + +int FetchLogARpc::RpcRequestList::remove(RpcRequest *target) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(target)) { + ret = OB_INVALID_ARGUMENT; + } else { + bool found = false; + RpcRequest *pre_request = NULL; + RpcRequest *request = head_; + + // Find the matching request structure + while (NULL != request && ! found) { + if (target == request) { + found = true; + } else { + pre_request = request; + request = request->next_; + } + } + + if (found) { + // Delete the corresponding node + if (NULL == pre_request) { + head_ = target->next_; + if (target == tail_) { + tail_ = head_; + } + } else { + pre_request->next_ = target->next_; + if (target == tail_) { + tail_ = pre_request; + } + } + + count_--; + } else { + ret = OB_ENTRY_NOT_EXIST; + } + } + + return ret; +} + + +////////////////////////////// FetchLogARpcResult ////////////////////////////// + +int FetchLogARpcResult::set(const obrpc::ObStreamSeq &seq, + const obrpc::ObRpcResultCode &rcode, + const obrpc::ObLogStreamFetchLogResp *resp, + const common::ObCurTraceId::TraceId &trace_id, + const int64_t rpc_start_time, + const int64_t rpc_callback_start_time, + const bool rpc_stop_upon_result, + const FetchLogARpc::RpcStopReason rpc_stop_reason) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(OB_SUCCESS == rcode.rcode_ && NULL == resp)) { + LOG_ERROR("invalid fetch log response", K(rcode), K(resp)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_UNLIKELY(rpc_start_time <= 0) || OB_UNLIKELY(rpc_callback_start_time <= 0)) { + LOG_ERROR("invalid argument", K(rpc_start_time), K(rpc_callback_start_time)); + ret = OB_INVALID_ARGUMENT; + } else { + seq_ = seq; + rcode_ = rcode; + trace_id_ = trace_id; + + // The result is valid when no error occurs + if (OB_SUCCESS == rcode.rcode_) { + if (OB_FAIL(resp_.assign(*resp))) { + LOG_ERROR("assign new fetch log resp fail", KR(ret), KPC(resp), K(resp_)); + } + } else { + resp_.reset(); + } + } + + // After setting all the result items, only then start setting the statistics items, + // because the results need a memory copy and this time must be considered + if (OB_SUCCESS == ret) { + int64_t rpc_end_time = get_timestamp(); + + rpc_time_ = rpc_end_time - rpc_start_time; + rpc_callback_time_ = rpc_end_time - rpc_callback_start_time; + rpc_stop_upon_result_ = rpc_stop_upon_result; + rpc_stop_reason_ = rpc_stop_reason; + } + return ret; +} + +////////////////////////////// FetchLogARpcResult Object Pool ////////////////////////////// + +int FetchLogARpcResultPool::init(const int64_t cached_obj_count) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("init twice"); + ret = OB_INIT_TWICE; + } else if (OB_FAIL(pool_.init(cached_obj_count, + ObModIds::OB_LOG_FETCH_LOG_ARPC_RESULT, + OB_SERVER_TENANT_ID, + DEFAULT_RESULT_POOL_BLOCK_SIZE))) { + LOG_ERROR("init result obj pool fail", KR(ret), K(cached_obj_count)); + } else { + inited_ = true; + } + return ret; +} + +void FetchLogARpcResultPool::destroy() +{ + inited_ = false; + pool_.destroy(); +} + +void FetchLogARpcResultPool::print_stat() +{ + int64_t alloc_count = pool_.get_alloc_count(); + int64_t free_count = pool_.get_free_count(); + int64_t fixed_count = pool_.get_fixed_count(); + int64_t used_count = alloc_count - free_count; + int64_t dynamic_count = (alloc_count > fixed_count) ? alloc_count - fixed_count : 0; + + _LOG_INFO("[STAT] [RPC_RESULT_POOL] USED=%ld FREE=%ld FIXED=%ld DYNAMIC=%ld", + used_count, free_count, fixed_count, dynamic_count); +} + +int FetchLogARpcResultPool::alloc(FetchLogARpcResult *&result) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(! inited_)) { + ret = OB_NOT_INIT; + } else { + ret = pool_.alloc(result); + } + return ret; +} + +void FetchLogARpcResultPool::free(FetchLogARpcResult *result) +{ + int ret = OB_SUCCESS; + if (OB_LIKELY(inited_) && OB_NOT_NULL(result)) { + result->reset(); + if (OB_FAIL(pool_.free(result))) { + LOG_ERROR("free result into pool fail", KR(ret), K(result)); + } else { + result = NULL; + } + } +} + + +} +} diff --git a/src/liboblog/src/ob_log_fetch_log_rpc.h b/src/liboblog/src/ob_log_fetch_log_rpc.h new file mode 100644 index 0000000000000000000000000000000000000000..80fdf6adfb2f5a21f31e76fd7620d85925107671 --- /dev/null +++ b/src/liboblog/src/ob_log_fetch_log_rpc.h @@ -0,0 +1,600 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_FETCH_LOG_RPC_H__ +#define OCEANBASE_LIBOBLOG_OB_LOG_FETCH_LOG_RPC_H__ + +#include "lib/lock/ob_spin_lock.h" // ObSpinLock +#include "lib/net/ob_addr.h" // ObAddr +#include "lib/profile/ob_trace_id.h" // TraceId +#include "lib/objectpool/ob_small_obj_pool.h" // ObSmallObjPool +#include "common/ob_partition_key.h" // ObPartitionKey +#include "common/ob_queue_thread.h" // ObCond +#include "rpc/frame/ob_req_transport.h" // ObReqTranslator::AsyncCB +#include "rpc/obrpc/ob_rpc_proxy.h" // ObRpcProxy +#include "rpc/obrpc/ob_rpc_packet.h" // OB_LOG_OPEN_STREAM +#include "rpc/obrpc/ob_rpc_result_code.h" // ObRpcResultCode +#include "clog/ob_log_external_rpc.h" // obrpc + +#include "ob_log_part_fetch_ctx.h" // FetchTaskList + +namespace oceanbase +{ +namespace liboblog +{ + +class FetchStream; + +////////////////////////////// OpenStreamSRpc ////////////////////////////// +// Open stream synchronous RPC wrapper class +class OpenStreamSRpc +{ +public: + explicit OpenStreamSRpc(); + virtual ~OpenStreamSRpc(); + + void reset(); + + /// Execute synchronous RPC request + /// The ret return value only indicates whether the function succeeds, not whether the RPC succeeds + /// When the ret returns OB_SUCCESS, you should check result code to determine whether the RPC was successful + int open_stream(IObLogRpc &rpc, + const common::ObAddr &svr, + const int64_t timeout, + FetchTaskList &part_list, + const obrpc::ObStreamSeq &stale_stream_seq, + const int64_t stream_life_time); + + /// Open stream for a single partition + /// The ret return value only indicates whether the function succeeds, not whether the RPC succeeds + /// When the ret returns OB_SUCCESS, you should check result code to determine whether the RPC was successful + int open_stream(IObLogRpc &rpc, + const common::ObAddr &svr, + const int64_t timeout, + const common::ObPartitionKey &pkey, + const uint64_t next_log_id, + const obrpc::ObStreamSeq &stale_stream_seq, + const int64_t stream_life_time); + + const obrpc::ObRpcResultCode &get_result_code() const { return rcode_; } + const obrpc::ObLogOpenStreamResp &get_resp() const { return resp_; } + const obrpc::ObLogOpenStreamReq &get_req() const { return req_; } + + TO_STRING_KV(K_(req), K_(resp), K_(rcode)); + +private: + int launch_open_stream_rpc_(IObLogRpc &rpc, + const common::ObAddr &svr, + const int64_t timeout); + int build_request_(FetchTaskList &part_list, + const obrpc::ObStreamSeq &stale_stream_seq, + const int64_t stream_life_time); + int build_request_(const common::ObPartitionKey &pkey, + const uint64_t next_log_id, + const obrpc::ObStreamSeq &stale_stream_seq, + const int64_t stream_life_time); + int set_request_basic_param_(const obrpc::ObStreamSeq &stale_stream_seq, + const int64_t stream_life_time); + int set_request_part_list_(FetchTaskList &part_list); + int set_request_part_list_(const common::ObPartitionKey &pkey, + const uint64_t next_log_id); + +private: + obrpc::ObLogOpenStreamReq req_; // Open stream request + obrpc::ObLogOpenStreamResp resp_; // Open stream response + obrpc::ObRpcResultCode rcode_; // Open stream RPC result code + +private: + DISALLOW_COPY_AND_ASSIGN(OpenStreamSRpc); +}; + +////////////////////////////// FetchLogSRpc ////////////////////////////// +// Fetch log synchronous RPC wrapper class +// Wrapping synchronous RPC with asynchronous interface +class FetchLogSRpc +{ + typedef obrpc::ObLogExternalProxy::AsyncCB RpcCBBase; + +public: + FetchLogSRpc(); + virtual ~FetchLogSRpc(); + +public: + // Perform synchronous RPC requests + // The ret return value only indicates whether the function was successful, not whether the RPC was successful + // RPC-related error code is set to result code + int fetch_log(IObLogRpc &rpc, + const common::ObAddr &svr, + const int64_t timeout, + const obrpc::ObStreamSeq &seq, + const int64_t upper_limit, + const int64_t fetch_log_cnt_per_part_per_round, + const bool need_feed_back); + + int set_resp(const obrpc::ObRpcResultCode &rcode, + const obrpc::ObLogStreamFetchLogResp *resp); + + void reset(); + + const obrpc::ObRpcResultCode &get_result_code() const { return rcode_; } + const obrpc::ObLogStreamFetchLogResp &get_resp() const { return resp_; } + const obrpc::ObLogStreamFetchLogReq &get_req() const { return req_; } + +private: + int build_request_(const obrpc::ObStreamSeq &seq, + const int64_t upper_limit, + const int64_t fetch_log_cnt_per_part_per_round, + const bool need_feed_back); + +private: + ////////////////////////////// RpcCB ////////////////////////////// + // Callback of Fetch log synchronization RPC + class RpcCB : public RpcCBBase + { + public: + explicit RpcCB(FetchLogSRpc &host); + virtual ~RpcCB(); + + public: + rpc::frame::ObReqTransport::AsyncCB *clone(const rpc::frame::SPAlloc &alloc) const; + int process(); + void on_timeout(); + void on_invalid(); + typedef typename obrpc::ObLogExternalProxy::ObRpc ProxyRpc; + void set_args(const typename ProxyRpc::Request &args) { UNUSED(args); } + + private: + int do_process_(const obrpc::ObRpcResultCode &rcode, const obrpc::ObLogStreamFetchLogResp *resp); + + private: + FetchLogSRpc &host_; + + private: + DISALLOW_COPY_AND_ASSIGN(RpcCB); + }; + +private: + obrpc::ObLogStreamFetchLogReq req_; // Fetch log request + obrpc::ObLogStreamFetchLogResp resp_; // Fetch log response + obrpc::ObRpcResultCode rcode_; // Fetch log RPC result code + RpcCB cb_; + common::ObCond cond_; + + // Marking the completion of RPC + volatile bool rpc_done_ CACHE_ALIGNED; + +private: + DISALLOW_COPY_AND_ASSIGN(FetchLogSRpc); +}; + +struct FetchLogARpcResult; +class IFetchLogARpcResultPool; +class IObLogRpc; +class IObLogStreamWorker; +class ObLogConfig; +////////////////////////////// FetchLogARpc ////////////////////////////// +// Fetch log asynchronous RPC wrapper class +// 1. To achieve the ultimate performance, the fetch log RPC is streamed with the log processing logic: +// after one RPC completes, the next RPC is issued immediately without waiting for log processing to complete +// 2. "Issue the next RPC immediately" has a certain condition: the next RPC is guaranteed to fetch the log +// The following cases do not require the next RPC to be issued immediately. +// 1). The upper limit is reached +// 2). All partition logs are up to date +// 3). RPC failed or observer failed to return +// 4). ... +// 3. Streaming mode is a significant performance improvement for fetching historical log scenarios. To maximize performance, the ideal case is. +// 1). Each RPC packet is full, carrying as much data as possible at a time, which can reduce the loss caused by network overhead +// 2). Since the upper limit is updated on a delayed basis (the log is updated only after processing), the upper limit should be set as large as possible. +// The optimal case should be the time range covered by at least two RPCs. +// Due to the different data distribution, the upper limit should support dynamic updates. +// 3). The partition list inside the stream should be stable for a long time to avoid the loss caused by open streams and thread scheduling +// 4). The number of streams should be as small as possible, preferably less than or equal to the number of worker threads +// +class FetchLogARpc +{ +private: + static const int64_t WAIT_TIME_ON_STOP = 10 * _MSEC_; + static const int64_t WARN_COUNT_ON_STOP = 500; + struct RpcRequest; + +public: + // The maximum number of results each RPC can have, and stop sending RPCs if this number is exceeded + static int64_t g_rpc_result_count_per_rpc_upper_limit; + static bool g_print_rpc_handle_info; + + static void configure(const ObLogConfig &config); + +public: + ////////////////////////// RPC状态 ////////////////////////// + // IDLE: No RPC task processing + // READY: Have RPC tasks to be processed + enum State + { + IDLE = 0, + READY = 1, + }; + + // RPC stop reason + enum RpcStopReason + { + INVALID_REASON = -1, + REACH_MAX_LOG = 0, // Reach maximum log + REACH_UPPER_LIMIT = 1, // Reach progress limit + FETCH_NO_LOG = 2, // Fetched 0 logs + FETCH_LOG_FAIL = 3, // Fetch log failure + REACH_MAX_RPC_RESULT = 4, // The number of RPC results reaches the upper limit + FORCE_STOP_RPC = 5, // Exnernal forced stop of RPC + }; + static const char *print_rpc_stop_reason(const RpcStopReason reason); + +public: + explicit FetchLogARpc(FetchStream &host); + virtual ~FetchLogARpc(); + + void reset(); + + void reset(const common::ObAddr &svr, + IObLogRpc &rpc, + IObLogStreamWorker &stream_worker, + IFetchLogARpcResultPool &result_pool); + + // Preparing an RPC request + // 1. Require discard_request to be called first if an RPC request has been prepared before, to discard an existing request + // 2. Require the status to be IDLE + int prepare_request(const obrpc::ObStreamSeq &seq, + const int64_t part_cnt, + const int64_t fetch_log_cnt_per_part_per_round, + const bool need_feed_back, + const int64_t rpc_timeout); + + // Discard the current request + // Set the status to IDLE + void discard_request(const char *discard_reason); + + // Launch an asynchronous RPC request + // 1. Requires that the request structure is ready + // 2. Requires a stream match + // 3. Requires that no asynchronous RPC is currently executing + // 4. requires the status to be IDLE + // + // Note that. + // 1. The ret return value only indicates whether the function was executed successfully, not whether the RPC was successful + // 2. The success of the RPC is returned using the rpc_send_succeed parameter + // 3. if the RPC fails, the result will be generated immediately, you can use next_result() to iterate through the results + // 4. If the RPC succeeds, you need to wait for the asynchronous callback to set the result + int async_fetch_log(const obrpc::ObStreamSeq &seq, + const int64_t upper_limit, + bool &rpc_send_succeed); + + /// Discard the current request and wait for the end of the asynchronous RPC + void stop(); + + // Iterate over the RPC results + // 1. requires the current RPC request to be valid + // 2. require the status to be READY, i.e., there is data to iterate over; this avoids concurrent access by multiple threads + // 3. return the error code OB_ITER_END if the RPC result iteration is complete, and return whether the RPC is running, then mark the status as IDLE + // 4. only iterate over results that match the current request + int next_result(FetchLogARpcResult *&result, bool &rpc_is_flying); + + // Recycling results + void revert_result(FetchLogARpcResult *result); + + // Update the request parameters + // Require the current request to be valid + int update_request(const obrpc::ObStreamSeq &seq, + const int64_t upper_limit, + const int64_t fetch_log_cnt_per_part_per_round, + const bool need_feed_back, + const int64_t rpc_timeout); + + // Mark the end of the request + // Require the current request to be valid + int mark_request_stop(const obrpc::ObStreamSeq &seq); + + // Process the RPC request result, called by the RPC callback thread + // 1. If it matches the current RPC request, push the result to the request queue + // 2. If it doesn't match the current RPC request, it is a deprecated RPC request, so the RPC result is discarded and the deprecated RPC request is recycled + // 3. Based on the request result, decide whether to launch the next RPC request immediately + int handle_rpc_response(RpcRequest &rpc_request, + const obrpc::ObRpcResultCode &rcode, + const obrpc::ObLogStreamFetchLogResp *resp); + + static const char *print_state(State state); + + int64_t get_flying_request_count(); + void print_flying_request_list(); + +private: + int alloc_rpc_request_(const obrpc::ObStreamSeq &seq, + const int64_t part_cnt, + const int64_t fetch_log_cnt_per_part_per_round, + const bool need_feed_back, + const int64_t rpc_timeout, + RpcRequest *&req); + void free_rpc_request_(RpcRequest *request); + int generate_rpc_result_(RpcRequest &rpc_req, + const obrpc::ObRpcResultCode &rcode, + const obrpc::ObLogStreamFetchLogResp *resp, + const int64_t rpc_callback_start_time, + const bool need_stop_rpc, + const RpcStopReason rpc_stop_reason, + bool &need_dispatch_stream_task); + int launch_async_rpc_(RpcRequest &request, + const int64_t upper_limit, + const bool launch_by_cb, + bool &rpc_send_succeed); + int push_result_and_be_ready_(FetchLogARpcResult *result, bool &is_state_idle); + int pop_result_(FetchLogARpcResult *&result); + void clear_result_(); + int destroy_flying_request_(RpcRequest *target_request); + int analyze_result_(RpcRequest &rpc_req, + const obrpc::ObRpcResultCode &rcode, + const obrpc::ObLogStreamFetchLogResp *resp, + bool &need_stop_rpc, + RpcStopReason &rpc_stop_reason, + int64_t &next_upper_limit); + void print_handle_info_(RpcRequest &rpc_req, + const obrpc::ObLogStreamFetchLogResp *resp, + const int64_t next_upper_limit, + const bool need_stop_rpc, + const RpcStopReason rpc_stop_reason, + const bool need_dispatch_stream_task); + +private: + ////////////////////////////// RpcCB ////////////////////////////// + // Callback of Fetch log RPC + typedef obrpc::ObLogExternalProxy::AsyncCB RpcCBBase; + class RpcCB : public RpcCBBase + { + public: + explicit RpcCB(RpcRequest &host) : host_(host) {} + virtual ~RpcCB() {} + + public: + rpc::frame::ObReqTransport::AsyncCB *clone(const rpc::frame::SPAlloc &alloc) const; + int process(); + void on_timeout(); + void on_invalid(); + typedef typename obrpc::ObLogExternalProxy::ObRpc ProxyRpc; + void set_args(const typename ProxyRpc::Request &args) { UNUSED(args); } + + TO_STRING_KV("host", reinterpret_cast(&host_)); + + private: + int do_process_(const obrpc::ObRpcResultCode &rcode, const obrpc::ObLogStreamFetchLogResp *resp); + + private: + RpcRequest &host_; + + private: + DISALLOW_COPY_AND_ASSIGN(RpcCB); + }; + + ////////////////////////////// RpcRequest ////////////////////////////// + // RPC request structure + // Each RPC request corresponds to a stream identifier uniquely + struct RpcRequest + { + // Invariant member variables within the request + FetchLogARpc &host_; + const int64_t part_cnt_; // partition count + RpcCB cb_; // RPC callback + + // Variables that change with the request + bool need_feed_back_; // need feedback + int64_t rpc_timeout_; // RPC timeout + obrpc::ObLogStreamFetchLogReq req_; // Fetch log request + common::ObCurTraceId::TraceId trace_id_; + + RpcRequest *next_; // Link list Structure + + // Forced stop flag + // The life of this flag is consistent with the RPC, and the flag is to be reset before each round of RPC starts + bool force_stop_flag_ CACHE_ALIGNED; + + // Start time of this RPC + int64_t rpc_start_time_ CACHE_ALIGNED; + + // Whether the RPC is being executed and no callback has been executed yet + volatile bool rpc_is_flying_ CACHE_ALIGNED; + + RpcRequest(FetchLogARpc &host, + const obrpc::ObStreamSeq &seq, + const int64_t part_cnt, + const int64_t fetch_log_cnt_per_part_per_round, + const bool need_feed_back, + const int64_t rpc_timeout); + virtual ~RpcRequest() {} + + // Update request parameters + void update_request(const int64_t upper_limit, + const int64_t fetch_log_cnt_per_part_per_round, + const bool need_feed_back, + const int64_t rpc_timeout); + + // Prepare the RPC request structure, set the final parameters, and immediately launch the RPC request next + void prepare(const int64_t upper_limit); + + // Marking RPC run status + void mark_flying_state(const bool rpc_is_flying); + + // Mark the stop of RPC + // Request the next round of RPC to stop + void mark_stop_flag() { ATOMIC_STORE(&force_stop_flag_, true); } + + const obrpc::ObStreamSeq &get_stream_seq() const { return req_.get_stream_seq(); } + const common::ObCurTraceId::TraceId &get_trace_id() const { return trace_id_; } + int64_t get_rpc_start_time() const { return rpc_start_time_; } + bool rpc_is_flying() const { return ATOMIC_LOAD(&rpc_is_flying_); } + int64_t get_upper_limit() const { return req_.get_upper_limit_ts(); } + int64_t get_part_count() const { return part_cnt_; } + bool get_stop_flag() const { return ATOMIC_LOAD(&force_stop_flag_); } + + TO_STRING_KV(K_(rpc_is_flying), + K_(part_cnt), + "rpc_start_time", TS_TO_STR(rpc_start_time_), + K_(force_stop_flag), + K_(need_feed_back), + K_(rpc_timeout), + K_(req), + K_(trace_id), + KP_(next)); + + private: + DISALLOW_COPY_AND_ASSIGN(RpcRequest); + }; + + ////////////////////////////// RpcRequestList ////////////////////////////// + // RPC request list + struct RpcRequestList + { + RpcRequest *head_; + RpcRequest *tail_; + int64_t count_; + + RpcRequestList() { reset(); } + + void reset() + { + head_ = NULL; + tail_ = NULL; + count_ = 0; + } + + void add(RpcRequest *req); + + // Use the stream identifier to delete the request structure + int remove(RpcRequest *target); + + TO_STRING_KV(K_(count), K_(head), K_(tail)); + }; + +public: + TO_STRING_KV( + "host", reinterpret_cast(&host_), + "state", print_state(state_), + "rpc_result_cnt", res_queue_.count(), + KPC_(cur_req), + K_(flying_req_list)); + +private: + typedef common::ObMapQueue ResQueue; + + FetchStream &host_; + + common::ObAddr svr_; + IObLogRpc *rpc_; + IObLogStreamWorker *stream_worker_; + IFetchLogARpcResultPool *result_pool_; + + State state_ CACHE_ALIGNED; + RpcRequest *cur_req_ CACHE_ALIGNED; + + // List of running RPC requests that have been deprecated + // These RPC requests are executing, no callbacks yet, but have been deprecated + // [ObStreamSeq <--> RpcRequest] One by one correspondence + RpcRequestList flying_req_list_; + + // Request Results Queue + ResQueue res_queue_; + + common::ObSpinLock lock_; + +private: + DISALLOW_COPY_AND_ASSIGN(FetchLogARpc); +}; + +////////////////////////////// FetchLogARpc Request Result ////////////////////////////// +struct FetchLogARpcResult +{ + obrpc::ObStreamSeq seq_; // Stream Identifier + obrpc::ObLogStreamFetchLogResp resp_; // Fetch log response + obrpc::ObRpcResultCode rcode_; // Fetch log result + common::ObCurTraceId::TraceId trace_id_; + + // Statistical items + // The time spent on the server side is in the fetch log result + int64_t rpc_time_; // Total RPC time: network + server + asynchronous processing + int64_t rpc_callback_time_; // RPC asynchronous processing time + bool rpc_stop_upon_result_; // Whether the RPC stops after the result is processed, i.e. whether it stops because of that result + FetchLogARpc::RpcStopReason rpc_stop_reason_; // RPC stop reason + + FetchLogARpcResult() { reset(); } + virtual ~FetchLogARpcResult() {} + + int set(const obrpc::ObStreamSeq &seq, + const obrpc::ObRpcResultCode &rcode, + const obrpc::ObLogStreamFetchLogResp *resp, + const common::ObCurTraceId::TraceId &trace_id, + const int64_t rpc_start_time, + const int64_t rpc_callback_start_time, + const bool rpc_stop_upon_result, + const FetchLogARpc::RpcStopReason rpc_stop_reason); + + void reset() + { + seq_.reset(); + resp_.reset(); + rcode_.reset(); + trace_id_.reset(); + rpc_time_ = 0; + rpc_callback_time_ = 0; + rpc_stop_upon_result_ = false; + rpc_stop_reason_ = FetchLogARpc::INVALID_REASON; + } + + TO_STRING_KV(K_(seq), K_(rcode), K_(resp), K_(trace_id), K_(rpc_time), + K_(rpc_callback_time), K_(rpc_stop_upon_result), + "rpc_stop_reason", FetchLogARpc::print_rpc_stop_reason(rpc_stop_reason_)); +}; + +////////////////////////////// FetchLogARpcResult对象池 ////////////////////////////// +class IFetchLogARpcResultPool +{ +public: + virtual ~IFetchLogARpcResultPool() {} + +public: + virtual int alloc(FetchLogARpcResult *&result) = 0; + virtual void free(FetchLogARpcResult *result) = 0; +}; + +class FetchLogARpcResultPool : public IFetchLogARpcResultPool +{ + typedef common::ObSmallObjPool ResultPool; + static const int64_t DEFAULT_RESULT_POOL_BLOCK_SIZE = 1L << 24; + +public: + FetchLogARpcResultPool() : inited_(false), pool_() {} + virtual ~FetchLogARpcResultPool() { destroy(); } + +public: + int init(const int64_t cached_obj_count); + void destroy(); + void print_stat(); + +public: + virtual int alloc(FetchLogARpcResult *&result); + virtual void free(FetchLogARpcResult *result); + +private: + bool inited_; + ResultPool pool_; + +private: + DISALLOW_COPY_AND_ASSIGN(FetchLogARpcResultPool); +}; + +} +} + +#endif diff --git a/src/liboblog/src/ob_log_fetch_stat_info.cpp b/src/liboblog/src/ob_log_fetch_stat_info.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7fe38be6249b38dd8a82111491a452b3de6005ca --- /dev/null +++ b/src/liboblog/src/ob_log_fetch_stat_info.cpp @@ -0,0 +1,472 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include "ob_log_fetch_stat_info.h" + +#include "lib/utility/ob_print_utils.h" // databuff_printf +#include "ob_log_utils.h" // SIZE_TO_STR + +using namespace oceanbase::common; + +namespace oceanbase +{ +namespace liboblog +{ + +///////////////////////////// TransStatInfo /////////////////////////// +void TransStatInfo::reset() +{ + decode_header_time_ = 0; + + redo_cnt_ = 0; + redo_size_ = 0; + read_redo_time_ = 0; + read_redo_decode_time_ = 0; + read_redo_parse_time_ = 0; + + prepare_cnt_ = 0; + prepare_size_ = 0; + prepare_with_redo_cnt_ = 0; + read_prepare_time_ = 0; + read_prepare_decode_time_ = 0; + read_prepare_parse_time_ = 0; + + commit_cnt_ = 0; + commit_size_ = 0; + commit_with_prepare_cnt_ = 0; + participant_cnt_ = 0; + read_commit_time_ = 0; + read_commit_decode_time_ = 0; + read_commit_parse_time_ = 0; + + sp_redo_cnt_ = 0; + sp_redo_size_ = 0; + read_sp_redo_time_ = 0; + read_sp_redo_decode_time_ = 0; + read_sp_redo_parse_time_ = 0; + + sp_commit_cnt_ = 0; + sp_commit_size_ = 0; + sp_commit_with_redo_cnt_ = 0; + read_sp_commit_time_ = 0; + read_sp_commit_decode_time_ = 0; + read_sp_commit_parse_time_ = 0; + + clear_cnt_ = 0; + clear_size_ = 0; +} + +void TransStatInfo::update(const TransStatInfo &tsi) +{ + decode_header_time_ += tsi.decode_header_time_; + + redo_cnt_ += tsi.redo_cnt_; + redo_size_ += tsi.redo_size_; + read_redo_time_ += tsi.read_redo_time_; + read_redo_decode_time_ += tsi.read_redo_decode_time_; + read_redo_parse_time_ += tsi.read_redo_parse_time_; + + prepare_cnt_ += tsi.prepare_cnt_; + prepare_size_ += tsi.prepare_size_; + prepare_with_redo_cnt_ += tsi.prepare_with_redo_cnt_; + read_prepare_time_ += tsi.read_prepare_time_; + read_prepare_decode_time_ += tsi.read_prepare_decode_time_; + read_prepare_parse_time_ += tsi.read_prepare_parse_time_; + + commit_cnt_ += tsi.commit_cnt_; + commit_size_ += tsi.commit_size_; + commit_with_prepare_cnt_ += tsi.commit_with_prepare_cnt_; + read_commit_time_ += tsi.read_commit_time_; + read_commit_decode_time_ += tsi.read_commit_decode_time_; + read_commit_parse_time_ += tsi.read_commit_parse_time_; + + sp_redo_cnt_ += tsi.sp_redo_cnt_; + sp_redo_size_ += tsi.sp_redo_size_; + read_sp_redo_time_ += tsi.read_sp_redo_time_; + read_sp_redo_decode_time_ += tsi.read_sp_redo_decode_time_; + read_sp_redo_parse_time_ += tsi.read_sp_redo_parse_time_; + + sp_commit_cnt_ += tsi.sp_commit_cnt_; + sp_commit_size_ += tsi.sp_commit_size_; + sp_commit_with_redo_cnt_ += tsi.sp_commit_with_redo_cnt_; + read_sp_commit_time_ += tsi.read_sp_commit_time_; + read_sp_commit_decode_time_ += tsi.read_sp_commit_decode_time_; + read_sp_commit_parse_time_ += tsi.read_sp_commit_parse_time_; + + clear_cnt_ += tsi.clear_cnt_; + clear_size_ += tsi.clear_size_; +} + +TransStatInfo TransStatInfo::operator - (const TransStatInfo &tsi) const +{ + TransStatInfo ret_tsi; + + ret_tsi.decode_header_time_ = decode_header_time_ - tsi.decode_header_time_; + + ret_tsi.redo_cnt_ = redo_cnt_ - tsi.redo_cnt_; + ret_tsi.redo_size_ = redo_size_ - tsi.redo_size_; + ret_tsi.read_redo_time_ = read_redo_time_ - tsi.read_redo_time_; + ret_tsi.read_redo_decode_time_ = read_redo_decode_time_ - tsi.read_redo_decode_time_; + ret_tsi.read_redo_parse_time_ = read_redo_parse_time_ - tsi.read_redo_parse_time_; + + ret_tsi.prepare_cnt_ = prepare_cnt_ - tsi.prepare_cnt_; + ret_tsi.prepare_size_ = prepare_size_ - tsi.prepare_size_; + ret_tsi.prepare_with_redo_cnt_ = prepare_with_redo_cnt_ - tsi.prepare_with_redo_cnt_; + ret_tsi.read_prepare_time_ = read_prepare_time_ - tsi.read_prepare_time_; + ret_tsi.read_prepare_decode_time_ = read_prepare_decode_time_ - tsi.read_prepare_decode_time_; + ret_tsi.read_prepare_parse_time_ = read_prepare_parse_time_ - tsi.read_prepare_parse_time_; + + ret_tsi.commit_cnt_ = commit_cnt_ - tsi.commit_cnt_; + ret_tsi.commit_size_ = commit_size_ - tsi.commit_size_; + ret_tsi.commit_with_prepare_cnt_ = commit_with_prepare_cnt_ - tsi.commit_with_prepare_cnt_; + ret_tsi.read_commit_time_ = read_commit_time_ - tsi.read_commit_time_; + ret_tsi.read_commit_decode_time_ = read_commit_decode_time_ - tsi.read_commit_decode_time_; + ret_tsi.read_commit_parse_time_ = read_commit_parse_time_ - tsi.read_commit_parse_time_; + + ret_tsi.sp_redo_cnt_ = sp_redo_cnt_ - tsi.sp_redo_cnt_; + ret_tsi.sp_redo_size_ = sp_redo_size_ - tsi.sp_redo_size_; + ret_tsi.read_sp_redo_time_ = read_sp_redo_time_ - tsi.read_sp_redo_time_; + ret_tsi.read_sp_redo_decode_time_ = read_sp_redo_decode_time_ - tsi.read_sp_redo_decode_time_; + ret_tsi.read_sp_redo_parse_time_ = read_sp_redo_parse_time_ - tsi.read_sp_redo_parse_time_; + + ret_tsi.sp_commit_cnt_ = sp_commit_cnt_ - tsi.sp_commit_cnt_; + ret_tsi.sp_commit_size_ = sp_commit_size_ - tsi.sp_commit_size_; + ret_tsi.sp_commit_with_redo_cnt_ = sp_commit_with_redo_cnt_ - tsi.sp_commit_with_redo_cnt_; + ret_tsi.read_sp_commit_time_ = read_sp_commit_time_ - tsi.read_sp_commit_time_; + ret_tsi.read_sp_commit_decode_time_ = read_sp_commit_decode_time_ - tsi.read_sp_commit_decode_time_; + ret_tsi.read_sp_commit_parse_time_ = read_sp_commit_parse_time_ - tsi.read_sp_commit_parse_time_; + + ret_tsi.clear_cnt_ = clear_cnt_ - tsi.clear_cnt_; + ret_tsi.clear_size_ = clear_size_ - tsi.clear_size_; + + return ret_tsi; +} + +void TransStatInfo::do_stat(const int64_t rpc_cnt) +{ + if (rpc_cnt <= 0) { + reset(); + } else { + decode_header_time_ /= rpc_cnt; + + redo_cnt_ /= rpc_cnt; + redo_size_ /= rpc_cnt; + read_redo_time_ /= rpc_cnt; + read_redo_decode_time_ /= rpc_cnt; + read_redo_parse_time_ /= rpc_cnt; + + prepare_cnt_ /= rpc_cnt; + prepare_size_ /= rpc_cnt; + prepare_with_redo_cnt_ /= rpc_cnt; + read_prepare_time_ /= rpc_cnt; + read_prepare_decode_time_ /= rpc_cnt; + read_prepare_parse_time_ /= rpc_cnt; + + commit_cnt_ /= rpc_cnt; + commit_size_ /= rpc_cnt; + commit_with_prepare_cnt_ /= rpc_cnt; + read_commit_time_ /= rpc_cnt; + read_commit_decode_time_ /= rpc_cnt; + read_commit_parse_time_ /= rpc_cnt; + + sp_redo_cnt_ /= rpc_cnt; + sp_redo_size_ /= rpc_cnt; + read_sp_redo_time_ /= rpc_cnt; + read_sp_redo_decode_time_ /= rpc_cnt; + read_sp_redo_parse_time_ /= rpc_cnt; + + sp_commit_cnt_ /= rpc_cnt; + sp_commit_size_ /= rpc_cnt; + sp_commit_with_redo_cnt_ /= rpc_cnt; + read_sp_commit_time_ /= rpc_cnt; + read_sp_commit_decode_time_ /= rpc_cnt; + read_sp_commit_parse_time_ /= rpc_cnt; + + clear_cnt_ /= rpc_cnt; + clear_size_ /= rpc_cnt; + } +} + +int64_t TransStatInfo::to_string(char* buf, const int64_t buf_len) const +{ + int64_t pos = 0; + + if (NULL != buf && buf_len > 0) { + int64_t total_log_cnt = get_log_cnt(); + int64_t total_size = get_log_size(); + + (void)common::databuff_printf(buf, buf_len, pos, "trans_count=%ld", total_log_cnt); + + if (total_log_cnt <= 0) { + (void)common::databuff_printf(buf, buf_len, pos, " "); + } else { + (void)common::databuff_printf(buf, buf_len, pos, "("); + + // redo / prepare / commit + if (commit_cnt_ > 0) { + (void)common::databuff_printf(buf, buf_len, pos, + "redo=%ld,prepare=%ld(with_redo=%ld),commit=%ld(with_prepare=%ld),", + redo_cnt_, prepare_cnt_, prepare_with_redo_cnt_, + commit_cnt_, commit_with_prepare_cnt_); + } + + // sp redo / sp commit + if (sp_commit_cnt_ > 0) { + (void)common::databuff_printf(buf, buf_len, pos, "sp_redo=%ld,sp_commit=%ld,", + sp_redo_cnt_, sp_commit_cnt_); + } + + // clear + if (clear_cnt_ > 0) { + (void)common::databuff_printf(buf, buf_len, pos, "clear=%ld,", clear_cnt_); + } + + pos--; + (void)common::databuff_printf(buf, buf_len, pos, ") "); + } + + (void)common::databuff_printf(buf, buf_len, pos, "trans_size=%s", SIZE_TO_STR(total_size)); + + if (total_log_cnt <= 0) { + (void)common::databuff_printf(buf, buf_len, pos, " "); + } else { + (void)common::databuff_printf(buf, buf_len, pos, "("); + + // redo / prepare / commit + if (commit_cnt_ > 0) { + (void)common::databuff_printf(buf, buf_len, pos, "redo=%s,prepare=%s,commit=%s,", + SIZE_TO_STR(redo_size_), SIZE_TO_STR(prepare_size_), SIZE_TO_STR(commit_size_)); + } + + // sp redo / sp commit + if (sp_commit_cnt_ > 0) { + (void)common::databuff_printf(buf, buf_len, pos, "sp_redo=%s,sp_commit=%s,", + SIZE_TO_STR(sp_redo_size_), SIZE_TO_STR(sp_commit_size_)); + } + + // clear + if (clear_cnt_ > 0) { + (void)common::databuff_printf(buf, buf_len, pos, "clear=%s,", SIZE_TO_STR(clear_size_)); + } + + pos--; + (void)common::databuff_printf(buf, buf_len, pos, ") "); + } + + if (commit_cnt_ > 0) { + if (read_redo_time_ > 0) { + (void)common::databuff_printf(buf, buf_len, pos, + "redo_time=%ld(decode=%ld,parse=%ld) ", + read_redo_time_, read_redo_decode_time_, read_redo_parse_time_); + } + + if (read_prepare_time_ > 0) { + (void)common::databuff_printf(buf, buf_len, pos, + "prepare_time=%ld(decode=%ld,parse=%ld) ", + read_prepare_time_, read_prepare_decode_time_, read_prepare_parse_time_); + } + + if (read_commit_time_ > 0) { + (void)common::databuff_printf(buf, buf_len, pos, + "commit_time=%ld(decode=%ld,parse=%ld) ", + read_commit_time_, read_commit_decode_time_, read_commit_parse_time_); + } + } + + if (sp_commit_cnt_ > 0) { + (void)common::databuff_printf(buf, buf_len, pos, + "sp_redo_time=%ld(decode=%ld,parse=%ld) ", + read_sp_redo_time_, read_sp_redo_decode_time_, read_sp_redo_parse_time_); + + (void)common::databuff_printf(buf, buf_len, pos, + "sp_commit_time=%ld(decode=%ld,parse=%ld)", + read_sp_commit_time_, read_sp_commit_decode_time_, read_sp_commit_parse_time_); + } + } + + return pos; +} + +///////////////////////////////////// FetchStatInfo ///////////////////////////////////// + +void FetchStatInfo::reset() +{ + fetch_log_cnt_ = 0; + fetch_log_size_ = 0; + fetch_log_rpc_cnt_ = 0; + single_rpc_cnt_ = 0; + reach_upper_limit_rpc_cnt_ = 0; + reach_max_log_id_rpc_cnt_ = 0; + no_log_rpc_cnt_ = 0; + reach_max_result_rpc_cnt_ = 0; + fetch_log_rpc_time_ = 0; + fetch_log_rpc_to_svr_net_time_ = 0; + fetch_log_rpc_svr_queue_time_ = 0; + fetch_log_rpc_svr_process_time_ = 0; + fetch_log_rpc_callback_time_ = 0; + handle_rpc_time_ = 0; + handle_rpc_read_log_time_ = 0; + handle_rpc_flush_time_ = 0; + read_log_decode_log_entry_time_ = 0; + + tsi_.reset(); +} + +void FetchStatInfo::update(const FetchStatInfo &fsi) +{ + fetch_log_cnt_ += fsi.fetch_log_cnt_; + fetch_log_size_ += fsi.fetch_log_size_; + fetch_log_rpc_cnt_ += fsi.fetch_log_rpc_cnt_; + single_rpc_cnt_ += fsi.single_rpc_cnt_; + reach_upper_limit_rpc_cnt_ += fsi.reach_upper_limit_rpc_cnt_; + reach_max_log_id_rpc_cnt_ += fsi.reach_max_log_id_rpc_cnt_; + no_log_rpc_cnt_ += fsi.no_log_rpc_cnt_; + reach_max_result_rpc_cnt_ += fsi.reach_max_result_rpc_cnt_; + fetch_log_rpc_time_ += fsi.fetch_log_rpc_time_; + fetch_log_rpc_to_svr_net_time_ += fsi.fetch_log_rpc_to_svr_net_time_; + fetch_log_rpc_svr_queue_time_ += fsi.fetch_log_rpc_svr_queue_time_; + fetch_log_rpc_svr_process_time_ += fsi.fetch_log_rpc_svr_process_time_; + fetch_log_rpc_callback_time_ += fsi.fetch_log_rpc_callback_time_; + handle_rpc_time_ += fsi.handle_rpc_time_; + handle_rpc_read_log_time_ += fsi.handle_rpc_read_log_time_; + handle_rpc_flush_time_ += fsi.handle_rpc_flush_time_; + read_log_decode_log_entry_time_ += fsi.read_log_decode_log_entry_time_; + + tsi_.update(fsi.tsi_); +} + +FetchStatInfo FetchStatInfo::operator - (const FetchStatInfo &fsi) const +{ + FetchStatInfo ret_fsi; + + ret_fsi.fetch_log_cnt_ = fetch_log_cnt_ - fsi.fetch_log_cnt_; + ret_fsi.fetch_log_size_ = fetch_log_size_ - fsi.fetch_log_size_; + ret_fsi.fetch_log_rpc_cnt_ = fetch_log_rpc_cnt_ - fsi.fetch_log_rpc_cnt_; + ret_fsi.single_rpc_cnt_ = single_rpc_cnt_ - fsi.single_rpc_cnt_; + ret_fsi.reach_upper_limit_rpc_cnt_ = reach_upper_limit_rpc_cnt_ - fsi.reach_upper_limit_rpc_cnt_; + ret_fsi.reach_max_log_id_rpc_cnt_ = reach_max_log_id_rpc_cnt_ - fsi.reach_max_log_id_rpc_cnt_; + ret_fsi.no_log_rpc_cnt_ = no_log_rpc_cnt_ - fsi.no_log_rpc_cnt_; + ret_fsi.reach_max_result_rpc_cnt_ = reach_max_result_rpc_cnt_ - fsi.reach_max_result_rpc_cnt_; + ret_fsi.fetch_log_rpc_time_ = fetch_log_rpc_time_ - fsi.fetch_log_rpc_time_; + ret_fsi.fetch_log_rpc_to_svr_net_time_ = fetch_log_rpc_to_svr_net_time_ - fsi.fetch_log_rpc_to_svr_net_time_; + ret_fsi.fetch_log_rpc_svr_queue_time_ = fetch_log_rpc_svr_queue_time_ - fsi.fetch_log_rpc_svr_queue_time_; + ret_fsi.fetch_log_rpc_svr_process_time_ = fetch_log_rpc_svr_process_time_ - fsi.fetch_log_rpc_svr_process_time_; + ret_fsi.fetch_log_rpc_callback_time_ = fetch_log_rpc_callback_time_ - fsi.fetch_log_rpc_callback_time_; + ret_fsi.handle_rpc_time_ = handle_rpc_time_ - fsi.handle_rpc_time_; + ret_fsi.handle_rpc_read_log_time_ = handle_rpc_read_log_time_ - fsi.handle_rpc_read_log_time_; + ret_fsi.handle_rpc_flush_time_ = handle_rpc_flush_time_ - fsi.handle_rpc_flush_time_; + ret_fsi.read_log_decode_log_entry_time_ = read_log_decode_log_entry_time_ - fsi.read_log_decode_log_entry_time_; + + ret_fsi.tsi_ = tsi_ - fsi.tsi_; + + return ret_fsi; +} + +///////////////////////////////// FetchStatInfoPrinter ///////////////////////////////// + +FetchStatInfoPrinter::FetchStatInfoPrinter(const FetchStatInfo &cur_stat_info, + const FetchStatInfo &last_stat_info, + const double delta_second) : + delta_fsi_(cur_stat_info - last_stat_info), + delta_second_(delta_second) +{ +} + +int64_t FetchStatInfoPrinter::to_string(char* buf, const int64_t buf_len) const +{ + int64_t pos = 0; + + if (delta_second_ > 0) { + int64_t log_cnt = delta_fsi_.fetch_log_cnt_; + int64_t log_size = delta_fsi_.fetch_log_size_; + int64_t rpc_cnt = delta_fsi_.fetch_log_rpc_cnt_; + int64_t single_rpc_cnt = delta_fsi_.single_rpc_cnt_; + int64_t reach_upper_limit_rpc_cnt = delta_fsi_.reach_upper_limit_rpc_cnt_; + int64_t reach_max_log_id_rpc_cnt = delta_fsi_.reach_max_log_id_rpc_cnt_; + int64_t no_log_rpc_cnt = delta_fsi_.no_log_rpc_cnt_; + int64_t reach_max_result_rpc_cnt = delta_fsi_.reach_max_result_rpc_cnt_; + int64_t rpc_time = delta_fsi_.fetch_log_rpc_time_; + int64_t svr_queue_time = delta_fsi_.fetch_log_rpc_svr_queue_time_; + int64_t svr_process_time = delta_fsi_.fetch_log_rpc_svr_process_time_; + int64_t callback_time = delta_fsi_.fetch_log_rpc_callback_time_; + + // Network time from liboblog to server + int64_t l2s_net_time = delta_fsi_.fetch_log_rpc_to_svr_net_time_; + + // The network time from server to liboblog is calculated and is inaccurate + // including: observer's outgoing packet queue, liboblog's incoming packet queue, outgoing packet encoding, incoming packet decoding, and network time + int64_t s2l_net_time = rpc_time - svr_queue_time - svr_process_time - callback_time - + l2s_net_time; + + // Total asynchronous processing RPC time + int64_t handle_rpc_time = delta_fsi_.handle_rpc_time_; + + // Parsing log time + int64_t read_log_time = delta_fsi_.handle_rpc_read_log_time_; + + // Deserialization log entry time + int64_t decode_log_entry_time = delta_fsi_.read_log_decode_log_entry_time_; + + // Output Transaction Task Time + int64_t flush_time = delta_fsi_.handle_rpc_flush_time_; + + // Calculate transaction statistics difference + TransStatInfo tsi = delta_fsi_.tsi_; + + // Each statistic item is divided by the number of RPC to obtain statistics per RPC + tsi.do_stat(rpc_cnt); + + int64_t traffic = static_cast(static_cast(log_size) / delta_second_); + int64_t rpc_cnt_per_sec = static_cast(static_cast(rpc_cnt) / delta_second_); + int64_t single_rpc_cnt_per_sec = + static_cast(static_cast(single_rpc_cnt) / delta_second_); + int64_t reach_upper_limit_rpc_cnt_per_sec = + static_cast(static_cast(reach_upper_limit_rpc_cnt) / delta_second_); + int64_t reach_max_log_id_rpc_cnt_per_sec = + static_cast(static_cast(reach_max_log_id_rpc_cnt) / delta_second_); + int64_t no_log_rpc_cnt_per_sec = + static_cast(static_cast(no_log_rpc_cnt) / delta_second_); + int64_t reach_max_result_rpc_cnt_per_sec = + static_cast(static_cast(reach_max_result_rpc_cnt) / delta_second_); + int64_t log_size_per_rpc = rpc_cnt <= 0 ? 0 : log_size / rpc_cnt; + int64_t log_cnt_per_rpc = rpc_cnt <= 0 ? 0 : log_cnt / rpc_cnt; + int64_t rpc_time_per_rpc = rpc_cnt <= 0 ? 0 : rpc_time / rpc_cnt; + int64_t svr_process_time_per_rpc = rpc_cnt <= 0 ? 0 : svr_process_time / rpc_cnt; + int64_t svr_queue_time_per_rpc = rpc_cnt <= 0 ? 0 : svr_queue_time / rpc_cnt; + int64_t callback_time_per_rpc = rpc_cnt <= 0 ? 0 : callback_time / rpc_cnt; + int64_t l2s_net_time_per_rpc = rpc_cnt <= 0 ? 0 : l2s_net_time / rpc_cnt; + int64_t s2l_net_time_per_rpc = rpc_cnt <= 0 ? 0 : s2l_net_time / rpc_cnt; + int64_t handle_rpc_time_per_rpc = rpc_cnt <= 0 ? 0 : handle_rpc_time / rpc_cnt; + int64_t read_log_time_per_rpc = rpc_cnt <= 0 ? 0 : read_log_time / rpc_cnt; + int64_t decode_log_entry_time_per_rpc = (rpc_cnt <= 0 ? 0 : decode_log_entry_time / rpc_cnt); + int64_t flush_time_per_rpc = (rpc_cnt <= 0 ? 0 : flush_time / rpc_cnt); + + + (void)databuff_printf(buf, buf_len, pos, + "traffic=%s/sec size/rpc=%s log_cnt/rpc=%ld rpc_cnt/sec=%ld " + "single_rpc/sec=%ld(upper_limit=%ld,max_log=%ld,no_log=%ld,max_result=%ld) " + "rpc_time=%ld svr_time=(queue=%ld,process=%ld) net_time=(l2s=%ld,s2l=%ld) cb_time=%ld " + "handle_rpc_time=%ld flush_time=%ld read_log_time=%ld(log_entry=%ld,trans=%ld) %s", + SIZE_TO_STR(traffic), SIZE_TO_STR(log_size_per_rpc), log_cnt_per_rpc, rpc_cnt_per_sec, + single_rpc_cnt_per_sec, reach_upper_limit_rpc_cnt_per_sec, + reach_max_log_id_rpc_cnt_per_sec, no_log_rpc_cnt_per_sec, reach_max_result_rpc_cnt_per_sec, + rpc_time_per_rpc, svr_queue_time_per_rpc, svr_process_time_per_rpc, + l2s_net_time_per_rpc, s2l_net_time_per_rpc, callback_time_per_rpc, + handle_rpc_time_per_rpc, flush_time_per_rpc, read_log_time_per_rpc, + decode_log_entry_time_per_rpc, tsi.get_total_time(), to_cstring(tsi)); + } + + return pos; +} + +} +} diff --git a/src/liboblog/src/ob_log_fetch_stat_info.h b/src/liboblog/src/ob_log_fetch_stat_info.h new file mode 100644 index 0000000000000000000000000000000000000000..ccd1dbd3402bbf7680a6e2b096b7bd1748d34951 --- /dev/null +++ b/src/liboblog/src/ob_log_fetch_stat_info.h @@ -0,0 +1,218 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_FETCH_STAT_INFO_H__ +#define OCEANBASE_LIBOBLOG_OB_LOG_FETCH_STAT_INFO_H__ + +#include "lib/utility/ob_print_utils.h" // TO_STRING_KV + +namespace oceanbase +{ +namespace liboblog +{ + +// Read transaction log statistics +struct TransStatInfo +{ + // Decoding transaction log header time + int64_t decode_header_time_; + + // ********** REDO ********** + int64_t redo_cnt_; + int64_t redo_size_; + int64_t read_redo_time_; + // Read redo subprocess time: decode and parse + int64_t read_redo_decode_time_; + int64_t read_redo_parse_time_; + + // ********** PREPARE ********** + int64_t prepare_cnt_; + int64_t prepare_size_; + int64_t prepare_with_redo_cnt_; + int64_t read_prepare_time_; + // Read prepare log subprocess time: decode and parse + int64_t read_prepare_decode_time_; // Decode prepare log time + int64_t read_prepare_parse_time_; // Parse prepare log time + + // ********** COMMIT ********** + int64_t commit_cnt_; + int64_t commit_size_; + int64_t commit_with_prepare_cnt_; + int64_t participant_cnt_; // 每个事务的参与者数量 + int64_t read_commit_time_; + // Read commmi log subprocess time: decode and parse + int64_t read_commit_decode_time_; + int64_t read_commit_parse_time_; + + // ********** SP TRANS REDO ********** + int64_t sp_redo_cnt_; + int64_t sp_redo_size_; + int64_t read_sp_redo_time_; + // Read sp redo subprocess time + int64_t read_sp_redo_decode_time_; + int64_t read_sp_redo_parse_time_; + + // ********** SP TRANS COMMIT ********** + int64_t sp_commit_cnt_; + int64_t sp_commit_size_; + int64_t sp_commit_with_redo_cnt_; + int64_t read_sp_commit_time_; + // Read sp commit subprocess time + int64_t read_sp_commit_decode_time_; + int64_t read_sp_commit_parse_time_; + + // ********** CLEAR ********** + int64_t clear_cnt_; + int64_t clear_size_; + + TransStatInfo() { reset(); } + void reset(); + void update(const TransStatInfo &tsi); + TransStatInfo operator - (const TransStatInfo &tsi) const; + void do_stat(const int64_t rpc_cnt); + + int64_t get_total_time() const + { + return decode_header_time_ + get_decode_time() + get_parse_time(); + } + + int64_t get_decode_time() const + { + return read_redo_decode_time_ + read_prepare_decode_time_ + read_commit_decode_time_ + + read_sp_redo_decode_time_ + read_sp_commit_decode_time_; + } + + int64_t get_parse_time() const + { + return read_redo_parse_time_ + read_prepare_parse_time_ + read_commit_parse_time_ + + read_sp_redo_parse_time_ + read_sp_commit_parse_time_; + } + + int64_t get_log_cnt() const + { + return redo_cnt_ + prepare_cnt_ - prepare_with_redo_cnt_ + commit_cnt_ - + commit_with_prepare_cnt_ + sp_redo_cnt_ + sp_commit_cnt_ + clear_cnt_; + } + + int64_t get_log_size() const + { + return redo_size_ + prepare_size_ + commit_size_ + sp_redo_size_ + sp_commit_size_ + + clear_size_; + } + + int64_t to_string(char* buf, const int64_t buf_len) const; +}; + +///////////////////////////////// FetchStatInfo ///////////////////////////////// + +// Fetch log overall process statistics +struct FetchStatInfo +{ + int64_t fetch_log_cnt_; // Number of log entries + int64_t fetch_log_size_; // Fetch log size + + ///////////////// RPC相关统计项 //////////////////// + int64_t fetch_log_rpc_cnt_; // Number of fetch log rpc + + int64_t single_rpc_cnt_; // Number of rpc that stop immediately after execution + int64_t reach_upper_limit_rpc_cnt_; // Number of rpc that reach upper limit + int64_t reach_max_log_id_rpc_cnt_; // Number of rpc that reach max log id + + int64_t no_log_rpc_cnt_; // Number of rpc that without log + + int64_t reach_max_result_rpc_cnt_; // Number of rpc that reach max result + + // Total time of fetch log RPC: including network, observer processing, asynchronous callback processing + int64_t fetch_log_rpc_time_; + + // Network time liboblog to observer + int64_t fetch_log_rpc_to_svr_net_time_; + + // observer queuing time + int64_t fetch_log_rpc_svr_queue_time_; + + // observer progressing time + int64_t fetch_log_rpc_svr_process_time_; + + // RPC local callback processing time + int64_t fetch_log_rpc_callback_time_; + + // Total log processing time + int64_t handle_rpc_time_; + + // Processing log flow: time to read logs + int64_t handle_rpc_read_log_time_; + + // Processing log flow: flush partition transaction operation time + int64_t handle_rpc_flush_time_; + + // Deserialize log entry time in the read log process + int64_t read_log_decode_log_entry_time_; + + // Transaction Resolution Statistics + TransStatInfo tsi_; + + FetchStatInfo() { reset(); } + void reset(); + + // Logs are fetched or RPC is executed + bool is_valid() + { + return fetch_log_cnt_ > 0 || fetch_log_rpc_cnt_ > 0; + } + + // Update statistical information + void update(const FetchStatInfo &fsi); + FetchStatInfo operator - (const FetchStatInfo &fsi) const; + + TO_STRING_KV(K_(fetch_log_cnt), + K_(fetch_log_size), + K_(fetch_log_rpc_cnt), + K_(single_rpc_cnt), + K_(reach_upper_limit_rpc_cnt), + K_(reach_max_log_id_rpc_cnt), + K_(no_log_rpc_cnt), + K_(reach_max_result_rpc_cnt), + K_(fetch_log_rpc_time), + K_(fetch_log_rpc_to_svr_net_time), + K_(fetch_log_rpc_svr_queue_time), + K_(fetch_log_rpc_svr_process_time), + K_(fetch_log_rpc_callback_time), + K_(handle_rpc_time), + K_(handle_rpc_read_log_time), + K_(handle_rpc_flush_time), + K_(read_log_decode_log_entry_time), + K_(tsi)); +}; + +///////////////////////////////// FetchStatInfoPrinter ///////////////////////////////// + +// FetchStatInfo Printers +struct FetchStatInfoPrinter +{ + FetchStatInfoPrinter(const FetchStatInfo &cur_stat_info, + const FetchStatInfo &last_stat_info, + const double delta_second); + + int64_t to_string(char* buf, const int64_t buf_len) const; + + FetchStatInfo delta_fsi_; + const double delta_second_; + +private: + DISALLOW_COPY_AND_ASSIGN(FetchStatInfoPrinter); +}; + +} +} + +#endif diff --git a/src/liboblog/src/ob_log_fetch_stream.cpp b/src/liboblog/src/ob_log_fetch_stream.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e478d18db753b84a44dbb0d3fad44b0d05348e3f --- /dev/null +++ b/src/liboblog/src/ob_log_fetch_stream.cpp @@ -0,0 +1,2128 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG_FETCHER + +#include "ob_log_fetch_stream.h" + +#include "lib/container/ob_se_array_iterator.h" // begin +#include "lib/allocator/page_arena.h" // ObArenaAllocator + +#include "ob_log_config.h" // ObLogConfig +#include "ob_log_rpc.h" // IObLogRpc +#include "ob_log_svr_finder.h" // IObLogSvrFinder +#include "ob_log_fetcher_heartbeat_worker.h" // IObLogFetcherHeartbeatWorker +#include "ob_log_stream_worker.h" // IObLogStreamWorker +#include "ob_log_part_progress_controller.h" // PartProgressController +#include "ob_log_trace_id.h" // ObLogTraceIdGuard + +using namespace oceanbase::common; +using namespace oceanbase::obrpc; + +namespace oceanbase +{ +namespace liboblog +{ + +int64_t FetchStream::g_stream_max_part_count = ObLogConfig::default_stream_max_partition_count; +int64_t FetchStream::g_stream_life_time = ObLogConfig::default_stream_life_time_sec * _SEC_; +int64_t FetchStream::g_rpc_timeout = ObLogConfig::default_fetch_log_rpc_timeout_sec * _SEC_; +int64_t FetchStream::g_fetch_log_cnt_per_part_per_round = ObLogConfig::default_fetch_log_cnt_per_part_per_round; +int64_t FetchStream::g_dml_progress_limit = ObLogConfig::default_progress_limit_sec_for_dml * _SEC_; +int64_t FetchStream::g_blacklist_survival_time = ObLogConfig::default_blacklist_survival_time_sec * _SEC_; +int64_t FetchStream::g_check_switch_server_interval = ObLogConfig::default_check_switch_server_interval_min * _MIN_; +bool FetchStream::g_print_rpc_handle_info = ObLogConfig::default_print_rpc_handle_info; +bool FetchStream::g_print_stream_dispatch_info = ObLogConfig::default_print_stream_dispatch_info; + +const char *FetchStream::print_state(State state) +{ + const char *str = "UNKNOWN"; + switch (state) { + case IDLE: + str = "IDLE"; + break; + case FETCH_LOG: + str = "FETCH_LOG"; + break; + default: + str = "UNKNOWN"; + break; + } + return str; +} + +FetchStream::FetchStream() : fetch_log_arpc_(*this) +{ + reset(); +} + +FetchStream::~FetchStream() +{ + reset(); +} + +void FetchStream::reset() +{ + // Wait for asynchronous RPC to end before clearing data + fetch_log_arpc_.stop(); + + state_ = State::IDLE; + stype_ = FETCH_STREAM_TYPE_HOT; + svr_.reset(); + rpc_ = NULL; + svr_finder_ = NULL; + heartbeater_ = NULL; + stream_worker_ = NULL; + rpc_result_pool_ = NULL; + progress_controller_ = NULL; + + upper_limit_ = OB_INVALID_TIMESTAMP; + need_open_stream_ = false; + stream_seq_.reset(); + last_feedback_tstamp_ = OB_INVALID_TIMESTAMP; + last_switch_server_tstamp_ = 0; + fetch_task_pool_.reset(); + fetch_log_arpc_.reset(); + + last_stat_time_ = OB_INVALID_TIMESTAMP; + cur_stat_info_.reset(); + last_stat_info_.reset(); + + FSListNode::reset(); +} + +void FetchStream::reset(const common::ObAddr &svr, + const FetchStreamType stream_type, + IObLogRpc &rpc, + IObLogSvrFinder &svr_finder, + IObLogFetcherHeartbeatWorker &heartbeater, + IObLogStreamWorker &stream_worker, + IFetchLogARpcResultPool &rpc_result_pool, + PartProgressController &progress_controller) +{ + reset(); + + svr_ = svr; + stype_ = stream_type; + rpc_ = &rpc; + svr_finder_ = &svr_finder; + heartbeater_ = &heartbeater; + stream_worker_ = &stream_worker; + rpc_result_pool_ = &rpc_result_pool; + progress_controller_ = &progress_controller; + + fetch_log_arpc_.reset(svr, rpc, stream_worker, rpc_result_pool); +} + +int FetchStream::add_fetch_task(PartFetchCtx &task) +{ + int ret = OB_SUCCESS; + bool is_pool_idle = false; + + if (OB_UNLIKELY(task.get_fetch_stream_type() != stype_)) { + LOG_ERROR("invalid part task, stream type does not match", K(stype_), + K(task.get_fetch_stream_type()), K(task)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(stream_worker_)) { + LOG_ERROR("invalid stream worker", K(stream_worker_)); + ret = OB_INVALID_ERROR; + } else { + // Mark to start fetching logs + task.dispatch_in_fetch_stream(svr_, *this); + + if (OB_FAIL(fetch_task_pool_.push(task, is_pool_idle))) { + if (OB_SIZE_OVERFLOW != ret) { + LOG_ERROR("push task into pool fail", KR(ret), K(task)); + } + } else { + LOG_DEBUG("[STAT] [FETCH_STREAM] [ADD_FETCH_TASK]", "fetch_task", &task, + "fetch_stream", this, K(is_pool_idle), "fetch_task", task, + "fetch_stream", *this); + + if (is_pool_idle) { + // If the task pool is in IDLE state before the task is inserted, then no thread is processing the current fetch log stream, + // and the fetch log stream task is marked as new. + // For a new fetch log stream task, it should be immediately assigned to a worker thread for processing + if (OB_FAIL(stream_worker_->dispatch_stream_task(*this, "EmptyStream"))) { + LOG_ERROR("dispatch stream task fail", KR(ret)); + } else { + // Note: You cannot continue to manipulate this data structure afterwards !!!!! + } + } + } + } + + return ret; +} + +int FetchStream::handle(volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + bool print_stream_dispatch_info = ATOMIC_LOAD(&g_print_stream_dispatch_info); + + if (print_stream_dispatch_info) { + LOG_INFO("[STAT] [FETCH_STREAM] begin handle", "fetch_stream", this, + "fetch_stream", *this); + } else { + LOG_DEBUG("[STAT] [FETCH_STREAM] begin handle", "fetch_stream", this, + "fetch_stream", *this); + } + + if (IDLE == state_) { + if (OB_FAIL(handle_idle_task_(stop_flag))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("handle IDLE task fail", KR(ret)); + } + } + } else if (FETCH_LOG == state_) { + if (OB_FAIL(handle_fetch_log_task_(stop_flag))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("handle FETCH_LOG task fail", KR(ret)); + } + } + } else { + LOG_ERROR("invalid state", K(state_)); + ret = OB_INVALID_ERROR; + } + + // Note: The following can no longer continue the operation, there may be concurrency issues !!!! + + return ret; +} + +// The purpose of a timed task is to assign itself to a worker thread +void FetchStream::process_timer_task() +{ + int ret = OB_SUCCESS; + static int64_t max_dispatch_time = 0; + int64_t start_time = get_timestamp(); + int64_t end_time = 0; + + LOG_DEBUG("[STAT] [WAKE_UP_STREAM_TASK]", "task", this, "task", *this); + + if (OB_ISNULL(stream_worker_)) { + LOG_ERROR("invalid stream worker", K(stream_worker_)); + ret = OB_INVALID_ERROR; + } else if (OB_FAIL(stream_worker_->dispatch_stream_task(*this, "TimerWakeUp"))) { + LOG_ERROR("dispatch stream task fail", KR(ret), K(this)); + } else { + ATOMIC_STORE(&end_time, get_timestamp()); + max_dispatch_time = std::max(max_dispatch_time, ATOMIC_LOAD(&end_time) - start_time); + + if (REACH_TIME_INTERVAL(STAT_INTERVAL)) { + LOG_INFO("[STAT] [FETCH_STREAM_TIMER_TASK]", K(max_dispatch_time)); + } + } +} + +void FetchStream::configure(const ObLogConfig & config) +{ + int64_t stream_max_partition_count = config.stream_max_partition_count; + int64_t stream_life_time_sec = config.stream_life_time_sec; + int64_t fetch_log_rpc_timeout_sec = config.fetch_log_rpc_timeout_sec; + int64_t fetch_log_cnt_per_part_per_round = config.fetch_log_cnt_per_part_per_round; + int64_t dml_progress_limit_sec = config.progress_limit_sec_for_dml; + int64_t blacklist_survival_time_sec = config.blacklist_survival_time_sec; + int64_t check_switch_server_interval_min = config.check_switch_server_interval_min; + bool print_rpc_handle_info = config.print_rpc_handle_info; + bool print_stream_dispatch_info = config.print_stream_dispatch_info; + + ATOMIC_STORE(&g_stream_max_part_count, stream_max_partition_count); + LOG_INFO("[CONFIG]", K(stream_max_partition_count)); + ATOMIC_STORE(&g_stream_life_time, stream_life_time_sec * _SEC_); + LOG_INFO("[CONFIG]", K(stream_life_time_sec)); + ATOMIC_STORE(&g_rpc_timeout, fetch_log_rpc_timeout_sec * _SEC_); + LOG_INFO("[CONFIG]", K(fetch_log_rpc_timeout_sec)); + ATOMIC_STORE(&g_fetch_log_cnt_per_part_per_round, fetch_log_cnt_per_part_per_round); + LOG_INFO("[CONFIG]", K(fetch_log_cnt_per_part_per_round)); + ATOMIC_STORE(&g_dml_progress_limit, dml_progress_limit_sec * _SEC_); + LOG_INFO("[CONFIG]", K(dml_progress_limit_sec)); + ATOMIC_STORE(&g_blacklist_survival_time, blacklist_survival_time_sec * _SEC_); + LOG_INFO("[CONFIG]", K(blacklist_survival_time_sec)); + ATOMIC_STORE(&g_check_switch_server_interval, check_switch_server_interval_min * _MIN_); + LOG_INFO("[CONFIG]", K(check_switch_server_interval_min)); + ATOMIC_STORE(&g_print_rpc_handle_info, print_rpc_handle_info); + LOG_INFO("[CONFIG]", K(print_rpc_handle_info)); + ATOMIC_STORE(&g_print_stream_dispatch_info, print_stream_dispatch_info); + LOG_INFO("[CONFIG]", K(print_stream_dispatch_info)); +} + +void FetchStream::do_stat() +{ + ObByteLockGuard lock_guard(stat_lock_); + + int64_t cur_time = get_timestamp(); + int64_t delta_time = cur_time - last_stat_time_; + double delta_second = static_cast(delta_time) / static_cast(_SEC_); + + if (last_stat_time_ <= 0) { + last_stat_time_ = cur_time; + last_stat_info_ = cur_stat_info_; + } else if (delta_second <= 0) { + // Statistics are too frequent, ignore the statistics here, otherwise the following will lead to divide by zero error + LOG_DEBUG("fetch stream stat too frequently", K(delta_time), K(delta_second), + K(last_stat_time_), K(this)); + } else { + FetchStatInfoPrinter fsi_printer(cur_stat_info_, last_stat_info_, delta_second); + + _LOG_INFO("[STAT] [FETCH_STREAM] stream=%s(%p:%s) part=%ld(queue=%ld) %s", to_cstring(svr_), this, + print_fetch_stream_type(stype_), fetch_task_pool_.total_count(), + fetch_task_pool_.queued_count(), to_cstring(fsi_printer)); + + last_stat_time_ = cur_time; + last_stat_info_ = cur_stat_info_; + } +} + +void FetchStream::handle_when_leave_(const char *leave_reason) const +{ + // Note: This function only prints logs and cannot access any data members, except global members + // Because of the multi-threaded problem + bool print_stream_dispatch_info = ATOMIC_LOAD(&g_print_stream_dispatch_info); + if (print_stream_dispatch_info) { + // No data members can be accessed in when print log, only the address is printed + LOG_INFO("[STAT] [FETCH_STREAM] leave stream", "fetch_stream", this, K(leave_reason)); + } else { + LOG_DEBUG("[STAT] [FETCH_STREAM] leave stream", "fetch_stream", this, K(leave_reason)); + } +} + +int FetchStream::handle_idle_task_(volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + bool pool_become_idle = false; + bool task_list_changed = false; + + if (OB_UNLIKELY(IDLE != state_)) { + LOG_ERROR("state does not match IDLE", K(state_)); + ret = OB_STATE_NOT_MATCH; + } + // Update task pool status and prepare task list + else if (OB_FAIL(fetch_task_pool_.update(pool_become_idle, task_list_changed))) { + LOG_ERROR("task pool prepare task fail", KR(ret)); + } + // If the task pool changes to IDLE state, no further operations will be performed + else if (pool_become_idle) { + // Note: You must not continue to manipulate any data structures here, as another thread may have taken over the fetch log stream. + // See the add_fetch_task() implementation for details + handle_when_leave_("StreamBecomeEmpty"); + } else { + bool open_stream_succeed = false; + + // Only when the task list has changed, or the active setting needs to open the stream, then the open stream operation needs to be performed + if (! task_list_changed && ! need_open_stream_) { + open_stream_succeed = true; + } else { + int64_t part_count = 0; + const char *discard_reason = task_list_changed ? "TaskListChangedOnIdle" : "ForceOpenStream"; + + open_stream_succeed = false; + + // First discard the old request + fetch_log_arpc_.discard_request(discard_reason); + + if (OB_FAIL(open_stream_(open_stream_succeed, part_count))) { + LOG_ERROR("open stream fail", KR(ret)); + } else if (open_stream_succeed) { + // Open stream successfully, ready to fetch log by asynchronous RPC request + if (OB_FAIL(prepare_rpc_request_(part_count))) { + LOG_ERROR("prepare rpc request fail", KR(ret), K(part_count)); + } + } + } + + if (OB_SUCCESS == ret) { + // Failed to open stream, kick out all partitions and start over + if (! open_stream_succeed) { + if (OB_FAIL(kick_out_all_(OPEN_STREAM_FAIL))) { + LOG_ERROR("kick out all partition fail after open stream error", KR(ret)); + } else { + // Recursively call the handle function to re-prepare the task + // Note: You cannot continue to manipulate any data structures after handle, there are concurrency scenarios + ret = handle(stop_flag); + } + } else { + bool need_fetch_log = false; + + // Update upper limit, prepare for fetching logs + if (OB_FAIL(get_upper_limit(upper_limit_))) { + LOG_ERROR("update upper limit fail", KR(ret)); + } + // Check need to fetch logs + else if (OB_FAIL(check_need_fetch_log_(upper_limit_, need_fetch_log))) { + LOG_ERROR("check need fetch log fail", KR(ret), K(upper_limit_)); + } else if (! need_fetch_log) { + // If you don't need to fetch the log, you will go into hibernation + // No further manipulation of the data structure !!!! + if (OB_FAIL(hibernate_())) { + LOG_ERROR("hibernate fail", KR(ret)); + } + } else { + // Go to fetch log status + switch_state(FETCH_LOG); + + // launch an asynchronous fetch log RPC + bool rpc_send_succeed = false; + if (OB_FAIL(async_fetch_log_(rpc_send_succeed))) { + LOG_ERROR("async fetch log fail", KR(ret)); + } else if (rpc_send_succeed) { + // Asynchronous fetch log RPC success, wait for RPC callback, after that can not continue to manipulate any data structure + // Note: You cannot continue to manipulate any data structures afterwards !!!!! + handle_when_leave_("AsyncRpcSendSucc"); + } else { + // RPC failure, directly into the FETCH_LOG processing process + // Note: You cannot continue to manipulate any data structures afterwards !!!!! + ret = handle(stop_flag); + } + } + } + } + } + return ret; +} + +int FetchStream::open_stream_(bool &rpc_succeed, int64_t &part_count) +{ + int ret = OB_SUCCESS; + OpenStreamSRpc open_stream_rpc; + FetchTaskList &task_list = fetch_task_pool_.get_task_list(); + int64_t stream_life_time = ATOMIC_LOAD(&g_stream_life_time); + int64_t rpc_timeout = ATOMIC_LOAD(&g_rpc_timeout); + + ObLogTraceIdGuard trace_guard; + + rpc_succeed = false; + need_open_stream_ = true; // Default requires open stream + part_count = 0; + + if (OB_ISNULL(rpc_)) { + LOG_ERROR("invalid rpc handler", K(rpc_)); + ret = OB_INVALID_ERROR; + } + // Execute synchronous RPC + else if (OB_FAIL(open_stream_rpc.open_stream(*rpc_, svr_, rpc_timeout, + task_list, stream_seq_, stream_life_time))) { + LOG_ERROR("launch open stream rpc fail", KR(ret), K(svr_), K(rpc_timeout), + K(task_list), K(stream_seq_), K(stream_life_time)); + } else { + // Checking RPC return values + const ObRpcResultCode &rcode = open_stream_rpc.get_result_code(); + const ObLogOpenStreamResp &resp = open_stream_rpc.get_resp(); + const ObLogOpenStreamReq &req = open_stream_rpc.get_req(); + + // RPC failure + if (OB_SUCCESS != rcode.rcode_) { + LOG_ERROR("open stream fail on rpc", K_(svr), K(rcode), K(req), K(resp)); + } else if (OB_SUCCESS != resp.get_err()) { + // server return error + LOG_ERROR("open stream fail on server", K_(svr), "svr_err", resp.get_err(), + "svr_debug_err", resp.get_debug_err(), K(rcode), K(req), K(resp)); + } else { + // Ending the old stream + LOG_DEBUG("[STAT] [FETCH_STREAM] [CLOSE_STREAM]", "fetch_stream", this, K_(stream_seq)); + + // Open new stream + LOG_DEBUG("[STAT] [FETCH_STREAM] [OPEN_STREAM]", "fetch_stream", this, + "stream_seq", resp.get_stream_seq(), + "part_count", task_list.count()); + + // Open stream successfully + need_open_stream_ = false; + rpc_succeed = true; + stream_seq_ = resp.get_stream_seq(); + last_feedback_tstamp_ = OB_INVALID_TIMESTAMP; + part_count = task_list.count(); + } + } + + return ret; +} + +int FetchStream::kick_out_all_(KickOutReason kick_out_reason) +{ + int ret = OB_SUCCESS; + FetchTaskList list; + + // Kick all partitions in the list from the task pool and return the task list + if (OB_FAIL(fetch_task_pool_.kick_out_task_list(list))) { + LOG_ERROR("kick out task list from task pool fail", KR(ret)); + } else { + PartFetchCtx *task = list.head(); + + while (OB_SUCCESS == ret && NULL != task) { + PartFetchCtx *next = task->get_next(); + + // Take down from the linklist and reset the linklist node information + task->reset_list_node(); + + // Distribute the log fetching task to the next server's log fetching stream + if (OB_FAIL(dispatch_fetch_task_(*task, kick_out_reason))) { + LOG_ERROR("dispatch fetch task fail", KR(ret), KPC(task)); + } else { + task = next; + } + } + + if (OB_SUCCESS == ret) { + list.reset(); + } + } + + return ret; +} + +int FetchStream::dispatch_fetch_task_(PartFetchCtx &task, + KickOutReason dispatch_reason) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(stream_worker_)) { + LOG_ERROR("invalid stream worker", K(stream_worker_)); + ret = OB_INVALID_ERROR; + } else { + // The server is not blacklisted when the stream is actively switch and when the partition is discarded, but is blacklisted in all other cases. + if (need_add_into_blacklist_(dispatch_reason)) { + // Get the total time of the current partition of the server service at this time + int64_t svr_start_fetch_tstamp = OB_INVALID_TIMESTAMP; + + if (OB_FAIL(task.get_cur_svr_start_fetch_tstamp(svr_, svr_start_fetch_tstamp))) { + LOG_ERROR("get_cur_svr_start_fetch_tstamp fail", KR(ret), "pkey", task.get_pkey(), + K(svr_), K(svr_start_fetch_tstamp)); + } else { + int64_t svr_service_time = get_timestamp() - svr_start_fetch_tstamp; + int64_t cur_survival_time = ATOMIC_LOAD(&g_blacklist_survival_time); + int64_t survival_time = cur_survival_time; + // Server add into blacklist + if (OB_FAIL(task.add_into_blacklist(svr_, svr_service_time, survival_time))) { + LOG_ERROR("task add into blacklist fail", KR(ret), K(task), K(svr_), + "svr_service_time", TVAL_TO_STR(svr_service_time), + "survival_time", TVAL_TO_STR(survival_time)); + } + } + } + + if (OB_SUCCESS == ret) { + const char *dispatch_reason_str = print_kick_out_reason_(dispatch_reason); + if (OB_FAIL(stream_worker_->dispatch_fetch_task(task, dispatch_reason_str))) { + // Assignment of fetch log tasks + LOG_ERROR("dispatch fetch task fail", KR(ret), K(task), + "dispatch_reason", dispatch_reason_str); + } else { + // You cannot continue with the task afterwards + } + } + } + + return ret; +} + +int FetchStream::get_upper_limit(int64_t &upper_limit_us) +{ + int ret = OB_SUCCESS; + int64_t min_progress = OB_INVALID_TIMESTAMP; + + if (OB_ISNULL(progress_controller_)) { + LOG_ERROR("invalid progress controller", K(progress_controller_)); + ret = OB_INVALID_ERROR; + } + // Get global minimum progress + else if (OB_FAIL(progress_controller_->get_min_progress(min_progress))) { + LOG_ERROR("get_min_progress fail", KR(ret), KPC(progress_controller_)); + } else if (OB_UNLIKELY(OB_INVALID_TIMESTAMP == min_progress)) { + LOG_ERROR("current min progress is invalid", K(min_progress), KPC(progress_controller_)); + ret = OB_INVALID_ERROR; + } else { + // DDL partition is not limited by progress limit, here upper limit is set to a future value + if (FETCH_STREAM_TYPE_DDL == stype_) { + upper_limit_us = min_progress + _YEAR_; + } else { + // Other partition are limited by progress limit + upper_limit_us = min_progress + ATOMIC_LOAD(&g_dml_progress_limit); + } + } + + return ret; +} + +int FetchStream::check_need_fetch_log_(const int64_t limit, bool &need_fetch_log) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(limit <= 0)) { + LOG_ERROR("invalid upper limit", K(limit)); + ret = OB_INVALID_ARGUMENT; + } else { + PartFetchCtx *task = fetch_task_pool_.get_task_list().head(); + + need_fetch_log = false; + + // Iterate through all tasks, as long as there is a task less than upper limit, then you need to continue to fetch logs + while (! need_fetch_log && OB_SUCCESS == ret && NULL != task) { + int64_t part_progress = task->get_progress(); + if (OB_UNLIKELY(OB_INVALID_TIMESTAMP == part_progress)) { + LOG_ERROR("fetch task progress is invalid", K(part_progress), KPC(task)); + ret = OB_ERR_UNEXPECTED; + } else { + need_fetch_log = (part_progress < limit); + } + + if (OB_SUCCESS == ret) { + task = task->get_next(); + } + } + } + return ret; +} + +int FetchStream::hibernate_() +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(stream_worker_)) { + LOG_ERROR("invalid stream worker", K(stream_worker_)); + ret = OB_INVALID_ERROR; + } else if (OB_FAIL(stream_worker_->hibernate_stream_task(*this, "FetchStream"))) { + LOG_ERROR("hibernate_stream_task fail", KR(ret)); + } else { + // Note: You can't continue to manipulate the structure after that, there are concurrency issues!!! + handle_when_leave_("Hibernate"); + } + + return ret; +} + +int FetchStream::prepare_rpc_request_(const int64_t part_count) +{ + int ret = OB_SUCCESS; + + // TODO: Currently, every time a RPC request is prepared, the default value is used, find a way to optimize + bool need_feed_back = false; + int64_t rpc_timeout = ATOMIC_LOAD(&g_rpc_timeout); + int64_t fetch_log_cnt_per_part_per_round = ATOMIC_LOAD(&g_fetch_log_cnt_per_part_per_round); + + if (OB_FAIL(fetch_log_arpc_.prepare_request(stream_seq_, part_count, + fetch_log_cnt_per_part_per_round, + need_feed_back, + rpc_timeout))) { + LOG_ERROR("prepare request for rpc fail", KR(ret), K(stream_seq_), K(part_count), + K(fetch_log_cnt_per_part_per_round), K(need_feed_back), K(rpc_timeout)); + } + + return ret; +} + +int FetchStream::async_fetch_log_(bool &rpc_send_succeed) +{ + int ret = OB_SUCCESS; + + rpc_send_succeed = false; + + // Launch an asynchronous RPC + if (OB_FAIL(fetch_log_arpc_.async_fetch_log(stream_seq_, + upper_limit_, + rpc_send_succeed))) { + LOG_ERROR("async_fetch_log fail", KR(ret), K(stream_seq_), K(upper_limit_), + K(fetch_log_arpc_)); + } else { + // Asynchronous RPC execution succeeded + // Note: You cannot continue to manipulate any data structures afterwards !!!! + } + return ret; +} + +void FetchStream::print_handle_info_(FetchLogARpcResult &result, + const int64_t handle_rpc_time, + const int64_t read_log_time, + const int64_t decode_log_entry_time, + const bool rpc_is_flying, + const bool is_stream_valid, + const char *stream_invalid_reason, + PartFetchCtx *min_progress_task, + const TransStatInfo &tsi, + const bool need_stop_request) +{ + bool print_rpc_handle_info = ATOMIC_LOAD(&g_print_rpc_handle_info); + PartFetchCtx::PartProgress min_progress; + ObPartitionKey min_pkey; + if (NULL != min_progress_task) { + min_progress_task->get_progress_struct(min_progress); + min_pkey = min_progress_task->get_pkey(); + } + + if (print_rpc_handle_info) { + LOG_INFO("handle rpc result by fetch stream", + "fetch_stream", this, K_(stream_seq), + K_(fetch_task_pool), + "upper_limit", TS_TO_STR(upper_limit_), + K(need_stop_request), + "rpc_stop_upon_result", result.rpc_stop_upon_result_, + "rpc_stop_reason", FetchLogARpc::print_rpc_stop_reason(result.rpc_stop_reason_), + K(rpc_is_flying), K(is_stream_valid), K(stream_invalid_reason), + "resp", result.resp_, K(handle_rpc_time), K(read_log_time), K(decode_log_entry_time), + K(tsi), K(min_progress), K(min_pkey)); + } else { + LOG_DEBUG("handle rpc result by fetch stream", + "fetch_stream", this, K_(stream_seq), + K_(fetch_task_pool), + "upper_limit", TS_TO_STR(upper_limit_), + K(need_stop_request), + "rpc_stop_upon_result", result.rpc_stop_upon_result_, + "rpc_stop_reason", FetchLogARpc::print_rpc_stop_reason(result.rpc_stop_reason_), + K(rpc_is_flying), K(is_stream_valid), K(stream_invalid_reason), + "resp", result.resp_, K(handle_rpc_time), K(read_log_time), K(decode_log_entry_time), + K(tsi), K(min_progress), K(min_pkey)); + } +} + +bool FetchStream::has_new_fetch_task_() const +{ + // If the queue of the fetch log task pool is not empty, it marks that there is a new task to be processed + return fetch_task_pool_.queued_count() > 0; +} + +int FetchStream::process_result_(FetchLogARpcResult &result, + volatile bool &stop_flag, + const bool rpc_is_flying, + bool &need_hibernate, + bool &is_stream_valid) +{ + int ret = OB_SUCCESS; + int64_t start_handle_time = get_timestamp(); + int64_t handle_rpc_time = 0; + int64_t read_log_time = 0; + int64_t decode_log_entry_time = 0; + int64_t flush_time = 0; + TransStatInfo tsi; + PartFetchCtx *min_progress_task = NULL; + bool need_stop_request = false; + const char *stream_invalid_reason = NULL; + + // Process each result, set the corresponding trace id + ObLogTraceIdGuard trace_guard(result.trace_id_); + + // Process the log results and make appropriate decisions based on the results + if (OB_FAIL(handle_fetch_log_result_(result, + stop_flag, + is_stream_valid, + stream_invalid_reason, + need_hibernate, + read_log_time, + decode_log_entry_time, + tsi, + flush_time, + min_progress_task))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("handle fetch log result fail", KR(ret), K(result), K(fetch_log_arpc_)); + } + } + // 如果当前取日志流无效,需要重新开流 + else if (! is_stream_valid) { + // If the current fetch log stream is invalid, you need to reopen the stream + fetch_log_arpc_.discard_request(stream_invalid_reason); + need_open_stream_ = true; + } + // The log stream is valid and ready to continue processing the next RPC packet + else { + // If a new partition task comes in, notify RPC to stop continuing to fetch logs + // Avoid starvation of new partitions + need_stop_request = (rpc_is_flying && has_new_fetch_task_()); + + // Mark the request as finished + // After you stop the request, you still need to continue iterating through the results until all the results are iterated through + if (need_stop_request && (OB_FAIL(fetch_log_arpc_.mark_request_stop(stream_seq_)))) { + LOG_ERROR("fetch log rpc mar request stop fail", KR(ret), K(this), K(stream_seq_), + K(fetch_log_arpc_), K(fetch_task_pool_)); + } + // Update RPC request parameters + else if (OB_FAIL(update_rpc_request_params_())) { + LOG_ERROR("update rpc request params fail", KR(ret)); + } else { + // success + } + } + + if (OB_SUCCESS == ret) { + handle_rpc_time = get_timestamp() - start_handle_time; + + // Update statistical information + update_fetch_stat_info_(result, handle_rpc_time, read_log_time, + decode_log_entry_time, flush_time, tsi); + + // Print processing information + print_handle_info_(result, handle_rpc_time, read_log_time, decode_log_entry_time, + rpc_is_flying, is_stream_valid, stream_invalid_reason, min_progress_task, tsi, + need_stop_request); + } + + return ret; +} + +int FetchStream::handle_fetch_log_task_(volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(FETCH_LOG != state_)) { + LOG_ERROR("state does not match which is not FETCH_LOG", K(state_)); + ret = OB_STATE_NOT_MATCH; + } else { + bool need_hibernate = false; + bool rpc_is_flying = false; + bool is_stream_valid = true; + FetchLogARpcResult *result = NULL; + + // Whether the log stream is taken over by RPC, default is false + bool stream_been_taken_over_by_rpc = false; + + // Continuously iterate through the fetch log results while the current fetch log stream is continuously active, and then process + while (! stop_flag + && OB_SUCCESS == ret + && is_stream_valid + && OB_SUCC(fetch_log_arpc_.next_result(result, rpc_is_flying))) { + need_hibernate = false; + + if (OB_ISNULL(result)) { + LOG_ERROR("invalid result", K(result)); + ret = OB_INVALID_ERROR; + } + // Processing results + else if (OB_FAIL(process_result_(*result, stop_flag, rpc_is_flying, need_hibernate, + is_stream_valid))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("process result fail", KR(ret), K(result), KPC(result), K(this), KPC(this)); + } + } else { + // Processing success + } + + // Recycling result + if (NULL != result) { + fetch_log_arpc_.revert_result(result); + result = NULL; + } + } + + if (stop_flag) { + ret = OB_IN_STOP_STATE; + } + + if (OB_ITER_END == ret) { + // Iterate through all results + ret = OB_SUCCESS; + + if (rpc_is_flying) { + // The RPC is still running, the fetch log stream is taken over by the RPC callback thread + // Note: No further manipulation of any data structures can be performed subsequently + stream_been_taken_over_by_rpc = true; + } else { + // The RPC is not running, it is still the current thread that is responsible for that fetch log stream + stream_been_taken_over_by_rpc = false; + } + } + + // Final unified processing results + if (OB_SUCCESS == ret) { + if (stream_been_taken_over_by_rpc) { + // The fetch log stream is taken over by the RPC callback, maintains the FETCH_LOG state, and exits unconditionally + // Note: You cannot continue to manipulate any data structures afterwards !!!!! + handle_when_leave_("RpcTaskOver"); + } else { + // The current thread is still responsible for this fetch log stream + // Entering IDLE state + switch_state(IDLE); + + // Hibernate the task if it needs to be hibernated + // Note: No more data structures can be accessed afterwards, there is a concurrency scenario !!!! + if (need_hibernate) { + if (OB_FAIL(hibernate_())) { + LOG_ERROR("hibernate fail", KR(ret)); + } + } else { + // No hibernation required, then recursive processing of IDLE tasks + // Note: no more data structures can be accessed afterwards, there is a concurrency scenario !!!! + ret = handle(stop_flag); + } + } + } + } + + return ret; +} + +void FetchStream::update_fetch_stat_info_(FetchLogARpcResult &result, + const int64_t handle_rpc_time, + const int64_t read_log_time, + const int64_t decode_log_entry_time, + const int64_t flush_time, + const TransStatInfo &tsi) +{ + ObByteLockGuard lock_guard(stat_lock_); + + FetchStatInfo &fsi = cur_stat_info_; + const ObRpcResultCode &rcode = result.rcode_; + const ObLogStreamFetchLogResp &resp = result.resp_; + const ObFetchStatus &fetch_status = resp.get_fetch_status(); + + // No statistics on failed RPCs + if (OB_SUCCESS == rcode.rcode_ && OB_SUCCESS == resp.get_err()) { + fsi.fetch_log_cnt_ += resp.get_log_num(); + fsi.fetch_log_size_ += resp.get_pos(); + + fsi.fetch_log_rpc_cnt_++; + fsi.fetch_log_rpc_time_ += result.rpc_time_; + fsi.fetch_log_rpc_to_svr_net_time_ += fetch_status.l2s_net_time_; + fsi.fetch_log_rpc_svr_queue_time_ += fetch_status.svr_queue_time_; + fsi.fetch_log_rpc_svr_process_time_ += fetch_status.ext_process_time_; + fsi.fetch_log_rpc_callback_time_ += result.rpc_callback_time_; + fsi.handle_rpc_time_ += handle_rpc_time; + fsi.handle_rpc_read_log_time_ += read_log_time; + fsi.handle_rpc_flush_time_ += flush_time; + fsi.read_log_decode_log_entry_time_ += decode_log_entry_time; + fsi.tsi_.update(tsi); + + // RPC stops immediately and is a single round of RPC + if (result.rpc_stop_upon_result_) { + fsi.single_rpc_cnt_++; + + switch (result.rpc_stop_reason_) { + case FetchLogARpc::REACH_UPPER_LIMIT: + fsi.reach_upper_limit_rpc_cnt_++; + break; + + case FetchLogARpc::REACH_MAX_LOG: + fsi.reach_max_log_id_rpc_cnt_++; + break; + + case FetchLogARpc::FETCH_NO_LOG: + fsi.no_log_rpc_cnt_++; + break; + + case FetchLogARpc::REACH_MAX_RPC_RESULT: + fsi.reach_max_result_rpc_cnt_++; + break; + + default: + break; + } + } + } +} + +int FetchStream::handle_fetch_log_result_(FetchLogARpcResult &result, + volatile bool &stop_flag, + bool &is_stream_valid, + const char *&stream_invalid_reason, + bool &need_hibernate, + int64_t &read_log_time, + int64_t &decode_log_entry_time, + TransStatInfo &tsi, + int64_t &flush_time, + PartFetchCtx *&min_progress_task) +{ + int ret = OB_SUCCESS; + const ObStreamSeq &seq = result.seq_; + const ObRpcResultCode &rcode = result.rcode_; + const ObLogStreamFetchLogResp &resp = result.resp_; + + is_stream_valid = true; + stream_invalid_reason = NULL; + need_hibernate = false; + + read_log_time = 0; + decode_log_entry_time = 0; + + if (OB_SUCCESS != rcode.rcode_ || OB_SUCCESS != resp.get_err()) { + is_stream_valid = false; + stream_invalid_reason = "FetchLogFail"; + if (OB_FAIL(handle_fetch_log_error_(seq, rcode, resp))) { + LOG_ERROR("handle fetch log error fail", KR(ret), K(seq), K(rcode), K(resp)); + } + } else { + // A collection of log taking tasks that need to be kicked out + ObArenaAllocator allocator; + KickOutTaskSet kick_out_set(allocator); + + // Read all log entries + if (OB_FAIL(read_log_(resp, stop_flag, kick_out_set, read_log_time, decode_log_entry_time, + tsi))) { + if (OB_LOG_NOT_SYNC == ret) { + // The stream is out of sync and needs to be reopened + // Note: This error code is handled uniformly below, and the following logic must be handled to + } else if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("read log fail", KR(ret), K(resp)); + } + } + // Check the feedback array + else if (OB_FAIL(check_feedback_(resp, kick_out_set))) { + LOG_ERROR("check feed back fail", KR(ret), K(resp)); + } + // Check to fetch the log heartbeat array + else if (OB_FAIL(check_fetch_log_heartbeat_(resp, kick_out_set))) { + if (OB_LOG_NOT_SYNC == ret) { + // Stream out of sync, need to reopen stream + } else { + LOG_ERROR("check fetch log heartbeat fail", KR(ret), K(resp), K(kick_out_set)); + } + } + // Update the status of the fetch log task + else if (OB_FAIL(update_fetch_task_state_(kick_out_set, stop_flag, min_progress_task, + flush_time))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("update fetch task state fail", KR(ret), K(kick_out_set)); + } + } else { + // success + } + + // The error code is handled uniformly here + if (OB_LOG_NOT_SYNC == ret) { + // Stream out of sync, need to reopen stream + is_stream_valid = false; + stream_invalid_reason = "LogNotSync"; + ret = OB_SUCCESS; + } else if (OB_SUCCESS == ret) { + // Kick out the partitions that need to be kicked out and reopen the stream next time + if (kick_out_set.count() > 0) { + is_stream_valid = false; + stream_invalid_reason = "KickOutPartition"; + if (OB_FAIL(kick_out_task_(kick_out_set))) { + LOG_ERROR("kick out task fail", KR(ret), K(kick_out_set)); + } + } else { + // All partitions read logs normally + is_stream_valid = true; + + // When the fetched log is empty, it needs to sleep for a while + if (resp.get_log_num() <= 0) { + need_hibernate = true; + } + + // TODO: Here we check the upper limit to achieve dynamic adjustment of the upper limit interval + } + } + } + return ret; +} + +bool FetchStream::check_need_feedback_() +{ + bool bool_ret = false; + const int64_t feedback_interval = TCONF.stream_feedback_interval_sec * _SEC_; + const int64_t cur_time = get_timestamp(); + + if (OB_INVALID_TIMESTAMP == last_feedback_tstamp_ // First request must feedback + || feedback_interval <= 0 // Request a feedback every time + || (cur_time - last_feedback_tstamp_) >= feedback_interval) // Periodic feedback + { + bool_ret = true; + last_feedback_tstamp_ = cur_time; + } + + return bool_ret; +} + +bool FetchStream::check_need_switch_server_() +{ + bool bool_ret = false; + const int64_t check_switch_server_interval = ATOMIC_LOAD(&g_check_switch_server_interval); + const int64_t cur_time = get_timestamp(); + + if ((check_switch_server_interval <= 0) + || (cur_time - last_switch_server_tstamp_) >= check_switch_server_interval) { + bool_ret = true; + last_switch_server_tstamp_ = cur_time; + } + + return bool_ret; +} + +int FetchStream::update_rpc_request_params_() +{ + int ret = OB_SUCCESS; + int64_t fetch_log_cnt_per_part_per_round = ATOMIC_LOAD(&g_fetch_log_cnt_per_part_per_round); + int64_t rpc_timeout = ATOMIC_LOAD(&g_rpc_timeout); + const bool need_feed_back = check_need_feedback_(); + + // Update local upper limit to keep in sync with RPC + if (OB_FAIL(get_upper_limit(upper_limit_))) { + LOG_ERROR("update upper limit fail", KR(ret)); + } + // Update fetch log request parameters + else if (OB_FAIL(fetch_log_arpc_.update_request(stream_seq_, + upper_limit_, + fetch_log_cnt_per_part_per_round, + need_feed_back, + rpc_timeout))) { + LOG_ERROR("update fetch log request fail", KR(ret), K(fetch_log_arpc_), K(stream_seq_), + K(upper_limit_), K(fetch_log_cnt_per_part_per_round), K(need_feed_back), K(rpc_timeout)); + } + + return ret; +} + +int FetchStream::handle_fetch_log_error_(const ObStreamSeq &seq, + const ObRpcResultCode &rcode, + const ObLogStreamFetchLogResp &resp) +{ + int ret = OB_SUCCESS; + bool need_kick_out_all = false; + KickOutReason kick_out_reason = NONE; + + // RPC failure, need switch server + if (OB_SUCCESS != rcode.rcode_) { + need_kick_out_all = true; + kick_out_reason = FETCH_LOG_FAIL_ON_RPC; + LOG_ERROR("fetch log fail on rpc", K_(svr), K(rcode), "fetch_stream", this, K(seq)); + } + // server return error + else if (OB_SUCCESS != resp.get_err()) { + // If the stream does not exist, the stream is reopened without switching servers + if (OB_STREAM_NOT_EXIST == resp.get_err()) { + need_kick_out_all = false; + LOG_WARN("fetch log fail on server, stream not exist", K_(svr), "svr_err", resp.get_err(), + "svr_debug_err", resp.get_debug_err(), K(seq), K(rcode), K(resp)); + } else { + // Other errors, switch server directly + need_kick_out_all = true; + kick_out_reason = FETCH_LOG_FAIL_ON_SERVER; + LOG_ERROR("fetch log fail on server", "fetch_stream", this, K_(svr), + "svr_err", resp.get_err(), "svr_debug_err", resp.get_debug_err(), + K(seq), K(rcode), K(resp)); + } + } else { + need_kick_out_all = false; + } + + if (OB_SUCCESS == ret && need_kick_out_all) { + if (OB_FAIL(kick_out_all_(kick_out_reason))) { + LOG_ERROR("kick out all fail", KR(ret)); + } + } + + return ret; +} + +bool FetchStream::need_add_into_blacklist_(const KickOutReason reason) +{ + bool bool_ret = false; + + if ((NEED_SWITCH_SERVER == reason) || (DISCARDED == reason)) { + bool_ret = false; + } else { + bool_ret = true; + } + + return bool_ret; +} + +const char *FetchStream::print_kick_out_reason_(const KickOutReason reason) +{ + const char *str = "NONE"; + switch (reason) { + case OPEN_STREAM_FAIL: + str = "OpenStreamFail"; + break; + + case FETCH_LOG_FAIL_ON_RPC: + str = "FetchLogFailOnRpc"; + break; + + case FETCH_LOG_FAIL_ON_SERVER: + str = "FetchLogFailOnServer"; + break; + + case MISSING_LOG_OPEN_STREAM_FAIL: + str = "MissingLogOpenStreamFail"; + break; + + case MISSING_LOG_FETCH_FAIL: + str = "MissingLogFetchFail"; + break; + + case LAGGED_FOLLOWER: + str = "LAGGED_FOLLOWER"; + break; + + case LOG_NOT_IN_THIS_SERVER: + str = "LOG_NOT_IN_THIS_SERVER"; + break; + + case PARTITION_OFFLINED: + str = "PARTITION_OFFLINED"; + break; + + case PROGRESS_TIMEOUT: + str = "PROGRESS_TIMEOUT"; + break; + + case PROGRESS_TIMEOUT_ON_LAGGED_REPLICA: + str = "PROGRESS_TIMEOUT_ON_LAGGED_REPLICA"; + break; + + case NEED_SWITCH_SERVER: + str = "NeedSwitchServer"; + break; + + case DISCARDED: + str = "Discarded"; + break; + + default: + str = "NONE"; + break; + } + + return str; +} + +bool FetchStream::exist_(KickOutTaskSet &kick_out_set, const common::ObPartitionKey &pkey) +{ + KickOutTask task(pkey); + return OB_HASH_EXIST == kick_out_set.exist_refactored(task); +} + +int FetchStream::set_(KickOutTaskSet &kick_out_set, + const common::ObPartitionKey &pkey, + KickOutReason kick_out_reason) +{ + KickOutTask task(pkey, kick_out_reason); + return kick_out_set.set_refactored(task); +} + +int FetchStream::read_log_(const ObLogStreamFetchLogResp &resp, + volatile bool &stop_flag, + KickOutTaskSet &kick_out_set, + int64_t &read_log_time, + int64_t &decode_log_entry_time, + TransStatInfo &tsi) +{ + int ret = OB_SUCCESS; + const char *buf = resp.get_log_entry_buf(); + const int64_t len = resp.get_pos(); + const int64_t log_cnt = resp.get_log_num(); + int64_t pos = 0; + clog::ObLogEntry log_entry; + int64_t start_read_time = get_timestamp(); + + read_log_time = 0; + decode_log_entry_time = 0; + + if (OB_ISNULL(buf)) { + LOG_ERROR("invalid response log buf", K(buf), K(resp)); + ret = OB_ERR_UNEXPECTED; + } else if (0 == log_cnt) { + // Ignore 0 logs + LOG_DEBUG("fetch 0 log", K_(svr), K_(stream_seq), "fetch_status", resp.get_fetch_status()); + } else { + // Iterate through all log entries + for (int64_t idx = 0; OB_SUCCESS == ret && (idx < log_cnt); ++idx) { + int64_t begin_time = get_timestamp(); + // liboblog ignores the batch commit flag when checking checksum + bool ignore_batch_commit_flag_when_check_integrity = true; + + log_entry.reset(); + + // Deserialize log_entry + if (OB_FAIL(log_entry.deserialize(buf, len, pos))) { + LOG_ERROR("deserialize log entry fail", KR(ret), K(buf), K(len), K(pos), + K_(svr), K_(stream_seq)); + } + // Checking Integrity + else if (OB_UNLIKELY(!log_entry.check_integrity(ignore_batch_commit_flag_when_check_integrity))) { + LOG_ERROR("log entry check integrity fail", K(log_entry), K_(svr), K_(stream_seq), + K(ignore_batch_commit_flag_when_check_integrity)); + ret = OB_INVALID_DATA; + } else { + // ObLogEntry deserialize time + decode_log_entry_time += (get_timestamp() - begin_time); + + const clog::ObLogEntryHeader &header = log_entry.get_header(); + const ObPartitionKey &pkey = header.get_partition_key(); + uint64_t log_id = header.get_log_id(); + int64_t tstamp = header.get_submit_timestamp(); + PartFetchCtx *task = NULL; + IObLogPartTransResolver::ObLogMissingInfo missing_logs; + TransStatInfo local_tsi; + const bool need_filter_pg_no_missing_redo_trans = false; + IObLogPartTransResolver::ObAggreLogIndexArray log_indexs; + + // Filtering partition logs that need to be kicked out + if (exist_(kick_out_set, pkey)) { + LOG_INFO("ignore partition log entry which need kick out from current stream", + K(pkey), K(log_id), K(tstamp), K_(svr), K_(stream_seq)); + } + // Get the corresponding fetch log task + else if (OB_FAIL(fetch_task_pool_.get_task(pkey, task))) { + LOG_ERROR("get task from pool fail", KR(ret), K(pkey), K(log_entry), K_(stream_seq)); + } else if (OB_ISNULL(task)) { + LOG_ERROR("invalid task", K(task), K(pkey), K(log_entry), K_(stream_seq)); + ret = OB_ERR_UNEXPECTED; + } + // The fetch log task is responsible for parsing the logs + else if (OB_FAIL(task->read_log(log_entry, missing_logs, local_tsi, need_filter_pg_no_missing_redo_trans, + log_indexs, stop_flag))) { + if (OB_LOG_NOT_SYNC != ret && OB_IN_STOP_STATE != ret && OB_ITEM_NOT_SETTED != ret) { + LOG_ERROR("fetch task read log fail", KR(ret), K(log_entry), K(missing_logs)); + } else if (OB_ITEM_NOT_SETTED == ret) { + ret = OB_SUCCESS; + + // Handling missing redo log scenarios + KickOutReason fail_reason = NONE; + if (OB_FAIL(handle_missing_log_(*task, log_entry, missing_logs, stop_flag, + fail_reason))) { + // Processing failure, need to kick out, add to kick out set + if (OB_NEED_RETRY == ret) { + ret = OB_SUCCESS; + if (OB_FAIL(set_(kick_out_set, pkey, fail_reason))) { + LOG_ERROR("add task into kick out set fail", KR(ret), K(pkey), K(kick_out_set), + K(fail_reason)); + } + } else if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("handle missing log fail", KR(ret), KPC(task), K(missing_logs), + K(log_entry)); + } + } + } + } else { + // Update transaction statistics + tsi.update(local_tsi); + } + } + } + } + + if (OB_SUCCESS == ret) { + read_log_time = get_timestamp() - start_read_time; + } + + return ret; +} + +int FetchStream::handle_missing_log_(PartFetchCtx &task, + const clog::ObLogEntry &prepare_log_entry, + const IObLogPartTransResolver::ObLogMissingInfo &org_missing_logs, + volatile bool &stop_flag, + KickOutReason &fail_reason) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(org_missing_logs.get_missing_log_count() <= 0)) { + LOG_ERROR("empty missing log", K(org_missing_logs)); + ret = OB_INVALID_ARGUMENT; + } else { + const ObLogIdArray &missing_logs = org_missing_logs.missing_log_ids_; + int64_t stream_life_time = DEFAULT_MISSING_LOG_STREAM_LIFE_TIME; + const ObPartitionKey &pkey = task.get_pkey(); + + // The upper limit of the missing log stream is the prepare log timestamp + int64_t upper_limit = prepare_log_entry.get_header().get_submit_timestamp(); + + int64_t fetched_missing_log_cnt = 0; + // Keep reading logs and consuming missing log arrays + while (! stop_flag && OB_SUCCESS == ret && missing_logs.count() > fetched_missing_log_cnt) { + ObStreamSeq seq; + bool open_stream_succeed = false; + + // Start each time with the next missing log to be fetched + uint64_t start_log_id = missing_logs.at(fetched_missing_log_cnt); + + // Open streams for individual partitions based on minimum log ID + if (OB_FAIL(open_stream_(pkey, start_log_id, seq, stream_life_time, + open_stream_succeed))) { + LOG_ERROR("open stream fail", KR(ret), K(pkey), K(start_log_id), K(stream_life_time)); + } else if (! open_stream_succeed) { + // Open stream failed, need to switch server + ret = OB_NEED_RETRY; + fail_reason = MISSING_LOG_OPEN_STREAM_FAIL; + } + // Keep fetching the missing log on the current stream + else if (OB_FAIL(fetch_missing_log_(task, seq, missing_logs, org_missing_logs.missing_trans_ids_, + fetched_missing_log_cnt, upper_limit))) { + if (OB_STREAM_NOT_EXIST == ret) { + // Stream does not exist, need to reopen stream + ret = OB_SUCCESS; + } else if (OB_NEED_RETRY == ret) { + // Fetching logs failed, need to switch server + fail_reason = MISSING_LOG_FETCH_FAIL; + } else { + LOG_ERROR("fetch missing log fail", KR(ret), K(seq), K(task), K(missing_logs), + K(fetched_missing_log_cnt), K(upper_limit)); + } + } + } + + if (stop_flag) { + ret = OB_IN_STOP_STATE; + } + + if (OB_SUCCESS == ret) { + if (OB_UNLIKELY(missing_logs.count() > fetched_missing_log_cnt)) { + LOG_ERROR("missing log not consumed all", K(missing_logs), K(fetched_missing_log_cnt)); + ret = OB_ERR_UNEXPECTED; + } else { + TransStatInfo tsi; + // Missing log processing is complete, prepare log needs to be parsed again + IObLogPartTransResolver::ObLogMissingInfo missing_info; + const bool need_filter_pg_no_missing_redo_trans = true; + + if (OB_FAIL(task.read_log(prepare_log_entry, missing_info, tsi, need_filter_pg_no_missing_redo_trans, + org_missing_logs.log_indexs_, stop_flag))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("fetch task read log fail", KR(ret), K(prepare_log_entry), K(task), + K(need_filter_pg_no_missing_redo_trans), "log_indexs", org_missing_logs.log_indexs_); + } + } + } + } + } + return ret; +} + +int FetchStream::open_stream_(const common::ObPartitionKey &pkey, + const uint64_t start_log_id, + obrpc::ObStreamSeq &seq, + const int64_t stream_life_time, + bool &rpc_succeed) +{ + int ret = OB_SUCCESS; + OpenStreamSRpc open_stream_rpc; + int64_t rpc_timeout = ATOMIC_LOAD(&g_rpc_timeout); + + ObLogTraceIdGuard trace_guard; + + rpc_succeed = false; + + if (OB_ISNULL(rpc_)) { + LOG_ERROR("invalid rpc handler", K(rpc_)); + ret = OB_INVALID_ERROR; + } + // Execute synchronous RPC + else if (OB_FAIL(open_stream_rpc.open_stream(*rpc_, svr_, rpc_timeout, pkey, start_log_id, + seq, stream_life_time))) { + LOG_ERROR("launch open stream rpc fail", KR(ret), K(svr_), K(rpc_timeout), K(pkey), + K(start_log_id), K(stream_life_time)); + } else { + // Checking RPC return values + const ObRpcResultCode &rcode = open_stream_rpc.get_result_code(); + const ObLogOpenStreamResp &resp = open_stream_rpc.get_resp(); + const ObLogOpenStreamReq &req = open_stream_rpc.get_req(); + + // RPC failure + if (OB_SUCCESS != rcode.rcode_) { + LOG_ERROR("open stream fail on rpc", K_(svr), K(rcode), K(req), K(resp)); + } else if (OB_SUCCESS != resp.get_err()) { + // server return error + LOG_ERROR("open stream fail on server", K_(svr), "svr_err", resp.get_err(), + "svr_debug_err", resp.get_debug_err(), K(rcode), K(req), K(resp)); + } else { + rpc_succeed = true; + seq = resp.get_stream_seq(); + } + } + return ret; +} + +int FetchStream::fetch_missing_log_(PartFetchCtx &task, + const obrpc::ObStreamSeq &seq, + const ObLogIdArray &missing_logs, + const IObLogPartTransResolver::ObTransIDArray &missing_log_trans_id_array, + int64_t &fetched_missing_log_cnt, + const int64_t upper_limit) +{ + int ret = OB_SUCCESS; + // Use synchronous RPC method to fetch MISSING LOG + FetchLogSRpc *fetch_log_srpc = NULL; + int64_t rpc_timeout = ATOMIC_LOAD(&g_rpc_timeout); + + if (OB_ISNULL(rpc_)) { + LOG_ERROR("invalid rpc handler", K(rpc_)); + ret = OB_INVALID_ERROR; + } else if (OB_FAIL(alloc_fetch_log_srpc_(fetch_log_srpc))) { + LOG_ERROR("alloc fetch log srpc fail", KR(ret)); + } else if (OB_ISNULL(fetch_log_srpc)) { + LOG_ERROR("invalid fetch_log_srpc", K(fetch_log_srpc)); + ret = OB_ERR_UNEXPECTED; + } else { + while (OB_SUCCESS == ret && missing_logs.count() > fetched_missing_log_cnt) { + uint64_t max_log_id = missing_logs.at(missing_logs.count() - 1); + uint64_t min_log_id = missing_logs.at(fetched_missing_log_cnt); + + // Maximum number of logs fetched + int64_t fetch_log_cnt = max_log_id - min_log_id + 1; + + // Launch rpc + if (OB_FAIL(fetch_log_srpc->fetch_log(*rpc_, svr_, rpc_timeout, seq, upper_limit, + fetch_log_cnt, false))) { + LOG_ERROR("launch fetch log fail", KR(ret), K(seq), K(upper_limit), K(fetch_log_cnt)); + } else { + const obrpc::ObRpcResultCode &rcode = fetch_log_srpc->get_result_code(); + const obrpc::ObLogStreamFetchLogResp &resp = fetch_log_srpc->get_resp(); + + // RPC failure, need to switch server + if (OB_SUCCESS != rcode.rcode_) { + LOG_ERROR("fetch log fail on rpc", K_(svr), K(rcode), K(seq)); + ret = OB_NEED_RETRY; + } + // server return fail + else if (OB_SUCCESS != resp.get_err()) { + // Stream does not exist, reopen the stream + if (OB_STREAM_NOT_EXIST == resp.get_err()) { + LOG_WARN("fetch missing log fail on server, stream not exist", K_(svr), + "svr_err", resp.get_err(), "svr_debug_err", resp.get_debug_err(), + K(seq), K(rcode), K(resp)); + ret = OB_STREAM_NOT_EXIST; + } else { + // Other error, need to switch server + LOG_ERROR("fetch missing log fail on server", K_(svr), "svr_err", resp.get_err(), + "svr_debug_err", resp.get_debug_err(), K(seq), K(rcode), K(resp)); + ret = OB_NEED_RETRY; + } + } + // Fetch log successfully + else if (OB_FAIL(read_missing_log_(task, resp, missing_logs, missing_log_trans_id_array, fetched_missing_log_cnt))) { + LOG_ERROR("read missing log fail", KR(ret), K(resp), K(missing_logs), K(missing_log_trans_id_array), + K(fetched_missing_log_cnt), K(task)); + } + } + } + } + + if (NULL != fetch_log_srpc) { + free_fetch_log_srpc_(fetch_log_srpc); + fetch_log_srpc = NULL; + } + + return ret; +} + +int FetchStream::alloc_fetch_log_srpc_(FetchLogSRpc *&fetch_log_srpc) +{ + int ret = OB_SUCCESS; + void *buf = ob_malloc(sizeof(FetchLogSRpc), ObModIds::OB_LOG_FETCH_LOG_SRPC); + + if (OB_ISNULL(buf)) { + LOG_ERROR("alloc memory for FetchLogSRpc fail", K(sizeof(FetchLogSRpc))); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else if (OB_ISNULL(fetch_log_srpc = new(buf) FetchLogSRpc())) { + LOG_ERROR("construct fetch log srpc fail", K(buf)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + // success + } + return ret; +} + +void FetchStream::free_fetch_log_srpc_(FetchLogSRpc *fetch_log_srpc) +{ + if (NULL != fetch_log_srpc) { + fetch_log_srpc->~FetchLogSRpc(); + ob_free(fetch_log_srpc); + fetch_log_srpc = NULL; + } +} + +int FetchStream::read_missing_log_(PartFetchCtx &task, + const ObLogStreamFetchLogResp &resp, + const ObLogIdArray &missing_logs, + const IObLogPartTransResolver::ObTransIDArray &missing_log_trans_id_array, + int64_t &fetched_missing_log_cnt) +{ + int ret = OB_SUCCESS; + const char *buf = resp.get_log_entry_buf(); + int64_t len = resp.get_pos(); + int64_t pos = 0; + const int64_t log_cnt = resp.get_log_num(); + clog::ObLogEntry log_entry; + // liboblog should ignore the batch commit flag when checking checksum + bool ignore_batch_commit_flag_when_check_integrity = true; + + if (OB_UNLIKELY(missing_logs.count() <= fetched_missing_log_cnt)) { + LOG_ERROR("invalid missing_logs", K(missing_logs), K(fetched_missing_log_cnt)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(buf)) { + LOG_ERROR("invalid response log buf", K(buf), K(resp)); + ret = OB_ERR_UNEXPECTED; + } else if (0 == log_cnt) { + // Ignore 0 logs + // TODO: Adopt a policy to handle 0 log scenarios, or switch server if there are always 0 logs + LOG_INFO("fetch 0 missing log", K_(svr), K_(stream_seq), + "fetch_status", resp.get_fetch_status(), K(missing_logs), K(fetched_missing_log_cnt)); + } else { + // Iterate through all log entries and select the required redo logs + // Since missing_logs are sorted from smallest to largest, just compare the logs in order + for (int64_t idx = 0; + OB_SUCCESS == ret && (idx < log_cnt) && missing_logs.count() > fetched_missing_log_cnt; + ++idx) { + uint64_t next_missing_log_id = missing_logs.at(fetched_missing_log_cnt); + log_entry.reset(); + + // Deserialize log_entry + if (OB_FAIL(log_entry.deserialize(buf, len, pos))) { + LOG_ERROR("deserialize log entry fail", KR(ret), K(buf), K(len), K(pos), + K_(svr), K_(stream_seq)); + } + // Check intergrity + else if (OB_UNLIKELY(!log_entry.check_integrity(ignore_batch_commit_flag_when_check_integrity))) { + LOG_ERROR("log entry check integrity fail", K(log_entry), K_(svr), K_(stream_seq), + K(ignore_batch_commit_flag_when_check_integrity)); + ret = OB_INVALID_DATA; + } + // Check partition key + else if (OB_UNLIKELY(log_entry.get_header().get_partition_key() != task.get_pkey())) { + LOG_ERROR("invalid log, partition key does not match", + "log_pkey", log_entry.get_header().get_partition_key(), + "asked_pkey", task.get_pkey(), K(resp)); + ret = OB_INVALID_DATA; + } else if (OB_UNLIKELY(log_entry.get_header().get_log_id() > next_missing_log_id)) { + // The log ID should not be larger than the next missing log + LOG_ERROR("log id is greater than next missing log id", + "log_id", log_entry.get_header().get_log_id(), + K(next_missing_log_id), K(log_entry), K(missing_logs), K(fetched_missing_log_cnt)); + ret = OB_ERR_UNEXPECTED; + } else if (log_entry.get_header().get_log_id() < next_missing_log_id) { + // Filtering unwanted logs + } + // Read the next missing log + else if (OB_FAIL(task.read_missing_redo(log_entry, missing_log_trans_id_array))) { + LOG_ERROR("read missing redo fail", KR(ret), K(log_entry), K(missing_log_trans_id_array), K(next_missing_log_id)); + } else { + // success + fetched_missing_log_cnt++; + } + } + } + return ret; +} + +int FetchStream::kick_out_task_(const KickOutTaskSet &kick_out_set) +{ + int ret = OB_SUCCESS; + + KickOutTaskSet::const_iterator_t iter = kick_out_set.begin(); + + // Iterate through the collection, remove the partition fetch log task from the task pool, and assign it to another stream + for (; (OB_SUCCESS == ret && iter != kick_out_set.end()); ++iter) { + PartFetchCtx *task = NULL; + if (OB_FAIL(fetch_task_pool_.kick_out_task((*iter).pkey_, task))) { + LOG_ERROR("kick out task from pool fail", KR(ret), K(*iter), K(kick_out_set)); + } else if (OB_ISNULL(task)) { + LOG_ERROR("invalid task", K(task)); + ret = OB_INVALID_ERROR; + } else if (OB_FAIL(dispatch_fetch_task_(*task, + (*iter).kick_out_reason_))) { + LOG_ERROR("dispatch fetch task fail", KR(ret), KPC(task), K((*iter).kick_out_reason_)); + } else { + task = NULL; + } + } + + return ret; +} + +FetchStream::KickOutReason FetchStream::get_feedback_reason_(const Feedback &feedback) const +{ + // Get KickOutReason based on feedback + KickOutReason reason = NONE; + switch (feedback.feedback_type_) { + case ObLogStreamFetchLogResp::LAGGED_FOLLOWER: + reason = LAGGED_FOLLOWER; + break; + + case ObLogStreamFetchLogResp::LOG_NOT_IN_THIS_SERVER: + reason = LOG_NOT_IN_THIS_SERVER; + break; + + case ObLogStreamFetchLogResp::PARTITION_OFFLINED: + reason = PARTITION_OFFLINED; + break; + + default: + reason = NONE; + break; + } + + return reason; +} + +int FetchStream::check_feedback_(const ObLogStreamFetchLogResp &resp, + KickOutTaskSet &kick_out_set) +{ + int ret = OB_SUCCESS; + int64_t feedback_cnt = resp.get_feedback_array().count(); + + for (int64_t idx = 0; OB_SUCCESS == ret && (idx < feedback_cnt); ++idx) { + const Feedback &feedback = resp.get_feedback_array().at(idx); + KickOutReason reason = get_feedback_reason_(feedback); + + // Kick out all the partitions in the feedback, but not the NONE + if (reason != NONE) { + if (OB_FAIL(set_(kick_out_set, feedback.pkey_, reason))) { + if (OB_HASH_EXIST == ret) { + ret = OB_SUCCESS; + } else { + LOG_ERROR("set pkey into kick out set fail", KR(ret), K(feedback), K(kick_out_set)); + } + } + } + } + return ret; +} + +int FetchStream::check_fetch_log_heartbeat_(const ObLogStreamFetchLogResp &resp, + KickOutTaskSet &kick_out_set) +{ + int ret = OB_SUCCESS; + int64_t hb_cnt = resp.get_hb_array().count(); + typedef obrpc::ObLogStreamFetchLogResp::FetchLogHeartbeatItem Heartbeat; + + // Update the progress of each partition + for (int64_t idx = 0; OB_SUCCESS == ret && (idx < hb_cnt); ++idx) { + const Heartbeat &hb = resp.get_hb_array().at(idx); + PartFetchCtx *task = NULL; + + if (exist_(kick_out_set, hb.pkey_)) { + // Ignore the partitions that will be kicked out + } else if (OB_FAIL(fetch_task_pool_.get_task(hb.pkey_, task))) { + LOG_ERROR("get task from pool fail", KR(ret), K(hb.pkey_), K_(stream_seq)); + } else if (OB_ISNULL(task)) { + LOG_ERROR("invalid task", K(task), K(hb.pkey_), K_(stream_seq)); + ret = OB_ERR_UNEXPECTED; + } + // Update progress based on log heartbeat + else if (OB_FAIL(task->update_log_heartbeat(hb.next_log_id_, hb.heartbeat_ts_))) { + if (OB_LOG_NOT_SYNC != ret) { + LOG_ERROR("update log heartbeat fail", KR(ret), K(hb), KPC(task)); + } + } else { + // success + } + } + return ret; +} + +int FetchStream::update_fetch_task_state_(KickOutTaskSet &kick_out_set, + volatile bool &stop_flag, + PartFetchCtx *&min_progress_task, + int64_t &flush_time) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(heartbeater_) || OB_ISNULL(svr_finder_)) { + LOG_ERROR("invalid handlers", K(heartbeater_), K(svr_finder_)); + ret = OB_INVALID_ERROR; + } else { + FetchTaskList &task_list = fetch_task_pool_.get_task_list(); + PartFetchCtx *task = task_list.head(); + int64_t min_progress = OB_INVALID_TIMESTAMP; + bool need_check_switch_server = check_need_switch_server_(); + + // Check each of the fetch log tasks and update their status + while (OB_SUCCESS == ret && NULL != task) { + // If the task is deleted, it is kicked out directly + if (OB_UNLIKELY(task->is_discarded())) { + LOG_INFO("[STAT] [FETCH_STREAM] [RECYCLE_FETCH_TASK]", "fetch_task", task, + "fetch_stream", this, KPC(task)); + if (OB_FAIL(set_(kick_out_set, task->get_pkey(), DISCARDED))) { + if (OB_HASH_EXIST == ret) { + // Already exists, ignore + ret = OB_SUCCESS; + } else { + LOG_ERROR("set into kick out set fail", KR(ret), K(task->get_pkey()), K(kick_out_set)); + } + } + } else { + // Periodic update of leader information + if (task->need_update_leader_info()) { + if (OB_FAIL(task->update_leader_info(*svr_finder_))) { + LOG_ERROR("update leader info fail", KR(ret), KPC(task)); + } + } + + // Update heartbeat progress for needed tasks + if (task->need_heartbeat(upper_limit_)) { + if (OB_FAIL(task->update_heartbeat_info(*heartbeater_, *svr_finder_))) { + LOG_ERROR("update heartbeat info fail", KR(ret), KPC(task)); + } + } + + // Check if the progress is greater than the upper limit, and update the touch timestamp if it is greater than the upper limit + // Avoid progress of partitions is not updated that progress larger than upper_limit, which will be misjudged as progress timeout in the future + if (OB_SUCCESS == ret) { + task->update_touch_tstamp_if_progress_beyond_upper_limit(upper_limit_); + } + + // Update each partition's progress to the global + if (OB_SUCCESS == ret && OB_FAIL(publish_progress_(*task))) { + LOG_ERROR("update progress fail", KR(ret), K(task), KPC(task)); + } + + // Check if the server list needs to be updated + if (OB_SUCCESS == ret && task->need_update_svr_list()) { + bool need_print_info = (TCONF.print_partition_server_list_update_info != 0); + if (OB_FAIL(task->update_svr_list(*svr_finder_, need_print_info))) { + LOG_ERROR("update svr list fail", KR(ret), K(svr_finder_), KPC(task)); + } + } + + // Check if the log fetch timeout on the current server, and add the timeout tasks to the kick out collection + if (OB_SUCCESS == ret && OB_FAIL(check_fetch_timeout_(*task, kick_out_set))) { + LOG_ERROR("check fetch timeout fail", KR(ret), K(task), KPC(task), K(kick_out_set)); + } + + // Periodically check if there is a server with a higher level of excellence at this time, and if so, add the task to the kick out set for active flow cutting + if (need_check_switch_server) { + if (OB_SUCCESS == ret && OB_FAIL(check_switch_server_(*task, kick_out_set))) { + LOG_ERROR("check switch server fail", KR(ret), K(task), KPC(task), K(kick_out_set)); + } + } + + // Synchronize data to parser + // 1. synchronize the data generated by the read log to downstream + // 2. Synchronize progress to downstream using heartbeat task + if (OB_SUCCESS == ret) { + int64_t begin_flush_time = get_timestamp(); + if (OB_FAIL(task->sync(stop_flag))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("sync data to parser fail", KR(ret), KPC(task)); + } + } else { + flush_time += get_timestamp() - begin_flush_time; + } + } + + + if (OB_SUCCESS == ret) { + int64_t progress = task->get_progress(); + if (OB_INVALID_TIMESTAMP == min_progress || progress < min_progress) { + min_progress_task = task; + min_progress = progress; + } + } + } + + if (OB_SUCCESS == ret) { + task = task->get_next(); + } + } + } + + return ret; +} + +int FetchStream::publish_progress_(PartFetchCtx &task) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(progress_controller_)) { + LOG_ERROR("invalid progress controller", K(progress_controller_)); + ret = OB_INVALID_ARGUMENT; + } else { + int64_t part_progress = task.get_progress(); + int64_t part_progress_id = task.get_progress_id(); + + if (OB_UNLIKELY(OB_INVALID_TIMESTAMP == part_progress)) { + LOG_ERROR("invalid part progress", K(part_progress), K(task)); + ret = OB_INVALID_ERROR; + } else if (OB_FAIL(progress_controller_->update_progress(part_progress_id, + part_progress))) { + LOG_ERROR("update progress by progress controller fail", KR(ret), + K(part_progress_id), K(part_progress)); + } + } + return ret; +} + +int FetchStream::check_fetch_timeout_(PartFetchCtx &task, KickOutTaskSet &kick_out_set) +{ + int ret = OB_SUCCESS; + bool is_fetch_timeout = false; + bool is_fetch_timeout_on_lagged_replica = false; + // For lagging replica, the timeout of partition + int64_t fetcher_resume_tstamp = OB_INVALID_TIMESTAMP; + + if (OB_ISNULL(stream_worker_)) { + LOG_ERROR("invalid stream worker", K(stream_worker_)); + ret = OB_INVALID_ERROR; + } else { + fetcher_resume_tstamp = stream_worker_->get_fetcher_resume_tstamp(); + + if (OB_FAIL(task.check_fetch_timeout(svr_, upper_limit_, fetcher_resume_tstamp, + is_fetch_timeout, is_fetch_timeout_on_lagged_replica))) { + LOG_ERROR("check fetch timeout fail", KR(ret), K(svr_), + K(upper_limit_), "fetcher_resume_tstamp", TS_TO_STR(fetcher_resume_tstamp), K(task)); + } else if (is_fetch_timeout) { + KickOutReason reason = is_fetch_timeout_on_lagged_replica ? PROGRESS_TIMEOUT_ON_LAGGED_REPLICA : PROGRESS_TIMEOUT; + // If the partition fetch log times out, add it to the kick out collection + if (OB_FAIL(set_(kick_out_set, task.get_pkey(), reason))) { + if (OB_HASH_EXIST == ret) { + // Already exists, ignore + ret = OB_SUCCESS; + } else { + LOG_ERROR("set into kick out set fail", KR(ret), K(task.get_pkey()), K(kick_out_set), + K(reason)); + } + } + } + } + + return ret; +} + +int FetchStream::check_switch_server_(PartFetchCtx &task, KickOutTaskSet &kick_out_set) +{ + int ret = OB_SUCCESS; + + if (exist_(kick_out_set, task.get_pkey())) { + // Do not check for partitions already located in kick_out_set + } else if (task.need_switch_server(svr_)) { + LOG_DEBUG("exist higher priority server, need switch server", KR(ret), "pkey", task.get_pkey(), + "cur_svr", svr_); + // If you need to switch the stream, add it to the kick out collection + if (OB_FAIL(set_(kick_out_set, task.get_pkey(), NEED_SWITCH_SERVER))) { + if (OB_HASH_EXIST == ret) { + ret = OB_SUCCESS; + } else { + LOG_ERROR("set into kick out set fail", KR(ret), K(task.get_pkey()), K(kick_out_set)); + } + } + } else { + // do nothing + } + + return ret; +} + +/////////////////////////////// FetchTaskPool ///////////////////////////// + +const char *FetchStream::FetchTaskPool::print_state(State state) +{ + const char *str = "UNKNOWN"; + switch (state) { + case IDLE: + str = "IDLE"; + break; + case READY: + str = "READY"; + break; + case HANDLING: + str = "HANDLING"; + break; + default: + str = "UNKNOWN"; + break; + } + return str; +} + +FetchStream::FetchTaskPool::FetchTaskPool() : + state_(IDLE), + map_(), + queue_(), + total_cnt_(0), + list_() +{ + int ret = OB_SUCCESS; + if (OB_FAIL(queue_.init(ObModIds::OB_LOG_FETCH_STREAM_PART_TASK_QUEUE))) { + LOG_ERROR("init task queue fail", KR(ret)); + } else if (OB_FAIL(map_.init(ObModIds::OB_LOG_FETCH_STREAM_PART_TASK_MAP))) { + LOG_ERROR("init task map fail", KR(ret)); + } +} + +FetchStream::FetchTaskPool::~FetchTaskPool() +{ + reset(); + + queue_.destroy(); + (void)map_.destroy(); +} + +void FetchStream::FetchTaskPool::reset() +{ + state_ = IDLE; + map_.reset(); + queue_.reset(); + total_cnt_ = 0; + list_.reset(); +} + +int FetchStream::FetchTaskPool::push(PartFetchCtx &task, bool &is_pool_idle) +{ + int ret = OB_SUCCESS; + const ObPartitionKey &pkey = task.get_pkey(); + + // First increase the total number of tasks, and if the number of tasks has reached the limit, return directly + if (OB_FAIL(inc_total_cnt_(1))) { + if (OB_SIZE_OVERFLOW != ret) { + LOG_ERROR("inc_total_cnt_ fail", KR(ret)); + } + } + // Insert into MAP first + else if (OB_FAIL(map_.insert(pkey, &task))) { + LOG_ERROR("insert task into map fail", KR(ret), K(pkey), K(task)); + } + // Push queue + else if (OB_FAIL(queue_.push(&task))) { + LOG_ERROR("push task into queue fail", KR(ret), K(&task)); + (void)map_.erase(pkey); + } else { + is_pool_idle = false; + + // Unconditionally set to READY state + if ((ATOMIC_LOAD(&state_) != READY)) { + // Verify that the previous state is IDLE + is_pool_idle = (ATOMIC_SET(&state_, READY) == IDLE); + } + } + return ret; +} + +int FetchStream::FetchTaskPool::kick_out_task_list(FetchTaskList &task_list) +{ + int ret = OB_SUCCESS; + PartFetchCtx *task = list_.head(); + + // Remove the corresponding Task from the Map + while (OB_SUCCESS == ret && NULL != task) { + if (OB_FAIL(map_.erase(task->get_pkey()))) { + LOG_ERROR("erase task from map fail", KR(ret), "pkey", task->get_pkey(), KPC(task)); + } else { + task = task->get_next(); + } + } + + if (OB_SUCCESS == ret) { + int64_t part_count = list_.count(); + + // Modify the total number of fetch log tasks + if (part_count > 0 && OB_FAIL(dec_total_cnt_(part_count))) { + LOG_ERROR("decrease total cnt fail", KR(ret), K(part_count)); + } + } + + if (OB_SUCCESS == ret) { + // Copy Fetch Log Task List + task_list = list_; + + // Reset the list of fetch log tasks + list_.reset(); + } + + return ret; +} + +int FetchStream::FetchTaskPool::kick_out_task(const common::ObPartitionKey &pkey, + PartFetchCtx *&task) +{ + int ret = OB_SUCCESS; + + task = NULL; + // Delete from Map + if (OB_FAIL(map_.erase(pkey, task))) { + LOG_ERROR("erase from map fail", KR(ret), K(pkey)); + } else if (OB_ISNULL(task)) { + LOG_ERROR("invalid task", K(task), K(pkey)); + ret = OB_ERR_UNEXPECTED; + } else { + // Delete from list + list_.erase(*task); + + // Modify the total number of fetch log tasks + if (OB_FAIL(dec_total_cnt_(1))) { + LOG_ERROR("decrease total count fail", KR(ret)); + } else { + // success + } + } + + return ret; +} + +int FetchStream::FetchTaskPool::update(bool &pool_become_idle, bool &task_list_changed) +{ + int ret = OB_SUCCESS; + task_list_changed = false; + pool_become_idle = false; + + do { + // First set the status to HANDLING + (void)ATOMIC_SET(&state_, HANDLING); + + // Then the data in the queue is moved into the linklist + while (OB_SUCCESS == ret) { + PartFetchCtx *task = NULL; + + if (OB_FAIL(queue_.pop(task))) { + if (OB_EAGAIN != ret) { + LOG_ERROR("pop task from queue fail", KR(ret)); + } + } else if (OB_ISNULL(task)) { + LOG_ERROR("invalid task", K(task)); + ret = OB_ERR_UNEXPECTED; + } else { + list_.add_head(*task); + task_list_changed = true; + } + } + + if (OB_EAGAIN == ret) { + ret = OB_SUCCESS; + } + + if (OB_SUCCESS == ret) { + // The queue has been emptied above, if the linklist is also empty, there is no task; try to set the status to IDLE + // Only when the status is successfully changed from HANDLING to IDLE, it really means that both the queue and the linklist are empty, otherwise a new task may be inserted + // Note: When the state is changed from HANDLING to IDLE, no further operations can be performed on any data structure, as a thread may have already taken over the data structure. + if (list_.count() <= 0) { + if (HANDLING == ATOMIC_CAS(&state_, HANDLING, IDLE)) { + pool_become_idle = true; + } + } + } + } while (OB_SUCCESS == ret && list_.count() <= 0 && ! pool_become_idle); + + return ret; +} + +int FetchStream::FetchTaskPool::get_task(const common::ObPartitionKey &pkey, + PartFetchCtx *&task) const +{ + return map_.get(pkey, task); +} + +int FetchStream::FetchTaskPool::inc_total_cnt_(const int64_t cnt) +{ + int ret = OB_SUCCESS; + int64_t max_part_count = ATOMIC_LOAD(&FetchStream::g_stream_max_part_count); + int64_t cur_cnt = ATOMIC_LOAD(&total_cnt_); + int64_t old_cnt = 0; + int64_t new_cnt = 0; + + do { + old_cnt = cur_cnt; + new_cnt = (old_cnt + cnt); + + // When increasing the number of partitions, check if the upper limit is reached + if (cnt > 0 && old_cnt >= max_part_count) { + ret = OB_SIZE_OVERFLOW; + } + } while (OB_SUCCESS == ret + && (old_cnt != (cur_cnt = ATOMIC_CAS(&total_cnt_, old_cnt, new_cnt)))); + + return ret; +} + +int FetchStream::FetchTaskPool::dec_total_cnt_(const int64_t cnt) +{ + return inc_total_cnt_(-cnt); +} + +} +} diff --git a/src/liboblog/src/ob_log_fetch_stream.h b/src/liboblog/src/ob_log_fetch_stream.h new file mode 100644 index 0000000000000000000000000000000000000000..34f89e12bf4dd1dcd0956a6f4ba8c9a7e1961457 --- /dev/null +++ b/src/liboblog/src/ob_log_fetch_stream.h @@ -0,0 +1,461 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_FETCH_STREAM_H__ +#define OCEANBASE_LIBOBLOG_OB_LOG_FETCH_STREAM_H__ + +#include "lib/net/ob_addr.h" // ObAddr +#include "lib/hash/ob_linear_hash_map.h" // ObLinearHashMap +#include "lib/hash/ob_ext_iter_hashset.h" // ObExtIterHashSet +#include "lib/lock/ob_small_spin_lock.h" // ObByteLock +#include "common/ob_partition_key.h" // ObPartitionKey +#include "clog/ob_log_external_rpc.h" // ObStreamSeq, ObLogStreamFetchLogResp +#include "clog/ob_log_entry.h" // ObLogEntry + +#include "ob_log_part_fetch_ctx.h" // FetchTaskList +#include "ob_map_queue.h" // ObMapQueue +#include "ob_log_fetch_log_rpc.h" // FetchLogARpc, FetchLogARpcResult, FetchLogSRpc, IFetchLogARpcResultPool +#include "ob_log_utils.h" // _SEC_ +#include "ob_log_timer.h" // ObLogTimerTask +#include "ob_log_dlist.h" // ObLogDList +#include "ob_log_fetch_stream_type.h" // FetchStreamType +#include "ob_log_fetch_stat_info.h" // FetchStatInfo + +namespace oceanbase +{ +namespace liboblog +{ + +class IObLogStreamWorker; +class IObLogRpc; +class IObLogSvrFinder; +class IObLogFetcherHeartbeatWorker; +class PartFetchCtx; +class PartProgressController; +class ObLogConfig; + +class FetchStream; +typedef ObLogDListNode FSListNode; + +// Fetch log stream bi-directional linked list +typedef ObLogDList FSList; + +// Fetch log stream +class FetchStream : public FSListNode, public ObLogTimerTask +{ +private: + static const int64_t DEFAULT_MISSING_LOG_STREAM_LIFE_TIME = 10 * _SEC_; + static const int64_t STAT_INTERVAL = 30 * _SEC_; + static const int64_t DEFAULT_TASK_SET_SIZE = 16; + + typedef ObMapQueue TaskQueue; + typedef common::ObLinearHashMap TaskMap; + struct KickOutTask; + typedef common::hash::ObExtIterHashSet KickOutTaskSet; + typedef obrpc::ObLogStreamFetchLogResp::FeedbackPartition Feedback; + + // Class global variables +public: + static int64_t g_stream_max_part_count; + static int64_t g_stream_life_time; + static int64_t g_rpc_timeout; + static int64_t g_fetch_log_cnt_per_part_per_round; + static int64_t g_dml_progress_limit; + // Survival time of server added to blacklist + static int64_t g_blacklist_survival_time; + static int64_t g_check_switch_server_interval; + static bool g_print_rpc_handle_info; + static bool g_print_stream_dispatch_info; + + /////////// Fetch log stream status ////////// + // IDLE: Idle state, not waiting for any asynchronous RPC + // FETCH_LOG: Launch an asynchronous RPC request to fetch the log, waiting for the request to return + enum State + { + IDLE = 0, + FETCH_LOG = 1, + }; + + static const char *print_state(State state); + +public: + FetchStream(); + virtual ~FetchStream(); + +public: + void reset(); + + void reset(const common::ObAddr &svr, + const FetchStreamType stream_type, + IObLogRpc &rpc, + IObLogSvrFinder &svr_finder, + IObLogFetcherHeartbeatWorker &heartbeater, + IObLogStreamWorker &stream_worker, + IFetchLogARpcResultPool &rpc_result_pool, + PartProgressController &progress_controller); + + bool is_ddl_stream() const { return FETCH_STREAM_TYPE_DDL == stype_; } + + /// Add a fetch log task, multi-threaded operation + /// If this fetch log stream task is new, assign it to a worker thread for processing + /// + /// "New FetchStream task" includes two cases: + /// 1. A new FetchStream is created, the partition task is the first partition object, and the FetchStream is new + /// 2. A new partition task is added after an existing FetchStream has kicked out all the partitions, in which case it is also new + /// + /// @retval OB_SUCCESS added successfully + /// @retval OB_SIZE_OVERFLOW The task is full and cannot be added further + /// @retval Other error codes Adding failed + int add_fetch_task(PartFetchCtx &task); + + /// Processes the fetch log stream and advances the progress of each partition + /// Requirement: only one thread can call this function at the same time, multi-threading is not supported + /// + /// @retval OB_SUCCESS added successfully + /// @retval OB_IN_STOP_STATE exit + /// @retval Other error codes Add failed + int handle(volatile bool &stop_flag); + + /// Implement timer tasks + /// Assign it to a worker thread + void process_timer_task(); + + void switch_state(State state) { ATOMIC_STORE(&state_, state); } + + int get_upper_limit(int64_t &upper_limit_us); + + // Execution Statistics + void do_stat(); + + int64_t get_fetch_task_count() const { return fetch_task_pool_.total_count(); } + +public: + static void configure(const ObLogConfig & config); + +private: + // 切流原因定义 + // Switch fetch log stream definition + enum KickOutReason + { + NONE = -1, + + OPEN_STREAM_FAIL = 0, // Open stream failure + FETCH_LOG_FAIL_ON_RPC = 1, // Rpc failure + FETCH_LOG_FAIL_ON_SERVER = 2, // Server failure + + // read missing redo log + MISSING_LOG_OPEN_STREAM_FAIL = 3, // Open stream failure for missing redo log + MISSING_LOG_FETCH_FAIL = 4, // Fetch missing redo log failure + + // Feedback + LAGGED_FOLLOWER = 5, // Follow lag behind + LOG_NOT_IN_THIS_SERVER = 6, // This log is not served in the server + PARTITION_OFFLINED = 7, // Partition offline + + // Progress timeout, long time to fetch logs + PROGRESS_TIMEOUT = 8, // Partition fetch log timeout + // Progress timeout and detection of lagging replica + PROGRESS_TIMEOUT_ON_LAGGED_REPLICA = 9, // Partition fetch log timeout on lagging replica + + NEED_SWITCH_SERVER = 10, // There is a higher priority server that actively switch + DISCARDED = 11, // Partition is discard + }; + static const char *print_kick_out_reason_(const KickOutReason reason); + // Determine if the server needs to be blacklisted, + // NEED_SWITCH_SERVER and DISCARDED do not need to be added, all others do + bool need_add_into_blacklist_(const KickOutReason reason); + +private: + void handle_when_leave_(const char *leave_reason) const; + int handle_idle_task_(volatile bool &stop_flag); + int open_stream_(bool &rpc_succeed, int64_t &part_count); + int kick_out_all_(KickOutReason kick_out_reason); + int dispatch_fetch_task_(PartFetchCtx &task, + KickOutReason dispatch_reason); + int check_need_fetch_log_(const int64_t limit, bool &need_fetch_log); + int hibernate_(); + int async_fetch_log_(bool &rpc_send_succeed); + void print_handle_info_(FetchLogARpcResult &result, + const int64_t handle_rpc_time, + const int64_t read_log_time, + const int64_t decode_log_entry_time, + const bool rpc_is_flying, + const bool is_stream_valid, + const char *stream_invalid_reason, + PartFetchCtx *min_progress_task, + const TransStatInfo &tsi, + const bool need_stop_request); + bool has_new_fetch_task_() const; + int process_result_(FetchLogARpcResult &result, + volatile bool &stop_flag, + const bool rpc_is_flying, + bool &need_hibernate, + bool &is_stream_valid); + int handle_fetch_log_task_(volatile bool &stop_flag); + void update_fetch_stat_info_(FetchLogARpcResult &result, + const int64_t handle_rpc_time, + const int64_t read_log_time, + const int64_t decode_log_entry_time, + const int64_t flush_time, + const TransStatInfo &tsi); + int handle_fetch_log_result_(FetchLogARpcResult &result, + volatile bool &stop_flag, + bool &is_stream_valid, + const char *&stream_invalid_reason, + bool &need_hibernate, + int64_t &read_log_time, + int64_t &decode_log_entry_time, + TransStatInfo &tsi, + int64_t &flush_time, + PartFetchCtx *&task_with_min_progress); + int update_rpc_request_params_(); + int handle_fetch_log_error_(const obrpc::ObStreamSeq &seq, + const obrpc::ObRpcResultCode &rcode, + const obrpc::ObLogStreamFetchLogResp &resp); + bool exist_(KickOutTaskSet &kick_out_set, const common::ObPartitionKey &pkey); + int set_(KickOutTaskSet &kick_out_set, + const common::ObPartitionKey &pkey, + KickOutReason kick_out_reason); + int read_log_(const obrpc::ObLogStreamFetchLogResp &resp, + volatile bool &stop_flag, + KickOutTaskSet &kick_out_set, + int64_t &read_log_time, + int64_t &decode_log_entry_time, + TransStatInfo &tsi); + int handle_missing_log_(PartFetchCtx &task, + const clog::ObLogEntry &prepare_log_entry, + const IObLogPartTransResolver::ObLogMissingInfo &org_missing_logs, + volatile bool &stop_flag, + KickOutReason &fail_reason); + int open_stream_(const common::ObPartitionKey &pkey, + const uint64_t start_log_id, + obrpc::ObStreamSeq &seq, + const int64_t stream_life_time, + bool &rpc_succeed); + int fetch_missing_log_(PartFetchCtx &task, + const obrpc::ObStreamSeq &seq, + const ObLogIdArray &missing_logs, + const IObLogPartTransResolver::ObTransIDArray &missing_log_trans_id_array, + int64_t &fetched_missing_log_cnt, + const int64_t upper_limit); + int alloc_fetch_log_srpc_(FetchLogSRpc *&fetch_log_srpc); + int read_missing_log_(PartFetchCtx &task, + const obrpc::ObLogStreamFetchLogResp &resp, + const ObLogIdArray &missing_logs, + const IObLogPartTransResolver::ObTransIDArray &missing_log_trans_id_array, + int64_t &fetched_missing_log_cnt); + void free_fetch_log_srpc_(FetchLogSRpc *fetch_log_srpc); + KickOutReason get_feedback_reason_(const Feedback &feedback) const; + int check_feedback_(const obrpc::ObLogStreamFetchLogResp &resp, + KickOutTaskSet &kick_out_set); + int check_fetch_log_heartbeat_(const obrpc::ObLogStreamFetchLogResp &resp, + KickOutTaskSet &kick_out_set); + int kick_out_task_(const KickOutTaskSet &kick_out_set); + int update_fetch_task_state_(KickOutTaskSet &kick_out_set, + volatile bool &stop_flag, + PartFetchCtx *&task_with_min_progress, + int64_t &flush_time); + int publish_progress_(PartFetchCtx &task); + int check_fetch_timeout_(PartFetchCtx &task, KickOutTaskSet &kick_out_set); + int check_switch_server_(PartFetchCtx &task, KickOutTaskSet &kick_out_set); + int prepare_rpc_request_(const int64_t part_count); + bool check_need_feedback_(); + bool check_need_switch_server_(); + +private: + struct KickOutTask + { + common::ObPartitionKey pkey_; + KickOutReason kick_out_reason_; + + KickOutTask() : pkey_(), kick_out_reason_(NONE) {} + explicit KickOutTask(const common::ObPartitionKey &pkey) : + pkey_(pkey), + kick_out_reason_(NONE) + {} + KickOutTask(const common::ObPartitionKey &pkey, KickOutReason kick_out_reason) : + pkey_(pkey), + kick_out_reason_(kick_out_reason) + {} + + uint64_t hash() const + { + return pkey_.hash(); + } + + bool operator == (const KickOutTask &task) const + { + return pkey_ == task.pkey_; + } + + TO_STRING_KV(K_(pkey), "kick_out_reason", print_kick_out_reason_(kick_out_reason_)); + }; + + ////////////////////////// Fetch Log Task Pool //////////////////////// + /// Thread model: multi-threaded push task, single-threaded operation + struct FetchTaskPool + { + ///////////// Task pool status /////////// + // IDLE: 无任务 + // READY: There are tasks to be processed + // HANDLING: There are threads that are consuming tasks + // + // State transitions: + // 1. initial IDLE state + // 2. IDLE/READY/HANDLING -> READY: with task input, unconditionally converted to READY + // 3. READY -> HANDLING: A thread starts consuming the task + // 4. HANDLING -> IDLE: After the thread has consumed all the data, if the status is still HANDLING, it is converted to IDLE + // 5. HANDLING -> HANDLING: Threads can change from HANDLING to HANDLING several times during consumption + enum State + { + IDLE = 0, + READY = 1, + HANDLING = 2, + }; + + ////////////////////////////// Member Variables ////////////////////////////// + // Multi-threaded operation structure + State state_; + TaskMap map_; // All tasks Map + TaskQueue queue_; // Pending Task Queue + + volatile int64_t total_cnt_ CACHE_ALIGNED; // Total number of tasks + + // Single-threaded operation structure + FetchTaskList list_; // LinkList of tasks being processed + + ////////////////////////////// Member functions ////////////////////////////// + FetchTaskPool(); + virtual ~FetchTaskPool(); + static const char *print_state(State state); + + TO_STRING_KV("state", print_state(state_), + K_(total_cnt), + "queue_cnt", queue_.count(), + "list_cnt", list_.count(), + "map_cnt", map_.count()); + + void reset(); + + // 获取分区任务总数量: 包括在queue中的未开始取日志的分区任务数量 + // Get the total number of partition task that include in the queue that have not started fetching logs + int64_t total_count() const { return ATOMIC_LOAD(&total_cnt_); } + + // Get the number of unconsumed partition tasks in the queue + int64_t queued_count() const { return queue_.count(); } + + /// push new task to queue + /// Multi-threaded calls + /// + /// 1. Unconditionally set the task pool state to READY + /// 2. Return whether the task pool was in IDLE state before + /// 3. If it is in IDLE state, there is no thread currently consuming and a new thread needs to be allocated + /// + /// @param [in] task Target task + /// @param [out] is_pool_idle Return variable that marks whether the task pool was in IDLE state before the push + // + /// @retval OB_SUCCESS successful insertion + /// @retval OB_SIZE_OVERFLOW The task pool is full + /// @retval Other error codes Fail + int push(PartFetchCtx &task, bool &is_pool_idle); + + /// Kick out and return the entire task list + /// Single threaded call + int kick_out_task_list(FetchTaskList &list); + + /// Kick out task + /// Single threaded call + int kick_out_task(const common::ObPartitionKey &pkey, + PartFetchCtx *&task); + + /// Update task pool status and prepare task list + /// + /// 1. Take the pending tasks from the queue and put them in the linklist + /// 2. First set the status to HANDLING unconditionally + /// 3. Set the status to IDLE when it is confirmed that there are no tasks in the task pool + /// 4. After the status is successfully set to IDLE, the task pool can no longer be operated, as a thread may have already taken over the task pool + /// + /// @param [out] pool_become_idle Return a variable that marks whether the task pool has changed to IDLE status, i.e., whether it is empty. + /// @param [out] task_list_changed Return a variable that if the task list has changed + /// + /// @retval OB_SUCCESS Success + /// @retval Other error codes Fail + int update(bool &pool_become_idle, bool &task_list_changed); + + FetchTaskList &get_task_list() { return list_; } + + /// Get the corresponding fetch log task based on pkey + int get_task(const common::ObPartitionKey &pkey, PartFetchCtx *&task) const; + + private: + // Increase the total number of tasks, and return OB_SIZE_OVERFLOW if the total number reaches the upper limit + int inc_total_cnt_(const int64_t cnt); + // Decrease the total number of tasks + int dec_total_cnt_(const int64_t cnt); + + private: + DISALLOW_COPY_AND_ASSIGN(FetchTaskPool); + }; + +public: + TO_STRING_KV("type", "FETCH_STREAM", + "stype", print_fetch_stream_type(stype_), + "state", print_state(state_), + K_(svr), + "upper_limit", TS_TO_STR(upper_limit_), + K_(need_open_stream), + K_(stream_seq), + K_(last_feedback_tstamp), + K_(fetch_task_pool), + K_(fetch_log_arpc), + KP_(next), + KP_(prev)); + +private: + State state_; // Fetch log state + FetchStreamType stype_; // Stream type + common::ObAddr svr_; // Target server + IObLogRpc *rpc_; // RPC Processor + IObLogSvrFinder *svr_finder_; // SvrFinder + IObLogFetcherHeartbeatWorker *heartbeater_; // Heartbeat Manager + IObLogStreamWorker *stream_worker_; // Stream master + IFetchLogARpcResultPool *rpc_result_pool_; // RPC result object pool + PartProgressController *progress_controller_; // Progress Controller + + int64_t upper_limit_ CACHE_ALIGNED; // Stream upper limit + bool need_open_stream_ CACHE_ALIGNED; // Need to open stream + obrpc::ObStreamSeq stream_seq_; // Current stream identifier, valid after opening the stream + + int64_t last_feedback_tstamp_ CACHE_ALIGNED; // Last FEEDBACK time + int64_t last_switch_server_tstamp_ CACHE_ALIGNED; // Last switching server time + + // Fetch log task pool + // Includes: queue of pending tasks and list of tasks being processed + FetchTaskPool fetch_task_pool_; + + // Fetch Log Asynchronous RPC + FetchLogARpc fetch_log_arpc_; + + // Statistical Information + int64_t last_stat_time_; + FetchStatInfo cur_stat_info_; + FetchStatInfo last_stat_info_; + common::ObByteLock stat_lock_; // Mutex lock that statistical information update and access to + +private: + DISALLOW_COPY_AND_ASSIGN(FetchStream); +}; + +} +} + +#endif diff --git a/src/liboblog/src/ob_log_fetch_stream_container.cpp b/src/liboblog/src/ob_log_fetch_stream_container.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2960b9be210e5755b176ab850b6df50f3a4547d7 --- /dev/null +++ b/src/liboblog/src/ob_log_fetch_stream_container.cpp @@ -0,0 +1,239 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG_FETCHER + +#include "ob_log_fetch_stream_container.h" + +#include "lib/utility/ob_macro_utils.h" // OB_UNLIKELY +#include "lib/oblog/ob_log_module.h" // LOG_ERROR + +#include "ob_log_part_fetch_ctx.h" // PartFetchCtx +#include "ob_log_fetch_stream.h" // FetchStream +#include "ob_log_fetch_stream_pool.h" // IFetchStreamPool + +using namespace oceanbase::common; +namespace oceanbase +{ +namespace liboblog +{ + +FetchStreamContainer::FetchStreamContainer(const FetchStreamType stype) : stype_(stype) +{ + reset(); +} + +FetchStreamContainer::~FetchStreamContainer() +{ + reset(); +} + +void FetchStreamContainer::reset() +{ + // TODO: Free all task memory from global considerations + free_fs_list_(); + + svr_.reset(); + + rpc_ = NULL; + fs_pool_ = NULL; + svr_finder_ = NULL; + heartbeater_ = NULL; + stream_worker_ = NULL; + rpc_result_pool_ = NULL; + progress_controller_ = NULL; + + fs_list_.reset(); +} + +void FetchStreamContainer::reset(const common::ObAddr &svr, + IObLogRpc &rpc, + IFetchStreamPool &fs_pool, + IObLogSvrFinder &svr_finder, + IObLogFetcherHeartbeatWorker &heartbeater, + IObLogStreamWorker &stream_worker, + IFetchLogARpcResultPool &rpc_result_pool, + PartProgressController &progress_controller) +{ + reset(); + + svr_ = svr; + rpc_ = &rpc; + fs_pool_ = &fs_pool; + svr_finder_ = &svr_finder; + heartbeater_ = &heartbeater; + stream_worker_ = &stream_worker; + rpc_result_pool_ = &rpc_result_pool; + progress_controller_ = &progress_controller; +} + +int FetchStreamContainer::dispatch(PartFetchCtx &task) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(task.get_fetch_stream_type() != stype_)) { + LOG_ERROR("invalid part task, fetch stream type does not match", K(stype_), + K(task.get_fetch_stream_type()), K(task)); + ret = OB_INVALID_ARGUMENT; + } else { + bool succeed = false; + + while (OB_SUCCESS == ret && ! succeed) { + FSList base_fs_list; + // Find a fetch stream with free space and add it to the partition task + // If no free fetch stream is found, return the head of the linklist used for this lookup + if (OB_FAIL(find_fs_and_add_task_(task, succeed, base_fs_list))) { + LOG_ERROR("find_fs_and_add_task_ fail", KR(ret), K(task)); + } else if (! succeed) { + // Try to add a new available fetch stream, in order to solve the multi-threaded duplicate creation and lookup problem + // Compare the fetch stream linklist for changes, and if someone else has already created a new one, do not create it in + if (OB_FAIL(try_create_new_fs_and_add_task_(task, base_fs_list, succeed))) { + LOG_ERROR("try_create_new_fs_and_add_task_ fail", KR(ret), K(task), K(base_fs_list)); + } + } + } + } + + return ret; +} + +void FetchStreamContainer::do_stat() +{ + // Add read locks to allow concurrent lookups and inserts + SpinRLockGuard guard(lock_); + + FetchStream *fs = fs_list_.head(); + while (NULL != fs) { + if (fs->get_fetch_task_count() > 0) { + fs->do_stat(); + } + fs = fs->get_next(); + } +} + +// Release all FetchStream objects in the linklist +void FetchStreamContainer::free_fs_list_() +{ + if (NULL != fs_pool_ && fs_list_.count() > 0) { + FetchStream *fs = fs_list_.head(); + while (NULL != fs) { + FetchStream *next = fs->get_next(); + fs->reset(); + (void)fs_pool_->free(fs); + fs = next; + } + + fs_list_.reset(); + } +} + +int FetchStreamContainer::find_fs_and_add_task_(PartFetchCtx &task, + bool &succeed, + FSList &base_fs_list) +{ + int ret = OB_SUCCESS; + + // Add read locks to allow concurrent lookups and inserts + SpinRLockGuard guard(lock_); + + base_fs_list = fs_list_; + succeed = false; + + FetchStream *fs = fs_list_.head(); + while (OB_SUCCESS == ret && ! succeed && NULL != fs) { + ret = fs->add_fetch_task(task); + + if (OB_SUCCESS == ret) { + succeed = true; + } else if (OB_SIZE_OVERFLOW == ret) { + // The task is full, change the next object + ret = OB_SUCCESS; + fs = fs->get_next(); + } else { + LOG_ERROR("add fetch task into fetch stream fail", KR(ret), K(task), K(fs)); + } + } + + return ret; +} + +int FetchStreamContainer::alloc_fetch_stream_(FetchStream *&fs) +{ + int ret = OB_SUCCESS; + fs = NULL; + + if (OB_ISNULL(rpc_) + || OB_ISNULL(fs_pool_) + || OB_ISNULL(svr_finder_) + || OB_ISNULL(heartbeater_) + || OB_ISNULL(stream_worker_) + || OB_ISNULL(rpc_result_pool_) + || OB_ISNULL(progress_controller_)) { + LOG_ERROR("invalid handlers", K(rpc_), K(fs_pool_), K(svr_finder_), K(heartbeater_), + K(stream_worker_), K(rpc_result_pool_), K(progress_controller_)); + ret = OB_INVALID_ERROR; + } else if (OB_FAIL(fs_pool_->alloc(fs))) { + LOG_ERROR("alloc fetch stream fail", KR(ret), K(fs_pool_)); + } else if (OB_ISNULL(fs)) { + LOG_ERROR("invalid fetch stream", K(fs)); + ret = OB_ERR_UNEXPECTED; + } else { + fs->reset(svr_, stype_, + *rpc_, + *svr_finder_, + *heartbeater_, + *stream_worker_, + *rpc_result_pool_, + *progress_controller_); + + LOG_INFO("[STAT] [FETCH_STREAM_CONTAINER] [ALLOC_FETCH_STREAM]", K(fs), KPC(fs)); + } + + return ret; +} + +int FetchStreamContainer::try_create_new_fs_and_add_task_(PartFetchCtx &task, + FSList &base_fs_list, + bool &succeed) +{ + int ret = OB_SUCCESS; + FetchStream *fs = NULL; + + // Add write lock to ensure that only one thread creates a new fetch stream + SpinWLockGuard guard(lock_); + + succeed = false; + // If the linklist changes, just exit and retry next time + if (base_fs_list != fs_list_) { + LOG_DEBUG("new fetch stream has been created, retry next time", K(fs_list_), K(base_fs_list)); + } else if (OB_FAIL(alloc_fetch_stream_(fs))) { + LOG_ERROR("alloc fetch stream fail", KR(ret)); + } else if (OB_ISNULL(fs)) { + LOG_ERROR("invalid fetch stream", K(fs)); + ret = OB_ERR_UNEXPECTED; + } else { + // Update fetch stream linklist + fs_list_.add_head(*fs); + + // Add the task to the new fetch log stream, expecting certain success + if (OB_FAIL(fs->add_fetch_task(task))) { + LOG_ERROR("add fetch task into new fetch stream fail", KR(ret), K(task), K(fs)); + } else { + succeed = true; + } + } + + return ret; +} + +} +} diff --git a/src/liboblog/src/ob_log_fetch_stream_container.h b/src/liboblog/src/ob_log_fetch_stream_container.h new file mode 100644 index 0000000000000000000000000000000000000000..a4472f70b8d293eb5c2dc05ce8f4aa12ee583e01 --- /dev/null +++ b/src/liboblog/src/ob_log_fetch_stream_container.h @@ -0,0 +1,101 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_FETCH_STREAM_CONTAINER_H__ +#define OCEANBASE_LIBOBLOG_OB_LOG_FETCH_STREAM_CONTAINER_H__ + +#include "lib/lock/ob_spin_rwlock.h" // SpinRWLock +#include "lib/net/ob_addr.h" // ObAddr + +#include "ob_log_fetch_stream_type.h" // FetchStreamType +#include "ob_log_fetch_stream.h" // FSList, FetchStream + +namespace oceanbase +{ +namespace liboblog +{ +class IFetchStreamPool; +class IObLogRpc; +class IObLogStreamWorker; +class IFetchLogARpcResultPool; +class IObLogSvrFinder; +class IObLogFetcherHeartbeatWorker; +class PartProgressController; +class PartFetchCtx; + +class FetchStreamContainer +{ +public: + explicit FetchStreamContainer(const FetchStreamType stype); + virtual ~FetchStreamContainer(); + +public: + void reset(); + void reset(const common::ObAddr &svr, + IObLogRpc &rpc, + IFetchStreamPool &fs_pool, + IObLogSvrFinder &svr_finder, + IObLogFetcherHeartbeatWorker &heartbeater, + IObLogStreamWorker &stream_worker, + IFetchLogARpcResultPool &rpc_result_pool, + PartProgressController &progress_controller); + +public: + // Assign the fetch log task to a FetchStream + // If the target is a "new fetch stream task", assign it to a worker thread for processing + int dispatch(PartFetchCtx &task); + + void do_stat(); + +private: + void free_fs_list_(); + int find_fs_and_add_task_(PartFetchCtx &task, + bool &succeed, + FSList &base_fs_list); + int alloc_fetch_stream_(FetchStream *&fs); + int try_create_new_fs_and_add_task_(PartFetchCtx &task, + FSList &base_fs_list, + bool &succeed); + +public: + TO_STRING_KV("stype", print_fetch_stream_type(stype_), + K_(svr), + K_(fs_list)); + +private: + // Constants: Stream types for all tasks + const FetchStreamType stype_; + + // Target server + common::ObAddr svr_; + + IObLogRpc *rpc_; // RPC Processor + IFetchStreamPool *fs_pool_; // Fetch log stream task object pool + IObLogSvrFinder *svr_finder_; // SvrFinder + IObLogFetcherHeartbeatWorker *heartbeater_; // Heartbeat Manager + IObLogStreamWorker *stream_worker_; // Stream master + IFetchLogARpcResultPool *rpc_result_pool_; // RPC result pool + PartProgressController *progress_controller_; // Progress controller + + // Fetch log stream task + // Use read/write locks to control the reading and writing of tasks + FSList fs_list_; + common::SpinRWLock lock_; + +private: + DISALLOW_COPY_AND_ASSIGN(FetchStreamContainer); +}; + +} +} + +#endif diff --git a/src/liboblog/src/ob_log_fetch_stream_pool.cpp b/src/liboblog/src/ob_log_fetch_stream_pool.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8b2d09c13199094b47f760fb4578bb1be21610df --- /dev/null +++ b/src/liboblog/src/ob_log_fetch_stream_pool.cpp @@ -0,0 +1,78 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG_FETCHER + +#include "ob_log_fetch_stream_pool.h" + +#include "share/ob_define.h" // OB_SERVER_TENANT_ID +#include "lib/allocator/ob_mod_define.h" // ObModIds +#include "lib/utility/ob_macro_utils.h" // OB_FAIL +#include "lib/oblog/ob_log_module.h" // LOG_ERROR + +using namespace oceanbase::common; +namespace oceanbase +{ +namespace liboblog +{ + +FetchStreamPool::FetchStreamPool() : pool_() +{} + +FetchStreamPool::~FetchStreamPool() +{ + destroy(); +} + +int FetchStreamPool::init(const int64_t cached_fs_count) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(pool_.init(cached_fs_count, + ObModIds::OB_LOG_FETCH_STREAM_POOL, + OB_SERVER_TENANT_ID, + DEFAULT_BLOCK_SIZE))) { + LOG_ERROR("init fetch stream obj pool fail", KR(ret), K(cached_fs_count)); + } else { + // succ + } + return ret; +} + +void FetchStreamPool::destroy() +{ + pool_.destroy(); +} + +int FetchStreamPool::alloc(FetchStream *&fs) +{ + return pool_.alloc(fs); +} + +int FetchStreamPool::free(FetchStream *fs) +{ + return pool_.free(fs); +} + +void FetchStreamPool::print_stat() +{ + int64_t alloc_count = pool_.get_alloc_count(); + int64_t free_count = pool_.get_free_count(); + int64_t fixed_count = pool_.get_fixed_count(); + int64_t used_count = alloc_count - free_count; + int64_t dynamic_count = (alloc_count > fixed_count) ? alloc_count - fixed_count : 0; + + _LOG_INFO("[STAT] [FETCH_STREAM_POOL] USED=%ld FREE=%ld FIXED=%ld DYNAMIC=%ld", + used_count, free_count, fixed_count, dynamic_count); +} + +} +} diff --git a/src/liboblog/src/ob_log_fetch_stream_pool.h b/src/liboblog/src/ob_log_fetch_stream_pool.h new file mode 100644 index 0000000000000000000000000000000000000000..eceba69732e6abf4ecdce5faf08e5d68922861e2 --- /dev/null +++ b/src/liboblog/src/ob_log_fetch_stream_pool.h @@ -0,0 +1,64 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_FETCH_STREAM_POOL_H__ +#define OCEANBASE_LIBOBLOG_OB_LOG_FETCH_STREAM_POOL_H__ + +#include "lib/objectpool/ob_small_obj_pool.h" // ObSmallObjPool + +#include "ob_log_fetch_stream.h" // FetchStream + +namespace oceanbase +{ +namespace liboblog +{ + +class IFetchStreamPool +{ +public: + virtual ~IFetchStreamPool() {} + +public: + virtual int alloc(FetchStream *&fs) = 0; + virtual int free(FetchStream *fs) = 0; +}; + +////////////////////// FetchStreamPool /////////////////// +class FetchStreamPool : public IFetchStreamPool +{ + typedef common::ObSmallObjPool PoolType; + static const int64_t DEFAULT_BLOCK_SIZE = 1L << 24; + +public: + FetchStreamPool(); + virtual ~FetchStreamPool(); + +public: + int alloc(FetchStream *&fs); + int free(FetchStream *fs); + void print_stat(); + +public: + int init(const int64_t cached_fs_count); + void destroy(); + +private: + PoolType pool_; + +private: + DISALLOW_COPY_AND_ASSIGN(FetchStreamPool); +}; + +} +} + +#endif diff --git a/src/liboblog/src/ob_log_fetch_stream_type.cpp b/src/liboblog/src/ob_log_fetch_stream_type.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4c2eee2fe71e5af73e3995d4075035cc91aa8ca8 --- /dev/null +++ b/src/liboblog/src/ob_log_fetch_stream_type.cpp @@ -0,0 +1,43 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include "ob_log_fetch_stream_type.h" + +namespace oceanbase +{ +namespace liboblog +{ +const char *print_fetch_stream_type(FetchStreamType type) +{ + const char *str = "UNKNOWN"; + switch (type) { + case FETCH_STREAM_TYPE_UNKNOWN: + str = "UNKNOWN"; + break; + case FETCH_STREAM_TYPE_HOT: + str = "HOT"; + break; + case FETCH_STREAM_TYPE_COLD: + str = "COLD"; + break; + case FETCH_STREAM_TYPE_DDL: + str = "DDL"; + break; + default: + str = "INVALID"; + break; + } + return str; +} + +} +} diff --git a/src/liboblog/src/ob_log_fetch_stream_type.h b/src/liboblog/src/ob_log_fetch_stream_type.h new file mode 100644 index 0000000000000000000000000000000000000000..a6976e76fae7531e6859f3608c648a3de496a9de --- /dev/null +++ b/src/liboblog/src/ob_log_fetch_stream_type.h @@ -0,0 +1,45 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_FETCH_STREAM_TYPE_H__ +#define OCEANBASE_LIBOBLOG_OB_LOG_FETCH_STREAM_TYPE_H__ + +namespace oceanbase +{ +namespace liboblog +{ + +// Fetch log stream type +// +// 1. Hot streams: streams that are written to more frequently and have a larger log volume +// 2. Cold streams: streams that have not been written to for a long time and rely on heartbeats to maintain progress +// 3. DDL streams: streams dedicated to serving DDL partitions +// +// Different streams with different Strategies +// 1. Hot streams fetch logs frequently and need to allocate more resources to fetch logs +// 2. Cold streams have no logs for a long time, so they can reduce the frequency of log fetching and heartbeats and use less resources +// 3. DDL streams are always of the hot stream type, ensuring sufficient resources, always real-time, and immune to pauses +enum FetchStreamType +{ + FETCH_STREAM_TYPE_UNKNOWN = -1, + FETCH_STREAM_TYPE_HOT = 0, // Hot stream + FETCH_STREAM_TYPE_COLD = 1, // Cold stream + FETCH_STREAM_TYPE_DDL = 2, // DDL stream + FETCH_STREAM_TYPE_MAX +}; + +const char *print_fetch_stream_type(FetchStreamType type); + +} +} + +#endif diff --git a/src/liboblog/src/ob_log_fetcher.cpp b/src/liboblog/src/ob_log_fetcher.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d90facef19492e2ea728dedbacf60cbea611181c --- /dev/null +++ b/src/liboblog/src/ob_log_fetcher.cpp @@ -0,0 +1,734 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG_FETCHER + +#include "ob_log_fetcher.h" + +#include "lib/oblog/ob_log_module.h" // LOG_* + +#include "ob_log_config.h" // ObLogConfig +#include "ob_log_timer.h" // ObLogFixedTimer +#include "ob_log_ddl_handler.h" // IObLogDDLHandler +#include "ob_log_task_pool.h" // ObLogTransTaskPool +#include "ob_log_instance.h" // IObLogErrHandler + +using namespace oceanbase::common; + +namespace oceanbase +{ +namespace liboblog +{ + +///////////////////////////////////////////// ObLogFetcher ///////////////////////////////////////////// + +bool ObLogFetcher::g_print_partition_heartbeat_info = + ObLogConfig::default_print_partition_heartbeat_info; +int64_t ObLogFetcher::g_inner_heartbeat_interval = + ObLogConfig::default_output_inner_heartbeat_interval_msec * _MSEC_; + +ObLogFetcher::ObLogFetcher() : + inited_(false), + task_pool_(NULL), + ddl_handler_(NULL), + err_handler_(NULL), + part_trans_resolver_factory_(), + part_fetch_mgr_(), + progress_controller_(), + rpc_(), + all_svr_cache_(), + svr_finder_(), + heartbeater_(), + start_log_id_locator_(), + idle_pool_(), + dead_pool_(), + stream_worker_(), + dispatcher_(), + cluster_id_filter_(), + misc_tid_(0), + heartbeat_dispatch_tid_(0), + last_timestamp_(OB_INVALID_TIMESTAMP), + stop_flag_(true), + paused_(false), + pause_time_(OB_INVALID_TIMESTAMP), + resume_time_(OB_INVALID_TIMESTAMP) +{ +} + +ObLogFetcher::~ObLogFetcher() +{ + destroy(); +} + +int ObLogFetcher::init(IObLogDmlParser *dml_parser, + IObLogDDLHandler *ddl_handler, + IObLogErrHandler *err_handler, + ObLogSysTableHelper &systable_helper, + TaskPool *task_pool, + IObLogEntryTaskPool *log_entry_task_pool, + IObLogCommitter *committer, + const ObLogConfig &cfg, + const int64_t start_seq) +{ + int ret = OB_SUCCESS; + int64_t max_cached_part_fetch_ctx_count = cfg.active_partition_count; + + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("init twice"); + ret = OB_INIT_TWICE; + } else if (OB_ISNULL(dml_parser) + || OB_ISNULL(ddl_handler_ = ddl_handler) + || OB_ISNULL(err_handler_ = err_handler) + || OB_ISNULL(task_pool_ = task_pool) + || OB_ISNULL(log_entry_task_pool) + || OB_ISNULL(committer)) { + LOG_ERROR("invalid argument", K(dml_parser), K(ddl_handler), K(err_handler), + K(task_pool), K(log_entry_task_pool)); + ret = OB_INVALID_ARGUMENT; + } else { + // Before the Fetcher module is initialized, the following configuration items need to be loaded + configure(cfg); + if (OB_FAIL(progress_controller_.init(cfg.partition_count_upper_limit))) { + LOG_ERROR("init progress controller fail", KR(ret)); + } else if (OB_FAIL(dispatcher_.init(ddl_handler, committer, start_seq))) { + LOG_ERROR("init fetcher dispatcher fail", KR(ret), K(ddl_handler), + K(committer), K(start_seq)); + } else if (OB_FAIL(cluster_id_filter_.init(cfg.cluster_id_black_list.str(), + cfg.cluster_id_black_value_min, cfg.cluster_id_black_value_max))) { + LOG_ERROR("init cluster_id_filter fail", KR(ret)); + } else if (OB_FAIL(part_trans_resolver_factory_.init(*task_pool, *log_entry_task_pool, dispatcher_, cluster_id_filter_))) { + LOG_ERROR("init part trans resolver factory fail", KR(ret)); + } else if (OB_FAIL(part_fetch_mgr_.init(max_cached_part_fetch_ctx_count, + progress_controller_, + part_trans_resolver_factory_))) { + LOG_ERROR("init part fetch mgr fail", KR(ret)); + } else if (OB_FAIL(rpc_.init(cfg.rpc_tenant_id, cfg.io_thread_num))) { + LOG_ERROR("init rpc handler fail", KR(ret)); + } else if (OB_FAIL(all_svr_cache_.init(systable_helper, *err_handler))) { + LOG_ERROR("init all svr cache fail", KR(ret)); + } else if (OB_FAIL(svr_finder_.init(cfg.svr_finder_thread_num, *err_handler, + all_svr_cache_, systable_helper))) { + LOG_ERROR("init svr finder fail", KR(ret)); + } else if (OB_FAIL(heartbeater_.init(cfg.fetcher_heartbeat_thread_num, rpc_, *err_handler))) { + LOG_ERROR("init heartbeater fail", KR(ret)); + } else if (OB_FAIL(start_log_id_locator_.init(cfg.start_log_id_locator_thread_num, + cfg.start_log_id_locator_locate_count, + rpc_, *err_handler))) { + LOG_ERROR("init start log id locator fail", KR(ret)); + } else if (OB_FAIL(idle_pool_.init(cfg.idle_pool_thread_num, + *err_handler, + svr_finder_, + stream_worker_, + start_log_id_locator_))) { + LOG_ERROR("init idle pool fail", KR(ret)); + } else if (OB_FAIL(dead_pool_.init(cfg.dead_pool_thread_num, + part_fetch_mgr_, + *err_handler))) { + LOG_ERROR("init dead pool fail", KR(ret)); + } else if (OB_FAIL(stream_worker_.init(cfg.stream_worker_thread_num, + cfg.svr_stream_cached_count, + cfg.fetch_stream_cached_count, + cfg.rpc_result_cached_count, + cfg.timer_task_count_upper_limit, + rpc_, + idle_pool_, + dead_pool_, + svr_finder_, + *err_handler, + all_svr_cache_, + heartbeater_, + progress_controller_))) { + LOG_ERROR("init stream worker fail", KR(ret)); + } else { + paused_ = false; + pause_time_ = OB_INVALID_TIMESTAMP; + resume_time_ = OB_INVALID_TIMESTAMP; + misc_tid_ = 0; + heartbeat_dispatch_tid_ = 0; + last_timestamp_ = OB_INVALID_TIMESTAMP; + stop_flag_ = true; + inited_ = true; + + // Initialization test mode + IObLogPartTransResolver::test_mode_on = cfg.test_mode_on; + IObLogPartTransResolver::test_mode_ignore_redo_count = cfg.test_mode_ignore_redo_count; + IObLogPartTransResolver::test_checkpoint_mode_on = cfg.test_checkpoint_mode_on; + + LOG_INFO("init fetcher succ", "test_mode_on", IObLogPartTransResolver::test_mode_on, + "test_mode_ignore_redo_count", IObLogPartTransResolver::test_mode_ignore_redo_count, + "test_checkpoint_mode_on", IObLogPartTransResolver::test_checkpoint_mode_on); + } + } + return ret; +} + +void ObLogFetcher::destroy() +{ + stop(); + + // TODO: Global destroy all memory + inited_ = false; + task_pool_ = NULL; + ddl_handler_ = NULL; + err_handler_ = NULL; + + misc_tid_ = 0; + heartbeat_dispatch_tid_ = 0; + last_timestamp_ = OB_INVALID_TIMESTAMP; + stop_flag_ = true; + paused_ = false; + pause_time_ = OB_INVALID_TIMESTAMP; + resume_time_ = OB_INVALID_TIMESTAMP; + + stream_worker_.destroy(); + idle_pool_.destroy(); + dead_pool_.destroy(); + start_log_id_locator_.destroy(); + all_svr_cache_.destroy(); + heartbeater_.destroy(); + svr_finder_.destroy(); + rpc_.destroy(); + progress_controller_.destroy(); + part_fetch_mgr_.destroy(); + part_trans_resolver_factory_.destroy(); + dispatcher_.destroy(); + cluster_id_filter_.destroy(); + + LOG_INFO("destroy fetcher succ"); +} + +int ObLogFetcher::start() +{ + int ret = OB_SUCCESS; + int pthread_ret = 0; + + LOG_INFO("begin start fetcher"); + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not inited"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(! stop_flag_)) { + LOG_ERROR("fetcher has been started", K(stop_flag_)); + ret = OB_INIT_TWICE; + } else { + stop_flag_ = false; + + if (OB_FAIL(svr_finder_.start())) { + LOG_ERROR("start svr finder fail", KR(ret)); + } else if (OB_FAIL(heartbeater_.start())) { + LOG_ERROR("start heartbeater fail", KR(ret)); + } else if (OB_FAIL(start_log_id_locator_.start())) { + LOG_ERROR("start 'start_log_id_locator' fail", KR(ret)); + } else if (OB_FAIL(idle_pool_.start())) { + LOG_ERROR("start idle pool fail", KR(ret)); + } else if (OB_FAIL(dead_pool_.start())) { + LOG_ERROR("start dead pool fail", KR(ret)); + } else if (OB_FAIL(stream_worker_.start())) { + LOG_ERROR("start stream worker fail", KR(ret)); + } else if (0 != (pthread_ret = pthread_create(&misc_tid_, NULL, misc_thread_func_, this))) { + LOG_ERROR("start fetcher misc thread fail", K(pthread_ret), KERRNOMSG(pthread_ret)); + ret = OB_ERR_UNEXPECTED; + } else if (0 != (pthread_ret = pthread_create(&heartbeat_dispatch_tid_, NULL, + heartbeat_dispatch_thread_func_, this))) { + LOG_ERROR("start fetcher heartbeat dispatch thread fail", K(pthread_ret), KERRNOMSG(pthread_ret)); + ret = OB_ERR_UNEXPECTED; + } else { + LOG_INFO("start fetcher succ", K(misc_tid_), K(heartbeat_dispatch_tid_)); + } + } + return ret; +} + +void ObLogFetcher::stop() +{ + if (OB_LIKELY(inited_)) { + stop_flag_ = true; + + LOG_INFO("stop fetcher begin"); + stream_worker_.stop(); + dead_pool_.stop(); + idle_pool_.stop(); + start_log_id_locator_.stop(); + heartbeater_.stop(); + svr_finder_.stop(); + + if (0 != misc_tid_) { + int pthread_ret = pthread_join(misc_tid_, NULL); + if (0 != pthread_ret) { + LOG_ERROR("join fetcher misc thread fail", K(misc_tid_), K(pthread_ret), + KERRNOMSG(pthread_ret)); + } + misc_tid_ = 0; + } + + if (0 != heartbeat_dispatch_tid_) { + int pthread_ret = pthread_join(heartbeat_dispatch_tid_, NULL); + if (0 != pthread_ret) { + LOG_ERROR("join fetcher heartbeat dispatch thread fail", K(heartbeat_dispatch_tid_), + K(pthread_ret), KERRNOMSG(pthread_ret)); + } + heartbeat_dispatch_tid_ = 0; + } + + LOG_INFO("stop fetcher succ"); + } +} + +void ObLogFetcher::pause() +{ + if (OB_LIKELY(inited_)) { + int64_t pause_time = get_timestamp(); + int64_t last_pause_time = ATOMIC_LOAD(&pause_time_); + int64_t last_resume_time = ATOMIC_LOAD(&resume_time_); + + ATOMIC_STORE(&pause_time_, pause_time); + ATOMIC_STORE(&paused_, true); + stream_worker_.pause(); + LOG_INFO("pause fetcher succ", "last pause time", TS_TO_STR(last_pause_time), + "last resume time", TS_TO_STR(last_resume_time)); + } +} + +void ObLogFetcher::resume() +{ + if (OB_LIKELY(inited_)) { + int64_t resume_time = get_timestamp(); + int64_t pause_time = ATOMIC_LOAD(&pause_time_); + int64_t pause_interval = resume_time - pause_time; + + ATOMIC_STORE(&resume_time_, resume_time); + ATOMIC_STORE(&paused_, false); + stream_worker_.resume(resume_time); + LOG_INFO("resume fetcher succ", "pause interval", TVAL_TO_STR(pause_interval)); + } +} + +bool ObLogFetcher::is_paused() +{ + return ATOMIC_LOAD(&paused_); +} + +void ObLogFetcher::mark_stop_flag() +{ + if (OB_UNLIKELY(inited_)) { + LOG_INFO("mark fetcher stop begin"); + stop_flag_ = true; + + stream_worker_.mark_stop_flag(); + dead_pool_.mark_stop_flag(); + idle_pool_.mark_stop_flag(); + start_log_id_locator_.mark_stop_flag(); + heartbeater_.mark_stop_flag(); + svr_finder_.mark_stop_flag(); + LOG_INFO("mark fetcher stop succ"); + } +} + +int ObLogFetcher::add_partition(const common::ObPartitionKey &pkey, + const int64_t start_tstamp, + const uint64_t start_log_id) +{ + int ret = OB_SUCCESS; + PartFetchCtx *part_fetch_ctx = NULL; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not inited"); + ret = OB_NOT_INIT; + } + // Requires a valid start-up timestamp + else if (OB_UNLIKELY(start_tstamp <= 0)) { + LOG_ERROR("invalid start tstamp", K(start_tstamp), K(pkey), K(start_log_id)); + ret = OB_INVALID_ARGUMENT; + } + // Push partition into PartFetchMgr + else if (OB_FAIL(part_fetch_mgr_.add_partition(pkey, start_tstamp, start_log_id))) { + LOG_ERROR("add partition by part fetch mgr fail", KR(ret), K(pkey), K(start_tstamp), + K(start_log_id)); + } else if (OB_FAIL(part_fetch_mgr_.get_part_fetch_ctx(pkey, part_fetch_ctx))) { + LOG_ERROR("get part fetch ctx fail", KR(ret), K(pkey)); + } else if (OB_ISNULL(part_fetch_ctx)) { + LOG_ERROR("part fetch ctx is NULL", K(part_fetch_ctx)); + ret = OB_ERR_UNEXPECTED; + } + // First enter the IDLE POOL to initialize basic information + else if (OB_FAIL(idle_pool_.push(part_fetch_ctx))) { + LOG_ERROR("push task into idle pool fail", KR(ret), K(part_fetch_ctx)); + } else { + LOG_INFO("fetcher add partition succ", K(pkey), K(start_tstamp), K(start_log_id)); + } + return ret; +} + +int ObLogFetcher::recycle_partition(const common::ObPartitionKey &pkey) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not inited"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(part_fetch_mgr_.recycle_partition(pkey))) { + if (OB_ENTRY_NOT_EXIST == ret) { + LOG_INFO("partition has been recycled in fetcher", K(pkey)); + ret = OB_SUCCESS; + } else { + LOG_ERROR("recycle partition fail", KR(ret), K(pkey)); + } + } else { + LOG_INFO("fetcher recycle partition succ", K(pkey)); + } + return ret; +} + +int ObLogFetcher::set_start_global_trans_version(const int64_t start_global_trans_version) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not inited"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(OB_INVALID_TIMESTAMP == start_global_trans_version)) { + LOG_ERROR("invalid argument", K(start_global_trans_version)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(part_fetch_mgr_.set_start_global_trans_version(start_global_trans_version))) { + LOG_ERROR("part_fetch_mgr_ set_start_global_trans_version fail", KR(ret), K(start_global_trans_version)); + } else { + // succ + } + + return ret; +} + +void ObLogFetcher::configure(const ObLogConfig &cfg) +{ + bool print_partition_heartbeat_info = cfg.print_partition_heartbeat_info; + const int64_t inner_heartbeat_interval = TCONF.output_inner_heartbeat_interval_msec * _MSEC_; + + ATOMIC_STORE(&g_print_partition_heartbeat_info, print_partition_heartbeat_info); + ATOMIC_STORE(&g_inner_heartbeat_interval, inner_heartbeat_interval); + LOG_INFO("[CONFIG]", K(print_partition_heartbeat_info), K(g_inner_heartbeat_interval)); + + ObLogStartLogIdLocator::configure(cfg); + ObLogFetcherHeartbeatWorker::configure(cfg); + PartFetchCtx::configure(cfg); + FetchStream::configure(cfg); + ObLogAllSvrCache::configure(cfg); + ObLogFixedTimer::configure(cfg); + ObLogRpc::configure(cfg); + ObLogStreamWorker::configure(cfg); + BlackList::configure(cfg); + ObLogPartFetchMgr::configure(cfg); + FetchLogARpc::configure(cfg); + svr_finder_.configure(cfg); +} + +void *ObLogFetcher::misc_thread_func_(void *arg) +{ + if (NULL != arg) { + ObLogFetcher *host = static_cast(arg); + host->run_misc_thread(); + } + return NULL; +} + +void ObLogFetcher::run_misc_thread() +{ + LOG_INFO("fetcher misc thread start"); + + while (! stop_flag_) { + // Periodic printing progress slowest k partitions + if (REACH_TIME_INTERVAL(PRINT_K_SLOWEST_PARTITION)) { + // Print upper_limt, fetcher_delay + print_fetcher_stat_(); + // Print the slowest k partitions + part_fetch_mgr_.print_k_slowest_partition(); + } + + if (REACH_TIME_INTERVAL(PRINT_CLUSTER_ID_IGNORE_TPS_INTERVAL)) { + cluster_id_filter_.stat_ignored_tps(); + } + + if (REACH_TIME_INTERVAL(TRANS_ABORT_INFO_GC_INTERVAL)) { + if (OB_INVALID_TIMESTAMP != last_timestamp_) { + part_trans_resolver_factory_.gc_commit_trans_info(last_timestamp_); + } + } + + usleep(MISC_THREAD_SLEEP_TIME); + } + + LOG_INFO("fetcher misc thread stop"); +} + +void *ObLogFetcher::heartbeat_dispatch_thread_func_(void *arg) +{ + if (NULL != arg) { + ObLogFetcher *host = static_cast(arg); + host->heartbeat_dispatch_routine(); + } + return NULL; +} + +void ObLogFetcher::heartbeat_dispatch_routine() +{ + int ret = OB_SUCCESS; + LOG_INFO("fetcher heartbeat dispatch thread start"); + // Global heartbeat invalid pkey + ObPartitionKey hb_pkey; + + if (OB_ISNULL(task_pool_)) { + LOG_ERROR("invalid task pool", K(task_pool_)); + ret = OB_NOT_INIT; + } else { + while (OB_SUCCESS == ret && ! stop_flag_) { + int64_t heartbeat_tstamp = OB_INVALID_TIMESTAMP; + PartTransTask *task = NULL; + + // Get the next heartbeat timestamp + if (OB_FAIL(next_heartbeat_timestamp_(heartbeat_tstamp, last_timestamp_))) { + LOG_ERROR("next_heartbeat_timestamp_ fail", KR(ret), K(last_timestamp_)); + } else if (OB_UNLIKELY(OB_INVALID_TIMESTAMP == heartbeat_tstamp)) { + LOG_ERROR("heartbeat timestamp is invalid", K(heartbeat_tstamp)); + ret = OB_ERR_UNEXPECTED; + } else if (heartbeat_tstamp == last_timestamp_) { + // Heartbeat is not updated, no need to generate + } + else if (OB_ISNULL(task = task_pool_->get(NULL, hb_pkey))) { + LOG_ERROR("alloc part trans task fail", K(task)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } + else if (OB_FAIL(task->init_global_heartbeat_info(heartbeat_tstamp))) { + LOG_ERROR("init heartbeat task fail", KR(ret), K(heartbeat_tstamp), KPC(task)); + } + // Dispatch heartbeat task + else if (OB_FAIL(dispatcher_.dispatch(*task, stop_flag_))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("dispatch heartbeat task fail", KR(ret), KPC(task)); + } + } else { + last_timestamp_ = heartbeat_tstamp; + } + + if (OB_SUCCESS == ret) { + usleep((useconds_t)g_inner_heartbeat_interval); + } + } + } + + if (stop_flag_) { + ret = OB_IN_STOP_STATE; + } + + if (OB_SUCCESS != ret && OB_IN_STOP_STATE != ret && NULL != err_handler_) { + err_handler_->handle_error(ret, "Fetcher HEARTBEAT thread exits, err=%d", ret); + stop_flag_ = true; + } + + LOG_INFO("fetcher heartbeat dispatch thread stop", KR(ret), K(stop_flag_)); +} + +void ObLogFetcher::print_fetcher_stat_() +{ + int ret = OB_SUCCESS; + int64_t min_progress = OB_INVALID_TIMESTAMP; + int64_t upper_limit_us = OB_INVALID_TIMESTAMP; + int64_t fetcher_delay = OB_INVALID_TIMESTAMP; + int64_t dml_progress_limit = 0; + + // Get global minimum progress + if (OB_FAIL(progress_controller_.get_min_progress(min_progress))) { + LOG_ERROR("get_min_progress fail", KR(ret), K(progress_controller_)); + } else if (OB_UNLIKELY(OB_INVALID_TIMESTAMP == min_progress)) { + LOG_ERROR("current min progress is invalid", K(min_progress), K(progress_controller_)); + ret = OB_INVALID_ERROR; + } else { + dml_progress_limit = ATOMIC_LOAD(&FetchStream::g_dml_progress_limit); + upper_limit_us = min_progress + dml_progress_limit; + fetcher_delay = get_timestamp() - min_progress; + } + + if (OB_SUCC(ret)) { + LOG_INFO("[STAT] [FETCHER]", "upper_limit", TS_TO_STR(upper_limit_us), + "dml_progress_limit_sec", dml_progress_limit / _SEC_, + "fetcher_delay", TVAL_TO_STR(fetcher_delay)); + } +} +ObLogFetcher::FetchCtxMapHBFunc::FetchCtxMapHBFunc() : + data_progress_(OB_INVALID_TIMESTAMP), + ddl_progress_(OB_INVALID_TIMESTAMP), + ddl_last_dispatch_log_id_(OB_INVALID_ID), + min_progress_(OB_INVALID_TIMESTAMP), + max_progress_(OB_INVALID_TIMESTAMP), + min_progress_pkey_(), + max_progress_pkey_(), + part_count_(0) +{} + +bool ObLogFetcher::FetchCtxMapHBFunc::operator()(const common::ObPartitionKey &pkey, PartFetchCtx *&ctx) +{ + int ret = OB_SUCCESS; + bool bool_ret = true; + int64_t progress = OB_INVALID_TIMESTAMP; + PartTransDispatchInfo dispatch_info; + uint64_t last_dispatch_log_id = OB_INVALID_ID; + + if (NULL == ctx) { + // ctx is invalid, not processed + } else if (OB_FAIL(ctx->get_dispatch_progress(progress, dispatch_info))) { + LOG_ERROR("get_dispatch_progress fail", KR(ret), K(pkey), KPC(ctx)); + } + // The progress returned by the fetch log context must be valid, and its progress value must be a valid value, underlined by the fetch log progress + else if (OB_UNLIKELY(OB_INVALID_TIMESTAMP == progress)) { + LOG_ERROR("partition dispatch progress is invalid", K(progress), K(pkey), KPC(ctx), + K(dispatch_info)); + ret = OB_ERR_UNEXPECTED; + } else { + last_dispatch_log_id = dispatch_info.last_dispatch_log_id_; + + if (is_ddl_table(pkey.get_table_id())) { + // Assuming only one DDL partition + // Update the DDL partition + ddl_progress_ = progress; + ddl_last_dispatch_log_id_ = last_dispatch_log_id; + } else { + // Update data progress + if (OB_INVALID_TIMESTAMP == data_progress_) { + data_progress_ = progress; + } else { + data_progress_ = std::min(data_progress_, progress); + } + } + + // Update maximum and minimum progress + if (OB_INVALID_TIMESTAMP == max_progress_ || progress > max_progress_) { + max_progress_ = progress; + max_progress_pkey_ = pkey; + } + + if (OB_INVALID_TIMESTAMP == min_progress_ || progress < min_progress_) { + min_progress_ = progress; + min_progress_pkey_ = pkey; + } + + part_count_++; + + if (g_print_partition_heartbeat_info) { + _LOG_INFO("[STAT] [FETCHER] [HEARTBEAT] PART=%s PROGRESS=%ld DISPATCH_LOG_ID=%lu " + "DATA_PROGRESS=%ld DDL_PROGRESS=%ld DDL_DISPATCH_LOG_ID=%lu", to_cstring(pkey), + progress, last_dispatch_log_id, data_progress_, ddl_progress_, ddl_last_dispatch_log_id_); + } + } + + return bool_ret; +} + +// Get the heartbeat progress +// +// Principle: get producer progress first, then consumer progress; ensure progress values are safe +// +// DDL partition is the producer of data partition, DDL will trigger new partition, so you should get the DDL progress first before getting the data partition progress +int ObLogFetcher::next_heartbeat_timestamp_(int64_t &heartbeat_tstamp, const int64_t last_timestamp) +{ + int ret = OB_SUCCESS; + static int64_t last_data_progress = OB_INVALID_TIMESTAMP; + static int64_t last_ddl_handle_progress = OB_INVALID_TIMESTAMP; + static int64_t last_ddl_last_handle_log_id = OB_INVALID_ID; + static common::ObPartitionKey last_min_data_progress_pkey; + + FetchCtxMapHBFunc hb_func; + uint64_t ddl_min_progress_tenant_id = OB_INVALID_TENANT_ID; + uint64_t ddl_last_handle_log_id = OB_INVALID_ID; + int64_t ddl_handle_progress = OB_INVALID_TIMESTAMP; + + if (OB_ISNULL(ddl_handler_)) { + LOG_ERROR("invalid ddl handler", K(ddl_handler_)); + ret = OB_NOT_INIT; + } + // Get the DDL processing progress first, because the DDL is the producer of the data partition, and getting it first will ensure that the overall progress is not reverted + // Note: the progress value should not be invalid + else if (OB_FAIL(ddl_handler_->get_progress(ddl_min_progress_tenant_id, ddl_handle_progress, + ddl_last_handle_log_id))) { + LOG_ERROR("ddl_handler get_progress fail", KR(ret), K(ddl_min_progress_tenant_id), + K(ddl_handle_progress), K(ddl_last_handle_log_id)); + } + else if (OB_UNLIKELY(OB_INVALID_TIMESTAMP == ddl_handle_progress)) { + LOG_ERROR("get DDL handle progress is invalid", K(ddl_handle_progress), K(ddl_last_handle_log_id)); + ret = OB_ERR_UNEXPECTED; + } + // Then iterate through all the partitions to get the distribution progress of each partition, i.e. the progress of Fetcher's distribution data + // Note: Here we also get the progress of the DDL distribution, which is only used for printing + else if (OB_FAIL(part_fetch_mgr_.for_each_part(hb_func))) { + LOG_ERROR("for each part fetch ctx fail", KR(ret)); + } else { + int64_t data_progress = hb_func.data_progress_; + ObPartitionKey min_progress_pkey = hb_func.min_progress_pkey_; + ObPartitionKey max_progress_pkey = hb_func.max_progress_pkey_; + + // The final heartbeat timestamp is equal to the minimum value of the DDL processing progress and data progress + if (OB_INVALID_TIMESTAMP != data_progress) { + heartbeat_tstamp = std::min(data_progress, ddl_handle_progress); + } else { + heartbeat_tstamp = ddl_handle_progress; + } + + if (REACH_TIME_INTERVAL(PRINT_HEARTBEAT_INTERVAL) || g_print_partition_heartbeat_info) { + // Calculation of the minimum and maximum progress, and the corresponding partitions + int64_t min_progress = hb_func.min_progress_; + int64_t max_progress = hb_func.max_progress_; + + // If the DDL processing progress is smaller than the minimum progress, the minimum progress takes the DDL progress + if (min_progress > ddl_handle_progress) { + min_progress = ddl_handle_progress; + // FIXME: Here the pkey of the DDL is constructed directly, which may be wrong for the partition count field of the sys tenant, but here it is just + // printing logs does not affect + min_progress_pkey = ObPartitionKey(ddl_min_progress_tenant_id, 0, 0); + } + + _LOG_INFO("[STAT] [FETCHER] [HEARTBEAT] DELAY=[%.3lf, %.3lf](sec) PART_COUNT=%ld " + "MIN_DELAY=%s MAX_DELAY=%s DATA_PROGRESS=%s " + "DDL_PROGRESS=%s DDL_TENANT=%lu DDL_LOG_ID=%lu", + get_delay_sec(max_progress), + get_delay_sec(min_progress), + hb_func.part_count_, + to_cstring(max_progress_pkey), + to_cstring(min_progress_pkey), + TS_TO_STR(data_progress), + TS_TO_STR(ddl_handle_progress), + ddl_min_progress_tenant_id, + ddl_last_handle_log_id); + } + + // Checks if the heartbeat timestamp is reverted + if (OB_INVALID_TIMESTAMP != last_timestamp && heartbeat_tstamp < last_timestamp) { + LOG_ERROR("heartbeat timestamp is rollback, unexcepted error", + "last_timestamp", TS_TO_STR(last_timestamp), + K(last_timestamp), + "heartbeat_tstamp", TS_TO_STR(heartbeat_tstamp), + K(heartbeat_tstamp), + "data_progress", TS_TO_STR(data_progress), + "last_data_progress", TS_TO_STR(last_data_progress), + K(min_progress_pkey), K(last_min_data_progress_pkey), + "ddl_handle_progress", TS_TO_STR(ddl_handle_progress), + "last_ddl_handle_progress", TS_TO_STR(last_ddl_handle_progress), + "ddl_last_handle_log_id", TS_TO_STR(ddl_last_handle_log_id), + "last_ddl_last_handle_log_id", TS_TO_STR(last_ddl_last_handle_log_id)); + ret = OB_ERR_UNEXPECTED; + } else { + last_data_progress = data_progress; + last_ddl_handle_progress = ddl_handle_progress; + last_ddl_last_handle_log_id = ddl_last_handle_log_id; + last_min_data_progress_pkey = min_progress_pkey; + } + } + return ret; +} + +} +} diff --git a/src/liboblog/src/ob_log_fetcher.h b/src/liboblog/src/ob_log_fetcher.h new file mode 100644 index 0000000000000000000000000000000000000000..6cdbc63ddc0ce82a8d3cd8ba4b2118f47b560c29 --- /dev/null +++ b/src/liboblog/src/ob_log_fetcher.h @@ -0,0 +1,200 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_LOG_FETCHER_H_ +#define OCEANBASE_LIBOBLOG_LOG_FETCHER_H_ + +#include "common/ob_partition_key.h" // ObPartitionKey + +#include "ob_log_part_mgr.h" // PartAddCallback, PartRecycleCallback +#include "ob_log_part_fetch_mgr.h" // ObLogPartFetchMgr +#include "ob_log_rpc.h" // ObLogRpc +#include "ob_log_part_progress_controller.h" // PartProgressController +#include "ob_log_all_svr_cache.h" // ObLogAllSvrCache +#include "ob_log_svr_finder.h" // ObLogSvrFinder +#include "ob_log_fetcher_heartbeat_worker.h" // ObLogFetcherHeartbeatWorker +#include "ob_log_start_log_id_locator.h" // ObLogStartLogIdLocator +#include "ob_log_fetcher_idle_pool.h" // ObLogFetcherIdlePool +#include "ob_log_fetcher_dead_pool.h" // ObLogFetcherDeadPool +#include "ob_log_stream_worker.h" // ObLogStreamWorker +#include "ob_log_utils.h" // _SEC_ +#include "ob_log_cluster_id_filter.h" // ObLogClusterIDFilter +#include "ob_log_part_trans_resolver_factory.h" // ObLogPartTransResolverFactory +#include "ob_log_fetcher_dispatcher.h" // ObLogFetcherDispatcher + +namespace oceanbase +{ +namespace liboblog +{ + +class ObLogConfig; +class IObLogFetcher : public PartAddCallback, public PartRecycleCallback +{ +public: + virtual ~IObLogFetcher() { } + + virtual int start() = 0; + virtual void stop() = 0; + virtual void pause() = 0; + virtual void resume() = 0; + virtual bool is_paused() = 0; + virtual void mark_stop_flag() = 0; + + // Update fetcher configure + virtual void configure(const ObLogConfig &cfg) = 0; + + // Add partition + virtual int add_partition(const common::ObPartitionKey &pkey, + const int64_t start_tstamp, + const uint64_t start_log_id) = 0; + + // Recycling partition + virtual int recycle_partition(const common::ObPartitionKey &pkey) = 0; + + virtual int64_t get_part_trans_task_count() const = 0; + + virtual int set_start_global_trans_version(const int64_t start_global_trans_version) = 0; +}; + +///////////////////////////////////////////////////////////////////////////////// + +class IObLogDmlParser; +class IObLogErrHandler; +class PartTransTask; +class IObLogDDLHandler; +class IObLogCommitter; +template class ObLogTransTaskPool; +typedef ObLogTransTaskPool TaskPool; +class IObLogEntryTaskPool; + +class ObLogFetcher : public IObLogFetcher +{ + static const int64_t MISC_THREAD_SLEEP_TIME = 1 * _SEC_; + static const int64_t PRINT_K_SLOWEST_PARTITION = 10 * _SEC_; + static const int64_t PRINT_CLUSTER_ID_IGNORE_TPS_INTERVAL = 10 * _SEC_; + static const int64_t PRINT_HEARTBEAT_INTERVAL = 10 * _SEC_; + static const int64_t TRANS_ABORT_INFO_GC_INTERVAL = 10 * _SEC_; + + static bool g_print_partition_heartbeat_info; + static int64_t g_inner_heartbeat_interval; + +public: + ObLogFetcher(); + virtual ~ObLogFetcher(); + + int init(IObLogDmlParser *dml_parser, + IObLogDDLHandler *ddl_handler, + IObLogErrHandler *err_handler, + ObLogSysTableHelper &systable_helper, + TaskPool *task_pool, + IObLogEntryTaskPool *log_entry_task_pool, + IObLogCommitter *committer, + const ObLogConfig &cfg, + const int64_t start_seq); + + void destroy(); + +public: + virtual int start(); + virtual void stop(); + virtual void pause(); + virtual void resume(); + virtual bool is_paused(); + virtual void mark_stop_flag(); + + virtual int add_partition(const common::ObPartitionKey &pkey, + const int64_t start_tstamp, + const uint64_t start_log_id); + + virtual int recycle_partition(const common::ObPartitionKey &pkey); + + virtual int64_t get_part_trans_task_count() const + { return ATOMIC_LOAD(&PartTransDispatcher::g_part_trans_task_count); } + + virtual int set_start_global_trans_version(const int64_t start_global_trans_version); + + virtual void configure(const ObLogConfig &cfg); + +private: + static void *misc_thread_func_(void *); + void run_misc_thread(); + static void *heartbeat_dispatch_thread_func_(void *); + void heartbeat_dispatch_routine(); + void print_fetcher_stat_(); + + int next_heartbeat_timestamp_(int64_t &hb_ts, const int64_t last_hb_ts); + +private: + struct FetchCtxMapHBFunc + { + FetchCtxMapHBFunc(); + bool operator()(const common::ObPartitionKey &pkey, PartFetchCtx *&ctx); + + int64_t data_progress_; + int64_t ddl_progress_; + int64_t ddl_last_dispatch_log_id_; + int64_t min_progress_; + int64_t max_progress_; + common::ObPartitionKey min_progress_pkey_; + common::ObPartitionKey max_progress_pkey_; + int64_t part_count_; + + TO_STRING_KV(K_(data_progress), K_(ddl_progress), K_(ddl_last_dispatch_log_id), + K_(min_progress), + K_(max_progress), + K_(min_progress_pkey), + K_(max_progress_pkey), + K_(part_count)); + }; + +private: + bool inited_; + TaskPool *task_pool_; + IObLogDDLHandler *ddl_handler_; + IObLogErrHandler *err_handler_; + + // Manager + ObLogPartTransResolverFactory part_trans_resolver_factory_; + ObLogPartFetchMgr part_fetch_mgr_; // Fetch Log Task Manager + PartProgressController progress_controller_; // Process Controller + + // Function Modules + ObLogRpc rpc_; + ObLogAllSvrCache all_svr_cache_; + ObLogSvrFinder svr_finder_; + ObLogFetcherHeartbeatWorker heartbeater_; + ObLogStartLogIdLocator start_log_id_locator_; + ObLogFetcherIdlePool idle_pool_; + ObLogFetcherDeadPool dead_pool_; + ObLogStreamWorker stream_worker_; + ObLogFetcherDispatcher dispatcher_; + ObLogClusterIDFilter cluster_id_filter_; + + pthread_t misc_tid_; // Fetcher misc thread + pthread_t heartbeat_dispatch_tid_; // Dispatch heartbeat thread + int64_t last_timestamp_; // Record heartbeat timestamp + volatile bool stop_flag_ CACHE_ALIGNED; + + // stop flag + bool paused_ CACHE_ALIGNED; + int64_t pause_time_ CACHE_ALIGNED; + int64_t resume_time_ CACHE_ALIGNED; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogFetcher); +}; + +} +} + + +#endif diff --git a/src/liboblog/src/ob_log_fetcher_dead_pool.cpp b/src/liboblog/src/ob_log_fetcher_dead_pool.cpp new file mode 100644 index 0000000000000000000000000000000000000000..250713bebb6a4af628d187cf7b38cce3e752c647 --- /dev/null +++ b/src/liboblog/src/ob_log_fetcher_dead_pool.cpp @@ -0,0 +1,274 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG_FETCHER + +#include "ob_log_fetcher_dead_pool.h" + +#include "lib/oblog/ob_log_module.h" // LOG_ERROR +#include "lib/allocator/ob_mod_define.h" // ObModIds + +#include "ob_log_instance.h" // IObLogErrHandler +#include "ob_log_part_fetch_mgr.h" // IObLogPartFetchMgr + +namespace oceanbase +{ +namespace liboblog +{ + +ObLogFetcherDeadPool::ObLogFetcherDeadPool() : + inited_(false), + err_handler_(NULL), + part_fetch_mgr_(NULL) +{ +} + +ObLogFetcherDeadPool::~ObLogFetcherDeadPool() +{ + destroy(); +} + + +int ObLogFetcherDeadPool::init(const int64_t thread_num, + IObLogPartFetchMgr &part_fetch_mgr, + IObLogErrHandler &err_handler) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("init twice"); + ret = OB_INIT_TWICE; + } else if (OB_UNLIKELY(thread_num <= 0) || OB_UNLIKELY(thread_num > MAX_THREAD_NUM)) { + LOG_ERROR("invalid argument", K(thread_num)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(DeadPoolThread::init(thread_num, ObModIds::OB_LOG_FETCHER_DEAD_POOL))) { + LOG_ERROR("init thread fail", KR(ret), K(thread_num)); + } else { + reset_task_list_array_(); + + err_handler_ = &err_handler; + part_fetch_mgr_ = &part_fetch_mgr; + inited_ = true; + + LOG_INFO("init fetcher dead pool succ", K(thread_num), K(this)); + } + return ret; +} + +void ObLogFetcherDeadPool::destroy() +{ + stop(); + + inited_ = false; + DeadPoolThread::destroy(); + err_handler_ = NULL; + part_fetch_mgr_ = NULL; + reset_task_list_array_(); + + LOG_INFO("destroy fetcher dead pool succ"); +} + +int ObLogFetcherDeadPool::push(PartFetchCtx *task) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not inited"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(task)) { + LOG_ERROR("invalid argument", K(task)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_UNLIKELY(! task->is_discarded())) { + LOG_ERROR("invalid task which is not discarded", K(task), KPC(task)); + ret = OB_INVALID_ARGUMENT; + } else { + task->dispatch_in_dead_pool(); + + LOG_DEBUG("[STAT] [DEAD_POOL] [DISPATCH_IN]", K(task), KPC(task)); + + if (OB_FAIL(DeadPoolThread::push(task, task->hash()))) { + LOG_ERROR("push task fail", KR(ret), K(task), K(task->hash())); + } else { + // 成功 + } + } + return ret; +} + +int ObLogFetcherDeadPool::start() +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not inited"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(DeadPoolThread::start())) { + LOG_ERROR("start thread fail", KR(ret)); + } else { + LOG_INFO("start fetcher dead pool succ", "thread_num", get_thread_num()); + } + return ret; +} + +void ObLogFetcherDeadPool::stop() +{ + if (OB_LIKELY(inited_)) { + DeadPoolThread::stop(); + LOG_INFO("stop fetcher dead pool succ"); + } +} + +void ObLogFetcherDeadPool::mark_stop_flag() +{ + if (OB_LIKELY(inited_)) { + DeadPoolThread::mark_stop_flag(); + LOG_INFO("mark fetcher dead pool stop"); + } +} + +void ObLogFetcherDeadPool::run(const int64_t thread_index) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not inited"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(thread_index < 0) || OB_UNLIKELY(thread_index >= get_thread_num())) { + LOG_ERROR("invalid thread index", K(thread_index), K(get_thread_num())); + ret = OB_ERR_UNEXPECTED; + } else { + LOG_INFO("fetcher dead pool thread start", K(thread_index)); + + FetchTaskList &task_list = task_list_array_[thread_index]; + + while (! stop_flag_ && OB_SUCCESS == ret) { + if (OB_FAIL(retrieve_task_list_(thread_index, task_list))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("retrieve task list fail", KR(ret)); + } + } else if (OB_FAIL(handle_task_list_(thread_index, task_list))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("do requests fail", KR(ret)); + } + } else { + // Wait for a fixed time or until a new task arrives + cond_timedwait(thread_index, IDLE_WAIT_TIME); + } + } + + if (stop_flag_) { + ret = OB_IN_STOP_STATE; + } + + if (OB_SUCCESS != ret && OB_IN_STOP_STATE != ret && NULL != err_handler_) { + err_handler_->handle_error(ret, "fetcher dead pool thread exits, thread_index=%ld, err=%d", + thread_index, ret); + + DeadPoolThread::mark_stop_flag(); + } + + LOG_INFO("fetcher dead pool thread exits", K(thread_index), KR(ret)); + } +} + +void ObLogFetcherDeadPool::reset_task_list_array_() +{ + for (int64_t idx = 0; idx < MAX_THREAD_NUM; idx++) { + task_list_array_[idx].reset(); + } +} + +int ObLogFetcherDeadPool::retrieve_task_list_(const int64_t thread_index, FetchTaskList &list) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not inited"); + ret = OB_NOT_INIT; + } else { + while (! stop_flag_ && OB_SUCCESS == ret) { + void *data = NULL; + PartFetchCtx *task = NULL; + + if (OB_FAIL(pop(thread_index, data))) { + if (OB_EAGAIN == ret) { + // 无数据 + } else { + LOG_ERROR("pop task from queue fail", KR(ret), K(thread_index)); + } + } else if (OB_ISNULL(task = static_cast(data))) { + LOG_ERROR("task is NULL", K(task), K(data)); + ret = OB_ERR_UNEXPECTED; + } else { + list.add_head(*task); + + // Successfully acquired a task + LOG_DEBUG("[STAT] [DEAD_POOL] [RETRIEVE]", K(task), K(thread_index), + "count", list.count(), KPC(task)); + } + } + + if (OB_EAGAIN == ret) { + ret = OB_SUCCESS; + } + + if (stop_flag_) { + ret = OB_IN_STOP_STATE; + } + } + return ret; +} + +int ObLogFetcherDeadPool::handle_task_list_(const int64_t thread_index, FetchTaskList &list) +{ + int ret = OB_SUCCESS; + PartFetchCtx *task = list.head(); + + if (OB_ISNULL(part_fetch_mgr_)) { + LOG_ERROR("invalid part fetch mgr", K(part_fetch_mgr_)); + ret = OB_INVALID_ERROR; + } else { + while (OB_SUCCESS == ret && NULL != task) { + PartFetchCtx *next = task->get_next(); + + // Recycle tasks that are not used by asynchronous requests + if (! task->is_in_use()) { + const ObPartitionKey &pkey = task->get_pkey(); + + // Remove from list + list.erase(*task); + + LOG_INFO("[STAT] [DEAD_POOL] [REMOVE_FETCH_TASK]", K(task), K(thread_index), + KPC(task), K(list)); + + // First perform offline operations to ensure resource recovery and dispatch offline tasks + if (OB_FAIL(task->offline(stop_flag_))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("delete partition fail", KR(ret), K(task), KPC(task)); + } + } + // Then physically delete the partition from the partition fetch log manager + else if (OB_FAIL(part_fetch_mgr_->remove_partition(pkey))) { + LOG_ERROR("remove partition fail", KR(ret), K(pkey), K(task)); + } else { + // You can't continue operating the task afterwards + task = NULL; + } + } else { + // The mission is still in use + LOG_DEBUG("[STAT] [DEAD_POOL] [TASK_IN_USE]", K(task), KPC(task)); + } + + task = next; + } + } + return ret; +} + +} +} diff --git a/src/liboblog/src/ob_log_fetcher_dead_pool.h b/src/liboblog/src/ob_log_fetcher_dead_pool.h new file mode 100644 index 0000000000000000000000000000000000000000..7e0921131183d1d1c50f7dcd34f282a135c1c597 --- /dev/null +++ b/src/liboblog/src/ob_log_fetcher_dead_pool.h @@ -0,0 +1,96 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_FETCHER_DEAD_POOL_H__ +#define OCEANBASE_LIBOBLOG_OB_LOG_FETCHER_DEAD_POOL_H__ + +#include "lib/utility/ob_macro_utils.h" // DISALLOW_COPY_AND_ASSIGN + +#include "ob_log_config.h" // ObLogConfig +#include "ob_map_queue_thread.h" // ObMapQueueThread +#include "ob_log_part_fetch_ctx.h" // FetchTaskList, PartFetchCtx + +namespace oceanbase +{ +namespace liboblog +{ + +class IObLogFetcherDeadPool +{ +public: + static const int64_t MAX_THREAD_NUM = ObLogConfig::max_dead_pool_thread_num; + +public: + virtual ~IObLogFetcherDeadPool() {} + +public: + virtual int push(PartFetchCtx *task) = 0; + virtual int start() = 0; + virtual void stop() = 0; + virtual void mark_stop_flag() = 0; +}; + +///////////////////////////////////////////////////////////////// + +typedef common::ObMapQueueThread DeadPoolThread; + +class IObLogErrHandler; +class IObLogPartFetchMgr; + +class ObLogFetcherDeadPool : public IObLogFetcherDeadPool, public DeadPoolThread +{ + static const int64_t IDLE_WAIT_TIME = 100 * 1000; + +public: + ObLogFetcherDeadPool(); + virtual ~ObLogFetcherDeadPool(); + +public: + int init(const int64_t thread_num, + IObLogPartFetchMgr &part_fetch_mgr, + IObLogErrHandler &err_handler); + void destroy(); + +public: + // Implement the IObLogFetcherDeadPool virtual function + virtual int push(PartFetchCtx *task); + virtual int start(); + virtual void stop(); + virtual void mark_stop_flag(); + +public: + // Implement the ObMapQueueThread virtual function + // Overloading thread handling functions + virtual void run(const int64_t thread_index); + +private: + void reset_task_list_array_(); + int retrieve_task_list_(const int64_t thread_index, FetchTaskList &list); + int handle_task_list_(const int64_t thread_index, FetchTaskList &list); + +private: + bool inited_; + IObLogErrHandler *err_handler_; + IObLogPartFetchMgr *part_fetch_mgr_; + + // One task array per thread + FetchTaskList task_list_array_[MAX_THREAD_NUM]; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogFetcherDeadPool); +}; + + +} +} + +#endif diff --git a/src/liboblog/src/ob_log_fetcher_dispatcher.cpp b/src/liboblog/src/ob_log_fetcher_dispatcher.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b6b16de1173c36c0650236ab91c5ef46ca8110ad --- /dev/null +++ b/src/liboblog/src/ob_log_fetcher_dispatcher.cpp @@ -0,0 +1,246 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG_FETCHER + +#include "ob_log_fetcher_dispatcher.h" // ObLogFetcherDispatcher + +#include "lib/oblog/ob_log_module.h" // LOG_ERROR +#include "lib/atomic/ob_atomic.h" // ATOMIC_FAA +#include "lib/utility/ob_macro_utils.h" // RETRY_FUNC + +#include "ob_log_dml_parser.h" // IObLogDmlParser +#include "ob_log_ddl_handler.h" // IObLogDDLHandler +#include "ob_log_sequencer1.h" // IObLogSequencer +#include "ob_log_committer.h" // IObLogCommitter +#include "ob_log_part_trans_task.h" // PartTransTask +#include "ob_log_instance.h" // TCTX + +using namespace oceanbase::common; +namespace oceanbase +{ +namespace liboblog +{ + +ObLogFetcherDispatcher::ObLogFetcherDispatcher() : + inited_(false), + ddl_handler_(NULL), + committer_(NULL), + checkpoint_seq_(0) +{ +} + +ObLogFetcherDispatcher::~ObLogFetcherDispatcher() +{ + destroy(); +} + +int ObLogFetcherDispatcher::init(IObLogDDLHandler *ddl_handler, + IObLogCommitter *committer, + const int64_t start_seq) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("init twice", K(inited_)); + ret = OB_INIT_TWICE; + } else if (OB_ISNULL(ddl_handler_ = ddl_handler) + || OB_ISNULL(committer_ = committer) + || OB_UNLIKELY(start_seq < 0)) { + LOG_ERROR("invalid argument", K(ddl_handler), K(committer), K(start_seq)); + ret = OB_INVALID_ARGUMENT; + } else { + checkpoint_seq_ = start_seq; + inited_ = true; + } + + return ret; +} + +void ObLogFetcherDispatcher::destroy() +{ + inited_ = false; + ddl_handler_ = NULL; + committer_ = NULL; + checkpoint_seq_ = 0; +} + +int ObLogFetcherDispatcher::dispatch(PartTransTask &task, volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + + if (OB_LIKELY(! inited_)) { + LOG_ERROR("not init", K(inited_)); + ret = OB_NOT_INIT; + } else { + // All tasks are uniformly assigned checkpoint seq + task.set_checkpoint_seq(ATOMIC_FAA(&checkpoint_seq_, 1)); + + LOG_DEBUG("[STAT] [PART_TRANS] [FETCHER_DISPATCHER]", K(task), "checkpoint_seq", task.get_checkpoint_seq()); + + switch (task.get_type()) { + case PartTransTask::TASK_TYPE_DML_TRANS: + ret = dispatch_dml_trans_task_(task, stop_flag); + break; + + case PartTransTask::TASK_TYPE_DDL_TRANS: + ret = dispatch_ddl_trans_task_(task, stop_flag); + break; + + case PartTransTask::TASK_TYPE_GLOBAL_HEARTBEAT: + ret = dispatch_global_part_heartbeat_(task, stop_flag); + break; + + case PartTransTask::TASK_TYPE_PART_HEARTBEAT: + ret = dispatch_part_heartbeat_(task, stop_flag); + break; + + case PartTransTask::TASK_TYPE_OFFLINE_PARTITION: + ret = dispatch_offline_partition_task_(task, stop_flag); + break; + + default: + LOG_ERROR("invalid task, unkown type", K(task)); + ret = OB_NOT_SUPPORTED; + break; + } + + if (OB_SUCCESS != ret) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("dispatch task fail", KR(ret), K(task)); + } + } + } + + return ret; +} + +int ObLogFetcherDispatcher::dispatch_dml_trans_task_(PartTransTask &task, volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + IObLogSequencer *sequencer = TCTX.sequencer_; + + if (OB_ISNULL(sequencer)) { + LOG_ERROR("sequencer is NULL"); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(sequencer->push(&task, stop_flag))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("sequencer push fail", KR(ret), K(task)); + } + } else { + // succ + } + + return ret; +} + +int ObLogFetcherDispatcher::dispatch_ddl_trans_task_(PartTransTask &task, volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(ddl_handler_)) { + LOG_ERROR("invalid ddl handler", K(ddl_handler_)); + ret = OB_INVALID_ERROR; + } else { + // DDL transaction push into DDLHandler + RETRY_FUNC(stop_flag, *ddl_handler_, push, &task, DATA_OP_TIMEOUT); + } + + return ret; +} + +int ObLogFetcherDispatcher::dispatch_to_committer_(PartTransTask &task, volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(committer_)) { + LOG_ERROR("invalid committer", K(committer_)); + ret = OB_INVALID_ERROR; + } else { + const int64_t task_count = 1; + // Push into committer + RETRY_FUNC(stop_flag, *committer_, push, &task, task_count, DATA_OP_TIMEOUT); + } + + return ret; +} + +int ObLogFetcherDispatcher::dispatch_part_heartbeat_(PartTransTask &task, volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + + // Heartbeat of the DDL partition is distributed to the DDL processor + if (task.is_ddl_part_heartbeat()) { + if (OB_ISNULL(ddl_handler_)) { + LOG_ERROR("invalid ddl handler", K(ddl_handler_)); + ret = OB_INVALID_ERROR; + } else { + // Push into DDL Handler + RETRY_FUNC(stop_flag, *ddl_handler_, push, &task, DATA_OP_TIMEOUT); + } + } else { + ret = dispatch_to_committer_(task, stop_flag); + } + + return ret; +} + +int ObLogFetcherDispatcher::dispatch_offline_partition_task_(PartTransTask &task, + volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + + // DDL partition's offline tasks are distributed to DDL processors + if (task.is_ddl_offline_task()) { + if (OB_ISNULL(ddl_handler_)) { + LOG_ERROR("invalid ddl handler", K(ddl_handler_)); + ret = OB_INVALID_ERROR; + } else { + // Push into DDL Handler + RETRY_FUNC(stop_flag, *ddl_handler_, push, &task, DATA_OP_TIMEOUT); + } + } else { + ret = dispatch_to_committer_(task, stop_flag); + } + + return ret; +} + +int ObLogFetcherDispatcher::dispatch_global_part_heartbeat_(PartTransTask &task, volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + IObLogSequencer *sequencer = TCTX.sequencer_; + + if (OB_ISNULL(sequencer)) { + LOG_ERROR("sequencer is NULL"); + ret = OB_ERR_UNEXPECTED; + } else { + const int64_t thread_num = sequencer->get_thread_num(); + // 1. Set the reference count to the number of worker threads as a natural barrier, pushing to all worker threads in Sequencer each time + // 2. Decrement the reference count when each worker thread handle the global heartbeat, and update the Sequencer local safety point when it becomes 0 + task.set_ref_cnt(thread_num); + + // Note: The current rotation strategy and push are single-threaded operations, so this is the correct implementation + for (int64_t idx = 0; OB_SUCC(ret) && idx < thread_num; ++idx) { + if (OB_FAIL(sequencer->push(&task, stop_flag))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("sequencer push fail", KR(ret), K(task)); + } + } + } // for + } + + return ret; +} + +} +} diff --git a/src/liboblog/src/ob_log_fetcher_dispatcher.h b/src/liboblog/src/ob_log_fetcher_dispatcher.h new file mode 100644 index 0000000000000000000000000000000000000000..88099d68cef7220aa1feb223ebb1d3232a082a85 --- /dev/null +++ b/src/liboblog/src/ob_log_fetcher_dispatcher.h @@ -0,0 +1,84 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_FETCHER_DISPATCHER +#define OCEANBASE_LIBOBLOG_OB_LOG_FETCHER_DISPATCHER + +#include "lib/utility/ob_macro_utils.h" // DISALLOW_COPY_AND_ASSIGN, CACHE_ALIGNED + +#include "ob_log_utils.h" // _SEC_ + +namespace oceanbase +{ +namespace liboblog +{ + +class ObLogEntryTask; +class PartTransTask; +class IObLogFetcherDispatcher +{ +public: + virtual ~IObLogFetcherDispatcher() {} + + // DDL/DML: Support for dispatch partition transaction tasks + virtual int dispatch(PartTransTask &task, volatile bool &stop_flag) = 0; +}; + +///////////////////////////////////////////////////////////////////////////////// + +class IObLogDmlParser; +class IObLogDDLHandler; +class IObLogCommitter; +class ObLogFetcherDispatcher : public IObLogFetcherDispatcher +{ + static const int64_t DATA_OP_TIMEOUT = 10 * _SEC_; + +public: + ObLogFetcherDispatcher(); + virtual ~ObLogFetcherDispatcher(); + + virtual int dispatch(PartTransTask &task, volatile bool &stop_flag); + +public: + int init(IObLogDDLHandler *ddl_handler, + IObLogCommitter *committer, + const int64_t start_seq); + void destroy(); + +private: + int dispatch_dml_trans_task_(PartTransTask &task, volatile bool &stop_flag); + int dispatch_ddl_trans_task_(PartTransTask &task, volatile bool &stop_flag); + int dispatch_part_heartbeat_(PartTransTask &task, volatile bool &stop_flag); + int dispatch_to_committer_(PartTransTask &task, volatile bool &stop_flag); + int dispatch_offline_partition_task_(PartTransTask &task, volatile bool &stop_flag); + int dispatch_global_part_heartbeat_(PartTransTask &task, volatile bool &stop_flag); + +private: + bool inited_; + IObLogDDLHandler *ddl_handler_; + IObLogCommitter *committer_; + + // DML and Global HeartBeat checkpoint seq + // DDL global checkpoint seq: + // 1. DDL trans + // 2. DDL HeartBeat + // 3. DDL Offline Task + int64_t checkpoint_seq_ CACHE_ALIGNED; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogFetcherDispatcher); +}; + +} +} + +#endif diff --git a/src/liboblog/src/ob_log_fetcher_heartbeat_worker.cpp b/src/liboblog/src/ob_log_fetcher_heartbeat_worker.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9e9eccabb9613ecf857c0c15274fbea8515b7d16 --- /dev/null +++ b/src/liboblog/src/ob_log_fetcher_heartbeat_worker.cpp @@ -0,0 +1,522 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG_FETCHER + +#include "ob_log_fetcher_heartbeat_worker.h" + +#include "lib/allocator/ob_mod_define.h" // ObModIds +#include "lib/allocator/ob_malloc.h" // ob_malloc + +#include "ob_log_rpc.h" // IObLogRpc +#include "ob_log_instance.h" // IObLogErrHandler +#include "ob_log_trace_id.h" // ObLogTraceIdGuard + +using namespace oceanbase::common; +namespace oceanbase +{ +namespace liboblog +{ + +/////////////////////////////////////// ObLogFetcherHeartbeatWorker /////////////////////////////////////// + +int64_t ObLogFetcherHeartbeatWorker::g_rpc_timeout = ObLogConfig::default_heartbeater_rpc_timeout_sec * _SEC_; +int64_t ObLogFetcherHeartbeatWorker::g_batch_count = ObLogConfig::default_heartbeater_batch_count; + +ObLogFetcherHeartbeatWorker::ObLogFetcherHeartbeatWorker() : + inited_(false), + thread_num_(0), + rpc_(NULL), + err_handler_(NULL), + worker_data_(NULL), + allocator_(ObModIds::OB_LOG_HEARTBEATER) +{ +} + +ObLogFetcherHeartbeatWorker::~ObLogFetcherHeartbeatWorker() +{ + destroy(); +} + +int ObLogFetcherHeartbeatWorker::init(const int64_t thread_num, + IObLogRpc &rpc, + IObLogErrHandler &err_handler) +{ + int ret = OB_SUCCESS; + int64_t max_thread_num = ObLogConfig::max_fetcher_heartbeat_thread_num; + + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("init twice"); + ret = OB_INIT_TWICE; + } else if (OB_UNLIKELY(thread_num_ = thread_num) <= 0 + || OB_UNLIKELY(thread_num > max_thread_num)) { + LOG_ERROR("invalid thread num", K(thread_num), K(max_thread_num)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(HeartbeatThread::init(thread_num, ObModIds::OB_LOG_HEARTBEATER))) { + LOG_ERROR("init heartbeat worker fail", KR(ret), K(thread_num)); + } else { + int64_t alloc_size = thread_num * sizeof(WorkerData); + worker_data_ = static_cast(ob_malloc(alloc_size, ObModIds::OB_LOG_HEARTBEATER)); + + if (OB_ISNULL(worker_data_)) { + LOG_ERROR("allocate memory fail", K(worker_data_), K(alloc_size), K(thread_num)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + // Init worker data + for (int64_t idx = 0, cnt = thread_num; OB_SUCCESS == ret && idx < cnt; ++idx) { + new (worker_data_ + idx) WorkerData(); + WorkerData &data = worker_data_[idx]; + + if (OB_FAIL(data.init())) { + LOG_ERROR("init worker data fail", KR(ret)); + } + } + } + + if (OB_SUCCESS == ret) { + rpc_ = &rpc; + err_handler_ = &err_handler; + inited_ = true; + LOG_INFO("init heartbeater succ", K(thread_num)); + } + } + + if (OB_SUCCESS != ret) { + destroy(); + } + return ret; +} + +void ObLogFetcherHeartbeatWorker::destroy() +{ + stop(); + + inited_ = false; + + // Destroy the heartbeat worker thread pool + HeartbeatThread::destroy(); + + if (NULL != worker_data_) { + for (int64_t idx = 0, cnt = thread_num_; idx < cnt; ++idx) { + free_all_svr_req_(worker_data_[idx]); + worker_data_[idx].~WorkerData(); + } + + ob_free(worker_data_); + worker_data_ = NULL; + } + + thread_num_ = 0; + rpc_ = NULL; + err_handler_ = NULL; + worker_data_ = NULL; + + allocator_.clear(); + + LOG_INFO("destroy heartbeater succ"); +} + +int ObLogFetcherHeartbeatWorker::async_heartbeat_req(HeartbeatRequest *req) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not inited", K(inited_)); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(req)) { + LOG_ERROR("invalid argument", K(req)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_UNLIKELY(! req->is_state_idle())) { + LOG_ERROR("invalid request, state is not IDLE", KPC(req)); + ret = OB_INVALID_ARGUMENT; + } else { + req->set_state_req(); + + // Hash by server to the corresponding worker thread + // Ensure that requests from the same server are aggregated + uint64_t hash_val = req->svr_.hash(); + + if (OB_FAIL(HeartbeatThread::push(req, hash_val))) { + LOG_ERROR("push request into worker queue fail", KR(ret), K(req), K(hash_val), KPC(req)); + } + } + + return ret; +} + +int ObLogFetcherHeartbeatWorker::start() +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not inited", K(inited_)); + ret = OB_NOT_INIT; + } else if (OB_FAIL(HeartbeatThread::start())) { + LOG_ERROR("start heartbeater worker fail", KR(ret)); + } else { + LOG_INFO("start heartbeater succ", K_(thread_num)); + } + return ret; +} + +void ObLogFetcherHeartbeatWorker::stop() +{ + if (OB_LIKELY(inited_)) { + HeartbeatThread::stop(); + LOG_INFO("stop heartbeater succ"); + } +} + +void ObLogFetcherHeartbeatWorker::run(const int64_t thread_index) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not inited", K(inited_)); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(thread_index < 0) || OB_UNLIKELY(thread_index >= thread_num_)) { + LIB_LOG(ERROR, "invalid thread index", K(thread_index), K(thread_num_)); + ret = OB_ERR_UNEXPECTED; + } else { + WorkerData &data = worker_data_[thread_index]; + + while (! stop_flag_ && OB_SUCCESS == ret) { + if (OB_FAIL(do_retrieve_(thread_index, data))) { + LOG_ERROR("retrieve request fail", KR(ret), K(thread_index)); + } else if (OB_FAIL(do_request_(data))) { + LOG_ERROR("do request fail", KR(ret)); + } else { + cond_timedwait(thread_index, DATA_OP_TIMEOUT); + } + } + + if (stop_flag_) { + ret = OB_IN_STOP_STATE; + } + } + + if (OB_SUCCESS != ret && OB_IN_STOP_STATE != ret) { + LOG_ERROR("heartbeater worker exit on fail", KR(ret), K(thread_index)); + if (OB_NOT_NULL(err_handler_)) { + err_handler_->handle_error(ret, "heartbeater worker exits on fail, ret=%d, thread_index=%ld", + ret, thread_index); + } + } +} + +void ObLogFetcherHeartbeatWorker::configure(const ObLogConfig &config) +{ + int64_t heartbeater_rpc_timeout_sec = config.heartbeater_rpc_timeout_sec; + int64_t heartbeater_batch_count = config.heartbeater_batch_count; + + ATOMIC_STORE(&g_rpc_timeout, heartbeater_rpc_timeout_sec * _SEC_); + LOG_INFO("[CONFIG]", K(heartbeater_rpc_timeout_sec)); + ATOMIC_STORE(&g_batch_count, heartbeater_batch_count); + LOG_INFO("[CONFIG]", K(heartbeater_batch_count)); +} + +int ObLogFetcherHeartbeatWorker::do_retrieve_(const int64_t thread_index, WorkerData &worker_data) +{ + int ret = OB_SUCCESS; + int64_t batch_count = ATOMIC_LOAD(&g_batch_count); + + // Get data from the queue and process it in bulk + for (int64_t cnt = 0; OB_SUCCESS == ret && (cnt < batch_count); ++cnt) { + void *data = NULL; + HeartbeatRequest *request = NULL; + SvrReq *svr_req = NULL; + + if (OB_FAIL(HeartbeatThread::pop(thread_index, data))) { + if (OB_EAGAIN != ret) { + LOG_ERROR("pop data from queue fail", KR(ret), K(thread_index), K(data)); + } + } else if (OB_ISNULL(request = static_cast(data))) { + LOG_ERROR("request is NULL", K(request), K(thread_index), K(data)); + ret = OB_ERR_UNEXPECTED; + } + // Aggregate by server + else if (OB_FAIL(get_svr_req_(worker_data, request->svr_, svr_req))) { + LOG_ERROR("get svr req fail", KR(ret), K(request->svr_)); + } else if (OB_ISNULL(svr_req)) { + LOG_ERROR("invalid svr req", K(request->svr_), K(svr_req)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(svr_req->push(request))) { + LOG_ERROR("push request into request list fail", KR(ret), KPC(svr_req), K(request)); + } else { + // success + } + } + + if (OB_SUCCESS == ret) { + // Meet the number of batches requirement + } else if (OB_EAGAIN == ret) { + // Queue is empty + ret = OB_SUCCESS; + } else { + LOG_ERROR("pop and aggregate request fail", KR(ret), K(thread_index)); + } + return ret; +} + +ObLogFetcherHeartbeatWorker::SvrReq *ObLogFetcherHeartbeatWorker::alloc_svr_req_(const common::ObAddr &svr) +{ + SvrReq *svr_req = NULL; + void *buf = allocator_.alloc(sizeof(SvrReq)); + if (OB_NOT_NULL(buf)) { + svr_req = new(buf) SvrReq(svr); + } + return svr_req; +} + +void ObLogFetcherHeartbeatWorker::free_svr_req_(SvrReq *req) +{ + if (NULL != req) { + req->~SvrReq(); + allocator_.free(req); + req = NULL; + } +} + +void ObLogFetcherHeartbeatWorker::free_all_svr_req_(WorkerData &data) +{ + for (int64_t index = 0; index < data.svr_req_list_.count(); index++) { + SvrReq *svr_req = data.svr_req_list_.at(index); + + if (OB_NOT_NULL(svr_req)) { + free_svr_req_(svr_req); + svr_req = NULL; + } + } + + data.reset(); +} + +int ObLogFetcherHeartbeatWorker::get_svr_req_(WorkerData &data, + const common::ObAddr &svr, + SvrReq *&svr_req) +{ + int ret = OB_SUCCESS; + SvrReqList &svr_req_list = data.svr_req_list_; + SvrReqMap &svr_req_map = data.svr_req_map_; + + svr_req = NULL; + + // 优先从Map中获取对应的记录 + if (OB_FAIL(svr_req_map.get(svr, svr_req))) { + if (OB_ENTRY_NOT_EXIST == ret) { + ret = OB_SUCCESS; + + // 分配一个新的请求 + if (OB_ISNULL(svr_req = alloc_svr_req_(svr))) { + LOG_ERROR("allocate svr request fail", K(svr)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else if (OB_FAIL(svr_req_list.push_back(svr_req))) { + LOG_ERROR("push svr req into array fail", KR(ret), K(svr_req), K(svr_req_list)); + } else if (OB_FAIL(svr_req_map.insert(svr, svr_req))) { + LOG_ERROR("insert svr req into map fail", KR(ret), K(svr), KPC(svr_req)); + } + } else { + LOG_ERROR("get svr req from map fail", KR(ret), K(svr)); + } + } else { + // succ + } + return ret; +} + +int ObLogFetcherHeartbeatWorker::do_request_(WorkerData &data) +{ + int ret = OB_SUCCESS; + SvrReqList &svr_req_list = data.svr_req_list_; + RpcReq rpc_req; + + // Trace ID + ObLogTraceIdGuard trace_id_guard; + + if (OB_ISNULL(rpc_)) { + LOG_ERROR("invalid rpc handle", K(rpc_)); + ret = OB_INVALID_ARGUMENT; + } else { + // Iterate through the list of all server requests + for (int64_t idx = 0, cnt = svr_req_list.count(); OB_SUCCESS == ret && (idx < cnt); ++idx) { + if (OB_ISNULL(svr_req_list.at(idx))) { + LOG_ERROR("svr request is NULL", K(idx), K(cnt), K(svr_req_list)); + ret = OB_ERR_UNEXPECTED; + } else { + SvrReq &svr_req = *(svr_req_list.at(idx)); + + // Batch processing of heartbeat requests + for (int64_t start_idx = 0; OB_SUCCESS == ret && start_idx < svr_req.hb_req_list_.count();) { + rpc_req.reset(); + + // Build RPC request + if (OB_FAIL(build_rpc_request_(rpc_req, svr_req, start_idx))) { + LOG_ERROR("build rpc request fail", KR(ret), K(start_idx), K(svr_req), K(rpc_req)); + } + // Executing RPC requests + else if (OB_FAIL(request_heartbeat_(rpc_req, svr_req, start_idx))) { + LOG_ERROR("request heartbeat fail", KR(ret), K(rpc_req), K(svr_req), K(start_idx)); + } else { + // Update the starting point of the next request + start_idx += rpc_req.get_params().count(); + } + } + + // Reset server list + svr_req.reset(); + } + } + } + return ret; +} + +int ObLogFetcherHeartbeatWorker::build_rpc_request_(RpcReq &rpc_req, + SvrReq &svr_req, + const int64_t start_idx) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(start_idx < 0) || OB_UNLIKELY(svr_req.hb_req_list_.count() <= start_idx)) { + LOG_ERROR("invalid start_idx", K(start_idx), K(svr_req)); + ret = OB_INVALID_ARGUMENT; + } else { + int64_t max_req_cnt = RpcReq::ITEM_CNT_LMT; + int64_t avail_req_cnt = svr_req.hb_req_list_.count() - start_idx; + + // The maximum number of heartbeat requests aggregated in this request + int64_t req_cnt = std::min(max_req_cnt, avail_req_cnt); + + for (int64_t idx = 0; OB_SUCCESS == ret && idx < req_cnt; idx++) { + HeartbeatRequest *hb_req = svr_req.hb_req_list_.at(idx + start_idx); + + if (OB_ISNULL(hb_req)) { + LOG_ERROR("invalid heartbeat request", K(hb_req), K(idx), K(start_idx), K(svr_req)); + ret = OB_ERR_UNEXPECTED; + } else { + RpcReq::Param param; + param.reset(hb_req->pkey_, hb_req->next_log_id_); + + if (OB_FAIL(rpc_req.append_param(param))) { + if (OB_BUF_NOT_ENOUGH == ret) { + // buffer is full + } else { + LOG_ERROR("append param fail", KR(ret), K(param), K(rpc_req)); + } + } + } + } + + if (OB_BUF_NOT_ENOUGH == ret) { + ret = OB_SUCCESS; + } + } + return ret; +} + +int ObLogFetcherHeartbeatWorker::request_heartbeat_(const RpcReq &rpc_req, + SvrReq &svr_req, + const int64_t start_idx) +{ + int ret = OB_SUCCESS; + RpcResp rpc_resp; + const ObAddr &svr = svr_req.svr_; + int64_t total_hb_req_cnt = svr_req.hb_req_list_.count(); + int64_t req_param_cnt = rpc_req.get_params().count(); + int64_t rpc_timeout = ATOMIC_LOAD(&g_rpc_timeout); + + // Use a different Trace ID for each request + ObLogTraceIdGuard guard; + + if (OB_ISNULL(rpc_)) { + LOG_ERROR("invalid rpc handler", K(rpc_)); + ret = OB_INVALID_ERROR; + } else if (OB_UNLIKELY(start_idx < 0) + || OB_UNLIKELY(start_idx > (total_hb_req_cnt - req_param_cnt))) { + LOG_ERROR("invalid argument", K(start_idx), K(total_hb_req_cnt), K(req_param_cnt)); + ret = OB_INVALID_ARGUMENT; + } else { + int rpc_err = rpc_->req_leader_heartbeat(svr, rpc_req, rpc_resp, rpc_timeout); + int svr_err = rpc_resp.get_err(); + int64_t resp_result_cnt = rpc_resp.get_results().count(); + + // Check RPC result + if (OB_SUCCESS != rpc_err) { + LOG_ERROR("request heartbeat fail on rpc", K(svr), K(rpc_err), K(rpc_req)); + } else if (OB_SUCCESS != svr_err) { + LOG_ERROR("request heartbeat fail on server", K(svr), K(rpc_err), K(svr_err), + "svr_debug_err", rpc_resp.get_debug_err(), K(rpc_req), K(rpc_resp)); + } else if (OB_UNLIKELY(req_param_cnt != resp_result_cnt)) { + LOG_ERROR("heartbeat rpc request does not match rpc response", + K(req_param_cnt), K(resp_result_cnt), K(svr), K(rpc_req), K(rpc_resp)); + ret = OB_INVALID_DATA; + } else { + // success + } + + if (OB_SUCCESS == ret) { + TraceIdType *trace_id = ObCurTraceId::get_trace_id(); + + // Iterate through all heartbeat requests, set the corresponding result, and mark completion regardless of success or failure + for (int64_t idx = 0; OB_SUCCESS == ret && idx < req_param_cnt; idx++) { + HeartbeatRequest *hb_req = svr_req.hb_req_list_.at(start_idx + idx); + + if (OB_ISNULL(hb_req)) { + LOG_ERROR("invalid heartbeat request", K(hb_req), K(idx), K(start_idx), + K(svr_req.hb_req_list_)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_SUCCESS != rpc_err || OB_SUCCESS != svr_err) { + // Setting Failure Results + hb_req->set_resp(rpc_err, svr_err, OB_SUCCESS, OB_INVALID_ID, OB_INVALID_TIMESTAMP, + trace_id); + } else { + const RpcResp::Result &result = rpc_resp.get_results().at(idx); + + // Setting Success Results + hb_req->set_resp(rpc_err, svr_err, result.err_, result.next_served_log_id_, + result.next_served_ts_, trace_id); + } + + // After setting up the results, mark the heartbeat complete + if (OB_SUCCESS == ret) { + hb_req->set_state_done(); + // The request cannot be continued afterwards, there is a concurrency scenario + // Mark the corresponding request as invalid to avoid revisiting it + svr_req.hb_req_list_.at(start_idx + idx) = NULL; + } + } + } + } + return ret; +} + + +//////////////////////////// ObLogFetcherHeartbeatWorker::WorkerData //////////////////////////// + +int ObLogFetcherHeartbeatWorker::WorkerData::init() +{ + int ret = OB_SUCCESS; + if (OB_FAIL(svr_req_map_.init(ObModIds::OB_LOG_HEARTBEATER))) { + LOG_ERROR("init request map fail", KR(ret)); + } else { + svr_req_list_.set_label(ObModIds::OB_LOG_HEARTBEATER); + svr_req_list_.reset(); + } + return ret; +} + +void ObLogFetcherHeartbeatWorker::WorkerData::destroy() +{ + (void)svr_req_map_.destroy(); + svr_req_list_.reset(); +} + +} +} + diff --git a/src/liboblog/src/ob_log_fetcher_heartbeat_worker.h b/src/liboblog/src/ob_log_fetcher_heartbeat_worker.h new file mode 100644 index 0000000000000000000000000000000000000000..ba428592bb65205a5f19a30d1835380a18226db1 --- /dev/null +++ b/src/liboblog/src/ob_log_fetcher_heartbeat_worker.h @@ -0,0 +1,290 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_FETCHER_HEARTBEAT_WORKER_H_ +#define OCEANBASE_LIBOBLOG_OB_LOG_FETCHER_HEARTBEAT_WORKER_H_ + +#include "share/ob_define.h" // OB_INVALID_ID +#include "share/ob_errno.h" // OB_SUCCESS +#include "lib/utility/ob_print_utils.h" // TO_STRING_KV +#include "lib/atomic/ob_atomic.h" // ATOMIC_* +#include "lib/net/ob_addr.h" // ObAddr +#include "lib/container/ob_array.h" // ObArray +#include "lib/allocator/ob_safe_arena.h" // ObSafeArena +#include "lib/hash/ob_linear_hash_map.h" // ObLinearHashMap +#include "lib/profile/ob_trace_id.h" // ObCurTraceId +#include "common/ob_partition_key.h" // ObPartitionKey +#include "clog/ob_log_external_rpc.h" // obrpc + +#include "ob_map_queue_thread.h" // ObMapQueueThread +#include "ob_log_config.h" // ObLogConfig +#include "ob_log_utils.h" // _SEC_ + +namespace oceanbase +{ +namespace liboblog +{ + +struct HeartbeatRequest; +class IObLogFetcherHeartbeatWorker +{ +public: + virtual ~IObLogFetcherHeartbeatWorker() {} + +public: + virtual int async_heartbeat_req(HeartbeatRequest *req) = 0; + +public: + virtual int start() = 0; + virtual void stop() = 0; + virtual void mark_stop_flag() = 0; +}; + +/////////////////////////////////////// ObLogFetcherHeartbeatWorker /////////////////////////////////////// + +typedef common::ObMapQueueThread HeartbeatThread; + +class IObLogRpc; +class IObLogErrHandler; + +class ObLogFetcherHeartbeatWorker : public IObLogFetcherHeartbeatWorker, public HeartbeatThread +{ + static const int64_t DATA_OP_TIMEOUT = 100 * _MSEC_; + + typedef obrpc::ObLogLeaderHeartbeatReq RpcReq; + typedef obrpc::ObLogLeaderHeartbeatResp RpcResp; + + // Class global variables +public: + static int64_t g_rpc_timeout; + static int64_t g_batch_count; + +public: + ObLogFetcherHeartbeatWorker(); + virtual ~ObLogFetcherHeartbeatWorker(); + + int init(const int64_t thread_num, + IObLogRpc &rpc, + IObLogErrHandler &err_handler); + void destroy(); + +public: + int async_heartbeat_req(HeartbeatRequest *req); + int start(); + void stop(); + void mark_stop_flag() { HeartbeatThread::mark_stop_flag(); } + +public: + // Implement HeartbeatThread's thread handling functions + void run(const int64_t thread_index); + +public: + static void configure(const ObLogConfig &config); + +private: + struct WorkerData; + struct SvrReq; + int do_retrieve_(const int64_t thread_index, WorkerData &data); + SvrReq *alloc_svr_req_(const common::ObAddr &svr); + void free_svr_req_(SvrReq *req); + void free_all_svr_req_(WorkerData &data); + int get_svr_req_(WorkerData &data, + const common::ObAddr &svr, + SvrReq *&svr_req); + int do_request_(WorkerData &data); + int build_rpc_request_(RpcReq &rpc_req, + SvrReq &svr_req, + const int64_t start_idx); + int request_heartbeat_(const RpcReq &rpc_req, + SvrReq &svr_req, + const int64_t start_idx); + +private: + // Single server request + struct SvrReq + { + typedef common::ObArray ReqList; + + const common::ObAddr svr_; + ReqList hb_req_list_; // Heartbeat Request List + + explicit SvrReq(const common::ObAddr &svr) : svr_(svr), hb_req_list_() + {} + + TO_STRING_KV(K_(svr), "hb_req_cnt", hb_req_list_.count(), K_(hb_req_list)); + + void reset() + { + hb_req_list_.reset(); + } + + int push(HeartbeatRequest *req) + { + return hb_req_list_.push_back(req); + } + + private: + DISALLOW_COPY_AND_ASSIGN(SvrReq); + }; + + typedef common::ObArray SvrReqList; + typedef common::ObLinearHashMap SvrReqMap; + + // Local data per Worker + struct WorkerData + { + SvrReqList svr_req_list_; + SvrReqMap svr_req_map_; + + WorkerData() : svr_req_list_(), svr_req_map_() + {} + ~WorkerData() { destroy(); } + + int init(); + void destroy(); + + void reset() + { + svr_req_list_.reset(); + svr_req_map_.reset(); + } + }; + +private: + bool inited_; + int64_t thread_num_; + IObLogRpc *rpc_; + IObLogErrHandler *err_handler_; + WorkerData *worker_data_; + + // Module Arena allocator with multi-threaded support + typedef common::ObSafeArena AllocatorType; + AllocatorType allocator_; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogFetcherHeartbeatWorker); +}; + +/////////////////////////////////////// HeartbeatResponse /////////////////////////////////////// + +/// HeartbeatRequest result +struct HeartbeatResponse +{ + uint64_t next_served_log_id_ ; // The next log ID of the server services + int64_t next_served_tstamp_; // Lower bound for the next log timestamp of the server services + + int rpc_err_; // rpc error + int svr_err_; // server error + int partition_err_; // partition error + + void reset() + { + next_served_log_id_ = common::OB_INVALID_ID; + next_served_tstamp_ = common::OB_INVALID_TIMESTAMP; + rpc_err_ = common::OB_SUCCESS; + svr_err_ = common::OB_SUCCESS; + partition_err_ = common::OB_SUCCESS; + } + + void set(const int rpc_err, const int svr_err, const int partition_err, + const uint64_t next_served_log_id, const int64_t next_served_tstamp) + { + rpc_err_ = rpc_err; + svr_err_ = svr_err; + partition_err_ = partition_err; + next_served_log_id_ = next_served_log_id; + next_served_tstamp_ = next_served_tstamp; + } + + TO_STRING_KV(K_(next_served_log_id), K_(next_served_tstamp), + K_(rpc_err), K_(svr_err), K_(partition_err)); +}; + +/////////////////////////////////////// HeartbeatRequest /////////////////////////////////////// + +typedef common::ObCurTraceId::TraceId TraceIdType; + +/* + * HeartbeatRequest + * Request Status: + * - IDLE: Idle state + * - REQ: Requesting status, the result is not readable, external need to ensure the validity of the request memory + * - DONE: When the request is finished, you can read the result and reset it + */ +struct HeartbeatRequest +{ + enum State { IDLE = 0, REQ, DONE }; + + // Request Status + State state_; + + // Request Parameters + common::ObAddr svr_; + common::ObPartitionKey pkey_; + uint64_t next_log_id_; + + // Request Result + HeartbeatResponse resp_; + + // Trace ID used in the request process + TraceIdType trace_id_; + + TO_STRING_KV(K_(pkey), K_(state), K_(next_log_id), K_(svr), K_(resp), K_(trace_id)); + + void reset() + { + set_state(IDLE); + svr_.reset(); + pkey_.reset(); + next_log_id_ = common::OB_INVALID_ID; + resp_.reset(); + trace_id_.reset(); + } + + void reset(const common::ObPartitionKey &pkey, + const uint64_t next_log_id, + const common::ObAddr &svr) + { + reset(); + + svr_ = svr; + pkey_ = pkey; + next_log_id_ = next_log_id; + } + + void set_resp(const int rpc_err, const int svr_err, const int partition_err, + const uint64_t next_served_log_id, const int64_t next_served_tstamp, + TraceIdType *trace_id) + { + resp_.set(rpc_err, svr_err, partition_err, next_served_log_id, next_served_tstamp); + if (NULL != trace_id) { + trace_id_ = *trace_id; + } + } + + const HeartbeatResponse &get_resp() const { return resp_; } + + void set_state(const State state) { ATOMIC_STORE(&state_, state); } + State get_state() const { return (ATOMIC_LOAD(&state_)); } + + void set_state_idle() { ATOMIC_STORE(&state_, IDLE); } + void set_state_req() { ATOMIC_STORE(&state_, REQ); } + void set_state_done() { ATOMIC_STORE(&state_, DONE); } + bool is_state_idle() const { return (ATOMIC_LOAD(&state_)) == IDLE; } + bool is_state_req() const { return (ATOMIC_LOAD(&state_)) == REQ; } + bool is_state_done() const { return (ATOMIC_LOAD(&state_)) == DONE; } +}; + +} +} + +#endif diff --git a/src/liboblog/src/ob_log_fetcher_idle_pool.cpp b/src/liboblog/src/ob_log_fetcher_idle_pool.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e63a1ba3be8db912a6e58f97b7447edaa681e45f --- /dev/null +++ b/src/liboblog/src/ob_log_fetcher_idle_pool.cpp @@ -0,0 +1,330 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG_FETCHER + +#include "ob_log_fetcher_idle_pool.h" + +#include "lib/oblog/ob_log_module.h" // LOG_ERROR + +#include "ob_log_instance.h" // IObLogErrHandler +#include "ob_log_stream_worker.h" // IObLogStreamWorker + +using namespace oceanbase::common; + +namespace oceanbase +{ +namespace liboblog +{ + +ObLogFetcherIdlePool::ObLogFetcherIdlePool() : + inited_(false), + err_handler_(NULL), + svr_finder_(NULL), + stream_worker_(NULL), + start_log_id_locator_(NULL) +{ +} + +ObLogFetcherIdlePool::~ObLogFetcherIdlePool() +{ + destroy(); +} + +int ObLogFetcherIdlePool::init(const int64_t thread_num, + IObLogErrHandler &err_handler, + IObLogSvrFinder &svr_finder, + IObLogStreamWorker &stream_worker, + IObLogStartLogIdLocator &start_log_id_locator) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("init twice"); + ret = OB_INIT_TWICE; + } else if (OB_UNLIKELY(thread_num <= 0) || OB_UNLIKELY(thread_num > MAX_THREAD_NUM)) { + LOG_ERROR("invalid argument", K(thread_num)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(IdlePoolThread::init(thread_num, ObModIds::OB_LOG_FETCHER_IDLE_POOL))) { + LOG_ERROR("init thread fail", KR(ret), K(thread_num)); + } else { + reset_task_list_array_(); + + err_handler_ = &err_handler; + svr_finder_ = &svr_finder; + stream_worker_ = &stream_worker; + start_log_id_locator_ = &start_log_id_locator; + + inited_ = true; + + LOG_INFO("init fetcher idle pool succ", K(thread_num), K(this)); + } + return ret; +} + +void ObLogFetcherIdlePool::destroy() +{ + stop(); + + inited_ = false; + IdlePoolThread::destroy(); + err_handler_ = NULL; + svr_finder_ = NULL; + stream_worker_ = NULL; + start_log_id_locator_ = NULL; + + // TODO: Recycle all PartFetchCtx + reset_task_list_array_(); + + LOG_INFO("destroy fetcher idle pool succ"); +} + +int ObLogFetcherIdlePool::push(PartFetchCtx *task) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not inited"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(task)) { + LOG_ERROR("invalid argument"); + ret = OB_INVALID_ARGUMENT; + } else { + task->dispatch_in_idle_pool(); + + LOG_DEBUG("[STAT] [IDLE_POOL] [DISPATCH_IN]", K(task), KPC(task)); + + if (OB_FAIL(IdlePoolThread::push(task, task->hash()))) { + LOG_ERROR("push task fail", KR(ret), K(task), K(task->hash())); + } else { + // success + } + } + return ret; +} + +int ObLogFetcherIdlePool::start() +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not inited"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(IdlePoolThread::start())) { + LOG_ERROR("start thread fail", KR(ret)); + } else { + LOG_INFO("start fetcher idle pool succ", "thread_num", get_thread_num()); + } + return ret; +} + +void ObLogFetcherIdlePool::stop() +{ + if (OB_LIKELY(inited_)) { + IdlePoolThread::stop(); + LOG_INFO("stop fetcher idle pool succ"); + } +} + +void ObLogFetcherIdlePool::mark_stop_flag() +{ + if (OB_LIKELY(inited_)) { + IdlePoolThread::mark_stop_flag(); + LOG_INFO("mark fetcher idle pool stop"); + } +} + +void ObLogFetcherIdlePool::run(const int64_t thread_index) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not inited"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(thread_index < 0) || OB_UNLIKELY(thread_index >= get_thread_num())) { + LOG_ERROR("invalid thread index", K(thread_index), K(get_thread_num())); + ret = OB_ERR_UNEXPECTED; + } else { + LOG_INFO("fetcher idle pool thread start", K(thread_index)); + + FetchTaskList &task_list = task_list_array_[thread_index]; + + while (! stop_flag_ && OB_SUCCESS == ret) { + if (OB_FAIL(do_retrieve_(thread_index, task_list))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("do retrieve new request fail", KR(ret)); + } + } else if (OB_FAIL(do_request_(thread_index, task_list))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("do requests fail", KR(ret)); + } + } else { + // Wait for a fixed time or until a new task arrives + cond_timedwait(thread_index, IDLE_WAIT_TIME); + } + } + + if (stop_flag_) { + ret = OB_IN_STOP_STATE; + } + + if (OB_SUCCESS != ret && OB_IN_STOP_STATE != ret && NULL != err_handler_) { + err_handler_->handle_error(ret, "fetcher idle pool thread exits, thread_index=%ld, err=%d", + thread_index, ret); + + IdlePoolThread::mark_stop_flag(); + } + + LOG_INFO("fetcher idle pool thread exits", K(thread_index), KR(ret)); + } +} + +void ObLogFetcherIdlePool::reset_task_list_array_() +{ + for (int64_t idx = 0; idx < MAX_THREAD_NUM; idx++) { + task_list_array_[idx].reset(); + } +} + +int ObLogFetcherIdlePool::do_retrieve_(const int64_t thread_index, FetchTaskList &list) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not inited"); + ret = OB_NOT_INIT; + } else { + while (! stop_flag_ && OB_SUCCESS == ret) { + void *data = NULL; + PartFetchCtx *task = NULL; + + if (OB_FAIL(pop(thread_index, data))) { + if (OB_EAGAIN == ret) { + // 无数据 + } else { + LOG_ERROR("pop task from queue fail", KR(ret), K(thread_index)); + } + } else if (OB_ISNULL(task = static_cast(data))) { + LOG_ERROR("task is NULL", K(task), K(data)); + ret = OB_ERR_UNEXPECTED; + } else { + list.add_head(*task); + + // Successfully acquired a task + LOG_DEBUG("[STAT] [IDLE_POOL] [RETRIEVE]", K(task), K(thread_index), + "count", list.count(), KPC(task)); + } + } + + if (OB_EAGAIN == ret) { + ret = OB_SUCCESS; + } + + if (stop_flag_) { + ret = OB_IN_STOP_STATE; + } + } + return ret; +} + +int ObLogFetcherIdlePool::do_request_(const int64_t thread_index, FetchTaskList &list) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not inited"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(stream_worker_)) { + LOG_ERROR("invalid handlers", K(stream_worker_)); + ret = OB_INVALID_ERROR; + } else { + PartFetchCtx *task = list.head(); + + while (OB_SUCCESS == ret && NULL != task) { + PartFetchCtx *next = task->get_next(); + bool need_dispatch = false; + + if (OB_FAIL(handle_task_(task, need_dispatch))) { + LOG_ERROR("handle task fail", KR(ret), K(task), KPC(task)); + } else if (need_dispatch) { + // If it needs to be assigned to another thread, remove it from the linklist and then perform the assignment + list.erase(*task); + + LOG_DEBUG("[STAT] [IDLE_POOL] [DISPATCH_OUT]", K(task), K(thread_index), + "count", list.count(), KPC(task)); + + const char *dispatch_reason = "SvrListReady"; + if (OB_FAIL(stream_worker_->dispatch_fetch_task(*task, dispatch_reason))) { + LOG_ERROR("dispatch fetch task fail", KR(ret), KPC(task), K(dispatch_reason)); + } else { + // You cannot continue to operate the task afterwards + } + } + + if (OB_SUCCESS == ret) { + task = next; + } + } + } + return ret; +} + +int ObLogFetcherIdlePool::handle_task_(PartFetchCtx *task, bool &need_dispatch) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not inited"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(task)) { + LOG_ERROR("invalid argument", K(task)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(svr_finder_) || OB_ISNULL(start_log_id_locator_)) { + LOG_ERROR("invalid handlers", K(svr_finder_), K(start_log_id_locator_)); + ret = OB_INVALID_ERROR; + } else if (OB_UNLIKELY(task->is_discarded())) { + // If a task is deleted, assign it directly and recycle it during the assignment process + need_dispatch = true; + LOG_DEBUG("[STAT] [IDLE_POOL] [RECYCLE_FETCH_TASK]", K(task), KPC(task)); + } else { + need_dispatch = false; + + // If there is no leader information, update the leader + // Note: Leader information is not required for fetching logs, so it is not part of the work that must be done by the Idle pool + // This is just an asynchronous request, no success required + if (task->need_update_leader_info()) { + if (OB_FAIL(task->update_leader_info(*svr_finder_))) { + LOG_ERROR("update leader info fail", KR(ret), KPC(task)); + } + } + + if (OB_SUCCESS == ret) { + // Update the server list + // Requires a successful update of the server list before leaving the idle pool + if (task->need_update_svr_list()) { + if (OB_FAIL(task->update_svr_list(*svr_finder_))) { + LOG_ERROR("update server list fail", KR(ret), KPC(task)); + } + } + // locate the start log id + // Requires a successful location to leave the idle pool + else if (task->need_locate_start_log_id()) { + if (OB_FAIL(task->locate_start_log_id(*start_log_id_locator_))) { + LOG_ERROR("locate start log id fail", KR(ret), K(start_log_id_locator_), KPC(task)); + } + } else { + // After all the above conditions are met, allow distribution to the fetch log stream + need_dispatch = true; + } + } + } + return ret; +} + + +} +} diff --git a/src/liboblog/src/ob_log_fetcher_idle_pool.h b/src/liboblog/src/ob_log_fetcher_idle_pool.h new file mode 100644 index 0000000000000000000000000000000000000000..074154ef42c853ba64475abb9ed54aedaa1e242f --- /dev/null +++ b/src/liboblog/src/ob_log_fetcher_idle_pool.h @@ -0,0 +1,103 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_FETCHER_IDLE_POOL_H__ +#define OCEANBASE_LIBOBLOG_OB_LOG_FETCHER_IDLE_POOL_H__ + +#include "lib/utility/ob_macro_utils.h" // DISALLOW_COPY_AND_ASSIGN + +#include "ob_log_config.h" // ObLogConfig +#include "ob_map_queue_thread.h" // ObMapQueueThread +#include "ob_log_part_fetch_ctx.h" // FetchTaskList, PartFetchCtx + +namespace oceanbase +{ +namespace liboblog +{ + +class IObLogFetcherIdlePool +{ +public: + static const int64_t MAX_THREAD_NUM = ObLogConfig::max_idle_pool_thread_num; + +public: + virtual ~IObLogFetcherIdlePool() {} + +public: + virtual int push(PartFetchCtx *task) = 0; + virtual int start() = 0; + virtual void stop() = 0; + virtual void mark_stop_flag() = 0; +}; + +///////////////////////////////////////////////////////////////// + +typedef common::ObMapQueueThread IdlePoolThread; + +class IObLogErrHandler; +class IObLogSvrFinder; +class IObLogStreamWorker; +class IObLogStartLogIdLocator; + +class ObLogFetcherIdlePool : public IObLogFetcherIdlePool, public IdlePoolThread +{ + static const int64_t IDLE_WAIT_TIME = 100 * 1000; + +public: + ObLogFetcherIdlePool(); + virtual ~ObLogFetcherIdlePool(); + +public: + int init(const int64_t thread_num, + IObLogErrHandler &err_handler, + IObLogSvrFinder &svr_finder, + IObLogStreamWorker &stream_worker, + IObLogStartLogIdLocator &start_log_id_locator); + void destroy(); + +public: + // Implement the IObLogFetcherIdlePool virtual function + virtual int push(PartFetchCtx *task); + virtual int start(); + virtual void stop(); + virtual void mark_stop_flag(); + +public: + // Implement the ObMapQueueThread virtual function + // Overloading thread handling functions + virtual void run(const int64_t thread_index); + +private: + void reset_task_list_array_(); + int do_retrieve_(const int64_t thread_index, FetchTaskList &list); + int do_request_(const int64_t thread_index, FetchTaskList &list); + int handle_task_(PartFetchCtx *task, bool &need_dispatch); + +private: + bool inited_; + IObLogErrHandler *err_handler_; + IObLogSvrFinder *svr_finder_; + IObLogStreamWorker *stream_worker_; + IObLogStartLogIdLocator *start_log_id_locator_; + + // One task array per thread + FetchTaskList task_list_array_[MAX_THREAD_NUM]; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogFetcherIdlePool); +}; + + +} +} + +#endif diff --git a/src/liboblog/src/ob_log_formatter.cpp b/src/liboblog/src/ob_log_formatter.cpp new file mode 100644 index 0000000000000000000000000000000000000000..20fe9945237538013c5126fd00f3196042e0d0fc --- /dev/null +++ b/src/liboblog/src/ob_log_formatter.cpp @@ -0,0 +1,1509 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG_FORMATTER + +#include "ob_log_formatter.h" + +#include // BinLogBuf +#include "share/schema/ob_table_schema.h" // TableSchemaType +#include "lib/string/ob_string.h" // ObString +#include "storage/transaction/ob_trans_define.h" // ObTransID + +#include "ob_log_meta_manager.h" // IObLogMetaManager +#include "ob_log_utils.h" // obj2str +#include "ob_log_schema_getter.h" // IObLogSchemaGetter, DBSchemaInfo +#include "ob_log_instance.h" // IObLogErrHandler, TCTX +#include "ob_obj2str_helper.h" // ObObj2strHelper +#include "ob_log_trans_ctx_mgr.h" // IObLogTransCtxMgr +#include "ob_log_binlog_record_pool.h" // IObLogBRPool +#include "ob_log_storager.h" // IObLogStorager +#include "ob_log_tenant.h" // ObLogTenantGuard, ObLogTenant +#include "ob_log_config.h" // TCONF + +using namespace oceanbase::common; +using namespace oceanbase::storage; +using namespace oceanbase::share::schema; +using namespace oceanbase::transaction; + +namespace oceanbase +{ +namespace liboblog +{ + +void ObLogFormatter::RowValue::reset() +{ + column_num_ = 0; + contain_old_column_ = false; + new_column_array_ = NULL; + old_column_array_ = NULL; + + (void)memset(new_columns_, 0, sizeof(new_columns_)); + (void)memset(old_columns_, 0, sizeof(old_columns_)); + (void)memset(orig_default_value_, 0, sizeof(orig_default_value_)); + (void)memset(is_rowkey_, 0, sizeof(is_rowkey_)); + (void)memset(is_changed_, 0, sizeof(is_changed_)); +} + +int ObLogFormatter::RowValue::init(const int64_t column_num, const bool contain_old_column) +{ + column_num_ = column_num; + contain_old_column_ = contain_old_column; + new_column_array_ = NULL; + old_column_array_ = NULL; + + if (column_num > 0) { + (void)memset(new_columns_, 0, column_num * sizeof(new_columns_[0])); + (void)memset(old_columns_, 0, column_num * sizeof(old_columns_[0])); + (void)memset(orig_default_value_, 0, column_num * sizeof(orig_default_value_[0])); + (void)memset(is_rowkey_, 0, column_num * sizeof(is_rowkey_[0])); + (void)memset(is_changed_, 0, column_num * sizeof(is_changed_[0])); + } + + return OB_SUCCESS; +} + +ObLogFormatter::ObLogFormatter() : inited_(false), + working_mode_(WorkingMode::UNKNOWN_MODE), + obj2str_helper_(NULL), + br_pool_(NULL), + err_handler_(NULL), + meta_manager_(NULL), + schema_getter_(NULL), + storager_(NULL), + row_value_array_(NULL), + allocator_(ObModIds::OB_LOG_FORMATTER, PAGE_SIZE), + round_value_(0), + skip_dirty_data_(false), + enable_hbase_mode_(false), + hbase_util_(NULL), + skip_hbase_mode_put_column_count_not_consistency_(false), + enable_output_hidden_primary_key_(false), + log_entry_task_count_(0) + +{ +} + +ObLogFormatter::~ObLogFormatter() +{ + destroy(); +} + +int ObLogFormatter::init(const int64_t thread_num, + const int64_t queue_size, + const WorkingMode working_mode, + ObObj2strHelper *obj2str_helper, + IObLogBRPool *br_pool, + IObLogMetaManager *meta_manager, + IObLogSchemaGetter *schema_getter, + IObLogStorager *storager, + IObLogErrHandler *err_handler, + const bool skip_dirty_data, + const bool enable_hbase_mode, + ObLogHbaseUtil &hbase_util, + const bool skip_hbase_mode_put_column_count_not_consistency, + const bool enable_output_hidden_primary_key) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("ObLogFormatter has been initialized"); + ret = OB_INIT_TWICE; + } else if (OB_UNLIKELY(thread_num <= 0) + || OB_UNLIKELY(queue_size <= 0) + || OB_UNLIKELY(! is_working_mode_valid(working_mode)) + || OB_ISNULL(obj2str_helper) + || OB_ISNULL(br_pool) + || OB_ISNULL(meta_manager) + || OB_ISNULL(schema_getter) + || OB_ISNULL(storager) + || OB_ISNULL(err_handler)) { + LOG_ERROR("invalid arguments", K(thread_num), K(queue_size), K(working_mode), K(obj2str_helper), + K(meta_manager), K(schema_getter), K(storager), K(err_handler)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(FormatterThread::init(thread_num, queue_size))) { + LOG_ERROR("init formatter queue thread fail", KR(ret), K(thread_num), K(queue_size)); + } else if (OB_FAIL(init_row_value_array_(thread_num))) { + LOG_ERROR("init_row_value_array_ fail", KR(ret), K(thread_num)); + } else { + working_mode_ = working_mode; + obj2str_helper_ = obj2str_helper; + br_pool_ = br_pool; + err_handler_ = err_handler; + meta_manager_ = meta_manager; + schema_getter_ = schema_getter; + storager_ = storager; + round_value_ = 0; + skip_dirty_data_ = skip_dirty_data; + enable_hbase_mode_ = enable_hbase_mode; + hbase_util_ = &hbase_util; + skip_hbase_mode_put_column_count_not_consistency_ = skip_hbase_mode_put_column_count_not_consistency; + enable_output_hidden_primary_key_ = enable_output_hidden_primary_key; + log_entry_task_count_ = 0; + inited_ = true; + LOG_INFO("Formatter init succ", K(working_mode_), "working_mode", print_working_mode(working_mode_), + K(thread_num), K(queue_size)); + } + + return ret; +} + +void ObLogFormatter::destroy() +{ + FormatterThread::destroy(); + + inited_ = false; + + destroy_row_value_array_(); + + working_mode_ = WorkingMode::UNKNOWN_MODE; + obj2str_helper_ = NULL; + br_pool_ = NULL; + row_value_array_ = NULL; + err_handler_ = NULL; + meta_manager_ = NULL; + schema_getter_ = NULL; + storager_ = NULL; + round_value_ = 0; + skip_dirty_data_ = false; + enable_hbase_mode_ = false; + hbase_util_ = NULL; + skip_hbase_mode_put_column_count_not_consistency_ = false; + enable_output_hidden_primary_key_ = false; + log_entry_task_count_ = 0; +} + +int ObLogFormatter::start() +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogFormatter has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(FormatterThread::start())) { + LOG_ERROR("start formatter thread fail", KR(ret), "thread_num", get_thread_num()); + } else { + LOG_INFO("start formatter threads succ", "thread_num", get_thread_num()); + } + + return ret; +} + +void ObLogFormatter::stop() +{ + if (inited_) { + FormatterThread::stop(); + LOG_INFO("stop formatter threads succ", "thread_num", get_thread_num()); + } +} + +int ObLogFormatter::push(IStmtTask *stmt_task, volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogFormatter has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(stmt_task)) { + LOG_ERROR("invalid arguments", K(stmt_task)); + ret = OB_INVALID_ARGUMENT; + } else { + // Ensure that all stmt of ObLogEntryTask are pushed to the same queue + const uint64_t hash_value = ATOMIC_FAA(&round_value_, 1); + int64_t stmt_count = 0; + + while (OB_SUCC(ret) && NULL != stmt_task) { + IStmtTask *next = stmt_task->get_next(); + void *push_task = static_cast(stmt_task); + + RETRY_FUNC(stop_flag, *(static_cast(this)), push, push_task, hash_value, DATA_OP_TIMEOUT); + + if (OB_SUCC(ret)) { + stmt_task = next; + ++stmt_count; + } else { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("push task into formatter fail", KR(ret), K(push_task), K(hash_value)); + } + } + } // while + + if (OB_SUCC(ret)) { + ATOMIC_INC(&log_entry_task_count_); + } + } + + return ret; +} + +int ObLogFormatter::get_task_count(int64_t &br_count, + int64_t &log_entry_task_count) +{ + int ret = OB_SUCCESS; + br_count = 0; + log_entry_task_count = 0; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("parser has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(get_total_task_num(br_count))) { + LOG_ERROR("get_total_task_num fail", KR(ret), K(br_count)); + } else { + log_entry_task_count = ATOMIC_LOAD(&log_entry_task_count_); + } + + return ret; +} + +int ObLogFormatter::handle(void *data, const int64_t thread_index, volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + + ObLogBR *br = NULL; + ObLogSchemaGuard schema_guard; + DBSchemaInfo db_schema_info; + const TableSchemaType *table_schema = NULL; + IStmtTask *stmt_task = static_cast(data); + DmlStmtTask *dml_stmt_task = dynamic_cast(stmt_task); + RowValue *rv = row_value_array_ + thread_index; + int64_t new_column_cnt = 0; + bool is_ignore = false; + // Get the tenant schema: MYSQL or ORACLE + // To ensure the correctness of ObObj2strHelper::obj2str, you need to set the mysql or Oracle schema locally in the thread, there are two scenarios that depend on it: + // 1. set_meta_info_: first build local schema cache, depends on ObObj2strHelper + // 2. build_row_value_: formatting row data, relies on ObObj2strHelper + share::ObWorker::CompatMode compat_mode = share::ObWorker::CompatMode::INVALID; + const bool enable_formatter_print_log = (TCONF.enable_formatter_print_log != 0); + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogFormatter has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(stmt_task)) { + LOG_ERROR("invalid arguments", K(stmt_task)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_UNLIKELY(! stmt_task->is_dml_stmt()) || OB_ISNULL(dml_stmt_task)) { + LOG_ERROR("stmt_task is not DML statement", "stmt_task", *stmt_task); + ret = OB_NOT_SUPPORTED; + } else if (OB_FAIL(init_binlog_record_for_dml_stmt_task_(dml_stmt_task, br, is_ignore))) { + LOG_ERROR("init_binlog_record_for_dml_stmt_task_ fail", KR(ret), K(dml_stmt_task), K(is_ignore)); + } else if (is_ignore) { + br->set_is_valid(false); + } + // Collectively get Simple Schema + // Retry until exit or success + else if (OB_FAIL(get_schema_( + schema_getter_, + dml_stmt_task->get_table_version(), + dml_stmt_task->get_table_id(), + stop_flag, + schema_guard, + table_schema, + db_schema_info))) { + // Ignore the statement if the tenant was deleted, or the table was deleted or the get schema failed + if (OB_TENANT_HAS_BEEN_DROPPED == ret) { + LOG_INFO("[IGNORE_DATA] get schema error, tenant may be dropped", + "tenant_id", dml_stmt_task->get_tenant_id(), + "table_id", dml_stmt_task->get_table_id(), + "dml_stmt_task", *dml_stmt_task); + br->set_is_valid(false); + // reset ret + ret = OB_SUCCESS; + } else if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("get_schema_ fail", KR(ret), KPC(dml_stmt_task), K(table_schema)); + } + } + // Failed to get table schema, table was deleted + // TODO: After the table is deleted, do some aftercare + else if (OB_ISNULL(table_schema)) { + if (enable_formatter_print_log) { + LOG_INFO("[IGNORE_DATA] get schema error, table may be dropped", + "table_id", dml_stmt_task->get_table_id(), + "dml_stmt_task", *dml_stmt_task); + } else if (REACH_TIME_INTERVAL(PRINT_LOG_INTERVAL)) { + LOG_INFO("[IGNORE_DATA] get schema error, table may be dropped", + "table_id", dml_stmt_task->get_table_id(), + "dml_stmt_task", *dml_stmt_task); + } + br->set_is_valid(false); + } + // Filter sys tables that are not user tables and are not in backup mode + else if (! table_schema->is_user_table() + && ! BackupTableHelper::is_sys_table_exist_on_backup_mode(table_schema->is_sys_table(), + table_schema->get_table_id())) { + LOG_DEBUG("[IGNORE_DATA] ignore non-user table or sys table not exist on backup mode", + "table_name", table_schema->get_table_name(), + "table_id", table_schema->get_table_id(), + "table_type", ob_table_type_str(table_schema->get_table_type())); + br->set_is_valid(false); + } + // Ignore data from tables in the recycle bin + else if (table_schema->is_in_recyclebin() && ! is_backup_mode()) { + if (enable_formatter_print_log) { + LOG_INFO("[IGNORE_DATA] table is in recyclebin", + "table_id", dml_stmt_task->get_table_id(), + "is_backup_mode", is_backup_mode(), + KPC(dml_stmt_task)); + } else if (REACH_TIME_INTERVAL(PRINT_LOG_INTERVAL)) { + LOG_INFO("[IGNORE_DATA] table is in recyclebin", + "table_id", dml_stmt_task->get_table_id(), + "is_backup_mode", is_backup_mode(), + KPC(dml_stmt_task)); + } + br->set_is_valid(false); + } else if (OB_FAIL(get_tenant_compat_mode(table_schema->get_tenant_id(), compat_mode, stop_flag))) { + LOG_ERROR("get_tenant_compat_mode fail", KR(ret), "tenant_id", table_schema->get_tenant_id(), + "compat_mode", print_compat_mode(compat_mode), KPC(table_schema)); + } else { + share::CompatModeGuard g(compat_mode); + + if (OB_FAIL(set_meta_info_(schema_guard, table_schema, db_schema_info, br, stop_flag))) { + // Failed to get schema, ignore the data + if (OB_TENANT_HAS_BEEN_DROPPED == ret) { + LOG_INFO("[IGNORE_DATA] schema error when set_meta_info, tenant may be dropped", KR(ret), + "table_id", dml_stmt_task->get_table_id(), KPC(dml_stmt_task), K(db_schema_info)); + br->set_is_valid(false); + ret = OB_SUCCESS; + } else if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("set_meta_info_ fail", KR(ret), K(table_schema), K(db_schema_info), K(br), + "compat_mode", print_compat_mode(compat_mode)); + } + } else if (OB_FAIL(build_row_value_(rv, dml_stmt_task, table_schema, new_column_cnt))) { + LOG_ERROR("build_row_value_ fail", KR(ret), K(rv), "dml_stmt_task", *dml_stmt_task, K(new_column_cnt), + "compat_mode", print_compat_mode(compat_mode)); + } else if (OB_FAIL(build_binlog_record_(br, rv, new_column_cnt, dml_stmt_task->get_dml_type(), table_schema))) { + LOG_ERROR("build_binlog_record_ fail", KR(ret), K(br), K(rv), K(new_column_cnt), KPC(dml_stmt_task)); + } else { + if (OB_NOT_NULL(br->get_data()) && + OB_UNLIKELY(SRC_FULL_RECORDED != br->get_data()->getSrcCategory())) { + // Handling non-full column logging modes: currently not support + handle_non_full_columns_(*dml_stmt_task, *table_schema); + if (skip_dirty_data_) { + ret = OB_SUCCESS; + } else { + // Do not ignore, requires full log, if not full log, exit with an error + ret = OB_NOT_SUPPORTED; + } + } else { + // do nothing + } + } + } + + if (stop_flag) { + ret = OB_IN_STOP_STATE; + } + + if (OB_SUCC(ret)) { + LOG_DEBUG("formatter handle task", K(thread_index), "stmt_task", *dml_stmt_task); + + // Doing the finishing job + // Note: After this function call, neither the partition transaction nor the statement task can be referenced anymore and may be recycled at any time + if (OB_FAIL(finish_format_(stmt_task->get_host(), dml_stmt_task->get_redo_log_entry_task(), stop_flag))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("finish_format_ fail", KR(ret)); + } + } + } + + // Failure to withdraw + if (OB_SUCCESS != ret && OB_IN_STOP_STATE != ret && NULL != err_handler_) { + err_handler_->handle_error(ret, "formatter thread exits, thread_index=%ld, err=%d", + thread_index, ret); + stop_flag = true; + } + + return ret; +} + +int ObLogFormatter::init_binlog_record_for_dml_stmt_task_(DmlStmtTask *stmt_task, + ObLogBR *&br, + bool &is_ignore) +{ + int ret = OB_SUCCESS; + is_ignore = false; + ObLogRowDataIndex *row_data_index = NULL; + ObLogEntryTask *log_entry_task = NULL; + bool is_rollback = false; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogFormatter has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(stmt_task)) { + LOG_ERROR("invalid arguments", K(stmt_task)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(row_data_index = &(stmt_task->get_row_data_index()))) { + LOG_ERROR("row_data_index is NULL", KPC(stmt_task)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(log_entry_task = &(stmt_task->get_redo_log_entry_task()))) { + LOG_ERROR("log_entry_task is NULL", KPC(stmt_task)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(br_pool_->alloc(false/*is_serilized*/, br, row_data_index, log_entry_task))) { + LOG_ERROR("alloc binlog record from pool fail", KR(ret), K(stmt_task)); + } else if (OB_ISNULL(br)) { + LOG_ERROR("alloc binlog record fail", K(br)); + ret = OB_ERR_UNEXPECTED; + } else { + is_rollback = row_data_index->is_rollback(); + + // select ... for update to record T_DML_LOCK log to prevent loss of row lock information on the standby machine in the event of a master/standby switchover, no synchronization required + if (T_DML_LOCK == stmt_task->get_dml_type()) { + is_ignore = true; + } else if (is_rollback) { + is_ignore = true; + } else { + RecordType type = get_record_type(stmt_task->get_dml_type()); + const uint64_t tenant_id = extract_tenant_id(stmt_task->get_host().get_partition().get_table_id()); + + if (OB_FAIL(br->init_dml_data_first(type, tenant_id))) { + LOG_ERROR("br init_dml_data_first fail", KR(ret), K(type), K(tenant_id), K(*stmt_task)); + } else { + LOG_DEBUG("br init_dml_data succ", KR(ret), K(type), K(tenant_id), K(stmt_task), K(*stmt_task)); + } + } + + if (OB_SUCC(ret)) { + row_data_index->set_binlog_record(br); + } + } + + if (OB_FAIL(ret)) { + if (NULL != br) { + br_pool_->free(br); + br = NULL; + } + } + return ret; +} + +void ObLogFormatter::handle_non_full_columns_(DmlStmtTask &dml_stmt_task, + const TableSchemaType &table_schema) +{ + PartTransTask &task = dml_stmt_task.get_host(); + + if (! skip_dirty_data_) { + LOG_ERROR("row data is not full recorded", + "pkey", task.get_partition(), + "prepare_log_id", task.get_prepare_log_id(), + "timestamp", task.get_timestamp(), + "dml_type", dml_stmt_task.get_dml_type(), + "dml_type_str", print_dml_type(dml_stmt_task.get_dml_type()), + "table_name", table_schema.get_table_name(), + "table_id", table_schema.get_table_id(), + K(dml_stmt_task)); + } +} + +int ObLogFormatter::finish_format_(PartTransTask &part_trans_task, + ObLogEntryTask &redo_log_entry_task, + volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + ret = OB_NOT_INIT; + } else { + const int64_t stmt_num = redo_log_entry_task.get_stmt_num(); + int64_t formatted_stmt_num = redo_log_entry_task.inc_formatted_stmt_num(); + const bool is_all_stmt_formatted = formatted_stmt_num >= stmt_num; + const uint64_t tenant_id = part_trans_task.get_tenant_id(); + + if (is_all_stmt_formatted) { + if (OB_FAIL(redo_log_entry_task.link_row_list())) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("redo_log_entry_task link_row_list fail", KR(ret), K(redo_log_entry_task)); + } + } else { + LOG_DEBUG("[FORMATT]", K(tenant_id), K(stmt_num), K(redo_log_entry_task), K(part_trans_task)); + + if (is_memory_working_mode(working_mode_)) { + if (OB_FAIL(handle_memory_data_sync_work_mode_(part_trans_task, redo_log_entry_task, stop_flag))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("handle_memory_data_sync_work_mode_ fail", KR(ret), K(part_trans_task), K(redo_log_entry_task)); + } + } + } else if (is_storage_working_mode(working_mode_)) { + if (OB_FAIL(handle_storage_data_sync_work_mode_(part_trans_task, redo_log_entry_task, stop_flag))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("handle_storage_data_sync_work_mode_ fail", KR(ret), K(part_trans_task), K(redo_log_entry_task)); + } + } + } else { + ret = OB_NOT_SUPPORTED; + } + } + + if (OB_SUCC(ret)) { + ATOMIC_DEC(&log_entry_task_count_); + } + } else { + // do nothing + } + } + + return ret; +} + +int ObLogFormatter::handle_memory_data_sync_work_mode_(PartTransTask &part_trans_task, + ObLogEntryTask &log_entry_task, + volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + bool is_unserved_part_trans_task_can_be_recycled = false; + + if (OB_FAIL(part_trans_task.handle_log_entry_task_callback(ObLogEntryTask::FORMATTER_CB, + log_entry_task, + is_unserved_part_trans_task_can_be_recycled))) { + LOG_ERROR("handle_log_entry_task_callback fail", KR(ret), K(log_entry_task), K(part_trans_task), K(stop_flag)); + } else if (is_unserved_part_trans_task_can_be_recycled) { + LOG_DEBUG("handle_log_entry_task_callback: part_trans_task is revert", K(part_trans_task)); + part_trans_task.revert(); + } else {} + + return ret; +} + +int ObLogFormatter::handle_storage_data_sync_work_mode_(PartTransTask &part_trans_task, + ObLogEntryTask &redo_log_entry_task, + volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogFormatter has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(dispatch_to_storager_(redo_log_entry_task, stop_flag))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("dispatch_to_storager_ fail", KR(ret), K(redo_log_entry_task), K(part_trans_task)); + } + } else { + // succ + } + + return ret; +} + +int ObLogFormatter::dispatch_to_storager_(ObLogEntryTask &log_entry_task, + volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(storager_)) { + LOG_ERROR("storager_ is NULL"); + ret = OB_ERR_UNEXPECTED; + } else { + RETRY_FUNC(stop_flag, (*storager_), push, log_entry_task, DATA_OP_TIMEOUT); + } + + return ret; +} + +// @retval OB_SUCCESS success +// @retval OB_TENANT_HAS_BEEN_DROPPED tenant dropped +// #retval other error code fail +int ObLogFormatter::set_meta_info_(ObLogSchemaGuard &schema_guard, + const TableSchemaType *&simple_table_schema, + const DBSchemaInfo &db_schema_info, + ObLogBR *br, + volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogFormatter has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(simple_table_schema) || OB_UNLIKELY(! db_schema_info.is_valid()) || OB_ISNULL(br)) { + LOG_ERROR("invalid argument", K(simple_table_schema), K(br), K(db_schema_info)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(meta_manager_) || OB_ISNULL(schema_getter_)) { + LOG_ERROR("meta_manager_ or schema_getter_ is null", K(meta_manager_), K(schema_getter_)); + ret = OB_ERR_UNEXPECTED; + } else { + IDBMeta *db_meta = NULL; + ITableMeta *table_meta = NULL; + + if (OB_FAIL(meta_manager_->get_table_meta(simple_table_schema, *schema_getter_, table_meta, stop_flag)) + || NULL == table_meta) { + if (OB_TENANT_HAS_BEEN_DROPPED == ret) { + LOG_WARN("schema error when get_table_meta, tenant may by dropped", KR(ret), + "table_id", simple_table_schema->get_table_id(), + "table_name", simple_table_schema->get_table_name()); + } else if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("get_table_meta fail", KR(ret), "table_id", simple_table_schema->get_table_id(), + "table_name", simple_table_schema->get_table_name(), KPC(simple_table_schema), K(table_meta)); + ret = OB_SUCCESS == ret ? OB_ERR_UNEXPECTED : ret; + } + } else if (OB_FAIL(meta_manager_->get_db_meta(db_schema_info, schema_guard, db_meta, stop_flag)) + || NULL == db_meta) { + if (OB_TENANT_HAS_BEEN_DROPPED == ret) { + LOG_WARN("schema error when get_db_meta, tenant may by dropped", KR(ret), + "table_id", simple_table_schema->get_table_id(), + "table_name", simple_table_schema->get_table_name(), + K(db_schema_info)); + } else if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("get_db_meta fail", KR(ret), + "table_id", simple_table_schema->get_table_id(), + "table_name", simple_table_schema->get_table_name(), + K(db_schema_info)); + ret = OB_SUCCESS == ret ? OB_ERR_UNEXPECTED : ret; + } + } else if (OB_FAIL(br->set_table_meta(table_meta))) { + LOG_ERROR("set_table_meta fail", KR(ret), K(br), K(table_meta)); + } else if (OB_FAIL(br->set_db_meta(db_meta))) { + LOG_ERROR("set_db_meta fail", KR(ret), K(br), K(db_meta)); + } else { + // success + } + + if (OB_SUCCESS != ret) { + if (NULL != table_meta) { + meta_manager_->revert_table_meta(table_meta); + table_meta = NULL; + } + + if (NULL != db_meta) { + meta_manager_->revert_db_meta(db_meta); + db_meta = NULL; + } + } + } + + return ret; +} + +int ObLogFormatter::init_row_value_array_(const int64_t row_value_num) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(row_value_num <= 0)) { + LOG_ERROR("invalid argument", K(row_value_num)); + ret = OB_INVALID_ARGUMENT; + } else { + int64_t size = sizeof(RowValue) * row_value_num; + void *ptr = allocator_.alloc(size); + + if (NULL == (row_value_array_ = static_cast(ptr))) { + LOG_ERROR("allocate memory for RowValue fail", K(size), K(row_value_num)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + for (int64_t index = 0; index < row_value_num; index++) { + row_value_array_[index].reset(); + } + } + } + + return ret; +} + +void ObLogFormatter::destroy_row_value_array_() +{ + if (NULL != row_value_array_) { + allocator_.free(static_cast(row_value_array_)); + row_value_array_ = NULL; + } +} + +int ObLogFormatter::build_row_value_(RowValue *rv, + DmlStmtTask *stmt_task, + const TableSchemaType *simple_table_schema, + int64_t &new_column_cnt) +{ + int ret = OB_SUCCESS; + ColValueList *rowkey_cols = NULL; + ColValueList *new_cols = NULL; + ColValueList *old_cols = NULL; + int64_t column_num = 0; + TableSchemaInfo *tb_schema_info = NULL; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogFormatter has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(rv) || OB_ISNULL(stmt_task) || OB_ISNULL(simple_table_schema)) { + LOG_ERROR("invalid argument", K(rv), K(stmt_task), K(simple_table_schema)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(meta_manager_)) { + LOG_ERROR("meta_manager_ is null", K(meta_manager_)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(meta_manager_->get_table_schema_meta(simple_table_schema->get_schema_version(), + simple_table_schema->get_table_id(), tb_schema_info))) { + LOG_ERROR("meta_manager_ get_table_schema_meta fail", KR(ret), + "version", simple_table_schema->get_schema_version(), + "table_id", simple_table_schema->get_table_id(), + "table_name", simple_table_schema->get_table_name(), KPC(tb_schema_info)); + } else if (OB_ISNULL(tb_schema_info)) { + LOG_ERROR("tb_schema_info is null", K(tb_schema_info)); + ret = OB_ERR_UNEXPECTED; + } else { + column_num = tb_schema_info->get_non_hidden_column_count(); + + if (column_num <= 0) { + LOG_INFO("no valid column is found", "table_name", simple_table_schema->get_table_name(), + "table_id", simple_table_schema->get_table_id()); + } else if (OB_FAIL(stmt_task->parse_cols(obj2str_helper_, simple_table_schema, tb_schema_info, + enable_output_hidden_primary_key_))) { + LOG_ERROR("stmt_task.parse_cols fail", KR(ret), K(*stmt_task), K(obj2str_helper_), + KPC(simple_table_schema), KPC(tb_schema_info), + K(enable_output_hidden_primary_key_)); + } else if (OB_FAIL(stmt_task->get_cols(&rowkey_cols, &new_cols, &old_cols))) { + LOG_ERROR("get_cols fail", KR(ret), K(*stmt_task)); + } else if (OB_ISNULL(rowkey_cols) || OB_ISNULL(new_cols) || OB_ISNULL(old_cols)) { + LOG_ERROR("get_cols fail", K(rowkey_cols), K(new_cols), K(old_cols)); + ret = OB_ERR_UNEXPECTED; + } + // NOTE: Logic for determining whether an old value is included: the data in the old value is not empty + else if (OB_FAIL(rv->init(column_num, old_cols->num_ > 0))) { + LOG_ERROR("init RowValue fail", KR(ret), K(column_num)); + } + // fill new column value + else if (OB_FAIL(fill_normal_cols_(rv, *new_cols, simple_table_schema, *tb_schema_info, true))) { + LOG_ERROR("fill normal new columns fail", KR(ret), K(rv), KPC(new_cols)); + } + // fill old column value + else if (OB_FAIL(fill_normal_cols_(rv, *old_cols, simple_table_schema, *tb_schema_info, false))) { + LOG_ERROR("fill normal old columns fail", KR(ret), K(rv), KPC(old_cols)); + } else if (OB_FAIL(fill_rowkey_cols_(rv, *rowkey_cols, simple_table_schema, + *tb_schema_info))) { + LOG_ERROR("fill_rowkey_cols_ fail", KR(ret), K(rv), KPC(rowkey_cols), + "stmt_task", *stmt_task, K(simple_table_schema)); + } else if (OB_FAIL(fill_orig_default_value_(rv, simple_table_schema, *tb_schema_info, + stmt_task->get_redo_log_entry_task().get_allocator()))) { + LOG_ERROR("fill_orig_default_value_ fail", KR(ret), K(rv), K(simple_table_schema)); + } else { + new_column_cnt = new_cols->num_; + int64_t column_array_size = sizeof(BinLogBuf) * column_num; + BinLogBuf *new_column_array = + static_cast(stmt_task->get_redo_log_entry_task().alloc(column_array_size)); + BinLogBuf *old_column_array = + static_cast(stmt_task->get_redo_log_entry_task().alloc(column_array_size)); + + if (OB_ISNULL(new_column_array) || OB_ISNULL(old_column_array)) { + LOG_ERROR("allocate memory for column array fail", K(column_array_size), K(column_num)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + rv->new_column_array_ = new_column_array; + rv->old_column_array_ = old_column_array; + } + } + } + + return ret; +} + +int ObLogFormatter::fill_normal_cols_(RowValue *rv, + ColValueList &cv_list, + const TableSchemaType *simple_table_schema, + const TableSchemaInfo &tb_schema_info, + const bool is_new_value) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogFormatter has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(rv) || OB_ISNULL(simple_table_schema)) { + LOG_ERROR("invalid argument", K(rv), K(simple_table_schema)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(meta_manager_)) { + LOG_ERROR("meta_manager_ is null", K(meta_manager_)); + ret = OB_ERR_UNEXPECTED; + } else { + ColValue *cv = cv_list.head_; + int64_t table_schema_version = simple_table_schema->get_schema_version(); + uint64_t table_id = simple_table_schema->get_table_id(); + + while (OB_SUCCESS == ret && NULL != cv) { + const uint64_t column_id = cv->column_id_; + ColumnSchemaInfo *column_schema_info = NULL; + int64_t column_index = -1; + ColumnPropertyFlag column_property_flag; + + if (OB_FAIL(tb_schema_info.get_column_schema_info(column_id, enable_output_hidden_primary_key_, + column_schema_info, column_property_flag))) { + LOG_ERROR("get_column_schema_info fail", KR(ret), K(table_schema_version), K(table_id), + "table_name", simple_table_schema->get_table_name(), + K(column_id), K(enable_output_hidden_primary_key_), + K(column_schema_info), K(column_property_flag)); + } else if (OB_UNLIKELY(column_property_flag.is_non_user())) { + // formatter should not process to non-user columns + LOG_ERROR("handle non user column, unexpected", K(column_property_flag), + "table_id", simple_table_schema->get_table_id(), + "table_name", simple_table_schema->get_table_name(), + K(column_id)); + ret = OB_ERR_UNEXPECTED; + // requires that the column must exist and is not a hidden column + // This logic is guaranteed when constructing the column values + } else if (OB_UNLIKELY(column_property_flag.is_delete()) + || OB_UNLIKELY(column_property_flag.is_hidden() + || OB_UNLIKELY(column_property_flag.is_invisible()))) { + LOG_ERROR("column is invalid. column does not exist or " + "hidden or invisible column is not filtered", K(column_property_flag), + "table", simple_table_schema->get_table_name(), + "column_id", cv->column_id_, + "table_schema_version", simple_table_schema->get_schema_version()); + ret = OB_ERR_UNEXPECTED; + } else if (OB_ISNULL(column_schema_info)) { + LOG_ERROR("column_schema_info is null", K(column_schema_info)); + ret = OB_ERR_UNEXPECTED; + } else { + column_index = column_schema_info->get_column_idx(); + + if (OB_UNLIKELY(column_index < 0 || column_index >= OB_MAX_COLUMN_NUMBER)) { + LOG_ERROR("column_index is invalid", "table_name", simple_table_schema->get_table_name(), + K(column_index), + "column_id", cv->column_id_, K(OB_MAX_COLUMN_NUMBER)); + ret = OB_ERR_UNEXPECTED; + } else { + if (is_new_value) { + rv->new_columns_[column_index] = &cv->string_value_; + rv->is_changed_[column_index] = true; + } else { + rv->old_columns_[column_index] = &cv->string_value_; + } + } + } + + if (OB_SUCCESS == ret) { + cv = cv->next_; + } + } // while + } + + return ret; +} + +int ObLogFormatter::fill_rowkey_cols_(RowValue *rv, + ColValueList &rowkey_cols, + const TableSchemaType *simple_table_schema, + const TableSchemaInfo &tb_schema_info) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogFormatter has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(rv) || OB_ISNULL(simple_table_schema)) { + LOG_ERROR("invalid argument", K(rv), K(simple_table_schema)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(meta_manager_)) { + LOG_ERROR("meta_manager_ is null", K(meta_manager_)); + ret = OB_ERR_UNEXPECTED; + } else { + ColValue *cv_node = rowkey_cols.head_; + int64_t rowkey_count = rowkey_cols.num_; + int64_t table_schema_version = simple_table_schema->get_schema_version(); + uint64_t table_id = simple_table_schema->get_table_id(); + + for (int64_t index = 0; + OB_SUCCESS == ret && index < rowkey_count; + index++, cv_node = cv_node->next_) { + uint64_t column_id = cv_node->column_id_; + int64_t column_index = -1; + ColumnSchemaInfo *column_schema_info = NULL; + ColumnPropertyFlag column_property_flag; + + if (OB_ISNULL(cv_node)) { + LOG_ERROR("column value node is NULL", K(index), K(rowkey_count), K(cv_node)); + ret = OB_INVALID_DATA; + } else if (OB_FAIL(tb_schema_info.get_column_schema_info(column_id, enable_output_hidden_primary_key_, + column_schema_info, column_property_flag))) { + LOG_ERROR("get_column_schema_info fail", KR(ret), K(table_schema_version), K(table_id), + "table_name", simple_table_schema->get_table_name(), + K(column_id), K(enable_output_hidden_primary_key_), + K(column_schema_info), K(column_property_flag)); + } else if (OB_UNLIKELY(column_property_flag.is_non_user())) { + // formatter should not be processed to non-user columns + LOG_ERROR("handle non user column, unexpected", K(column_property_flag), + "table_id", simple_table_schema->get_table_id(), + "table_name", simple_table_schema->get_table_name(), + K(column_id)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_UNLIKELY(column_property_flag.is_delete())) { + // Do not allow primary keys to not exist + LOG_ERROR("rowkey column does not exist", K(column_property_flag), + "table_id", simple_table_schema->get_table_id(), + "table", simple_table_schema->get_table_name(), + "table_schema_version", simple_table_schema->get_schema_version(), + K(column_id)); + ret = OB_ERR_UNEXPECTED; + } + // Hidden primary keys should already be filtered + else if (OB_UNLIKELY(column_property_flag.is_hidden())) { + LOG_ERROR("hidden rowkey column is not filtered", K(column_property_flag), + "table_id", simple_table_schema->get_table_id(), + "table", simple_table_schema->get_table_name(), + "table_schema_version", simple_table_schema->get_schema_version(), + K(column_id)); + ret = OB_ERR_UNEXPECTED; + } else if (column_schema_info->is_invisible()) { + // not possible for invisible column + LOG_ERROR("is_invisible column unexpected", "table_id", simple_table_schema->get_table_id(), + "table_name", simple_table_schema->get_table_name(), + "table_schema_version", simple_table_schema->get_schema_version(), + K(column_id)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_ISNULL(column_schema_info)) { + LOG_ERROR("column_schema_info is null", K(column_schema_info)); + ret = OB_ERR_UNEXPECTED; + } else { + column_index = column_schema_info->get_column_idx(); + + if (OB_UNLIKELY(column_index < 0 || column_index >= OB_MAX_COLUMN_NUMBER)) { + LOG_ERROR("column_index is invalid", "table_name", simple_table_schema->get_table_name(), + K(column_index), K(column_id)); + ret = OB_ERR_UNEXPECTED; + } else { + // If the primary key column has been modified, the value after the modification is used, otherwise the value before the modification is used + if (NULL == rv->new_columns_[column_index]) { + rv->new_columns_[column_index] = &(cv_node->string_value_); + } + + rv->is_rowkey_[column_index] = column_schema_info->is_rowkey(); + rv->is_changed_[column_index] = true; + + if (rv->contain_old_column_ && NULL == rv->old_columns_[column_index]) { + rv->old_columns_[column_index] = &(cv_node->string_value_); + } + } + } + } // for + } + + return ret; +} + +int ObLogFormatter::fill_orig_default_value_(RowValue *rv, + const TableSchemaType *simple_table_schema, + const TableSchemaInfo &tb_schema_info, + common::ObIAllocator &allocator) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogFormatter has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(rv) || OB_ISNULL(simple_table_schema)) { + LOG_ERROR("invalid argument", K(rv), K(simple_table_schema)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(meta_manager_)) { + LOG_ERROR("meta_manager_ is null", K(meta_manager_)); + ret = OB_ERR_UNEXPECTED; + } else { + int64_t real_column_index = 0; + int64_t column_count = rv->column_num_; + int64_t table_schema_version = simple_table_schema->get_schema_version(); + uint64_t table_id = simple_table_schema->get_table_id(); + + for (int64_t index = 0; OB_SUCCESS == ret && index < column_count; index++) { + uint64_t column_id = OB_INVALID_ID; + ColumnSchemaInfo *column_schema_info = NULL; + ColumnPropertyFlag column_property_flag; + + // 1. where column_count does not contain the number of hidden columns, it is obtained directly from RowValue + // 2. get the column_idx of the specified version of table-column_idx directly from the meta_manager, + // Ensure that the order of the columns in the oblog output is the same as the order of the columns defined in the OB table (add columns at the specified position) + if (OB_FAIL(tb_schema_info.get_column_id(index, column_id))) { + LOG_ERROR("tb_schema_info get_column_id fail", KR(ret), K(index), K(column_id)); + } else if (OB_UNLIKELY(OB_INVALID_ID == column_id)) { + LOG_ERROR("column_id is not valid", K(column_id)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(tb_schema_info.get_column_schema_info(column_id, enable_output_hidden_primary_key_, + column_schema_info, column_property_flag))) { + LOG_ERROR("get_column_schema_info fail", KR(ret), K(index), K(column_count), K(table_schema_version), + K(table_id), "table_name", simple_table_schema->get_table_name(), + K(column_id), K(enable_output_hidden_primary_key_), + K(column_schema_info), K(column_property_flag)); + } else if (OB_UNLIKELY(column_property_flag.is_non_user())) { + // formatter should not be processed to non-user columns + LOG_ERROR("handle non user column, unexpected", K(column_property_flag), + "table_id", simple_table_schema->get_table_id(), + "table_name", simple_table_schema->get_table_name(), + "table_schema_version", simple_table_schema->get_schema_version(), + K(column_id)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_UNLIKELY(column_property_flag.is_delete())) { + // not possible for delete column + LOG_ERROR("is_delete column unexpected", "table_id", simple_table_schema->get_table_id(), + "table_name", simple_table_schema->get_table_name(), + "table_schema_version", simple_table_schema->get_schema_version(), + K(column_id)); + ret = OB_ERR_UNEXPECTED; + } else if (column_schema_info->is_hidden()) { + // not possible for hidden column + LOG_ERROR("is_hidden column unexpected", "table_id", simple_table_schema->get_table_id(), + "table_name", simple_table_schema->get_table_name(), + "table_schema_version", simple_table_schema->get_schema_version(), + K(column_id)); + ret = OB_ERR_UNEXPECTED; + } else if (column_schema_info->is_invisible()) { + // not possible for invisible column + LOG_ERROR("is_invisible column unexpected", "table_id", simple_table_schema->get_table_id(), + "table_name", simple_table_schema->get_table_name(), + "table_schema_version", simple_table_schema->get_schema_version(), + K(column_id)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_ISNULL(column_schema_info)) { + LOG_ERROR("column_schema_info is null", K(column_schema_info)); + ret = OB_ERR_UNEXPECTED; + } else { + // Determine if it is a newly added column, if it is a newly added column, then fill in the original default value + // If neither the new value nor the old value has a value, then it must be a newly added column + if (NULL != rv->new_columns_[real_column_index] + || NULL != rv->old_columns_[real_column_index]) { + rv->orig_default_value_[real_column_index] = NULL; + } else { + // default vlaue + const common::ObString *orig_default_value_str = column_schema_info->get_orig_default_value_str(); + ObString *str = static_cast(allocator.alloc(sizeof(ObString))); + + if (OB_ISNULL(str)) { + LOG_ERROR("allocate memory for ObString fail", K(sizeof(ObString))); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else if (OB_ISNULL(orig_default_value_str)) { + LOG_ERROR("orig_default_value_str is null", K(index), + K(table_id), "table_name", simple_table_schema->get_table_name(), + K(column_id), KPC(column_schema_info)); + ret = OB_ERR_UNEXPECTED; + // For varchar: + // 1. the default value is NULL, which should be set to NULL + // 2. The default value is an empty string, which should be set to '' + } else if (NULL == orig_default_value_str->ptr() + && 0 == orig_default_value_str->length()) { + new (str) ObString(); + } else if (NULL != orig_default_value_str->ptr() + && 0 == orig_default_value_str->length()) { + // Empty strings do not require memcpy + new (str) ObString(); + str->assign_ptr(ObObj2strHelper::EMPTY_STRING, static_cast(0)); + } else { + const int64_t length = orig_default_value_str->length(); + const char *ptr = orig_default_value_str->ptr(); + char *ptr_copy = NULL; + + if (OB_ISNULL(ptr_copy = static_cast(allocator.alloc(length)))) { + LOG_ERROR("allocate memory fail", K(length)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + MEMCPY(ptr_copy, ptr, length); + new (str) ObString(length, ptr_copy); + } + } + + if (OB_SUCC(ret)) { + rv->orig_default_value_[real_column_index] = str; + + LOG_DEBUG("cast column orig default value", + "casted", *str, + "table_id", simple_table_schema->get_table_id(), + "table_name", simple_table_schema->get_table_name(), + K(real_column_index), K(index)); + } + } + + real_column_index++; + } + } + } + + return ret; +} + +int ObLogFormatter::set_src_category_(ILogRecord *br_data, + RowValue *rv, + const ObRowDml &dml_type, + const bool is_hbase_mode_put) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(br_data) || OB_ISNULL(rv)) { + LOG_ERROR("invalid argument", K(br_data), K(rv)); + ret = OB_INVALID_ARGUMENT; + } else { + int src_category = SRC_NO; + + // 1. INSERT statements are always set to full column log format + // 2. DELETE and UPDATE must be populated with old values in full column logging mode, so if they are populated with old values, they are in full column logging format + // 3. OB-HBase mode put special handling + if (T_DML_INSERT == dml_type || rv->contain_old_column_ || is_hbase_mode_put) { + src_category = SRC_FULL_RECORDED; + } else { + src_category = SRC_FULL_FAKED; + } + + br_data->setSrcCategory(src_category); + } + + return ret; +} + +int ObLogFormatter::build_binlog_record_(ObLogBR *br, + RowValue *rv, + const int64_t new_column_cnt, + const ObRowDml &dml_type, + const TableSchemaType *simple_table_schema) +{ + int ret = OB_SUCCESS; + ILogRecord *br_data = NULL; + bool is_hbase_mode_put = false; + const uint64_t table_id = simple_table_schema->get_table_id(); + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogFormatter has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(br) || OB_ISNULL(rv) || OB_ISNULL(simple_table_schema)) { + LOG_ERROR("invalid argument", K(br), K(rv), K(simple_table_schema)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(br_data = br->get_data())) { + LOG_ERROR("binlog record data is invalid", K(br)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(rv->new_column_array_) || OB_ISNULL(rv->old_column_array_)) { + LOG_ERROR("invalid row value, new_column_array or old_column_array is invalid", + K(rv->new_column_array_), K(rv->old_column_array_)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(is_hbase_mode_put_(table_id, dml_type, rv->column_num_, new_column_cnt, + rv->contain_old_column_, is_hbase_mode_put))) { + LOG_ERROR("is_hbase_mode_put_ fail", KR(ret), K(table_id), "dml_type", print_dml_type(dml_type), + "column_num", rv->column_num_, + K(new_column_cnt), + "contain_old_column", rv->contain_old_column_, + K(is_hbase_mode_put)); + } else if (OB_FAIL(set_src_category_(br_data, rv, dml_type, is_hbase_mode_put))) { + LOG_ERROR("set_src_category_ fail", KR(ret), K(br_data), K(rv), K(dml_type), K(is_hbase_mode_put)); + } else { + // default to be valid + br->set_is_valid(true); + + if (rv->column_num_ <= 0) { + LOG_INFO("ignore non-user-column table", "table_name", simple_table_schema->get_table_name(), + "table_id", simple_table_schema->get_table_id()); + // ignore table with no columns + br->set_is_valid(false); + } else { + br_data->setNewColumn(rv->new_column_array_, static_cast(rv->column_num_)); + br_data->setOldColumn(rv->old_column_array_, static_cast(rv->column_num_)); + + ObRowDml current_dml_type = dml_type; + if (is_hbase_mode_put) { + current_dml_type = T_DML_INSERT; + + // modify record type + RecordType type = get_record_type(current_dml_type); + if (OB_FAIL(br->setInsertRecordTypeForHBasePut(type))) { + LOG_ERROR("br setInsertRecordTypeForHBasePut fail", KR(ret), K(br), + "type", print_record_type(type), + "dml_type", print_dml_type(dml_type), + "current_dml_type", print_dml_type(current_dml_type), + "table_name", simple_table_schema->get_table_name(), + "table_id", simple_table_schema->get_table_id()); + } else { + // succ + } + } + + switch (current_dml_type) { + case T_DML_DELETE: { + ret = format_dml_delete_(br_data, rv); + break; + } + case T_DML_INSERT: { + ret = format_dml_insert_(br_data, rv); + break; + } + case T_DML_UPDATE: { + ret = format_dml_update_(br_data, rv); + break; + } + default: { + ret = OB_NOT_SUPPORTED; + LOG_ERROR("unknown DML type, not supported", K(current_dml_type)); + break; + } + } + } + } + + return ret; +} + +int ObLogFormatter::is_hbase_mode_put_(const uint64_t table_id, + const ObRowDml &dml_type, + const int64_t column_number, + const int64_t new_column_cnt, + const bool contain_old_column, + bool &is_hbase_mode_put) +{ + int ret = OB_SUCCESS; + is_hbase_mode_put = false; + bool is_hbase_table = false; + + if (enable_hbase_mode_) { + if (OB_ISNULL(hbase_util_)) { + LOG_ERROR("hbase_util_ is null", K(hbase_util_)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(hbase_util_->is_hbase_table(table_id, is_hbase_table))) { + LOG_ERROR("ObLogHbaseUtil is_hbase_table fail", KR(ret), K(table_id), K(is_hbase_table)); + } else if (is_hbase_table && T_DML_UPDATE == dml_type && false == contain_old_column) { + if (column_number == new_column_cnt) { + is_hbase_mode_put = true; + } else if (skip_hbase_mode_put_column_count_not_consistency_) { + is_hbase_mode_put = true; + + LOG_INFO("skip hbase mode put column count not consistency", K(table_id), + "dml_type", print_dml_type(dml_type), + "hbase_mode_put_column_cnt", new_column_cnt, + K(column_number)); + } else { + LOG_ERROR("hbase mode put column cnt is not consistency", K(table_id), + "dml_type", print_dml_type(dml_type), + "hbase_mode_put_column_cnt", new_column_cnt, + K(column_number)); + ret = OB_ERR_UNEXPECTED; + } + + LOG_DEBUG("[HBASE] [PUT]", K(is_hbase_mode_put), K(table_id), + "dml_type", print_dml_type(dml_type), + K(column_number), K(new_column_cnt), K(contain_old_column)); + } else { + // do nothing + } + } + + return ret; +} + +int ObLogFormatter::format_dml_delete_(ILogRecord *br_data, const RowValue *row_value) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(br_data) || OB_ISNULL(row_value)) { + LOG_ERROR("invalid argument", K(br_data), K(row_value)); + ret = OB_INVALID_ARGUMENT; + } else { + for (int64_t i = 0; OB_SUCCESS == ret && i < row_value->column_num_; i++) { + // Handling primary key values + if (row_value->is_rowkey_[i]) { + // The primary key value is taken from the primary key value recorded in the new columns, regardless of whether it is a full column log or not + // The primary key is set by the fill_rowkey_cols_() function, which must be placed in the new column + // DELETE operations use the original primary key value and do not need to use the primary key value from the old value + ObString *str = row_value->new_columns_[i]; + + if (OB_ISNULL(str)) { + LOG_ERROR("rowkey column is NULL, unexcepted error", K(i), K(row_value->column_num_)); + ret = OB_ERR_UNEXPECTED; + } else { + br_data->putOld(str->ptr(), str->length()); + } + } + // Handling non-primary key values + else { + if (row_value->contain_old_column_) { + // When full column logging, the non-rowkey column of oldCold is set to the corresponding value + // If the column value is not provided, then it is a new column and the corresponding original default value is set + ObString *str = row_value->old_columns_[i]; + if (NULL == str) { + str = row_value->orig_default_value_[i]; + } + + if (OB_ISNULL(str)) { + LOG_ERROR("old column value and original default value are all invalid", + K(i), "column_num", row_value->column_num_); + ret = OB_ERR_UNEXPECTED; + } else { + br_data->putOld(str->ptr(), str->length()); + } + } else { + // Non-rowkey columns of oldCold are set to no-change status for non-full column logging + bool is_changed = false; + + if (OB_FAIL(ObLogBR::put_old(br_data, is_changed))) { + LOG_ERROR("put_old fail", KR(ret), K(br_data), K(is_changed)); + } + } + } + } + } + + return ret; +} + +int ObLogFormatter::format_dml_insert_(ILogRecord *br_data, const RowValue *row_value) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(br_data) || OB_ISNULL(row_value)) { + LOG_ERROR("invalid argument", K(br_data), K(row_value)); + ret = OB_INVALID_ARGUMENT; + } else { + for (int64_t i = 0; OB_SUCCESS == ret && i < row_value->column_num_; i++) { + if (!row_value->is_changed_[i]) { + ObString *str_val = row_value->orig_default_value_[i]; + + if (OB_ISNULL(str_val)) { + LOG_ERROR("column original default value is NULL", K(i), + "column_num", row_value->column_num_); + ret = OB_ERR_UNEXPECTED; + } else { + br_data->putNew(str_val->ptr(), str_val->length()); + } + } else { + ObString *str_val = row_value->new_columns_[i]; + + if (OB_ISNULL(str_val)) { + LOG_ERROR("changed column new value is NULL", K(i), + "column_num", row_value->column_num_); + ret = OB_ERR_UNEXPECTED; + } else { + br_data->putNew(str_val->ptr(), str_val->length()); + } + } + + // FIXME: No old values are populated, regardless of whether it is a full column log + } + } + + return ret; +} + +int ObLogFormatter::format_dml_update_(ILogRecord *br_data, const RowValue *row_value) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(br_data) || OB_ISNULL(row_value)) { + LOG_ERROR("invalid argument", K(br_data), K(row_value)); + ret = OB_INVALID_ARGUMENT; + } else { + for (int i = 0; OB_SUCCESS == ret && i < row_value->column_num_; i++) { + if (! row_value->is_changed_[i]) { + if (row_value->contain_old_column_) { + // In the case of a full column log, for update, if a column is not updated, the new value is filled with the value in old_column + // If there is no corresponding value in the old column either, the original default value is filled + ObString *str_val = row_value->old_columns_[i]; + + if (NULL == str_val) { + str_val = row_value->orig_default_value_[i]; + } + + if (OB_ISNULL(str_val)) { + LOG_ERROR("new column value, old column value and original default value " + "are all invalid", + K(i), "column_num", row_value->column_num_); + ret = OB_ERR_UNEXPECTED; + } else { + br_data->putNew(str_val->ptr(), str_val->length()); + } + } else { + // Mark as unmodified when not a full column log + br_data->putNew(NULL, 0); + } + } else { + ObString *str_val = row_value->new_columns_[i]; + + if (OB_ISNULL(str_val)) { + LOG_ERROR("changed column new value is NULL", K(i), + "column_num", row_value->column_num_); + ret = OB_ERR_UNEXPECTED; + } else { + br_data->putNew(str_val->ptr(), str_val->length()); + } + } + + if (OB_SUCCESS == ret) { + if (row_value->contain_old_column_) { + // For full column logging, the old value is always filled with the value in old_column for updates + // If there is no valid value in the old column, the original default value is filled + ObString *str_val = row_value->old_columns_[i]; + + if (NULL == str_val) { + str_val = row_value->orig_default_value_[i]; + } + + if (OB_ISNULL(str_val)) { + LOG_ERROR("old column value and original default value are all invalid", + K(i), "column_num", row_value->column_num_); + ret = OB_ERR_UNEXPECTED; + } else { + br_data->putOld(str_val->ptr(), str_val->length()); + } + } else { + // When not full column logging, for update, the old value is filled with whether the corresponding column has been modified + bool is_changed = row_value->is_changed_[i]; + if (row_value->is_rowkey_[i]) { + is_changed = true; + } + + if (OB_FAIL(ObLogBR::put_old(br_data, is_changed))) { + LOG_ERROR("put_old fail", KR(ret), K(br_data), K(is_changed)); + } + } + } + } // end of for + } + + return ret; +} + +int ObLogFormatter::get_schema_(IObLogSchemaGetter *schema_getter, + const int64_t version, + const uint64_t table_id, + volatile bool &stop_flag, + ObLogSchemaGuard &schema_guard, + const TableSchemaType *&table_schema, + DBSchemaInfo &db_schema_info) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogFormatter has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(schema_getter) || OB_UNLIKELY(version <= 0)) { + LOG_ERROR("invalid argument", K(schema_getter), K(version)); + ret = OB_INVALID_ARGUMENT; + } else { + int64_t refreshed_version = version; + const uint64_t pure_tb_id = extract_pure_id(table_id); + const uint64_t tenant_id = extract_tenant_id(table_id); + + if (OB_ALL_SEQUENCE_VALUE_TID == pure_tb_id) { + ObLogTenantGuard guard; + ObLogTenant *tenant = NULL; + + if (OB_FAIL(TCTX.get_tenant_guard(tenant_id, guard))) { + LOG_ERROR("get tenant fail", KR(ret), K(tenant_id)); + } else if (OB_ISNULL(tenant = guard.get_tenant())) { + LOG_ERROR("invalid tenant", K(guard), K(tenant)); + ret = OB_ERR_UNEXPECTED; + } else { + const int64_t tenant_start_schema_version = tenant->get_start_schema_version(); + refreshed_version = max(version, tenant_start_schema_version); + } + } + + // get schema guard + RETRY_FUNC(stop_flag, (*schema_getter), get_schema_guard_and_table_schema, + table_id, + refreshed_version, + GET_SCHEMA_TIMEOUT, + schema_guard, + table_schema); + + if (OB_SUCCESS == ret && NULL != table_schema) { + uint64_t db_id = table_schema->get_database_id(); + + // Get database schema information, including name and version + RETRY_FUNC(stop_flag, schema_guard, get_database_schema_info, db_id, db_schema_info, + GET_SCHEMA_TIMEOUT); + } + } + + return ret; +} + +} // namespace liboblog +} // namespace oceanbase diff --git a/src/liboblog/src/ob_log_formatter.h b/src/liboblog/src/ob_log_formatter.h new file mode 100644 index 0000000000000000000000000000000000000000..063c0ab440e4373838c258490136a7911fd1389e --- /dev/null +++ b/src/liboblog/src/ob_log_formatter.h @@ -0,0 +1,230 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_FORMATTER_H__ +#define OCEANBASE_LIBOBLOG_FORMATTER_H__ + +#include "lib/allocator/ob_allocator.h" // ObIAllocator +#include "lib/thread/ob_multi_fixed_queue_thread.h" // ObMQThread +#include "storage/ob_i_store.h" // ObRowDml + +#include "ob_log_binlog_record.h" // ILogRecord, ObLogBR + +#include "ob_log_part_trans_task.h" // ColValueList, PartTransTask, IStmtTask, DmlStmtTask +#include "ob_log_schema_cache_info.h" // TableSchemaInfo +#include "ob_log_hbase_mode.h" // ObLogHbaseUtil +#include "ob_log_schema_getter.h" // DBSchemaInfo +#include "ob_log_work_mode.h" // WorkingMode + +using namespace oceanbase::logmessage; +namespace oceanbase +{ +namespace common +{ +class ObString; +} + +namespace liboblog +{ +///////////////////////////////////////////////////////////////////////////////////////// +// IObLogFormatter + +class IObLogFormatter +{ +public: + enum + { + MAX_FORMATTER_NUM = 64, + PAGE_SIZE = common::OB_MALLOC_NORMAL_BLOCK_SIZE, + GET_SCHEMA_TIMEOUT = 1 * 1000 * 1000, + }; + +public: + virtual ~IObLogFormatter() {} + +public: + virtual int start() = 0; + virtual void stop() = 0; + virtual void mark_stop_flag() = 0; + virtual int push(IStmtTask *task, volatile bool &stop_flag) = 0; + virtual int get_task_count(int64_t &br_count, int64_t &log_entry_task_count) = 0; +}; + + + +///////////////////////////////////////////////////////////////////////////////////////// +// ObLogFormatter + +class IObLogMetaManager; +class IObLogSchemaGetter; +class IObLogStorager; +class IObLogErrHandler; +class ObObj2strHelper; +class IObLogBRPool; +class ObLogSchemaGuard; + +typedef common::ObMQThread FormatterThread; + +class ObLogFormatter : public IObLogFormatter, public FormatterThread +{ +public: + ObLogFormatter(); + virtual ~ObLogFormatter(); + +public: + int start(); + void stop(); + void mark_stop_flag() { FormatterThread::mark_stop_flag(); } + int push(IStmtTask *task, volatile bool &stop_flag); + int get_task_count(int64_t &br_count, + int64_t &log_entry_task_count); + int handle(void *data, const int64_t thread_index, volatile bool &stop_flag); + +public: + int init(const int64_t thread_num, + const int64_t queue_size, + const WorkingMode working_mode, + ObObj2strHelper *obj2str_helper, + IObLogBRPool *br_pool, + IObLogMetaManager *meta_manager, + IObLogSchemaGetter *schema_getter, + IObLogStorager *storager, + IObLogErrHandler *err_handler, + const bool skip_dirty_data, + const bool enable_hbase_mode, + ObLogHbaseUtil &hbase_util, + const bool skip_hbase_mode_put_column_count_not_consistency, + const bool enable_output_hidden_primary_key); + void destroy(); + +private: + struct RowValue + { + int64_t column_num_; + bool contain_old_column_; + BinLogBuf *new_column_array_; + BinLogBuf *old_column_array_; + + common::ObString *new_columns_[common::OB_MAX_COLUMN_NUMBER]; + common::ObString *old_columns_[common::OB_MAX_COLUMN_NUMBER]; + common::ObString *orig_default_value_[common::OB_MAX_COLUMN_NUMBER]; + + bool is_rowkey_[common::OB_MAX_COLUMN_NUMBER]; + bool is_changed_[common::OB_MAX_COLUMN_NUMBER]; + + void reset(); + int init(const int64_t column_num, const bool contain_old_column); + }; + +private: + typedef share::schema::ObSimpleTableSchemaV2 TableSchemaType; + static const int64_t DATA_OP_TIMEOUT = 1 * 1000 * 1000; + static const int64_t PRINT_LOG_INTERVAL = 10 * 1000 * 1000; + + void handle_non_full_columns_(DmlStmtTask &dml_stmt_task, + const TableSchemaType &table_schema); + int init_row_value_array_(const int64_t row_value_num); + void destroy_row_value_array_(); + int set_meta_info_(ObLogSchemaGuard &schema_guard, + const TableSchemaType *&simple_table_schema, + const DBSchemaInfo &db_schema_info, + ObLogBR *br, + volatile bool &stop_flag); + int build_row_value_(RowValue *rv, + DmlStmtTask *stmt_task, + const TableSchemaType *simple_table_schema, + int64_t &new_column_cnt); + int fill_normal_cols_(RowValue *rv, + ColValueList &cv_list, + const TableSchemaType *simple_table_schema, + const TableSchemaInfo &tb_schema_info, + const bool is_new_value); + int fill_rowkey_cols_(RowValue *rv, + ColValueList &rowkey_cols, + const TableSchemaType *simple_table_schema, + const TableSchemaInfo &tb_schema_info); + int build_binlog_record_(ObLogBR *br, + RowValue *rv, + const int64_t new_column_cnt, + const storage::ObRowDml &dml_type, + const TableSchemaType *simple_table_schema); + // HBase mode put + // 1. hbase table + // 2. update type + // 3. new value all columns, old value empty + int is_hbase_mode_put_(const uint64_t table_id, + const storage::ObRowDml &dml_type, + const int64_t column_number, + const int64_t new_column_cnt, + const bool contain_old_column, + bool &is_hbase_mode_put); + int set_src_category_(ILogRecord *br, + RowValue *rv, + const storage::ObRowDml &dml_type, + const bool is_hbase_mode_put); + int format_dml_delete_(ILogRecord *binlog_record, const RowValue *row_value); + int format_dml_insert_(ILogRecord *binlog_record, const RowValue *row_value); + int format_dml_update_(ILogRecord *binlog_record, const RowValue *row_value); + int fill_orig_default_value_(RowValue *rv, + const TableSchemaType *simple_table_schema, + const TableSchemaInfo &tb_schema_info, + common::ObIAllocator &allocator); + + int get_schema_(IObLogSchemaGetter *schema_getter, + const int64_t version, + const uint64_t table_id, + volatile bool &stop_flag, + ObLogSchemaGuard &schema_guard, + const TableSchemaType *&table_schema, + DBSchemaInfo &db_schema_info); + int finish_format_(PartTransTask &part_trans_task, + ObLogEntryTask &redo_log_entry_task, + volatile bool &stop_flag); + int init_binlog_record_for_dml_stmt_task_(DmlStmtTask *stmt_task, + ObLogBR *&br, + bool &is_ignore); + int handle_memory_data_sync_work_mode_(PartTransTask &part_trans_task, + ObLogEntryTask &redo_log_entry_task, + volatile bool &stop_flag); + int handle_storage_data_sync_work_mode_(PartTransTask &part_trans_task, + ObLogEntryTask &redo_log_entry_task, + volatile bool &stop_flag); + int dispatch_to_storager_(ObLogEntryTask &log_entry_task, + volatile bool &stop_flag); + +private: + bool inited_; + WorkingMode working_mode_; + ObObj2strHelper *obj2str_helper_; + IObLogBRPool *br_pool_; + IObLogErrHandler *err_handler_; + IObLogMetaManager *meta_manager_; + IObLogSchemaGetter *schema_getter_; + IObLogStorager *storager_; + RowValue *row_value_array_; + common::ObArenaAllocator allocator_; + // Used to ensure that statement tasks are evenly distributed to Formatter threads + uint64_t round_value_; + bool skip_dirty_data_; + bool enable_hbase_mode_; + ObLogHbaseUtil *hbase_util_; + bool skip_hbase_mode_put_column_count_not_consistency_; + bool enable_output_hidden_primary_key_; + int64_t log_entry_task_count_; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogFormatter); +}; + +} // namespace liboblog +} // namespace oceanbase +#endif /* OCEANBASE_LIBOBLOG_FORMATTER_H__ */ diff --git a/src/liboblog/src/ob_log_hbase_mode.cpp b/src/liboblog/src/ob_log_hbase_mode.cpp new file mode 100644 index 0000000000000000000000000000000000000000..61cb055e815fe66350b930c66fc9bcfa06d5585a --- /dev/null +++ b/src/liboblog/src/ob_log_hbase_mode.cpp @@ -0,0 +1,229 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG + +#include "ob_log_hbase_mode.h" // ObLogHbaseUtil + +#include "share/schema/ob_table_schema.h" // ObTableSchema, ObColumnIterByPrevNextID +#include "share/schema/ob_column_schema.h" // ObColumnSchemaV2 + +using namespace oceanbase::common; +using namespace oceanbase::share::schema; +namespace oceanbase +{ +namespace liboblog +{ + +ObLogHbaseUtil::ObLogHbaseUtil() : + inited_(false), + table_id_set_(), + column_id_map_() +{} + +ObLogHbaseUtil::~ObLogHbaseUtil() +{ + destroy(); +} + +int ObLogHbaseUtil::init() +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("init twice", K(inited_)); + ret = OB_INIT_TWICE; + } else if (OB_FAIL(table_id_set_.create(DEFAULT_TABLE_SET_SIZE))) { + LOG_ERROR("table_id_set_ create fail", KR(ret)); + } else if (OB_FAIL(column_id_map_.init(ObModIds::OB_LOG_HBASE_COLUMN_ID_MAP))) { + LOG_ERROR("init column_id_map_ fail", KR(ret)); + } else { + inited_ = true; + } + + return ret; +} + +void ObLogHbaseUtil::destroy() +{ + inited_ = false; + + table_id_set_.destroy(); + column_id_map_.destroy(); +} + +int ObLogHbaseUtil::add_hbase_table_id(const ObTableSchema &table_schema) +{ + int ret = OB_SUCCESS; + + bool is_hbase_mode_table = false; + const uint64_t table_id = table_schema.get_table_id(); + const char *table_name = table_schema.get_table_name(); + + if (OB_FAIL(filter_hbase_mode_table_(table_schema, is_hbase_mode_table))) { + LOG_ERROR("filter_hbase_mode_table_ fail", KR(ret), K(table_id), K(table_name), K(is_hbase_mode_table)); + } else if (! is_hbase_mode_table) { + LOG_INFO("[IS_NOT_HBASE_TABLE]", K(table_name), K(table_id), K(is_hbase_mode_table)); + } else if (OB_FAIL(table_id_set_.set_refactored(table_id))) { + LOG_ERROR("add_table_id into table_id_set_ fail", KR(ret), K(table_name), K(table_id)); + } else { + LOG_INFO("[HBASE] add_table_id into table_id_set_ succ", K(table_name), K(table_id)); + } + + return ret; +} + +int ObLogHbaseUtil::filter_hbase_mode_table_(const ObTableSchema &table_schema, + bool &is_hbase_mode_table) +{ + int ret = OB_SUCCESS; + + is_hbase_mode_table = false; + // Marks the presence or absence of a specified column + int column_flag[HBASE_TABLE_COLUMN_COUNT]; + // Mark column T as bigint or not + bool is_T_column_bigint_type = false; + // Record T-column id + uint64_t column_id = OB_INVALID_ID; + memset(column_flag, '\0', sizeof(column_flag)); + ObColumnIterByPrevNextID pre_next_id_iter(table_schema); + + while (OB_SUCCESS == ret) { + const ObColumnSchemaV2 *column_schema = NULL; + + if (OB_FAIL(pre_next_id_iter.next(column_schema))) { + if (OB_ITER_END != ret) { + LOG_ERROR("pre_next_id_iter next fail", KR(ret), KPC(column_schema)); + } + } else if (OB_ISNULL(column_schema)) { + LOG_ERROR("column_schema is null", KPC(column_schema)); + ret = OB_ERR_UNEXPECTED; + } else { + const char *column_name = column_schema->get_column_name(); + + if (0 == strcmp(column_name, K_COLUMN)) { + column_flag[0]++; + } else if (0 == strcmp(column_name, Q_COLUMN)) { + column_flag[1]++; + } else if (0 == strcmp(column_name, T_COLUMN)) { + column_flag[2]++; + + if (ObIntType == column_schema->get_data_type()) { + is_T_column_bigint_type = true; + column_id = column_schema->get_column_id(); + } + } else if (0 == strcmp(column_name, V_COLUMN)) { + column_flag[3]++; + } + } + } // while + + // Iterate through all columns + if (OB_ITER_END == ret) { + ret = OB_SUCCESS; + } + + int64_t hbase_table_column_cnt = 0; + // check contains four columns K, Q, T, V + for (int64_t idx=0; idx < HBASE_TABLE_COLUMN_COUNT && OB_SUCC(ret); ++idx) { + if (1 == column_flag[idx]) { + ++hbase_table_column_cnt; + } + } + + if (OB_SUCC(ret)) { + if ((HBASE_TABLE_COLUMN_COUNT == hbase_table_column_cnt) + && is_T_column_bigint_type) { + is_hbase_mode_table = true; + + TableID table_key(table_schema.get_table_id()); + if (OB_UNLIKELY(OB_INVALID_ID == column_id)) { + LOG_ERROR("column_id is not valid", K(column_id)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(column_id_map_.insert(table_key, column_id))) { + LOG_ERROR("column_id_map_ insert fail", KR(ret), K(table_key), K(column_id)); + } else { + // succ + } + } else { + is_hbase_mode_table = false; + } + } + + LOG_INFO("[HBASE] table info", "table_id", table_schema.get_table_id(), + "table_name", table_schema.get_table_name(), + K(hbase_table_column_cnt), + K(column_id), K(is_T_column_bigint_type), + K(is_hbase_mode_table)); + + return ret; +} + +int ObLogHbaseUtil::judge_hbase_T_column(const uint64_t table_id, + const uint64_t column_id, + bool &chosen) +{ + int ret = OB_SUCCESS; + chosen = false; + + if (OB_FAIL(table_id_set_.exist_refactored(table_id))) { + if (OB_HASH_EXIST == ret) { + ret = OB_SUCCESS; + + // Table exists to determine if it is a T column + TableID table_key(table_id); + uint64_t T_column_id = OB_INVALID_ID; + + if (OB_FAIL(column_id_map_.get(table_key, T_column_id))) { + LOG_ERROR("get column_id from map fail", KR(ret), K(table_key), K(T_column_id)); + } else if (OB_UNLIKELY(OB_INVALID_ID == T_column_id)) { + LOG_ERROR("T_column_id is not valid", K(T_column_id)); + ret = OB_ERR_UNEXPECTED; + } else if (column_id == T_column_id) { + chosen = true; + } else { + chosen = false; + } + } else if (OB_HASH_NOT_EXIST == ret) { + chosen = false; + ret = OB_SUCCESS; + } else { + LOG_ERROR("table_id_set_ exist_refactored fail", KR(ret), K(table_id)); + } + } + + return ret; +} + +int ObLogHbaseUtil::is_hbase_table(const uint64_t table_id, + bool &chosen) +{ + int ret = OB_SUCCESS; + chosen = false; + + if (OB_FAIL(table_id_set_.exist_refactored(table_id))) { + if (OB_HASH_EXIST == ret) { + ret = OB_SUCCESS; + chosen = true; + } else if (OB_HASH_NOT_EXIST == ret) { + ret = OB_SUCCESS; + chosen = false; + } else { + LOG_ERROR("table_id_set_ exist_refactored fail", KR(ret), K(table_id)); + } + } + + return ret; +} + +} +} diff --git a/src/liboblog/src/ob_log_hbase_mode.h b/src/liboblog/src/ob_log_hbase_mode.h new file mode 100644 index 0000000000000000000000000000000000000000..525361c337c5d1aaf9b20ba44af61e1b8b44003a --- /dev/null +++ b/src/liboblog/src/ob_log_hbase_mode.h @@ -0,0 +1,116 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_HBASE_MODE_H_ +#define OCEANBASE_LIBOBLOG_OB_LOG_HBASE_MODE_H_ + +#include "lib/hash/ob_hashset.h" // ObHashSet +#include "lib/hash/ob_linear_hash_map.h" // ObLinearHashMap + +namespace oceanbase +{ +namespace share +{ +namespace schema +{ +class ObTableSchema; +} // namespace schema +} // namespace share + +namespace liboblog +{ + +class ObLogHbaseUtil +{ +public: + ObLogHbaseUtil(); + virtual ~ObLogHbaseUtil(); + +public: + // Determine if the table is an hbase model, + // if yes, join; otherwise do nothing + // + // Determine if the table is an hbase model: + // 1. table_name contains $ + // 2. contains four columns K, Q, T, V + // 3. T is of type bigint + // Note: All of the above conditions are not necessarily met for an hbase table + int add_hbase_table_id(const oceanbase::share::schema::ObTableSchema &table_schema); + + // Determine if conversion is required + // table exists and is a T column + int judge_hbase_T_column(const uint64_t table_id, + const uint64_t column_id, + bool &chosen); + + int is_hbase_table(const uint64_t table_id, + bool &chosen); + +public: + int init(); + void destroy(); + +private: + static const int64_t HBASE_TABLE_COLUMN_COUNT = 4; + const char *K_COLUMN = "K"; + const char *Q_COLUMN = "Q"; + const char *T_COLUMN = "T"; + const char *V_COLUMN = "V"; + + static const int64_t DEFAULT_TABLE_SET_SIZE = 1024; + typedef common::hash::ObHashSet HbaseTableIDSet; + + struct TableID + { + uint64_t table_id_; + + TableID(const uint64_t table_id) : + table_id_(table_id) + {} + + int64_t hash() const + { + return static_cast(table_id_); + } + + bool operator== (const TableID &other) const + { + return table_id_ == other.table_id_; + } + + void reset() + { + table_id_ = common::OB_INVALID_ID; + } + + TO_STRING_KV(K_(table_id)); + }; + + typedef common::ObLinearHashMap ColumnIDMap; + +private: + int filter_hbase_mode_table_(const oceanbase::share::schema::ObTableSchema &table_schema, + bool &is_hbase_mode_table); + +private: + bool inited_; + + HbaseTableIDSet table_id_set_; + ColumnIDMap column_id_map_; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogHbaseUtil); +}; + +} +} +#endif diff --git a/src/liboblog/src/ob_log_instance.cpp b/src/liboblog/src/ob_log_instance.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0b3ee69d9ed52e1ae2b3f07849e2c105d9305171 --- /dev/null +++ b/src/liboblog/src/ob_log_instance.cpp @@ -0,0 +1,2637 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG + +#include "ob_log_instance.h" + +#include "lib/oblog/ob_log_module.h" // LOG_ERROR +#include "lib/file/file_directory_utils.h" // FileDirectoryUtils +#include "share/ob_version.h" // build_version +#include "share/ob_tenant_mgr.h" // ObTenantManager +#include "share/system_variable/ob_system_variable.h" // ObPreProcessSysVars +#include "share/ob_time_utility2.h" // ObTimeUtility2 +#include "sql/ob_sql_init.h" // init_sql_factories +#include "observer/ob_server_struct.h" // GCTX +#include "observer/omt/ob_tenant_timezone_mgr.h" // OTTZ_MGR +#include "common/ob_clock_generator.h" + +#include "ob_log_common.h" +#include "ob_log_config.h" // ObLogConfig +#include "ob_log_utils.h" // ob_log_malloc +#include "ob_log_meta_manager.h" // ObLogMetaManager +#include "ob_log_sql_server_provider.h" // ObLogSQLServerProvider +#include "ob_log_schema_getter.h" // ObLogSchemaGetter +#include "ob_log_timezone_info_getter.h" // ObLogTimeZoneInfoGetter +#include "ob_log_committer.h" // ObLogCommitter +#include "ob_log_formatter.h" // ObLogFormatter +#include "ob_log_storager.h" // ObLogStorager +#include "ob_log_data_processor.h" // ObLogDataProcessor +#include "ob_log_sequencer1.h" // ObLogSequencer +#include "ob_log_part_trans_parser.h" // ObLogPartTransParser +#include "ob_log_dml_parser.h" // ObLogDmlParser +#include "ob_log_ddl_parser.h" // ObLogDdlParser +#include "ob_log_fetcher.h" // ObLogFetcher +#include "ob_log_part_trans_task.h" // PartTransTask +#include "ob_log_table_matcher.h" // ObLogTableMatcher +#include "ob_log_trans_ctx_mgr.h" // ObLogTransCtxMgr +#include "ob_log_trans_ctx.h" // TransCtx +#include "ob_log_resource_collector.h" // ObLogResourceCollector +#include "ob_log_binlog_record_pool.h" // ObLogBRPool +#include "ob_log_ddl_handler.h" // ObLogDDLHandler +#include "ob_log_start_schema_matcher.h" // ObLogStartSchemaMatcher +#include "ob_log_tenant_mgr.h" // IObLogTenantMgr +#include "ob_log_mock_store_service.h" // MockObLogStoreService + +#include "ob_log_trace_id.h" + +#define INIT(v, type, args...) \ + do {\ + if (OB_SUCC(ret)) { \ + type *tmp_var = NULL; \ + if (OB_ISNULL(tmp_var = new(std::nothrow) type())) { \ + _LOG_ERROR("construct %s fail", #type); \ + ret = OB_ALLOCATE_MEMORY_FAILED; \ + } else if (OB_FAIL(tmp_var->init(args))) { \ + _LOG_ERROR("init %s fail, ret=%d", #type, ret); \ + delete tmp_var; \ + tmp_var = NULL; \ + } else { \ + v = tmp_var; \ + _LOG_INFO("init component \'%s\' succ", #type); \ + } \ + } \ + } while (0) + +#define DESTROY(v, type) \ + do {\ + if (NULL != v) { \ + type *var = static_cast(v); \ + (void)var->destroy(); \ + delete v; \ + v = NULL; \ + } \ + } while (0) + +using namespace oceanbase::common; + +namespace oceanbase +{ +using namespace share; +namespace liboblog +{ + +ObLogInstance *ObLogInstance::instance_ = NULL; + +ObLogInstance *ObLogInstance::get_instance() +{ + if (NULL == instance_) { + instance_ = new(std::nothrow) ObLogInstance(); + } + + return instance_; +} + +// Here is the chicken and egg problem, assuming that ObLogInstance has already been created when get_ref_instance() is called +ObLogInstance &ObLogInstance::get_ref_instance() +{ + if (NULL == instance_) { + LOG_ERROR("ObLogInstance is NULL", K(instance_)); + } + return *instance_; +} + +void ObLogInstance::destroy_instance() +{ + if (NULL != instance_) { + _LOG_INFO("ObLogInstance %p destroy", instance_); + delete instance_; + instance_ = NULL; + } +} + +ObLogInstance::ObLogInstance() : + inited_(false), + oblog_major_(0), + oblog_minor_(0), + oblog_patch_(0), + timer_tid_(0), + sql_tid_(0), + flow_control_tid_(0), + err_cb_(NULL), + global_errno_(0), + handle_error_flag_(0), + disable_redirect_log_(false), + log_clean_cycle_time_us_(0), + output_dml_br_count_(0), + output_ddl_br_count_(0), + stop_flag_(true), + last_heartbeat_timestamp_micro_sec_(0), + is_assign_log_dir_valid_(false), + br_index_in_trans_(0), + part_trans_task_count_(0), + trans_task_pool_alloc_(), + start_tstamp_(0), + is_schema_split_mode_(false), + drc_message_factory_binlog_record_type_(), + working_mode_(WorkingMode::UNKNOWN_MODE), + mysql_proxy_(), + timezone_info_getter_(NULL), + hbase_util_(), + obj2str_helper_(), + br_queue_(), + trans_task_pool_(), + log_entry_task_pool_(NULL), + store_service_(NULL), + br_pool_(NULL), + trans_ctx_mgr_(NULL), + meta_manager_(NULL), + resource_collector_(NULL), + server_provider_(NULL), + schema_getter_(NULL), + tb_matcher_(NULL), + ss_matcher_(NULL), + systable_helper_(NULL), + committer_(NULL), + storager_(NULL), + data_processor_(NULL), + formatter_(NULL), + sequencer_(NULL), + part_trans_parser_(NULL), + dml_parser_(NULL), + ddl_parser_(NULL), + ddl_handler_(NULL), + fetcher_(NULL), + trans_stat_mgr_(NULL), + tenant_mgr_(NULL) +{ + MEMSET(assign_log_dir_, 0, sizeof(assign_log_dir_)); + MEMSET(ob_trace_id_str_, 0, sizeof(ob_trace_id_str_)); +} + +ObLogInstance::~ObLogInstance() +{ + destroy(); + + LOG_INFO("====================liboblog end===================="); +} + +int ObLogInstance::init(const char *config_file, + const uint64_t start_tstamp_sec, + ERROR_CALLBACK err_cb /* = NULL */) +{ + const int64_t start_tstamp_usec = start_tstamp_sec * _SEC_; + return init_with_start_tstamp_usec(config_file, start_tstamp_usec, err_cb); +} + +int ObLogInstance::init_with_start_tstamp_usec(const char *config_file, + const uint64_t start_tstamp_usec, + ERROR_CALLBACK err_cb /* = NULL */) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("instance has been initialized"); + ret = OB_INIT_TWICE; + } else if (OB_ISNULL(config_file)) { + LOG_ERROR("invalid arguments", K(config_file)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(init_logger_())) { // First initialize the logging module + LOG_ERROR("init_logger_ fail", KR(ret)); + } else if (OB_FAIL(TCONF.init())) { + LOG_ERROR("config init fail", KR(ret)); + } else if (OB_FAIL(TCONF.load_from_file(config_file))) { + LOG_ERROR("load config from file fail", KR(ret), K(config_file)); + } else if (OB_FAIL(init_common_(start_tstamp_usec, err_cb))) { + LOG_ERROR("init_common_ fail", KR(ret), K(start_tstamp_usec), K(err_cb)); + } else { + inited_ = true; + } + + return ret; +} + +int ObLogInstance::init(const std::map& configs, + const uint64_t start_tstamp_sec, + ERROR_CALLBACK err_cb /* = NULL */ ) +{ + int ret = OB_SUCCESS; + const int64_t start_tstamp_usec = start_tstamp_sec * _SEC_; + + if (OB_FAIL(init_with_start_tstamp_usec(configs, start_tstamp_usec, err_cb))) { + LOG_ERROR("init fail", KR(ret), K(start_tstamp_usec)); + } + + return ret; +} + +int ObLogInstance::init_with_start_tstamp_usec(const std::map& configs, + const uint64_t start_tstamp_usec, + ERROR_CALLBACK err_cb /* = NULL */ ) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("instance has been initialized"); + ret = OB_INIT_TWICE; + } else if (OB_FAIL(init_logger_())) { // First initialize the logging module + LOG_ERROR("init_logger_ fail", KR(ret)); + } else if (OB_FAIL(TCONF.init())) { + LOG_ERROR("config init fail", KR(ret)); + } else if (OB_FAIL(TCONF.load_from_map(configs))) { + LOG_ERROR("load config from map fail", KR(ret)); + } else if (OB_FAIL(init_common_(start_tstamp_usec, err_cb))) { + // handle error + } else { + inited_ = true; + } + + return ret; +} + +int ObLogInstance::set_assign_log_dir(const char *log_dir, const int64_t log_dir_len) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(log_dir) || OB_UNLIKELY(log_dir_len > OB_MAX_FILE_NAME_LENGTH)) { + ret = OB_INVALID_ARGUMENT; + } else { + (void)snprintf(assign_log_dir_, sizeof(assign_log_dir_), "%.*s", static_cast(log_dir_len), log_dir); + is_assign_log_dir_valid_ = true; + } + + return ret; +} + +int ObLogInstance::set_data_start_ddl_schema_version(const uint64_t tenant_id, + const int64_t data_start_ddl_schema_version) +{ + int ret = OB_SUCCESS; + ObLogTenant *tenant = NULL; + ObLogTenantGuard guard; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("instance has not been initialized"); + ret = OB_NOT_INIT; + } else if (! stop_flag_) { + LOG_ERROR("ObLogInstance have already started, can not set data start ddl schema version", + K(stop_flag_), K(tenant_id), K(data_start_ddl_schema_version)); + ret = OB_NOT_SUPPORTED; + } else if (OB_UNLIKELY(OB_INVALID_TENANT_ID == tenant_id) + || OB_UNLIKELY(OB_INVALID_TIMESTAMP == data_start_ddl_schema_version)) { + LOG_ERROR("invalid argument", K(tenant_id), K(data_start_ddl_schema_version)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(get_tenant_guard(tenant_id, guard))) { + if (OB_ENTRY_NOT_EXIST != ret) { + LOG_ERROR("get_tenant_guard fail", KR(ret), K(tenant_id)); + } + } else if (OB_ISNULL(tenant = guard.get_tenant())) { + LOG_ERROR("tenant is null", K(tenant_id), K(tenant)); + ret = OB_ERR_UNEXPECTED; + } else { + tenant->update_global_data_schema_version(data_start_ddl_schema_version); + } + + return ret; +} + +int ObLogInstance::get_start_schema_version(const uint64_t tenant_id, + const bool is_create_tenant_when_backup, + int64_t &start_schema_version) +{ + int ret = OB_SUCCESS; + ObLogTenant *tenant = NULL; + ObLogTenantGuard guard; + start_schema_version = OB_INVALID_TIMESTAMP; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("instance has not been initialized"); + ret = OB_NOT_INIT; + } else if (! is_create_tenant_when_backup && ! stop_flag_) { + LOG_ERROR("ObLogInstance have already started, can not get start schema version", + K(stop_flag_), K(tenant_id), K(is_create_tenant_when_backup), K(start_schema_version)); + ret = OB_NOT_SUPPORTED; + } else if (OB_UNLIKELY(OB_INVALID_TENANT_ID == tenant_id)) { + LOG_ERROR("invalid argument", K(tenant_id)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(get_tenant_guard(tenant_id, guard))) { + if (OB_ENTRY_NOT_EXIST != ret) { + LOG_ERROR("get_tenant_guard fail", KR(ret), K(tenant_id)); + } + } else if (OB_ISNULL(tenant = guard.get_tenant())) { + LOG_ERROR("tenant is null", K(tenant_id), K(tenant)); + ret = OB_ERR_UNEXPECTED; + } else { + start_schema_version = tenant->get_start_schema_version(); + } + + return ret; +} + +int ObLogInstance::set_start_global_trans_version(const int64_t start_global_trans_version) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("instance has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(OB_INVALID_TIMESTAMP == start_global_trans_version)) { + LOG_ERROR("invalid argument", K(start_global_trans_version)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(fetcher_)) { + LOG_ERROR("fetcher_ is null", K(fetcher_)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(fetcher_->set_start_global_trans_version(start_global_trans_version))) { + LOG_ERROR("fetcher_ set_start_global_trans_version fail", KR(ret), K(start_global_trans_version)); + } else { + LOG_INFO("set_start_global_trans_version succ", K(start_global_trans_version)); + } + + return ret; +} + +int ObLogInstance::init_logger_() +{ + int ret = OB_SUCCESS; + char log_dir[OB_MAX_FILE_NAME_LENGTH]; + char log_file[OB_MAX_FILE_NAME_LENGTH]; + char stderr_log_file[OB_MAX_FILE_NAME_LENGTH]; + + if (is_assign_log_dir_valid_) { + (void)snprintf(log_dir, sizeof(log_dir), "%s", assign_log_dir_); + (void)snprintf(log_file, sizeof(log_file), "%s%s", assign_log_dir_, DEFAULT_LOG_FILE_NAME); + (void)snprintf(stderr_log_file, sizeof(stderr_log_file), "%s%s", assign_log_dir_, DEFAULT_STDERR_LOG_FILE_NAME); + } else { + (void)snprintf(log_dir, sizeof(log_dir), "%s", DEFAULT_LOG_DIR); + (void)snprintf(log_file, sizeof(log_file), "%s", DEFAULT_LOG_FILE); + (void)snprintf(stderr_log_file, sizeof(stderr_log_file), "%s", DEFAULT_STDERR_LOG_FILE); + } + + if (OB_FAIL(common::FileDirectoryUtils::create_full_path(log_dir))) { + LOG_ERROR("FileDirectoryUtils create_full_path fail", KR(ret), K(log_dir)); + } else { + easy_log_level = EASY_LOG_INFO; + OB_LOGGER.set_max_file_size(MAX_LOG_FILE_SIZE); + OB_LOGGER.set_file_name(log_file, disable_redirect_log_, false); + OB_LOGGER.set_log_level("INFO"); + OB_LOGGER.disable_thread_log_level(); + + if (! disable_redirect_log_) { + // Open the stderr log file + // and redirects stderr to that log file + int32_t stderr_log_fd = open(stderr_log_file, O_RDWR | O_CREAT | O_APPEND | O_LARGEFILE, 0644); + if (OB_UNLIKELY(stderr_log_fd <= 0)) { + LOG_ERROR("failed to open stderror log, which fd should be greater than 0", K(stderr_log_fd)); + } else { + dup2(stderr_log_fd, 2); + close(stderr_log_fd); + stderr_log_fd = -1; + } + } + + _LOG_INFO("====================liboblog start===================="); + _LOG_INFO("liboblog %s %s", PACKAGE_VERSION, RELEASEID); + _LOG_INFO("BUILD_VERSION: %s", build_version()); + _LOG_INFO("BUILD_TIME: %s %s", build_date(), build_time()); + _LOG_INFO("BUILD_FLAGS: %s", build_flags()); + _LOG_INFO("Copyright (c) 2007-2016 Alipay Inc."); + _LOG_INFO("======================================================"); + _LOG_INFO("\n"); + } + + return ret; +} + +#define MPRINT(format, ...) fprintf(stderr, format "\n", ##__VA_ARGS__) + +void ObLogInstance::print_version() +{ + MPRINT("liboblog %s %s", PACKAGE_VERSION, RELEASEID); + MPRINT("REVISION: %s", build_version()); + MPRINT("BUILD_TIME: %s %s", build_date(), build_time()); + MPRINT("BUILD_FLAGS: %s\n", build_flags()); + MPRINT("Copyright (c) 2007-2016 Alipay Inc."); + MPRINT(); +} + +// ObKVGlobalCache rely on ObTenantManager +int ObLogInstance::init_global_tenant_manager_() +{ + int ret = OB_SUCCESS; + static const int64_t DEFAULT_TENANT_COUNT = 2; + static const int64_t SCHEMA_CACHE_MEM_LIMIT_LOWER_BOUND = 0; + static const int64_t SCHEMA_CACHE_MEM_LIMIT_UPPER_BOUND = 1L << 31L; // 2G + ObTenantManager &tenant_manager = ObTenantManager::get_instance(); + const int64_t tenant_manager_memory_upper_limit = TCONF.tenant_manager_memory_upper_limit.get(); + + if (OB_FAIL(tenant_manager.init(DEFAULT_TENANT_COUNT))) { + LOG_ERROR("init tenant manager fail", KR(ret)); + } + // The SYS tenant is used only for the Schema Cache module, and it allocates the memory that occupies the share of the SYS tenant + else if (OB_FAIL(tenant_manager.add_tenant(OB_SYS_TENANT_ID))) { + LOG_ERROR("add OB SYS tenant fail", KR(ret)); + } else if (OB_FAIL(tenant_manager.set_tenant_mem_limit(OB_SYS_TENANT_ID, + SCHEMA_CACHE_MEM_LIMIT_LOWER_BOUND, + SCHEMA_CACHE_MEM_LIMIT_UPPER_BOUND))) { + LOG_ERROR("set_tenant_mem_limit fail for OB SYS tenant", KR(ret)); + } else if (OB_FAIL(tenant_manager.add_tenant(OB_SERVER_TENANT_ID))) { + LOG_ERROR("add OB SERVER tenant fail", KR(ret)); + } else if (OB_FAIL(tenant_manager.set_tenant_mem_limit(OB_SERVER_TENANT_ID, 0, tenant_manager_memory_upper_limit))) { + LOG_ERROR("set_tenant_mem_limit fail for OB SERVER tenant", KR(ret)); + } else { + LOG_INFO("ObTenantManager add_tenant succ", + "tenant_manager_memory_upper_limit", SIZE_TO_STR(tenant_manager_memory_upper_limit)); + } + + return ret; +} + +int ObLogInstance::init_global_kvcache_() +{ + int ret = OB_SUCCESS; + static const int64_t KV_CACHE_WASH_TIMER_INTERVAL_US = 60 * _SEC_; + static const int64_t DEFAULT_BUCKET_NUM = 10000000L; + static const int64_t DEFAULT_MAX_CACHE_SIZE = 1024L * 1024L * 1024L * 1024L; //1T + + // init schema cache + if (OB_FAIL(ObKVGlobalCache::get_instance().init(DEFAULT_BUCKET_NUM, + DEFAULT_MAX_CACHE_SIZE, + lib::ACHUNK_SIZE, + KV_CACHE_WASH_TIMER_INTERVAL_US))) { + LOG_ERROR("Fail to init ObKVGlobalCache", KR(ret)); + } else if (OB_FAIL(lib::ObResourceMgr::get_instance().set_cache_washer(ObKVGlobalCache::get_instance()))) { + LOG_ERROR("Fail to set_cache_washer", KR(ret)); + } else { + LOG_INFO("ObKVGlobalCache init succ", "max_cached_size", SIZE_TO_STR(DEFAULT_QUEUE_SIZE)); + } + + return ret; +} + +// FIXME: when refreshing the schema, construct "generated column" schema depends on the default system variables, require initialization +// The specific function is: ObSchemaUtils::cascaded_generated_column() +// +// This situation is only temporary, the subsequent "generated column" logic will decouple the schema and the default system variables. +// Once decoupled, there is no need to initialize system variables here +// After decoupling, there is no need to initialize system variables here. +int ObLogInstance::init_sys_var_for_generate_column_schema_() +{ + int ret = OB_SUCCESS; + ::oceanbase::sql::init_sql_factories(); + + if (OB_FAIL(ObPreProcessSysVars::init_sys_var())){ + LOG_ERROR("PreProcessing init system variable failed", KR(ret)); + } else { + // success + } + return ret; +} + +#include "lib/alloc/alloc_struct.h" +int ObLogInstance::init_common_(uint64_t start_tstamp_usec, ERROR_CALLBACK err_cb) +{ + int ret = OB_SUCCESS; + int64_t current_timestamp_usec = get_timestamp(); + + if (start_tstamp_usec <= 0) { + start_tstamp_usec = current_timestamp_usec; + _LOG_INFO("start liboblog from current timestamp: %ld", start_tstamp_usec); + } + + if (OB_SUCC(ret)) { + err_cb_ = err_cb; + global_errno_ = 0; + handle_error_flag_ = 0; + start_tstamp_ = start_tstamp_usec; + is_schema_split_mode_ = false; + + // set pid file + write_pid_file_(); + + // 1. set the initialization log level to ensure that the schema prints an INFO log at startup + // 2. Change the schema to WARN after the startup is complete + OB_LOGGER.set_mod_log_levels(TCONF.init_log_level.str()); + + // 校验配置项是否满足期望 + if (OB_FAIL(TCONF.check_all())) { + LOG_ERROR("check config fail", KR(ret)); + } else if (OB_FAIL(dump_config_())) { + LOG_ERROR("dump_config_ fail", KR(ret)); + } else if (OB_FAIL(trans_task_pool_alloc_.init(TASK_POOL_ALLOCATOR_TOTAL_LIMIT, + TASK_POOL_ALLOCATOR_HOLD_LIMIT, + TASK_POOL_ALLOCATOR_PAGE_SIZE))) { + LOG_ERROR("init fifo allocator fail", KR(ret)); + } else if (OB_FAIL(trans_task_pool_.init(&trans_task_pool_alloc_, + TCONF.part_trans_task_prealloc_count, + TCONF.part_trans_task_page_size, + 1 == TCONF.part_trans_task_dynamic_alloc, + TCONF.part_trans_task_prealloc_page_count))) { + LOG_ERROR("init task pool fail", KR(ret)); + } else if (OB_FAIL(hbase_util_.init())) { + LOG_ERROR("init hbase_util_ fail", KR(ret)); + } else if (OB_FAIL(br_queue_.init(DEFAULT_QUEUE_SIZE))) { + LOG_ERROR("init binlog record queue fail", KR(ret)); + } else if (OB_FAIL(init_global_tenant_manager_())) { + LOG_ERROR("init_global_tenant_manager_ fail", KR(ret)); + } else if (OB_FAIL(init_global_kvcache_())) { + LOG_ERROR("init_global_kvcache_ fail", KR(ret)); + } else if (OB_FAIL(init_sys_var_for_generate_column_schema_())) { + LOG_ERROR("init_sys_var_for_generate_column_schema_ fail", KR(ret)); + } else if (OB_FAIL(init_components_(start_tstamp_usec))) { + LOG_ERROR("init_components_ fail", KR(ret), K(start_tstamp_usec)); + } else { + stop_flag_ = true; + timer_tid_ = 0; + sql_tid_ = 0; + flow_control_tid_ = 0; + output_dml_br_count_ = 0; + output_ddl_br_count_ = 0; + last_heartbeat_timestamp_micro_sec_ = start_tstamp_usec; + log_clean_cycle_time_us_ = TCONF.log_clean_cycle_time_in_hours * _HOUR_; + part_trans_task_count_ = 0; + } + } + + if (OB_SUCC(ret)) { + LOG_INFO("init liboblog succ", K_(is_schema_split_mode), K_(start_tstamp), + "start_tstamp", TS_TO_STR(start_tstamp_), + "working_mode", print_working_mode(working_mode_), + K(err_cb)); + } + + if (OB_SUCC(ret)) { + // After startup, set the log level to prevent schema from printing INFO logs + OB_LOGGER.set_mod_log_levels(TCONF.log_level.str()); + } + + return ret; +} + +int ObLogInstance::dump_config_() +{ + int ret = OB_SUCCESS; + const char *config_fpath = TCONF.config_fpath.str(); + + // dump config to log + TCONF.print(); + + // Create the corresponding directory + char *p = strrchr(const_cast(config_fpath), '/'); + if (NULL != p) { + char dir_buffer[OB_MAX_FILE_NAME_LENGTH]; + snprintf(dir_buffer, OB_MAX_FILE_NAME_LENGTH, "%.*s", (int)(p - config_fpath), config_fpath); + common::FileDirectoryUtils::create_full_path(dir_buffer); + } + + if (OB_SUCC(ret)) { + // dump config to file + if (OB_FAIL(TCONF.dump2file(config_fpath))) { + LOG_ERROR("config dump2file fail", KR(ret), K(config_fpath)); + } else { + LOG_INFO("dump config to file succ", K(config_fpath)); + } + } + + return ret; +} + +int32_t ObLogInstance::get_pid_() +{ + return static_cast(getpid()); +} + +int ObLogInstance::init_self_addr_() +{ + int ret = OB_SUCCESS; + static const int64_t BUF_SIZE = 128; + char BUFFER[BUF_SIZE]; + int32_t self_pid = get_pid_(); + ObString local_ip(sizeof(BUFFER), 0, BUFFER); + + if (OB_FAIL(get_local_ip(local_ip))) { + LOG_ERROR("get_local_ip fail", KR(ret), K(local_ip)); + } else if (!get_self_addr().set_ip_addr(local_ip, self_pid)) { + LOG_ERROR("self addr set ip addr error", K(local_ip), K(self_pid)); + } else { + // succ + } + + return ret; +} + +int ObLogInstance::init_schema_split_mode_(const int64_t sys_schema_version) +{ + int ret = OB_SUCCESS; + bool bool_ret = false; + int64_t split_schema_version = 0; + int64_t timeout = GET_SCHEMA_TIMEOUT_ON_START_UP; + + if (OB_ISNULL(schema_getter_)) { + ret = OB_ERR_UNEXPECTED; + } + // After determining the starting schema version of the sys tenant, determine the split schema version + // Note: this order cannot be changed, otherwise liboblog will not see the dynamic change of split schema version + else if (OB_FAIL(schema_getter_->load_split_schema_version(split_schema_version, timeout))) { + LOG_ERROR("load_split_schema_version fail", KR(ret), K(timeout)); + } else { + // 1. split_schema_version is an invalid value, which means it must be a non-split schema + // 2. split_schema_version is a valid value, which determines whether the cluster is in split mode by comparing the sys tenant schema version + // 3. split_schema_version is 0, which means the cluster is started in schema split mode, so it must be split mode + if (split_schema_version < 0) { + bool_ret = false; + } else if (0 == split_schema_version) { + bool_ret = true; + } else if (sys_schema_version < split_schema_version) { + bool_ret = false; + } else { + bool_ret = true; + } + + ATOMIC_STORE(&is_schema_split_mode_, bool_ret); + } + + LOG_INFO("init schema split mode", KR(ret), K_(is_schema_split_mode), K(split_schema_version), + K(sys_schema_version)); + return ret; +} + +// init schema module +int ObLogInstance::init_schema_(const int64_t start_tstamp_us, int64_t &sys_start_schema_version) +{ + int ret = OB_SUCCESS; + const uint64_t sys_tenant_id = OB_SYS_TENANT_ID; + ObLogSchemaGuard sys_schema_guard; + + INIT(schema_getter_, ObLogSchemaGetter, mysql_proxy_.get_ob_mysql_proxy(), + &(TCONF.get_common_config()), TCONF.cached_schema_version_count, + TCONF.history_schema_version_count); + + if (OB_SUCC(ret)) { + // Get the SYS tenant startup schema version + // Note: SYS tenants do not need to handle tenant deletion scenarios + if (OB_FAIL(schema_getter_->get_schema_version_by_timestamp(sys_tenant_id, start_tstamp_us, + sys_start_schema_version, GET_SCHEMA_TIMEOUT_ON_START_UP))) { + LOG_ERROR("get_schema_version_by_timestamp fail", KR(ret), K(sys_tenant_id), K(start_tstamp_us)); + } + // init schema_split_mode + else if (OB_FAIL(init_schema_split_mode_(sys_start_schema_version))) { + LOG_ERROR("init schema split mode fail", KR(ret), K(sys_start_schema_version)); + } + } + return ret; +} + +int ObLogInstance::init_components_(const uint64_t start_tstamp_usec) +{ + int ret = OB_SUCCESS; + IObLogErrHandler *err_handler = this; + int64_t start_seq = DEFAULT_START_SEQUENCE_NUM; + const char *config_url = NULL; + bool skip_dirty_data = (TCONF.skip_dirty_data != 0); + bool skip_reversed_schema_verison = (TCONF.skip_reversed_schema_verison != 0); + bool enable_hbase_mode = (TCONF.enable_hbase_mode != 0); + bool enable_backup_mode = (TCONF.enable_backup_mode != 0); + bool skip_hbase_mode_put_column_count_not_consistency = (TCONF.skip_hbase_mode_put_column_count_not_consistency != 0); + bool enable_convert_timestamp_to_unix_timestamp = (TCONF.enable_convert_timestamp_to_unix_timestamp != 0); + bool enable_output_hidden_primary_key = (TCONF.enable_output_hidden_primary_key != 0); + bool enable_oracle_mode_match_case_sensitive = (TCONF.enable_oracle_mode_match_case_sensitive != 0); + const char *rs_list = TCONF.rootserver_list.str(); + const char *cluster_user = TCONF.cluster_user.str(); + const char *cluster_password = TCONF.cluster_password.str(); + const char *cluster_db_name = TCONF.cluster_db_name.str(); + const char *tb_white_list = TCONF.tb_white_list.str(); + const char *tb_black_list = TCONF.tb_black_list.str(); + const char *tg_white_list = TCONF.tablegroup_white_list.str(); + const char *tg_black_list = TCONF.tablegroup_black_list.str(); + int64_t max_cached_trans_ctx_count = MAX_CACHED_TRANS_CTX_COUNT; + int64_t sql_conn_timeout_us = TCONF.mysql_connect_timeout_sec * _SEC_; + int64_t sql_query_timeout_us = TCONF.mysql_query_timeout_sec * _SEC_; + const char *ob_trace_id_ptr = TCONF.ob_trace_id.str(); + const char *drc_message_factory_binlog_record_type_str = TCONF.drc_message_factory_binlog_record_type.str(); + // The starting schema version of the SYS tenant + int64_t sys_start_schema_version = OB_INVALID_VERSION; + const char *data_start_schema_version = TCONF.data_start_schema_version.str(); + const char *store_service_path = TCONF.store_service_path.str(); + const char *working_mode_str = TCONF.working_mode.str(); + WorkingMode working_mode = get_working_mode(working_mode_str); + const bool enable_ssl_client_authentication = (1 == TCONF.ssl_client_authentication); + + drc_message_factory_binlog_record_type_.assign(drc_message_factory_binlog_record_type_str, + strlen(drc_message_factory_binlog_record_type_str)); + + if (OB_UNLIKELY(! is_working_mode_valid(working_mode))) { + LOG_ERROR("working_mode is not valid", K(working_mode_str), "working_mode", print_working_mode(working_mode)); + ret = OB_INVALID_CONFIG; + } else { + working_mode_ = working_mode; + + LOG_INFO("set working mode", K(working_mode_str), K(working_mode_), "working_mode", print_working_mode(working_mode_)); + } + + // init ObTraceId + if (OB_FAIL(ret)) { + } else if (TCONF.need_verify_ob_trace_id) { + if (OB_FAIL(init_ob_trace_id_(ob_trace_id_ptr))) { + LOG_ERROR("init_ob_trace_id_ fail", KR(ret), K(ob_trace_id_ptr)); + } + } + + // init self addr + if (OB_FAIL(ret)) { + } else if (OB_FAIL(init_self_addr_())) { + LOG_ERROR("init self addr error", KR(ret)); + } + // format cluster_url + else if (OB_FAIL(TCONF.format_cluster_url())) { + LOG_ERROR("format config url fail", KR(ret)); + } else { + config_url = TCONF.cluster_url.str(); + } + + INIT(server_provider_, ObLogSQLServerProvider, config_url, rs_list); + + if (OB_SUCC(ret)) { + if (OB_FAIL(ObMemoryDump::get_instance().init())) { + LOG_ERROR("init memory dump fail", K(ret)); + } + } + + // init ObLogMysqlProxy + if (OB_SUCC(ret)) { + if (OB_FAIL(mysql_proxy_.init(server_provider_, cluster_user, cluster_password, + cluster_db_name, sql_conn_timeout_us, sql_query_timeout_us, enable_ssl_client_authentication))) { + LOG_ERROR("mysql_proxy_ init fail", KR(ret), K(server_provider_), + K(cluster_user), K(cluster_password), K(cluster_db_name), K(sql_conn_timeout_us), + K(sql_query_timeout_us), K(enable_ssl_client_authentication)); + } + } + + // init ObCompatModeGetter + if (OB_SUCC(ret)) { + if (OB_FAIL(share::ObCompatModeGetter::instance().init(&(mysql_proxy_.get_ob_mysql_proxy())))) { + LOG_ERROR("compat_mode_getter init fail", KR(ret)); + } + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(common::ObClockGenerator::init())) { + LOG_ERROR("failed to init ob clock generator", KR(ret)); + } + } + + INIT(log_entry_task_pool_, ObLogEntryTaskPool, TCONF.log_entry_task_prealloc_count); + + INIT(store_service_, MockObLogStoreService, store_service_path); + + INIT(br_pool_, ObLogBRPool, TCONF.binlog_record_prealloc_count); + + INIT(trans_ctx_mgr_, ObLogTransCtxMgr, max_cached_trans_ctx_count, TCONF.sort_trans_participants); + + INIT(systable_helper_, ObLogSysTableHelper, *server_provider_, + TCONF.access_systable_helper_thread_num, TCONF.cluster_user, + TCONF.cluster_password, TCONF.cluster_db_name); + + INIT(meta_manager_, ObLogMetaManager, &obj2str_helper_, enable_output_hidden_primary_key); + + INIT(resource_collector_, ObLogResourceCollector, + TCONF.resource_collector_thread_num, TCONF.resource_collector_thread_num_for_br, DEFAULT_QUEUE_SIZE, + br_pool_, trans_ctx_mgr_, meta_manager_, store_service_); + + // init oblog version,e.g. 2.2.1 + if (OB_SUCC(ret)) { + if (OB_FAIL(init_oblog_version_components_())) { + LOG_ERROR("init oblog version components fail", KR(ret)); + } + } + + // Initialize ObClusterVersion before initializing the schema module + if (OB_SUCC(ret)) { + if (OB_FAIL(init_ob_cluster_version_())) { + LOG_ERROR("init_ob_cluster_version_ fail", KR(ret)); + } + } + + // check oblog version is greater than or equal to ob version + if (OB_SUCC(ret)) { + if (OB_FAIL(check_observer_version_valid_())) { + LOG_ERROR("check_observer_version_valid_ fail", KR(ret)); + } + } + + if (OB_SUCC(ret)) { + // init GCTX + init_global_context_(); + } + + INIT(tenant_mgr_, ObLogTenantMgr, enable_oracle_mode_match_case_sensitive); + + INIT(timezone_info_getter_, ObLogTimeZoneInfoGetter, TCONF.timezone.str(), + mysql_proxy_.get_ob_mysql_proxy(), *systable_helper_, *tenant_mgr_, *err_handler); + + if (OB_SUCC(ret)) { + // init interface for getting tenant timezone map + OTTZ_MGR.init(ObLogTimeZoneInfoGetter::get_tenant_timezone_map); + } + + // The initialization of schema depends on the initialization of timezone_info_getter_, + // and the initialization of timezone_info_getter_ depends on the initialization of tenant_mgr_ + if (OB_SUCC(ret)) { + // Initialize schema-related modules, split patterns, and SYS tenant starting schema versions based on start-up timestamps + if (OB_FAIL(init_schema_(start_tstamp_usec, sys_start_schema_version))) { + LOG_ERROR("init schema fail", KR(ret), K(start_tstamp_usec)); + } + } + + INIT(tb_matcher_, ObLogTableMatcher, tb_white_list, tb_black_list, tg_white_list, tg_black_list); + + INIT(ss_matcher_, ObLogStartSchemaMatcher, data_start_schema_version); + + INIT(trans_stat_mgr_, ObLogTransStatMgr); + + // After initializing the timezone info getter successfully, initialize the obj2str_helper_ + if (OB_SUCC(ret)) { + if (OB_FAIL(obj2str_helper_.init(*timezone_info_getter_, hbase_util_, enable_hbase_mode, + enable_convert_timestamp_to_unix_timestamp, enable_backup_mode, *tenant_mgr_))) { + LOG_ERROR("init obj2str_helper fail", KR(ret), K(enable_hbase_mode), + K(enable_convert_timestamp_to_unix_timestamp), K(enable_backup_mode)); + } + } + + ObLogSysTableHelper::ClusterInfo cluster_info; + if (OB_SUCC(ret)) { + if (OB_FAIL(query_cluster_info_(cluster_info))) { + LOG_ERROR("query_cluster_info_ fail", KR(ret), K(cluster_info)); + } + } + + INIT(committer_, ObLogCommitter, start_seq, &br_queue_, resource_collector_, + br_pool_, trans_ctx_mgr_, trans_stat_mgr_, err_handler); + + INIT(storager_, ObLogStorager, TCONF.storager_thread_num, TCONF.storager_queue_length, *store_service_, *err_handler); + + INIT(data_processor_, ObLogDataProcessor, TCONF.data_processor_thread_num, TCONF.data_processor_queue_length, + working_mode_, *store_service_, *err_handler); + + INIT(formatter_, ObLogFormatter, TCONF.formatter_thread_num, DEFAULT_QUEUE_SIZE, working_mode_, + &obj2str_helper_, br_pool_, meta_manager_, schema_getter_, storager_, err_handler, + skip_dirty_data, enable_hbase_mode, hbase_util_, skip_hbase_mode_put_column_count_not_consistency, + enable_output_hidden_primary_key); + + INIT(sequencer_, ObLogSequencer, TCONF.sequencer_thread_num, TCONF.sequencer_queue_length, + *trans_ctx_mgr_, *trans_stat_mgr_, *committer_, *data_processor_, *err_handler); + + INIT(part_trans_parser_, ObLogPartTransParser, br_pool_, meta_manager_, cluster_info.cluster_id_); + INIT(dml_parser_, ObLogDmlParser, TCONF.dml_parser_thread_num, DEFAULT_QUEUE_SIZE, *formatter_, + *err_handler, *part_trans_parser_); + + INIT(ddl_parser_, ObLogDdlParser, TCONF.ddl_parser_thread_num, DEFAULT_QUEUE_SIZE, *err_handler, + *part_trans_parser_); + + INIT(ddl_handler_, ObLogDDLHandler, ddl_parser_, sequencer_, err_handler, + schema_getter_, skip_reversed_schema_verison); + + INIT(fetcher_, ObLogFetcher, dml_parser_, ddl_handler_, err_handler, *systable_helper_, + &trans_task_pool_, log_entry_task_pool_, committer_, TCONF, start_seq); + + // config tenant mgr + if (OB_SUCC(ret)) { + if (OB_FAIL(config_tenant_mgr_(start_tstamp_usec, sys_start_schema_version))) { + LOG_ERROR("config_tenant_mgr_ fail", KR(ret), K(start_tstamp_usec), K(sys_start_schema_version)); + } + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(config_data_start_schema_version_(TCONF.global_data_start_schema_version))) { + LOG_ERROR("config_data_start_schema_version_ fail", KR(ret)); + } + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(update_data_start_schema_version_on_split_mode_())) { + LOG_ERROR("update_data_start_schema_on_split_mode_ fail", KR(ret)); + } + } + + LOG_INFO("init all components done", KR(ret), K(start_tstamp_usec), K(sys_start_schema_version), + K(max_cached_trans_ctx_count), K_(is_schema_split_mode)); + + return ret; +} + +int ObLogInstance::update_data_start_schema_version_on_split_mode_() +{ + int ret = OB_SUCCESS; + + if (! is_schema_split_mode_ || NULL == tenant_mgr_) { + //do nothing + } else if (OB_FAIL(tenant_mgr_->set_data_start_schema_version_on_split_mode())) { + LOG_ERROR("set_data_start_schema_version_on_split_mode fail", KR(ret), + K(is_schema_split_mode_), K(tenant_mgr_)); + } else { + // succ + } + + return ret; +} + +int ObLogInstance::config_data_start_schema_version_(const int64_t global_data_start_schema_version) +{ + int ret = OB_SUCCESS; + // Currently only supports non-split mode configuration for all tenants, split mode to be supported TODO + if (! is_schema_split_mode_ && NULL != tenant_mgr_) { + LOG_INFO("config data start schema version", K(global_data_start_schema_version), + K_(is_schema_split_mode)); + + // Set a uniform starting schema version for all tenants in non-split mode + if (global_data_start_schema_version <= 0) { + LOG_INFO("global_data_start_schema_version is not configured under non schema split mode, " + "need not set data start schema version", + K(global_data_start_schema_version), K_(is_schema_split_mode)); + } else if (OB_FAIL(tenant_mgr_->set_data_start_schema_version_for_all_tenant( + global_data_start_schema_version))) { + LOG_ERROR("set_data_start_schema_version_for_all_tenant fail", KR(ret), + K(global_data_start_schema_version), K(is_schema_split_mode_)); + } + } + + return ret; +} + +int ObLogInstance::config_tenant_mgr_(const int64_t start_tstamp_usec, + const int64_t sys_schema_version) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(tenant_mgr_) || OB_ISNULL(fetcher_)) { + LOG_ERROR("invaild argument", K(tenant_mgr_), K(fetcher_)); + ret = OB_INVALID_ARGUMENT; + } + + // Register the "add partition" callback + // Called sequentially + // Committer, Sequencer, Fetcher all need to add partitions dynamically + if (OB_SUCC(ret)) { + if (OB_FAIL(tenant_mgr_->register_part_add_callback(fetcher_))) { + LOG_ERROR("fetcher register_part_add_callback fail", KR(ret), K(fetcher_)); + } else { + LOG_INFO("register add-partition-callback succ", K_(committer), K_(sequencer), K_(fetcher)); + } + } + + // Register the "Recycle Partition" callback + // Called sequentially + // Fetcher cannot delete immediately, it needs to wait for the partition to be reclaimed, i.e. safely deleted + if (OB_SUCC(ret)) { + if (OB_FAIL(tenant_mgr_->register_part_recycle_callback(fetcher_))) { + LOG_ERROR("fetcher register_part_recycle_callback fail", KR(ret), K(fetcher_)); + } else { + LOG_INFO("register recycle-partition-callback succ", K_(fetcher)); + } + } + + // Add all tables for all tenants + // Beforehand, make sure all callbacks are registered + if (OB_SUCC(ret)) { + if (OB_FAIL(tenant_mgr_->add_all_tenants(start_tstamp_usec, + sys_schema_version, + GET_SCHEMA_TIMEOUT_ON_START_UP))) { + LOG_ERROR("add_all_tenants fail", KR(ret), K(start_tstamp_usec), K(sys_schema_version)); + } + } + return ret; +} + +void ObLogInstance::destroy_components_() +{ + LOG_INFO("destroy all components begin"); + + // Destruction by reverse order + DESTROY(fetcher_, ObLogFetcher); + DESTROY(ddl_handler_, ObLogDDLHandler); + DESTROY(ddl_parser_, ObLogDdlParser); + DESTROY(dml_parser_, ObLogDmlParser); + DESTROY(part_trans_parser_, ObLogPartTransParser); + DESTROY(sequencer_, ObLogSequencer); + DESTROY(formatter_, ObLogFormatter); + DESTROY(committer_, ObLogCommitter); + DESTROY(systable_helper_, ObLogSysTableHelper); + DESTROY(ss_matcher_, ObLogStartSchemaMatcher); + DESTROY(tb_matcher_, ObLogTableMatcher); + DESTROY(schema_getter_, ObLogSchemaGetter); + DESTROY(server_provider_, ObLogSQLServerProvider); + DESTROY(resource_collector_, ObLogResourceCollector); + DESTROY(meta_manager_, ObLogMetaManager); + DESTROY(trans_ctx_mgr_, ObLogTransCtxMgr); + DESTROY(trans_stat_mgr_, ObLogTransStatMgr); + DESTROY(timezone_info_getter_, ObLogTimeZoneInfoGetter); + DESTROY(tenant_mgr_, ObLogTenantMgr); + DESTROY(log_entry_task_pool_, ObLogEntryTaskPool); + DESTROY(br_pool_, ObLogBRPool); + DESTROY(storager_, ObLogStorager); + DESTROY(data_processor_, ObLogDataProcessor); + DESTROY(store_service_, MockObLogStoreService); + + LOG_INFO("destroy all components end"); +} + +void ObLogInstance::destroy() +{ + stop(); + + inited_ = false; + + oblog_major_ = 0; + oblog_minor_ = 0; + oblog_patch_ = 0; + + destroy_components_(); + err_cb_ = NULL; + + TCONF.destroy(); + stop_flag_ = true; + last_heartbeat_timestamp_micro_sec_ = 0; + trans_stat_mgr_ = NULL; + tenant_mgr_ = NULL; + global_errno_ = 0; + handle_error_flag_ = 0; + disable_redirect_log_ = false; + log_clean_cycle_time_us_ = 0; + mysql_proxy_.destroy(); + hbase_util_.destroy(); + obj2str_helper_.destroy(); + br_queue_.destroy(); + timer_tid_ = 0; + sql_tid_ = 0; + flow_control_tid_ = 0; + + (void)trans_task_pool_.destroy(); + (void)trans_task_pool_alloc_.destroy(); + + output_dml_br_count_ = 0; + output_ddl_br_count_ = 0; + + ObKVGlobalCache::get_instance().destroy(); + ObTenantManager::get_instance().destroy(); + ObMemoryDump::get_instance().destroy(); + ObClockGenerator::destroy(); + + is_assign_log_dir_valid_ = false; + MEMSET(assign_log_dir_, 0, sizeof(assign_log_dir_)); + MEMSET(ob_trace_id_str_, 0, sizeof(ob_trace_id_str_)); + br_index_in_trans_ = 0; + part_trans_task_count_ = 0; + start_tstamp_ = 0; + is_schema_split_mode_ = false; +} + +int ObLogInstance::launch() +{ + int ret = OB_SUCCESS; + + LOG_INFO("launch all components begin"); + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("instance has not been initialized"); + ret = OB_NOT_INIT; + } else if (stop_flag_) { + // Reset global error codes at startup + global_errno_ = OB_SUCCESS; + stop_flag_ = false; + + if (OB_FAIL(resource_collector_->start())) { + LOG_ERROR("start resource collector fail", KR(ret)); + } else if (OB_FAIL(storager_->start())) { + LOG_ERROR("start storager_ fail", KR(ret)); + } else if (OB_FAIL(data_processor_->start())) { + LOG_ERROR("start data_processor_ fail", KR(ret)); + } else if (OB_FAIL(committer_->start())) { + LOG_ERROR("start committer fail", KR(ret)); + } else if (OB_FAIL(formatter_->start())) { + LOG_ERROR("start formatter fail", KR(ret)); + } else if (OB_FAIL(sequencer_->start())) { + LOG_ERROR("start sequencer fail", KR(ret)); + } else if (OB_FAIL(dml_parser_->start())) { + LOG_ERROR("start DML parser fail", KR(ret)); + } else if (OB_FAIL(ddl_parser_->start())) { + LOG_ERROR("start DDL parser fail", KR(ret)); + } else if (OB_FAIL(ddl_handler_->start())) { + LOG_ERROR("start fetcher fail", KR(ret)); + } else if (OB_FAIL(fetcher_->start())) { + LOG_ERROR("start fetcher fail", KR(ret)); + } else if (OB_FAIL(start_threads_())) { + LOG_ERROR("start_threads_ fail", KR(ret)); + } else if (OB_FAIL(timezone_info_getter_->start())) { + LOG_ERROR("start_timezone_info_thread_ fail", KR(ret)); + } else { + LOG_INFO("launch all components end success"); + } + } + + return ret; +} + +void ObLogInstance::stop() +{ + if (inited_) { + mark_stop_flag(); + + LOG_INFO("stop all components begin"); + + stop_flag_ = true; + + // stop thread + wait_threads_stop_(); + // stop timezon info getter + timezone_info_getter_->stop(); + + fetcher_->stop(); + ddl_handler_->stop(); + ddl_parser_->stop(); + dml_parser_->stop(); + sequencer_->stop(); + formatter_->stop(); + storager_->stop(); + data_processor_->stop(); + committer_->stop(); + resource_collector_->stop(); + + // set global error code + global_errno_ = (global_errno_ == OB_SUCCESS ? OB_IN_STOP_STATE : global_errno_); + + LOG_INFO("stop all components end"); + } +} + +int ObLogInstance::table_group_match(const char *pattern, + bool &is_matched, + const int fnmatch_flags) +{ + UNUSED(pattern); + UNUSED(is_matched); + UNUSED(fnmatch_flags); + // not support + return OB_NOT_SUPPORTED; +} + +int ObLogInstance::get_table_groups(std::vector &table_groups) +{ + UNUSED(table_groups); + // not support + return OB_NOT_SUPPORTED; +} + +int ObLogInstance::get_tenant_ids(std::vector &tenant_ids) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("instance has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(tenant_mgr_)) { + LOG_ERROR("tenant_mgr_ is null", K(tenant_mgr_)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(tenant_mgr_->get_all_tenant_ids(tenant_ids))) { + LOG_ERROR("get_all_tenant_ids fail", KR(ret)); + } else { + // succ + } + + return ret; +} + +void ObLogInstance::mark_stop_flag() +{ + if (inited_) { + LOG_INFO("mark_stop_flag begin"); + + fetcher_->mark_stop_flag(); + ddl_handler_->mark_stop_flag(); + ddl_parser_->mark_stop_flag(); + dml_parser_->mark_stop_flag(); + sequencer_->mark_stop_flag(); + formatter_->mark_stop_flag(); + storager_->mark_stop_flag(); + data_processor_->mark_stop_flag(); + committer_->mark_stop_flag(); + resource_collector_->mark_stop_flag(); + timezone_info_getter_->mark_stop_flag(); + + LOG_INFO("mark_stop_flag end"); + } +} + +int ObLogInstance::next_record(ILogRecord **record, const int64_t timeout_us) +{ + int ret = OB_SUCCESS; + int32_t major_version = 0; + uint64_t tenant_id = OB_INVALID_ID; + + if (OB_FAIL(next_record(record, major_version, tenant_id, timeout_us))) { + if (OB_TIMEOUT != ret && OB_IN_STOP_STATE != ret) { + LOG_ERROR("next record fail", KR(ret), K(record)); + } + } + + return ret; +} + +int ObLogInstance::next_record(ILogRecord **record, + int32_t &major_version, + uint64_t &tenant_id, + const int64_t timeout_us) +{ + int ret = OB_SUCCESS; + ILogRecord *pop_record = NULL; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("instance has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(record)) { + LOG_ERROR("invalid argument", K(record)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_UNLIKELY(OB_SUCCESS != global_errno_)) { + // In case of global error, the corresponding error code is returned, except for OB_TIMEOUT + ret = (OB_TIMEOUT == global_errno_) ? OB_IN_STOP_STATE : global_errno_; + } else if (OB_FAIL(br_queue_.pop(pop_record, major_version, tenant_id, timeout_us))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("pop binlog record from br_queue fail", KR(ret)); + } + } else if (OB_ISNULL(pop_record)) { + LOG_ERROR("pop binlog record from br_queue fail", KR(ret), K(pop_record)); + ret = OB_ERR_UNEXPECTED; + } else { + *record = pop_record; + } + + if (OB_SUCC(ret)) { + ObLogBR *oblog_br = NULL; + + if (OB_ISNULL(record) || OB_ISNULL(*record)) { + LOG_ERROR("record is invalid", K(record)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_ISNULL(oblog_br = reinterpret_cast((*record)->getUserData()))) { + LOG_ERROR("get user data fail", "br", *record, K(oblog_br)); + ret = OB_ERR_UNEXPECTED; + } else { + int record_type = (*record)->recordType(); + int64_t timestamp_usec = (*record)->getTimestamp() * 1000000 + (*record)->getRecordUsec(); + + if (HEARTBEAT == record_type) { + last_heartbeat_timestamp_micro_sec_ = + std::max(timestamp_usec, last_heartbeat_timestamp_micro_sec_); + } + + // NOTE: Set the timestamp of the last heartbeat to Checkpoint1 of the data + (*record)->setCheckpoint(last_heartbeat_timestamp_micro_sec_ / 1000000, + last_heartbeat_timestamp_micro_sec_ % 1000000); + + if (EDDL == record_type) { + ATOMIC_INC(&output_ddl_br_count_); + } else if (EBEGIN == record_type) { + do_drc_consume_tps_stat_(); + } else if (HEARTBEAT != record_type && ECOMMIT != record_type) { + ATOMIC_INC(&output_dml_br_count_); + do_drc_consume_rps_stat_(); + } else { + // do nothing + } + + const int64_t part_trans_task_count = oblog_br->get_part_trans_task_count(); + bool need_accumulate_stat = true; + do_stat_for_part_trans_task_count_(record_type, part_trans_task_count, need_accumulate_stat); + } + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(verify_ob_trace_id_(*record))) { + LOG_ERROR("verify_ob_trace_id_ fail", KR(ret), K(record), K(*record)); + } else { + // do nothing + } + } + + if (OB_SUCC(ret)) { + if (! TCONF.enable_verify_mode) { + // do nothing + } else { + if (OB_FAIL(verify_dml_unique_id_(*record))) { + LOG_ERROR("verify_dml_unique_id_ fail", KR(ret), K(record), K(*record)); + } else if (OB_FAIL(verify_ddl_schema_version_(*record))) { + LOG_ERROR("verify_ddl_schema_version_ fail", KR(ret), K(record), K(*record)); + } else { + } + } + } + + return ret; +} + +int ObLogInstance::verify_ob_trace_id_(ILogRecord *br) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("instance has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(br)) { + LOG_ERROR("invalid arguments", K(br)); + ret = OB_INVALID_ARGUMENT; + } else if (! TCONF.need_verify_ob_trace_id) { + // do nothing + } else { + int record_type = br->recordType(); + + if (EINSERT == record_type || EUPDATE == record_type || EDELETE == record_type) { + // only verify insert\update\delete type + const ObString ob_trace_id_config(ob_trace_id_str_); + ObLogBR *oblog_br = NULL; + ObLogRowDataIndex *row_data_index = NULL; + PartTransTask *task = NULL; + + if (OB_ISNULL(oblog_br = reinterpret_cast(br->getUserData()))) { + LOG_ERROR("get user data fail", K(br), K(oblog_br)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(row_data_index = static_cast(oblog_br->get_host()))) { + LOG_ERROR("row_data_index is NULL", KPC(row_data_index)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_ISNULL(task = static_cast(row_data_index->get_host()))) { + LOG_ERROR("part trans task is null", KPC(task)); + ret = OB_ERR_UNEXPECTED; + } else { + ObString trace_id; + const int64_t trace_id_idx = 2; + + if (OB_FAIL(get_br_filter_value_(*br, trace_id_idx, trace_id))) { + LOG_ERROR("get_br_filter_value_ fail", KR(ret), K(trace_id_idx), K(trace_id)); + } else { + if (0 == ob_trace_id_config.compare(trace_id)) { + // succ + } else { + LOG_ERROR("verify_ob_trace_id fail", K(trace_id), K(ob_trace_id_config), KPC(task)); + ret = OB_ITEM_NOT_MATCH; + } + } + } + } // record_type + } + + return ret; +} + +int ObLogInstance::verify_ddl_schema_version_(ILogRecord *br) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("instance has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(br)) { + LOG_ERROR("invalid arguments", K(br)); + ret = OB_INVALID_ARGUMENT; + } else { + int record_type = br->recordType(); + + if (EDDL == record_type) { + ObLogBR *oblog_br = NULL; + PartTransTask *task = NULL; + int64_t new_cols_count = 0; + BinLogBuf *new_cols = br->newCols((unsigned int &)new_cols_count); + int64_t ddl_schema_version_index = 1; + + // Currently ddl br only synchronizes two columns, ddl_stmt_str and ddl_schema_version + if (OB_ISNULL(oblog_br = reinterpret_cast(br->getUserData()))) { + LOG_ERROR("get user data fail", K(br), K(oblog_br)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(task = static_cast(oblog_br->get_host()))) { + LOG_ERROR("part trans task is null", KPC(task)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_UNLIKELY(2 != new_cols_count)) { + LOG_ERROR("ddl br new cols count is not equal 2", K(new_cols_count)); + ret = OB_ERR_UNEXPECTED; + } else { + ObString br_ddl_schema_version(new_cols[ddl_schema_version_index].buf_used_size, + new_cols[ddl_schema_version_index].buf); + + int64_t ddl_schema_version = oblog_br->get_ddl_schema_version(); + const int64_t ddl_schema_version_str_len = DdlStmtTask::MAX_DDL_SCHEMA_VERSION_STR_LENGTH; + char ddl_schema_version_str[ddl_schema_version_str_len]; + int64_t pos = 0; + + if (OB_FAIL(databuff_printf(ddl_schema_version_str, ddl_schema_version_str_len, + pos, "%ld", ddl_schema_version))) { + LOG_ERROR("databuff_printf fail", KR(ret), K(ddl_schema_version), + K(ddl_schema_version_str), K(pos)); + } else if (0 == br_ddl_schema_version.compare(ddl_schema_version_str)) { + // succ + } else { + LOG_ERROR("verify_ddl_schema_version_ fail", K(br_ddl_schema_version), + K(ddl_schema_version_str), KPC(task)); + ret = OB_ITEM_NOT_MATCH; + } + } + } + } + + return ret; +} + +int ObLogInstance::verify_dml_unique_id_(ILogRecord *br) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("instance has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(br)) { + LOG_ERROR("invalid arguments", K(br)); + ret = OB_INVALID_ARGUMENT; + } else { + // Adding a self-checking adjacent record is a different scenario + static ObPartitionKey last_pkey; + static uint64_t last_prepare_log_id = OB_INVALID_ID; + static int32_t last_log_offset = 0; + static uint64_t last_row_index = OB_INVALID_ID; + + int record_type = br->recordType(); + + if (EINSERT == record_type || EUPDATE == record_type || EDELETE == record_type) { + // only verify insert\update\delete type + ObLogBR *oblog_br = NULL; + ObLogRowDataIndex *row_data_index = NULL; + PartTransTask *task = NULL; + + if (OB_ISNULL(oblog_br = reinterpret_cast(br->getUserData()))) { + LOG_ERROR("get user data fail", K(br), K(oblog_br)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(row_data_index = static_cast(oblog_br->get_host()))) { + LOG_ERROR("row_data_index is NULL", KPC(row_data_index)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_ISNULL(task = static_cast(row_data_index->get_host()))) { + LOG_ERROR("part trans task is null", KPC(task)); + ret = OB_ERR_UNEXPECTED; + } else { + // binlog record set unique id + ObString br_unique_id; + const int64_t br_unique_id_idx = 1; + common::ObString dml_unique_id; + const ObString &pkey_and_log_id_str = task->get_pkey_and_log_id_str(); + const int32_t log_offset = row_data_index->get_log_offset(); + uint64_t row_index = row_data_index->get_row_no(); + DmlStmtUniqueID dml_stmt_unique_id(pkey_and_log_id_str, log_offset, row_index); + + if (OB_UNLIKELY(! dml_stmt_unique_id.is_valid())) { + LOG_ERROR("dml_stmt_unique_id is not valid", K(dml_stmt_unique_id)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(get_br_filter_value_(*br, br_unique_id_idx, br_unique_id))) { + LOG_ERROR("get_br_filter_value_ fail", KR(ret), K(br_unique_id_idx), K(br_unique_id)); + } else { + const int64_t buf_len = dml_stmt_unique_id.get_dml_unique_id_length() + 1; + // verify current br + char buf[buf_len]; + int64_t pos = 0; + + if (OB_FAIL(dml_stmt_unique_id.customized_to_string(buf, buf_len, pos))) { + LOG_ERROR("init_dml_unique_id_ fail", KR(ret), K(buf_len), K(pos)); + } else { + dml_unique_id.assign_ptr(buf, static_cast(pos)); + + if (0 == br_unique_id.compare(dml_unique_id)) { + // succ + } else { + LOG_ERROR("verify_dml_unique_id_ fail", K(br_unique_id), K(dml_unique_id), KPC(task)); + ret = OB_ITEM_NOT_MATCH; + } + } + + if (OB_SUCC(ret)) { + // Verify Adjacent br + if (OB_INVALID_ID == last_prepare_log_id) { + // won't verify for the first time + } else { + if (last_pkey == task->get_partition() + && last_prepare_log_id == task->get_prepare_log_id() + && last_log_offset == log_offset + && last_row_index == row_index) { + LOG_ERROR("current br_unique_id should not be equal to last_br_unique_id", + K(br_unique_id), KPC(task), K(row_index), + K(last_pkey), K(last_prepare_log_id), K(last_log_offset), K(last_row_index)); + ret = OB_ERR_UNEXPECTED; + } else { + // succ + } + } + } // OB_SUCC(ret) + + if (OB_SUCC(ret)) { + last_pkey = task->get_partition(); + last_prepare_log_id = task->get_prepare_log_id(); + last_log_offset = log_offset; + last_row_index = row_index; + } + } + } + } + } + + return ret; +} + +int ObLogInstance::get_br_filter_value_(ILogRecord &br, + const int64_t idx, + common::ObString &str) +{ + int ret = OB_SUCCESS; + LogRecordImpl *br_impl = NULL; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("instance has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(br_impl = dynamic_cast(&br))) { + LOG_ERROR("invalid arguments", K(br_impl)); + ret = OB_INVALID_ARGUMENT; + } else { + unsigned int filter_rv_count = 0; + const BinLogBuf *filter_value = br_impl->filterValues((unsigned int &) filter_rv_count); + const BinLogBuf *str_buf = filter_value + idx; + + if (OB_ISNULL(str_buf)) { + LOG_ERROR("str_buf is NULL"); + ret = OB_ERR_UNEXPECTED; + } else { + str.assign_ptr(str_buf->buf, str_buf->buf_used_size); + } + } + + return ret; +} + +void ObLogInstance::release_record(ILogRecord *record) +{ + int ret = OB_SUCCESS; + if (inited_ && NULL != record) { + int record_type = record->recordType(); + ObLogBR *br = reinterpret_cast(record->getUserData()); + + if (OB_ISNULL(br)) { + LOG_ERROR("binlog record user data is NULL", K(record)); + ret = OB_ERR_UNEXPECTED; + } else { + if (EDDL == record_type) { + ATOMIC_DEC(&output_ddl_br_count_); + } else if (EBEGIN == record_type) { + do_drc_release_tps_stat_(); + } else if (HEARTBEAT != record_type && ECOMMIT != record_type) { + ATOMIC_DEC(&output_dml_br_count_); + do_drc_release_rps_stat_(); + } else { + // do nothing + } + + const int64_t part_trans_task_count = br->get_part_trans_task_count(); + bool need_accumulate_stat = false; + do_stat_for_part_trans_task_count_(record_type, part_trans_task_count, need_accumulate_stat); + + if (OB_FAIL(resource_collector_->revert(record_type, br))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("revert binlog record fail", KR(ret), K(br), + "record_type", print_record_type(record_type)); + } + } else { + br = NULL; + record = NULL; + } + } + } +} + +void ObLogInstance::handle_error(const int err_no, const char *fmt, ...) +{ + static const int64_t MAX_ERR_MSG_LEN = 1024; + static char err_msg[MAX_ERR_MSG_LEN]; + + if (inited_) { + // Call the error callback only once + if (0 == ATOMIC_CAS(&handle_error_flag_, 0, 1)) { + va_list ap; + va_start(ap, fmt); + vsnprintf(err_msg, sizeof(err_msg), fmt, ap); + va_end(ap); + + global_errno_ = (err_no == OB_SUCCESS ? OB_IN_STOP_STATE : err_no); + _LOG_INFO("HANDLE_ERROR: err_cb=%p, errno=%d, errmsg=\"%s\"", err_cb_, err_no, err_msg); + + if (NULL != err_cb_) { + ObLogError err; + err.level_ = ObLogError::ERR_ABORT; // FIXME: Support for other types of error levels + err.errno_ = err_no; + err.errmsg_ = err_msg; + + LOG_INFO("ERROR_CALLBACK begin", KP(err_cb_)); + err_cb_(err); + LOG_INFO("ERROR_CALLBACK end", KP(err_cb_)); + } else { + LOG_ERROR("No ERROR CALLBACK function available, abort now"); + } + + // notify other module to stop + mark_stop_flag(); + } + } +} + +int32_t ObLogInstance::get_log_level() const +{ + return OB_LOGGER.get_log_level(); +} + +const char *ObLogInstance::get_log_file() const +{ + return DEFAULT_LOG_FILE; +} + +void ObLogInstance::write_pid_file_() +{ + int pid_file_fd = -1; + const char *pid_file = DEFAULT_PID_FILE; + char pid_file_dir[32] = {}; + + (void)snprintf(pid_file_dir, sizeof(pid_file_dir), "%s", DEFAULT_PID_FILE_DIR); + common::FileDirectoryUtils::create_full_path(pid_file_dir); + + pid_file_fd = open(pid_file, O_RDWR | O_CREAT, 0600); + if (OB_UNLIKELY(pid_file_fd < 0)) { + LOG_ERROR("open pid file fail", K(pid_file), K(pid_file_fd), K(errno), KERRMSG); + } else { + char buf[32] = {}; + (void)snprintf(buf, sizeof(buf), "%d\n", getpid()); + (void)ftruncate(pid_file_fd, 0); + + ssize_t len = strlen(buf); + ssize_t nwrite = write(pid_file_fd, buf, len); + if (OB_UNLIKELY(len != nwrite)) { + LOG_ERROR("write pid file fail", K(pid_file), K(pid_file_fd), + K(buf), K(len), K(errno), KERRMSG); + } + + close(pid_file_fd); + pid_file_fd = -1; + } +} + +void *ObLogInstance::timer_thread_func_(void *args) +{ + if (NULL != args) { + ObLogInstance *instance = static_cast(args); + instance->timer_routine(); + } + + return NULL; +} + +void *ObLogInstance::sql_thread_func_(void *args) +{ + if (NULL != args) { + ObLogInstance *instance = static_cast(args); + instance->sql_thread_routine(); + } + + return NULL; +} + +void *ObLogInstance::flow_control_thread_func_(void *args) +{ + if (NULL != args) { + ObLogInstance *instance = static_cast(args); + instance->flow_control_thread_routine(); + } + + return NULL; +} + +void ObLogInstance::sql_thread_routine() +{ + int ret = OB_SUCCESS; + const static int64_t THREAD_INTERVAL = 1 * _SEC_; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("instance has not been initialized"); + ret = OB_NOT_INIT; + } else { + while (! stop_flag_ && OB_SUCCESS == ret) { + int64_t cluster_version_refresh_interval_sec = TCONF.cluster_version_refresh_interval_sec * _SEC_; + + // refresh SQL SERVER list + if (REACH_TIME_INTERVAL(REFRESH_SERVER_LIST_INTERVAL)) { + ObLogSQLServerProvider *server_provider = static_cast(server_provider_); + if (OB_ISNULL(server_provider)) { + LOG_ERROR("server_provider is NULL", K(server_provider)); + ret = OB_ERR_UNEXPECTED; + } else { + server_provider->call_refresh_server_list(); + } + } + + // refresh cluster version + if (REACH_TIME_INTERVAL(cluster_version_refresh_interval_sec)) { + (void)update_cluster_version_(); + + // check observer versin <= liboblog version + if (OB_FAIL(check_observer_version_valid_())) { + LOG_ERROR("check_observer_version_valid_ fail", KR(ret)); + } + } + + usleep(THREAD_INTERVAL); + } + + if (stop_flag_) { + ret = OB_IN_STOP_STATE; + } + + if (OB_SUCCESS != ret && OB_IN_STOP_STATE != ret) { + handle_error(ret, "sql thread exits, err=%d", ret); + stop_flag_ = true; + } + } + + LOG_INFO("instance sql thread exits", KR(ret), K_(stop_flag)); +} + +void ObLogInstance::flow_control_thread_routine() +{ + int ret = OB_SUCCESS; + const static int64_t THREAD_INTERVAL = 100 * 1000; // Flow control takes 100ms at a time + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("instance has not been initialized"); + ret = OB_NOT_INIT; + } else { + while (! stop_flag_ && OB_SUCCESS == ret) { + // flow control + global_flow_control_(); + + usleep(THREAD_INTERVAL); + } + + if (stop_flag_) { + ret = OB_IN_STOP_STATE; + } + + if (OB_SUCCESS != ret && OB_IN_STOP_STATE != ret) { + handle_error(ret, "flow control thread exits, err=%d", ret); + stop_flag_ = true; + } + } + + LOG_INFO("instance flow control thread exits", KR(ret), K_(stop_flag)); +} + +void ObLogInstance::timer_routine() +{ + int ret = OB_SUCCESS; + const static int64_t TIMER_INTERVAL = 1 * _SEC_; + const static int64_t PRINT_INTERVAL = 10 * _SEC_; + int64_t clean_log_interval = CLEAN_LOG_INTERVAL; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("instance has not been initialized"); + ret = OB_NOT_INIT; + } else { + while (! stop_flag_ && OB_SUCCESS == ret) { + // Periodic reload configuration + if (REACH_TIME_INTERVAL(RELOAD_CONFIG_INTERVAL)) { + reload_config_(); + } + + // Periodic printing of statistical information + if (REACH_TIME_INTERVAL(PRINT_INTERVAL)) { + print_tenant_memory_usage_(); + schema_getter_->print_stat_info(); + tenant_mgr_->print_stat_info(); + trans_task_pool_.print_stat_info(); + log_entry_task_pool_->print_stat_info(); + br_pool_->print_stat_info(); + trans_ctx_mgr_->print_stat_info(); + print_trans_stat_(); + resource_collector_->print_stat_info(); + data_processor_->print_stat_info(); + } + + // Periodic memory recycling + if (REACH_TIME_INTERVAL(ObLogSchemaGetter::RECYCLE_MEMORY_INTERVAL)) { + schema_getter_->try_recycle_memory(); + } + + // Cycle Cleanup Log + int64_t clean_cycle = ATOMIC_LOAD(&log_clean_cycle_time_us_); + clean_log_interval = std::min(clean_log_interval, clean_cycle); + if (clean_cycle > 0 && REACH_TIME_INTERVAL(clean_log_interval)) { + clean_log_(); + } + + usleep(TIMER_INTERVAL); + } + + if (stop_flag_) { + ret = OB_IN_STOP_STATE; + } + + if (OB_SUCCESS != ret && OB_IN_STOP_STATE != ret) { + handle_error(ret, "timer thread exits, err=%d", ret); + stop_flag_ = true; + } + } + + LOG_INFO("instance timer thread exits", KR(ret), K_(stop_flag)); +} + +int ObLogInstance::start_threads_() +{ + int ret = OB_SUCCESS; + int pthread_ret = 0; + + if (OB_UNLIKELY(0 != timer_tid_)) { + LOG_ERROR("timer thread has been started", K(timer_tid_)); + ret = OB_NOT_SUPPORTED; + } else if (0 != (pthread_ret = pthread_create(&timer_tid_, NULL, timer_thread_func_, this))) { + LOG_ERROR("start timer thread fail", K(pthread_ret), KERRNOMSG(pthread_ret)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_UNLIKELY(0 != sql_tid_)) { + LOG_ERROR("sql thread has been started", K(sql_tid_)); + ret = OB_NOT_SUPPORTED; + } else if (0 != (pthread_ret = pthread_create(&sql_tid_, NULL, sql_thread_func_, this))) { + LOG_ERROR("start sql thread fail", K(pthread_ret), KERRNOMSG(pthread_ret)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_UNLIKELY(0 != flow_control_tid_)) { + LOG_ERROR("flow control thread has been started", K(flow_control_tid_)); + ret = OB_NOT_SUPPORTED; + } else if (0 != (pthread_ret = pthread_create(&flow_control_tid_, NULL, flow_control_thread_func_, this))) { + LOG_ERROR("start flow control thread fail", K(pthread_ret), KERRNOMSG(pthread_ret)); + ret = OB_ERR_UNEXPECTED; + } else { + LOG_INFO("start instance threads succ", K(timer_tid_), K(sql_tid_), K(flow_control_tid_)); + } + + return ret; +} + +void ObLogInstance::wait_threads_stop_() +{ + if (0 != timer_tid_) { + int pthread_ret = pthread_join(timer_tid_, NULL); + if (0 != pthread_ret) { + LOG_ERROR("join timer thread fail", K(timer_tid_), K(pthread_ret), + KERRNOMSG(pthread_ret)); + } else { + LOG_INFO("stop timer thread succ", K(timer_tid_)); + } + + timer_tid_ = 0; + } + + if (0 != sql_tid_) { + int pthread_ret = pthread_join(sql_tid_, NULL); + if (0 != pthread_ret) { + LOG_ERROR("join sql thread fail", K(sql_tid_), K(pthread_ret), + KERRNOMSG(pthread_ret)); + } else { + LOG_INFO("stop sql thread succ", K(sql_tid_)); + } + + sql_tid_ = 0; + } + + if (0 != flow_control_tid_) { + int pthread_ret = pthread_join(flow_control_tid_, NULL); + if (0 != pthread_ret) { + LOG_ERROR("join flow control thread fail", K(flow_control_tid_), K(pthread_ret), + KERRNOMSG(pthread_ret)); + } else { + LOG_INFO("stop flow control thread succ", K(flow_control_tid_)); + } + + flow_control_tid_ = 0; + } +} + +void ObLogInstance::reload_config_() +{ + int ret = OB_SUCCESS; + ObLogConfig &config = TCONF; + const char *default_config_fpath = DEFAULT_CONFIG_FPATN; + + _LOG_INFO("====================reload config begin===================="); + + if (OB_FAIL(config.load_from_file(default_config_fpath))) { + LOG_ERROR("load_from_file fail", KR(ret), K(default_config_fpath)); + } else { + LOG_INFO("reset log level", "log_level", config.log_level.str()); + OB_LOGGER.set_mod_log_levels(config.log_level.str()); + + ATOMIC_STORE(&log_clean_cycle_time_us_, config.log_clean_cycle_time_in_hours * _HOUR_); + + if (0 != config.enable_dump_pending_trans_info) { + dump_pending_trans_info_(); + } + + // config fetcher + if (OB_NOT_NULL(fetcher_)) { + fetcher_->configure(config); + } + + // config sequencer + if (OB_NOT_NULL(sequencer_)) { + sequencer_->configure(config); + } + + // config committer_ + if (OB_NOT_NULL(committer_)) { + committer_->configure(config); + } + + // config server_provider_ + if (OB_NOT_NULL(server_provider_)) { + ObLogSQLServerProvider *oblog_server_provider = static_cast(server_provider_); + + if (OB_ISNULL(oblog_server_provider)) { + LOG_ERROR("oblog_server_provider is NULL", K(oblog_server_provider)); + ret = OB_ERR_UNEXPECTED; + } else { + oblog_server_provider->configure(config); + } + } + } + + _LOG_INFO("====================reload config end===================="); +} + +void ObLogInstance::print_tenant_memory_usage_() +{ + lib::ObMallocAllocator *mallocator = lib::ObMallocAllocator::get_instance(); + + if (OB_ISNULL(mallocator)) { + LOG_ERROR("mallocator is NULL, can not print_tenant_memory_usage"); + } else { + mallocator->print_tenant_memory_usage(OB_SYS_TENANT_ID); + mallocator->print_tenant_ctx_memory_usage(OB_SYS_TENANT_ID); + mallocator->print_tenant_memory_usage(OB_SERVER_TENANT_ID); + mallocator->print_tenant_ctx_memory_usage(OB_SERVER_TENANT_ID); + } +} + +/// Global traffic control +/// Principle: 1. Keep the total number of active Partition Transaction Tasks (PartTransTask) under control by referring to the number of +/// Match the production rate with the consumption rate to avoid OOM +/// 2. Consider liboblog memory usage, when the memory usage reaches a certain limit, flow control should be performed to avoid OOM +/// +/// Implementation: 1. Check the number of active partition transaction tasks periodically, and when it exceeds the upper bound, check whether there are enough reusable +/// Partitioned transaction tasks, if they exist, stop Fetcher; otherwise, turn on Fetcher. +// 2. periodically check the total memory occupied by liboblog, and when it exceeds the upper bound, check if there are enough reusable +// partitioned transaction tasks, if they exist, stop Fetcher; otherwise turn on Fetcher. +void ObLogInstance::global_flow_control_() +{ + int ret = OB_SUCCESS; + + if (inited_) { + if (OB_ISNULL(fetcher_) || OB_ISNULL(dml_parser_) + || OB_ISNULL(formatter_) + || OB_ISNULL(ddl_handler_) || OB_ISNULL(resource_collector_)) { + LOG_ERROR("invalid arguments", K(fetcher_), K(dml_parser_), + K(formatter_), K(ddl_handler_), K(resource_collector_)); + ret = OB_ERR_UNEXPECTED; + } else { + int64_t part_trans_task_active_count_upper_bound = + TCONF.part_trans_task_active_count_upper_bound; + int64_t part_trans_task_reusable_count_upper_bound = + TCONF.part_trans_task_reusable_count_upper_bound; + int64_t ready_to_seq_task_upper_bound = + TCONF.ready_to_seq_task_upper_bound; + int64_t log_entry_task_active_count_upper_bound = + TCONF.log_entry_task_active_count_upper_bound; + double system_memory_avail_percentage_lower_bound = + static_cast(TCONF.system_memory_avail_percentage_lower_bound) / 100; + int64_t memory_limit = TCONF.memory_limit.get(); + + int64_t total_part_trans_task_count = trans_task_pool_.get_total_count(); + int64_t active_part_trans_task_count = trans_task_pool_.get_alloc_count(); + int64_t active_log_entry_task_count = log_entry_task_pool_->get_alloc_count(); + int64_t reusable_part_trans_task_count = 0; + int64_t ready_to_seq_task_count = 0; + + int64_t fetcher_part_trans_task_count = fetcher_->get_part_trans_task_count(); + int64_t dml_parser_part_trans_task_count = 0; + int64_t br_queue_part_trans_task_count = br_queue_.get_part_trans_task_count(); + int64_t out_part_trans_task_count = get_out_part_trans_task_count_(); + int64_t resource_collector_part_trans_task_count = resource_collector_->get_part_trans_task_count(); + int64_t committer_ddl_part_trans_task_count = 0; + int64_t committer_dml_part_trans_task_count = 0; + int64_t committer_br_count = 0; + committer_->get_part_trans_task_count(committer_ddl_part_trans_task_count, + committer_dml_part_trans_task_count, + committer_br_count); + + int64_t memory_hold = get_memory_hold_(); + int64_t system_memory_avail = get_memory_avail_(); + int64_t system_memory_limit = get_memory_limit_(); + int64_t system_memory_avail_lower_bound = + static_cast(static_cast(system_memory_limit) * system_memory_avail_percentage_lower_bound); + bool need_slow_down_fetcher = false; + + if (OB_FAIL(get_task_count_(ready_to_seq_task_count, reusable_part_trans_task_count))) { + LOG_ERROR("get_task_count fail", KR(ret), K(ready_to_seq_task_count), + K(reusable_part_trans_task_count)); + } else if (OB_FAIL(dml_parser_->get_log_entry_task_count(dml_parser_part_trans_task_count))) { + LOG_ERROR("DML parser get_log_entry_task_count fail", KR(ret), K(dml_parser_part_trans_task_count)); + } else { + // Use the following policy for global traffic control: + // need_slow_down = (active partitioned transaction tasks exceed the upper limit || liboblog takes up more memory than the upper limit || system free memory is less than a certain percentage) + // && (reusable transaction tasks exceeds limit || Parser and Sequencer module cache tasks exceeds limit) + bool condition1 = (active_part_trans_task_count >= part_trans_task_active_count_upper_bound) + || (memory_hold >= memory_limit) + || (system_memory_avail < system_memory_avail_lower_bound); + bool condition2 = (reusable_part_trans_task_count >= part_trans_task_reusable_count_upper_bound) + || (ready_to_seq_task_count > ready_to_seq_task_upper_bound); + bool condition3 = (active_log_entry_task_count >= log_entry_task_active_count_upper_bound); + + need_slow_down_fetcher = (condition1 && condition2) || (condition3); + + // Get the number of active distributed transactions after sequencing, including sequenced, formatted, and committed + int64_t seq_trans_count = + trans_ctx_mgr_->get_trans_count(TransCtx::TRANS_CTX_STATE_SEQUENCED); + int64_t committed_trans_count = + trans_ctx_mgr_->get_trans_count(TransCtx::TRANS_CTX_STATE_COMMITTED); + + bool current_fetcher_is_paused = fetcher_->is_paused(); + + // Print logs: 1. on status changes; 2. cyclical printing + bool need_print_state = (current_fetcher_is_paused != need_slow_down_fetcher); + + if (need_print_state || REACH_TIME_INTERVAL(PRINT_GLOBAL_FLOW_CONTROL_INTERVAL)) { + _LOG_INFO("[STAT] [FLOW_CONTROL] NEED_SLOW_DOWN=%d " + "PAUSED=%d MEM=%s/%s " + "AVAIL_MEM=%s/%s " + "READY_TO_SEQ=%ld/%ld " + "PART_TRANS(TOTAL=%ld,ACTIVE=%ld/%ld,REUSABLE=%ld/%ld) " + "LOG_TASK(ACTIVE=%ld/%ld) " + "[FETCHER=%ld DML_PARSER=%ld " + "COMMITER=%ld USER_QUEUE=%ld OUT=%ld RC=%ld] " + "DIST_TRANS(SEQ=%ld,COMMITTED=%ld)", + need_slow_down_fetcher, current_fetcher_is_paused, + SIZE_TO_STR(memory_hold), SIZE_TO_STR(memory_limit), + SIZE_TO_STR(system_memory_avail), SIZE_TO_STR(system_memory_avail_lower_bound), + ready_to_seq_task_count, ready_to_seq_task_upper_bound, + total_part_trans_task_count, + active_part_trans_task_count, part_trans_task_active_count_upper_bound, + reusable_part_trans_task_count, part_trans_task_reusable_count_upper_bound, + active_log_entry_task_count, log_entry_task_active_count_upper_bound, + fetcher_part_trans_task_count, dml_parser_part_trans_task_count, + committer_ddl_part_trans_task_count + committer_dml_part_trans_task_count, + br_queue_part_trans_task_count, out_part_trans_task_count, + resource_collector_part_trans_task_count, + seq_trans_count, committed_trans_count); + } + } + + if (OB_SUCC(ret)) { + // 1. Traffic control requires fetcher to be suspended + // 2. The configuration item forces the fetcher to be suspended + int64_t config_pause_fetcher = TCONF.pause_fetcher; + if (need_slow_down_fetcher || 0 != config_pause_fetcher) { + LOG_INFO("[STAT] [FLOW_CONTROL] [CONFIG] [PAUSE_FETCHER]", + K(need_slow_down_fetcher), K(config_pause_fetcher)); + fetcher_->pause(); + } + // 3. Recovery fetcher in other cases + else { + if (fetcher_->is_paused()) { + LOG_INFO("[STAT] [FLOW_CONTROL] [RESUME_FETCHER]"); + fetcher_->resume(); + } + } + } + } // else + } // inited +} + +void ObLogInstance::dump_pending_trans_info_() +{ + int ret = OB_SUCCESS; + + if (NULL != trans_ctx_mgr_) { + const char *file = DEFAULT_PENDING_TRANS_INFO_FILE; + int fd = open(file, O_WRONLY | O_APPEND | O_CREAT, 0600); + static const int64_t BUFFER_SIZE = 2 << 26; + char *buffer = static_cast(ob_log_malloc(BUFFER_SIZE)); + + if (OB_UNLIKELY(fd < 0)) { + LOG_ERROR("open pending trans info file fail", K(file), K(fd), + K(errno), KERRMSG); + ret = OB_ERR_UNEXPECTED; + } else if (OB_ISNULL(buffer)) { + LOG_ERROR("allocate memory for pending trans info fail", K(BUFFER_SIZE)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + int64_t pos = 0; + + (void)databuff_printf(buffer, BUFFER_SIZE, pos, "============================== BEGIN "); + (void)ObTimeUtility2::usec_to_str(get_timestamp(), buffer, BUFFER_SIZE, pos); + (void)databuff_printf(buffer, BUFFER_SIZE, pos, " ==============================\n"); + + if (OB_FAIL(trans_ctx_mgr_->dump_pending_trans_info(buffer, BUFFER_SIZE, pos))) { + LOG_ERROR("dump pending trans info fail", KR(ret), K(buffer), K(BUFFER_SIZE), K(pos)); + } else { + (void)databuff_printf(buffer, BUFFER_SIZE, pos, + "============================== END ==============================;\n"); + } + + if (OB_SUCC(ret)) { + int64_t total_size = pos; + char *ptr = buffer; + + while (OB_SUCC(ret) && total_size > 0) { + ssize_t nwrite = 0; + nwrite = write(fd, ptr, static_cast(total_size)); + + if (nwrite >= 0) { + total_size -= nwrite; + ptr += nwrite; + } else { + LOG_ERROR("write pending trans info file fail", + K(nwrite), K(total_size), K(file), K(fd), K(errno), KERRMSG, KP(ptr)); + ret = OB_IO_ERROR; + break; + } + } + } + } + + if (fd >= 0) { + if (OB_FAIL(fsync(fd))) { + LOG_ERROR("failed to execute fsync file", K(ret), K(fd)); + } else if (OB_FAIL(close(fd))) { + LOG_ERROR("failed to execute close file", K(ret), K(fd)); + } else { + fd = -1; + } + } + + if (NULL != buffer) { + ob_log_free(buffer); + buffer = NULL; + } + } +} + +void ObLogInstance::clean_log_() +{ + int64_t cycle_time = log_clean_cycle_time_us_; + const static int64_t PRINT_TIME_BUF_SIZE = 64; + const static int64_t CMD_BUF_SIZE = 1024; + static char print_time_buf[PRINT_TIME_BUF_SIZE]; + static char cmd_buf[CMD_BUF_SIZE]; + static const char *print_time_format = "%Y-%m-%d %H:%i:%s"; + static const char *cmd_time_format = "%Y%m%d%H%i%s"; + static const char *log_file = "removed_log_files"; + + if (cycle_time > 0) { + int64_t print_time_pos = 0; + int64_t cmd_pos = 0; + int64_t base_time = get_timestamp() - cycle_time; + int64_t begin_time = get_timestamp(); + + (void)ObTimeUtility2::usec_format_to_str(base_time, print_time_format, + print_time_buf, PRINT_TIME_BUF_SIZE, print_time_pos); + + (void)databuff_printf(cmd_buf, CMD_BUF_SIZE, cmd_pos, + "echo `date` > log/%s; base_time=", log_file); + + (void)ObTimeUtility2::usec_format_to_str(base_time, cmd_time_format, + cmd_buf, CMD_BUF_SIZE, cmd_pos); + + (void)databuff_printf(cmd_buf, CMD_BUF_SIZE, cmd_pos, "; " + "for file in `find log/ | grep \"liboblog.log.\" | grep -v err`; " + "do " + "num=`echo $file | cut -d '.' -f 3`; " + "if [ $num -lt $base_time ]; " + "then " + "echo $file >> log/%s; " + "rm $file -f; " + "fi " + "done", log_file); + + (void)system(cmd_buf); + + _LOG_INFO("[STAT] [CLEAN_LOG] BASE_TIME='%.*s' EXE_TIME=%ld CYCLE_TIME=%ld CMD=%s", + (int)print_time_pos, print_time_buf, get_timestamp() - begin_time, + cycle_time, cmd_buf); + } +} + +int64_t ObLogInstance::get_memory_hold_() +{ + return lib::get_memory_used(); +} + +int64_t ObLogInstance::get_memory_avail_() +{ + return lib::get_memory_avail(); +} + +int64_t ObLogInstance::get_memory_limit_() +{ + return lib::get_memory_limit(); +} + +int ObLogInstance::get_task_count_(int64_t &ready_to_seq_task_count, + int64_t &part_trans_task_resuable_count) +{ + int ret = OB_SUCCESS; + ready_to_seq_task_count = 0; + part_trans_task_resuable_count = 0; + + if (OB_ISNULL(fetcher_) || OB_ISNULL(dml_parser_) || OB_ISNULL(formatter_) + || OB_ISNULL(storager_) + || OB_ISNULL(sequencer_) || OB_ISNULL(data_processor_) || OB_ISNULL(committer_) + || OB_ISNULL(ddl_handler_) || OB_ISNULL(resource_collector_)) { + LOG_ERROR("invalid arguments", K(fetcher_), K(dml_parser_), K(formatter_), K(storager_), + K(sequencer_), K(data_processor_), K(committer_), K(ddl_handler_), K(resource_collector_)); + ret = OB_ERR_UNEXPECTED; + } else { + // I. Get the number of tasks to be processed by each module + int64_t dml_parser_log_count = 0; + int64_t formatter_br_count = 0; + int64_t formatter_log_count = 0; + int64_t storager_task_count = 0; + struct IObLogSequencer::SeqStatInfo seq_stat_info; + int64_t data_processor_task_count = 0; + int64_t committer_pending_dml_trans_count = committer_->get_dml_trans_count(); + + if (OB_FAIL(dml_parser_->get_log_entry_task_count(dml_parser_log_count))) { + LOG_ERROR("parser get_log_entry_task_count fail", KR(ret), K(dml_parser_log_count)); + } else if (OB_FAIL(formatter_->get_task_count(formatter_br_count, formatter_log_count))) { + LOG_ERROR("formatter get_task_count fail", KR(ret), K(formatter_br_count), K(formatter_log_count)); + } else { + storager_->get_task_count(storager_task_count); + sequencer_->get_task_count(seq_stat_info); + data_processor_->get_task_count(data_processor_task_count); + + // Count the number of partitioned tasks to be ordered + ready_to_seq_task_count = dml_parser_log_count + formatter_log_count + storager_task_count; + } + + // II. Get the number of reusable tasks for each module + // 1. count by module + // 2. The number of reusable tasks in the overall partition includes the following components: + // (1) tasks held by committer + // (2) Tasks held by br_queue + // (3) Tasks held by users that have not been returned + // (4) tasks held by resource_collector + if (OB_SUCC(ret)) { + int64_t committer_ddl_part_trans_task_count = 0; + int64_t committer_dml_part_trans_task_count = 0; + int64_t committer_br_count = 0; + + int64_t fetcher_part_trans_task_count = fetcher_->get_part_trans_task_count(); + committer_->get_part_trans_task_count(committer_ddl_part_trans_task_count, + committer_dml_part_trans_task_count, + committer_br_count); + int64_t ddl_handle_part_trans_task_count = ddl_handler_->get_part_trans_task_count(); + int64_t br_queue_part_trans_task_count = br_queue_.get_part_trans_task_count(); + int64_t out_part_trans_task_count = get_out_part_trans_task_count_(); + int64_t resource_collector_part_trans_task_count = resource_collector_->get_part_trans_task_count(); + int64_t dml_br_count_in_user_queue = br_queue_.get_dml_br_count(); + int64_t dml_br_count_output = output_dml_br_count_; + + // Get the number of DDL Binlog Records in the user queue + int64_t ddl_br_count_in_user_queue = br_queue_.get_ddl_br_count(); + int64_t ddl_br_count_output = output_ddl_br_count_; + + part_trans_task_resuable_count = committer_ddl_part_trans_task_count + + committer_dml_part_trans_task_count + + br_queue_part_trans_task_count + + out_part_trans_task_count + + resource_collector_part_trans_task_count; + + // Print monitoring items + if (REACH_TIME_INTERVAL(PRINT_GLOBAL_FLOW_CONTROL_INTERVAL)) { + _LOG_INFO("------------------------------------------------------------"); + _LOG_INFO("[TASK_COUNT_STAT] [FETCHER] [PART_TRANS_TASK=%ld]", fetcher_part_trans_task_count); + _LOG_INFO("[TASK_COUNT_STAT] [DML_PARSER] [LOG_TASK=%ld]", dml_parser_log_count); + _LOG_INFO("[TASK_COUNT_STAT] [DDL_HANDLE] [PART_TRANS_TASK=%ld]", ddl_handle_part_trans_task_count); + _LOG_INFO("[TASK_COUNT_STAT] [FORMATTER] [BR=%ld LOG_TASK=%ld]", formatter_br_count, formatter_log_count); + _LOG_INFO("[TASK_COUNT_STAT] [STORAGER] [LOG_TASK=%ld]", storager_task_count); + _LOG_INFO("[TASK_COUNT_STAT] [SEQUENCER] [PART_TRANS_TASK(QUEUE=%ld TOTAL=[%ld][DDL=%ld DML=%ld HB=%ld])]", + seq_stat_info.queue_part_trans_task_count_, seq_stat_info.total_part_trans_task_count_, + seq_stat_info.ddl_part_trans_task_count_, seq_stat_info.dml_part_trans_task_count_, seq_stat_info.hb_part_trans_task_count_); + _LOG_INFO("[TASK_COUNT_STAT] [DATA_PROCESSIR] [ROW_TASK=%ld]", data_processor_task_count); + _LOG_INFO("[TASK_COUNT_STAT] [COMMITER] [DML_TRANS=%ld DDL_PART_TRANS_TASK=%ld DML_PART_TRANS_TASK=%ld] BR_COUNT=%ld", + committer_pending_dml_trans_count, + committer_ddl_part_trans_task_count, + committer_dml_part_trans_task_count, + committer_br_count); + _LOG_INFO("[TASK_COUNT_STAT] [USER_QUEQUE] [PART_TRANS_TASK=%ld] [DDL_BR=%ld] [DML_BR=%ld]", + br_queue_part_trans_task_count, + ddl_br_count_in_user_queue, + dml_br_count_in_user_queue); + _LOG_INFO("[TASK_COUNT_STAT] [OUT] [PART_TRANS_TASK=%ld] [DDL_BR=%ld] [DML_BR=%ld]", out_part_trans_task_count, + ddl_br_count_output, dml_br_count_output); + _LOG_INFO("[TASK_COUNT_STAT] [RESOURCE_COLLECTOR] [PART_TRANS_TASK=%ld]", + resource_collector_part_trans_task_count); + } + } + } + + return ret; +} + +void ObLogInstance::do_drc_consume_tps_stat_() +{ + if (OB_ISNULL(trans_stat_mgr_)) { + LOG_ERROR("trans_stat is null", K(trans_stat_mgr_)); + } else { + trans_stat_mgr_->do_drc_consume_tps_stat(); + } +} + +void ObLogInstance::do_drc_consume_rps_stat_() +{ + if (OB_ISNULL(trans_stat_mgr_)) { + LOG_ERROR("trans_stat is null", K(trans_stat_mgr_)); + } else { + trans_stat_mgr_->do_drc_consume_rps_stat(); + } +} + +void ObLogInstance::do_drc_release_tps_stat_() +{ + if (OB_ISNULL(trans_stat_mgr_)) { + LOG_ERROR("trans_stat is null", K(trans_stat_mgr_)); + } else { + trans_stat_mgr_->do_drc_release_tps_stat(); + } +} + +void ObLogInstance::do_drc_release_rps_stat_() +{ + if (OB_ISNULL(trans_stat_mgr_)) { + LOG_ERROR("trans_stat is null", K(trans_stat_mgr_)); + } else { + trans_stat_mgr_->do_drc_release_rps_stat(); + } +} + +void ObLogInstance::do_stat_for_part_trans_task_count_(int record_type, + int64_t part_trans_task_count, + bool need_accumulate_stat) +{ + if ((EDDL == record_type) || (EBEGIN == record_type)) { + if (need_accumulate_stat) { + // hold by user + (void)ATOMIC_AAF(&part_trans_task_count_, part_trans_task_count); + } else { + // return by user + (void)ATOMIC_AAF(&part_trans_task_count_, -part_trans_task_count); + } + } else { + // do nothing + } +} + +void ObLogInstance::print_trans_stat_() +{ + if (OB_ISNULL(trans_stat_mgr_)) { + LOG_ERROR("trans_stat is null", K(trans_stat_mgr_)); + } else { + trans_stat_mgr_->print_stat_info(); + } +} + +int ObLogInstance::init_ob_trace_id_(const char *ob_trace_id_ptr) +{ + int ret = OB_SUCCESS; + int64_t pos = 0; + + if (OB_ISNULL(ob_trace_id_ptr)) { + LOG_ERROR("ob_trace_id_ptr is null", K(ob_trace_id_ptr)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(databuff_printf(ob_trace_id_str_, sizeof(ob_trace_id_str_), pos, "%s", + ob_trace_id_ptr))) { + LOG_ERROR("databuff_printf ob_trace_id_str_ fail", K(ob_trace_id_str_), K(pos), K(ob_trace_id_ptr)); + } else { + LOG_INFO("init_ob_trace_id_ succ", K(ob_trace_id_str_), K(ob_trace_id_ptr)); + } + + return ret; +} + +int ObLogInstance::query_cluster_info_(ObLogSysTableHelper::ClusterInfo &cluster_info) +{ + int ret = OB_SUCCESS; + cluster_info.reset(); + bool done = false; + + if (OB_ISNULL(systable_helper_)) { + LOG_ERROR("systable_helper_ is null", K(systable_helper_)); + ret = OB_ERR_UNEXPECTED; + } else { + while (! done && OB_SUCCESS == ret) { + if (OB_FAIL(systable_helper_->query_cluster_info(cluster_info))) { + LOG_WARN("systable_helper_ query_cluster_info fail", KR(ret), K(cluster_info)); + } else { + done = true; + } + + if (OB_NEED_RETRY == ret) { + ret = OB_SUCCESS; + usleep(100L * 1000L); + } + } + } + + return ret; +} + +// init cluster version +int ObLogInstance::init_ob_cluster_version_() +{ + int ret = OB_SUCCESS; + uint64_t min_observer_version = OB_INVALID_ID; + + if (OB_FAIL(query_cluster_min_observer_version_(min_observer_version))) { + LOG_ERROR("query_cluster_min_observer_version_ fail", KR(ret), K(min_observer_version)); + } else if (OB_FAIL(ObClusterVersion::get_instance().init(min_observer_version))) { + LOG_ERROR("ObClusterVersion init fail", KR(ret), K(min_observer_version)); + } else { + LOG_INFO("OceanBase cluster version init succ", "cluster_version", ObClusterVersion::get_instance()); + } + + return ret; +} + +// Query the smallest version of the cluster until it succeeds +int ObLogInstance::query_cluster_min_observer_version_(uint64_t &min_observer_version) +{ + int ret = OB_SUCCESS; + bool done = false; + + if (OB_ISNULL(systable_helper_)) { + LOG_ERROR("systable_helper_ is null", K(systable_helper_)); + ret = OB_ERR_UNEXPECTED; + } else { + while (! done && OB_SUCCESS == ret) { + if (OB_FAIL(systable_helper_->query_cluster_min_observer_version(min_observer_version))) { + LOG_WARN("systable_helper_ query_cluster_min_observer_version fail", KR(ret), K(min_observer_version)); + } else { + done = true; + } + + if (OB_NEED_RETRY == ret) { + ret = OB_SUCCESS; + usleep(100L * 1000L); + } + } + } + + return ret; +} + +// update cluster version +void ObLogInstance::update_cluster_version_() +{ + int ret = OB_SUCCESS; + uint64_t min_observer_version = OB_INVALID_ID; + + if (OB_NOT_NULL(systable_helper_)) { + if (OB_FAIL(systable_helper_->query_cluster_min_observer_version(min_observer_version))) { + if (OB_NEED_RETRY == ret) { + LOG_WARN("systable_helper_ query_cluster_min_observer_version fail", KR(ret), K(min_observer_version)); + ret = OB_SUCCESS; + } else { + LOG_ERROR("systable_helper_ query_cluster_min_observer_version fail", KR(ret), K(min_observer_version)); + } + } else { + ObClusterVersion::get_instance().update_cluster_version(min_observer_version); + LOG_INFO("OceanBase cluster version update succ", "cluster_version", ObClusterVersion::get_instance()); + } + } +} + +int ObLogInstance::check_ob_version_legal_(const uint64_t ob_version) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(OB_INVALID_ID == ob_version)) { + ret = OB_INVALID_DATA; + LOG_WARN("ob_version is invalid", KR(ret), K(ob_version)); + } else { + // do nothing + } + return ret; +} + +int ObLogInstance::check_observer_version_valid_() +{ + int ret = OB_SUCCESS; + + const uint64_t ob_version = GET_MIN_CLUSTER_VERSION(); + uint32_t ob_major = 0; + uint16_t ob_minor = 0; + uint16_t ob_patch = 0; + cal_version_components_(ob_version, ob_major, ob_minor, ob_patch); + if (0 != TCONF.skip_ob_version_compat_check) { + _LOG_INFO("skip_ob_version_compat_check is true, skip check, observer_version(%u.%hu.%hu)", + ob_major, ob_minor, ob_patch); + } else if (OB_FAIL(check_ob_version_legal_(ob_version))) { + _LOG_WARN("check ob version illegal, observer_version(%u.%hu.%hu), skip it", + ob_major, ob_minor, ob_patch); + ret = OB_SUCCESS; + } else if ((oblog_major_ >=3 && ob_major == 1)) { + ret = OB_VERSION_NOT_MATCH; + _LOG_ERROR("oblog_version(%u.%hu.%hu) don't support observer_version(%u.%hu.%hu)", + oblog_major_, oblog_minor_, oblog_patch_, ob_major, ob_minor, ob_patch); + } else if (oblog_major_ > ob_major + || (oblog_major_ == ob_major && oblog_minor_ >= ob_minor)) { + _LOG_INFO("oblog_version(%u.%hu.%hu) compatible with observer_version(%u.%hu.%hu)", + oblog_major_, oblog_minor_, oblog_patch_, ob_major, ob_minor, ob_patch); + } else { + ret = OB_VERSION_NOT_MATCH; + _LOG_ERROR("oblog_version(%u.%hu.%hu) not compatible with observer_version(%u.%hu.%hu), " + "oblog_version is too old, need upgrade", + oblog_major_, oblog_minor_, oblog_patch_, ob_major, ob_minor, ob_patch); + } + + return ret; +} + +void ObLogInstance::init_global_context_() +{ + // The schema module relies on this value to determine the working mode, setting it to invalid means old mode is used. + // For versions below 2.2, liboblog should start in old mode, and switch schema working mode when schema splitting is detected. + // For version 2.2 or higher, liboblog can start in new mode. + (void)GCTX.set_split_schema_version(OB_INVALID_VERSION); + (void)GCTX.set_split_schema_version_v2(OB_INVALID_VERSION); + + // There is no need for liboblog to connect to a backup library, if there is, the value should be maintained. + share::ObClusterInfo cluster_info; + share::ObRedoTransportOption redo_option; + ObSEArray sync_cluster_id; + cluster_info.cluster_type_ = PRIMARY_CLUSTER; + cluster_info.cluster_id_ = 1; + cluster_info.set_switch_timestamp(1); + cluster_info.version_ = 1; +} + +int ObLogInstance::init_oblog_version_components_() +{ + int ret = OB_SUCCESS; + uint64_t oblog_version = 0; + if (OB_FAIL(ObClusterVersion::get_version(PACKAGE_VERSION, oblog_version))) { + LOG_ERROR("get_version fail", KR(ret), K(PACKAGE_VERSION), K(oblog_version)); + } else { + cal_version_components_(oblog_version, oblog_major_, oblog_minor_, oblog_patch_); + } + + if (OB_SUCC(ret)) { + LOG_INFO("init oblog_version components succ", K(PACKAGE_VERSION), K(oblog_version), + K(oblog_major_), K(oblog_minor_), K(oblog_patch_)); + } + return ret; +} + +void ObLogInstance::cal_version_components_(const uint64_t version, + uint32_t &major, + uint16_t &minor, + uint16_t &patch) +{ + major = OB_VSN_MAJOR(version); + minor = OB_VSN_MINOR(version); + patch = OB_VSN_PATCH(version); +} + +int ObLogInstance::get_tenant_guard(const uint64_t tenant_id, ObLogTenantGuard &guard) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(tenant_mgr_)) { + LOG_ERROR("tenant_mgr is NULL", K(tenant_mgr_)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(tenant_mgr_->get_tenant_guard(tenant_id, guard))) { + if (OB_ENTRY_NOT_EXIST != ret) { + LOG_ERROR("get_tenant_guard fail", KR(ret), K(tenant_id), K(guard)); + } + } else { + // success + } + return ret; +} + +} +} diff --git a/src/liboblog/src/ob_log_instance.h b/src/liboblog/src/ob_log_instance.h new file mode 100644 index 0000000000000000000000000000000000000000..480fa0795fc8eb936e732dfa85212df933d3e200 --- /dev/null +++ b/src/liboblog/src/ob_log_instance.h @@ -0,0 +1,353 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_INSTANCE_H__ +#define OCEANBASE_LIBOBLOG_INSTANCE_H__ + +#include "liboblog.h" + +#include "lib/allocator/ob_concurrent_fifo_allocator.h" // ObConcurrentFIFOAllocator +#include "lib/alloc/memory_dump.h" // memory_meta_dump + +#include "ob_log_binlog_record.h" // ObLogBR +#include "ob_obj2str_helper.h" // ObObj2strHelper +#include "ob_log_task_pool.h" // ObLogTransTaskPool +#include "ob_log_entry_task_pool.h" // ObLogEntryTaskPool +#include "ob_log_binlog_record_queue.h" // BRQueue +#include "ob_log_trans_stat_mgr.h" // TransTpsRpsStatInfo, IObLogTransStatMgr +#include "ob_log_systable_helper.h" // ObLogSysTableHelper +#include "ob_log_hbase_mode.h" // ObLogHbaseUtil +#include "ob_log_mysql_proxy.h" // ObLogMysqlProxy +#include "ob_log_work_mode.h" // WorkingMode + +using namespace oceanbase::logmessage; + +namespace oceanbase +{ + +namespace common +{ +namespace sqlclient +{ +class ObMySQLServerProvider; +} // namespace sqlclient +} // namespace common + +namespace liboblog +{ +class IObLogMetaManager; +class IObLogSchemaGetter; +class IObLogTimeZoneInfoGetter; +class IObLogFetcher; +class IObLogDDLHandler; +class IObLogDmlParser; +class IObLogDdlParser; +class IObLogPartTransParser; +class IObLogSequencer; +class IObLogFormatter; +class IObLogStorager; +class IObLogDataProcessor; +class IObLogCommitter; +class PartTransTask; +class IObLogStartSchemaMatcher; +class IObLogTableMatcher; +class IObLogTransCtxMgr; +class IObStoreService; +class IObLogBRPool; +class IObLogResourceCollector; +class IObLogTenantMgr; +class ObLogTenantGuard; + +typedef ObLogTransTaskPool PartTransTaskPool; + +// interface for error handler +class IObLogErrHandler +{ +public: + virtual ~IObLogErrHandler() {} + +public: + virtual void handle_error(const int err_no, const char *fmt, ...) = 0; +}; + +typedef common::sqlclient::ObMySQLServerProvider ServerProviderType; + +class ObLogInstance : public IObLog, public IObLogErrHandler +{ +public: + virtual ~ObLogInstance(); + + static const int64_t TASK_POOL_ALLOCATOR_PAGE_SIZE = common::OB_MALLOC_BIG_BLOCK_SIZE; + static const int64_t TASK_POOL_ALLOCATOR_TOTAL_LIMIT = (1LL << 37); // 128G + static const int64_t TASK_POOL_ALLOCATOR_HOLD_LIMIT = TASK_POOL_ALLOCATOR_PAGE_SIZE; + static const int64_t DATA_OP_TIMEOUT = 10L * 1000L * 1000L; + static const int64_t CLEAN_LOG_INTERVAL = 60L * 1000L * 1000L; + static const int64_t REFRESH_SERVER_LIST_INTERVAL = 10L * 1000L * 1000L; + +protected: + ObLogInstance(); + +public: + static ObLogInstance *get_instance(); + static ObLogInstance &get_ref_instance(); + static void destroy_instance(); + +public: + virtual int init(const char *config_file, + const uint64_t start_timestamp, + ERROR_CALLBACK err_cb = NULL); + + // Start-up timestamps are in microseconds + int init_with_start_tstamp_usec(const char *config_file, + const uint64_t start_timestamp_usec, + ERROR_CALLBACK err_cb = NULL); + + virtual int init(const std::map& configs, + const uint64_t start_timestamp, + ERROR_CALLBACK err_cb = NULL); + + virtual int init_with_start_tstamp_usec(const std::map& configs, + const uint64_t start_timestamp_usec, + ERROR_CALLBACK err_cb = NULL); + + virtual void destroy(); + + virtual int next_record(ILogRecord **record, const int64_t timeout_us); + virtual int next_record(ILogRecord **record, + int32_t &major_version, + uint64_t &tenant_id, + const int64_t timeout_us); + virtual void release_record(ILogRecord *record); + virtual int launch(); + virtual void stop(); + virtual int table_group_match(const char *pattern, + bool &is_matched, + const int fnmatch_flags = FNM_CASEFOLD); + virtual int get_table_groups(std::vector &table_groups); + virtual int get_tenant_ids(std::vector &tenant_ids); + +public: + void mark_stop_flag(); + void handle_error(const int err_no, const char *fmt, ...); + void timer_routine(); + void sql_thread_routine(); + void flow_control_thread_routine(); + +public: + int32_t get_log_level() const; + const char *get_log_file() const; + void set_disable_redirect_log(const bool flag) { disable_redirect_log_ = flag; } + static void print_version(); + int set_assign_log_dir(const char *log_dir, const int64_t log_dir_len); + void enable_schema_split_mode() + { + ATOMIC_STORE(&is_schema_split_mode_, true); + } + +public: + // Backup using the interface: + // 1. set ddl schema version + // For schema non-split mode. + // Pass in the maximum ddl_schema_version for all backup tenants, sequencer takes the maximum of start_schema_version and that value + // as the initial value of the global Schema version number, to ensure that the schema version of each tenant's data stream is not rolled back + // For schema splitting mode, it needs to be set based on tenant_id + // + // @retval OB_SUCCESS Success + // @retval OB_ENTRY_NOT_EXIST tenant does not exist, tenant has been deleted + // @retval Other error codes Fail + int set_data_start_ddl_schema_version(const uint64_t tenant_id, + const int64_t ddl_schema_version); + // 2. Get the starting schema version by tenant ID + // No need to check suspension status for new tenants in the middle + // + // @retval OB_SUCCESS Success + // @retval OB_ENTRY_NOT_EXIST tenant does not exist, tenant has been deleted + // @retval Other error codes Fail + int get_start_schema_version(const uint64_t tenant_id, + const bool is_create_tenant_when_backup, + int64_t &start_schema_version); + // 3. set start global trans version + int set_start_global_trans_version(const int64_t start_global_trans_version); + +public: + friend class ObLogGlobalContext; + +private: + int init_logger_(); + int dump_config_(); + int init_sys_var_for_generate_column_schema_(); + int init_common_(const uint64_t start_timestamp, ERROR_CALLBACK err_cb); + int get_pid_(); + int init_self_addr_(); + int init_schema_split_mode_(const int64_t sys_schema_version); + int init_schema_(const int64_t start_tstamp_us, int64_t &sys_start_schema_version); + int init_components_(const uint64_t start_timestamp); + int config_tenant_mgr_(const int64_t start_tstamp_us, const int64_t sys_schema_version); + void destroy_components_(); + void write_pid_file_(); + static void *timer_thread_func_(void *args); + static void *sql_thread_func_(void *args); + static void *flow_control_thread_func_(void *args); + int start_threads_(); + void wait_threads_stop_(); + void reload_config_(); + void print_tenant_memory_usage_(); + void global_flow_control_(); + void dump_pending_trans_info_(); + int revert_participants_(PartTransTask *participants); + int revert_trans_task_(PartTransTask *task); + void clean_log_(); + int init_global_tenant_manager_(); + int init_global_kvcache_(); + // Get the total amount of memory occupied by liboblog + int64_t get_memory_hold_(); + // Get system free memory + int64_t get_memory_avail_(); + // Get system memory limit + int64_t get_memory_limit_(); + // Get the number of tasks to be processed + int get_task_count_(int64_t &ready_to_seq_task_count, + int64_t &part_trans_task_resuable_count); + + // next record + void do_drc_consume_tps_stat_(); + void do_drc_consume_rps_stat_(); + // release record + void do_drc_release_tps_stat_(); + void do_drc_release_rps_stat_(); + // statistical number of tasks + void do_stat_for_part_trans_task_count_(int record_type, + int64_t part_trans_task_count, + bool need_accumulate_stat); + int64_t get_out_part_trans_task_count_() const { return ATOMIC_LOAD(&part_trans_task_count_); } + + // Print transaction statistics + void print_trans_stat_(); + // verify ObTraceId + int init_ob_trace_id_(const char *ob_trace_id_ptr); + int verify_ob_trace_id_(ILogRecord *record); + // verify ddl schema version + int verify_ddl_schema_version_(ILogRecord *br); + // verify dml unique id + int verify_dml_unique_id_(ILogRecord *br); + int get_br_filter_value_(ILogRecord &br, + const int64_t idx, + common::ObString &str); + int query_cluster_info_(ObLogSysTableHelper::ClusterInfo &cluser_info); + void update_cluster_version_(); + int check_ob_version_legal_(const uint64_t ob_version); + int check_observer_version_valid_(); + int init_ob_cluster_version_(); + int init_oblog_version_components_(); + void cal_version_components_(const uint64_t version, uint32_t &major, uint16_t &minor, uint16_t &patch); + int query_cluster_min_observer_version_(uint64_t &min_observer_version); + // Initialize global variables for compatibility with GCTX dependencies + void init_global_context_(); + int config_data_start_schema_version_(const int64_t global_data_start_schema_version); + int update_data_start_schema_version_on_split_mode_(); + +private: + static ObLogInstance *instance_; + +private: + bool inited_; + uint32_t oblog_major_; + uint16_t oblog_minor_; + uint16_t oblog_patch_; + pthread_t timer_tid_; // Thread that perform light-weight tasks + pthread_t sql_tid_; // Thread that perform SQL-related tasks + pthread_t flow_control_tid_; // Thread that perform flow control + ERROR_CALLBACK err_cb_; + int global_errno_; + int8_t handle_error_flag_; + bool disable_redirect_log_; + int64_t log_clean_cycle_time_us_; + + int64_t output_dml_br_count_ CACHE_ALIGNED; + int64_t output_ddl_br_count_ CACHE_ALIGNED; + + volatile bool stop_flag_ CACHE_ALIGNED; + // Record microsecond timestamps + int64_t last_heartbeat_timestamp_micro_sec_ CACHE_ALIGNED; + + // Specify the OB_LOGGER directory path + char assign_log_dir_[common::OB_MAX_FILE_NAME_LENGTH]; + bool is_assign_log_dir_valid_; + + // ob_trace_id + char ob_trace_id_str_[common::OB_MAX_TRACE_ID_BUFFER_SIZE + 1]; + uint64_t br_index_in_trans_; + + // Count the number of partitioned transaction tasks + // Users holding unreturned + int64_t part_trans_task_count_ CACHE_ALIGNED; + + // Partitioned Task Pool allocator + common::ObConcurrentFIFOAllocator trans_task_pool_alloc_; + + // External global exposure of variables via TCTX +public: + int64_t start_tstamp_; + bool is_schema_split_mode_; + std::string drc_message_factory_binlog_record_type_; + WorkingMode working_mode_; + + // compoments + ObLogMysqlProxy mysql_proxy_; + IObLogTimeZoneInfoGetter *timezone_info_getter_; + ObLogHbaseUtil hbase_util_; + ObObj2strHelper obj2str_helper_; + BRQueue br_queue_; + PartTransTaskPool trans_task_pool_; + IObLogEntryTaskPool *log_entry_task_pool_; + IObStoreService *store_service_; + IObLogBRPool *br_pool_; + IObLogTransCtxMgr *trans_ctx_mgr_; + IObLogMetaManager *meta_manager_; + IObLogResourceCollector *resource_collector_; + ServerProviderType *server_provider_; + IObLogSchemaGetter *schema_getter_; + IObLogTableMatcher *tb_matcher_; + IObLogStartSchemaMatcher *ss_matcher_; + ObLogSysTableHelper *systable_helper_; + IObLogCommitter *committer_; + IObLogStorager *storager_; + IObLogDataProcessor *data_processor_; + IObLogFormatter *formatter_; + IObLogSequencer *sequencer_; + IObLogPartTransParser *part_trans_parser_; + IObLogDmlParser *dml_parser_; + IObLogDdlParser *ddl_parser_; + IObLogDDLHandler *ddl_handler_; + IObLogFetcher *fetcher_; + IObLogTransStatMgr *trans_stat_mgr_; // Transaction Statistics Management + IObLogTenantMgr *tenant_mgr_; + // The tz information of the sys tenant is placed in instance because of the refresh schema dependency + ObTZInfoMap tz_info_map_; + ObTimeZoneInfoWrap tz_info_wrap_; + + // Functions exposed to the outside via TCTX +public: + // @retval OB_SUCCESS success + // @retval OB_ENTRY_NOT_EXIST tenant not exist + // @retval other error code fail + int get_tenant_guard(const uint64_t tenant_id, ObLogTenantGuard &guard); + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogInstance); +}; + +#define TCTX (ObLogInstance::get_ref_instance()) + +} // namespace liboblog +} // namespace oceanbase +#endif /* OCEANBASE_LIBOBLOG_INSTANCE_H__ */ diff --git a/src/liboblog/src/ob_log_lighty_list.h b/src/liboblog/src/ob_log_lighty_list.h new file mode 100644 index 0000000000000000000000000000000000000000..0f7de838f806eb06ee6d0ebba098f142657f04fd --- /dev/null +++ b/src/liboblog/src/ob_log_lighty_list.h @@ -0,0 +1,66 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_LIGHTY_LIST_H__ +#define OCEANBASE_LIBOBLOG_LIGHTY_LIST_H__ + +#include "share/ob_define.h" + +namespace oceanbase +{ +namespace liboblog +{ + +template +struct LightyList +{ + Type *head_; + Type *tail_; + int64_t num_; + + LightyList() : head_(NULL), tail_(NULL), num_(0) + {} + + ~LightyList() { reset(); } + + void reset() + { + head_ = NULL; + tail_ = NULL; + num_ = 0; + } + + int add(Type *node) + { + int ret = common::OB_SUCCESS; + + if (NULL == node) { + ret = common::OB_INVALID_ARGUMENT; + } else if (NULL == head_) { + head_ = node; + tail_ = node; + node->set_next(NULL); + num_ = 1; + } else { + tail_->set_next(node); + tail_ = node; + node->set_next(NULL); + num_++; + } + return ret; + } + + TO_STRING_KV(KP_(head), KP_(tail), K_(num)); +}; +} // namespace liboblog +} // namespace oceanbase +#endif /* OCEANBASE_LIBOBLOG_LIGHTY_LIST_H__ */ diff --git a/src/liboblog/src/ob_log_main.c b/src/liboblog/src/ob_log_main.c new file mode 100644 index 0000000000000000000000000000000000000000..fb2c5a72e1bcd07851620c9e19dac0f913f93aa5 --- /dev/null +++ b/src/liboblog/src/ob_log_main.c @@ -0,0 +1,39 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include +#include + +const char my_interp[] __attribute__((section(".interp"))) + = "/lib64/ld-linux-x86-64.so.2"; + +const char* build_version(); +const char* build_date(); +const char* build_time(); +const char* build_flags(); + +int so_main() +{ + fprintf(stdout, "\n"); + + fprintf(stdout, "liboblog (%s %s)\n", PACKAGE_STRING, RELEASEID); + fprintf(stdout, "\n"); + + fprintf(stdout, "BUILD_VERSION: %s\n", build_version()); + fprintf(stdout, "BUILD_TIME: %s %s\n", build_date(), build_time()); + fprintf(stdout, "BUILD_FLAGS: %s\n", build_flags()); + exit(0); +} + +void __attribute__((constructor)) ob_log_init() +{ +} diff --git a/src/liboblog/src/ob_log_meta_manager.cpp b/src/liboblog/src/ob_log_meta_manager.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a7b15a4b0c66d8dd8cda13aae67b847b88bd4e30 --- /dev/null +++ b/src/liboblog/src/ob_log_meta_manager.cpp @@ -0,0 +1,1560 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG + +#include "ob_log_meta_manager.h" + +#include // LogMsgFactory + +#include "lib/atomic/ob_atomic.h" // ATOMIC_* +#include "observer/mysql/obsm_utils.h" // ObSMUtils +#include "rpc/obmysql/ob_mysql_global.h" // obmysql +#include "share/schema/ob_table_schema.h" // ObTableSchema, ObSimpleTableSchemaV2 +#include "share/schema/ob_column_schema.h" // ObColumnSchemaV2 + +#include "ob_log_schema_getter.h" // ObLogSchemaGuard, DBSchemaInfo, TenantSchemaInfo +#include "ob_log_utils.h" // print_mysql_type, ob_log_malloc +#include "ob_obj2str_helper.h" // ObObj2strHelper +#include "ob_log_adapt_string.h" // ObLogAdaptString +#include "ob_log_config.h" // TCONF + +#define META_STAT_INFO(fmt, args...) LOG_INFO("[META_STAT] " fmt, args) +#define META_STAT_DEBUG(fmt, args...) LOG_DEBUG("[META_STAT] " fmt, args) + +#define SET_ENCODING(meta, charset) \ + do {\ + meta->setEncoding(ObCharset::charset_name(charset)); \ + } while (0) + +using namespace oceanbase::common; +using namespace oceanbase::obmysql; +using namespace oceanbase::share::schema; +namespace oceanbase +{ +namespace liboblog +{ +ObLogMetaManager::ObLogMetaManager() : inited_(false), + enable_output_hidden_primary_key_(false), + obj2str_helper_(NULL), + ddl_table_meta_(NULL), + db_meta_map_(), + tb_meta_map_(), + tb_schema_info_map_(), + allocator_() +{ +} + +ObLogMetaManager::~ObLogMetaManager() +{ + destroy(); +} + +int ObLogMetaManager::init(ObObj2strHelper *obj2str_helper, + const bool enable_output_hidden_primary_key) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("meta manager has been initialized"); + ret = OB_INIT_TWICE; + } else if (OB_ISNULL(obj2str_helper)) { + LOG_ERROR("invalid argument", K(obj2str_helper)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(db_meta_map_.init(ObModIds::OB_LOG_DATABASE_META_MAP))) { + LOG_ERROR("init db_meta_map fail", KR(ret)); + } else if (OB_FAIL(tb_meta_map_.init(ObModIds::OB_LOG_TABLE_META_MAP))) { + LOG_ERROR("init tb_meta_map fail", KR(ret)); + } else if (OB_FAIL(tb_schema_info_map_.init(ObModIds::OB_LOG_TABLE_SCHEMA_META_MAP))) { + LOG_ERROR("init tb_schema_info_map_ fail", KR(ret)); + } else if (OB_FAIL(allocator_.init(ALLOCATOR_TOTAL_LIMIT, + ALLOCATOR_HOLD_LIMIT, + ALLOCATOR_PAGE_SIZE))) { + LOG_ERROR("init allocator fail", KR(ret)); + } else if (OB_FAIL(build_ddl_meta_())) { + LOG_ERROR("build ddl meta fail", KR(ret)); + } else { + enable_output_hidden_primary_key_ = enable_output_hidden_primary_key; + obj2str_helper_ = obj2str_helper; + inited_ = true; + } + + return ret; +} + +void ObLogMetaManager::destroy() +{ + destroy_ddl_meta_(); + + inited_ = false; + enable_output_hidden_primary_key_ = false; + obj2str_helper_ = NULL; + + // note: destroy tb_schema_info_map first, then destroy allocator + tb_schema_info_map_.destroy(); + allocator_.destroy(); + db_meta_map_.destroy(); + tb_meta_map_.destroy(); + + ddl_table_meta_ = NULL; +} + +// @retval OB_SUCCESS success +// @retval OB_TENANT_HAS_BEEN_DROPPED tenant has been dropped +// #retval other error code fail +int ObLogMetaManager::get_table_meta(const share::schema::ObSimpleTableSchemaV2 *simple_table_schema, + IObLogSchemaGetter &schema_getter, + ITableMeta *&table_meta, + volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + TableMetaInfo *meta_info = NULL; + + if (OB_ISNULL(simple_table_schema)) { + LOG_ERROR("invalid argument", K(simple_table_schema)); + ret = OB_INVALID_ARGUMENT; + } else { + MetaKey key; + key.id_ = simple_table_schema->get_table_id(); + + if (OB_FAIL((get_meta_info_(tb_meta_map_, key, meta_info)))) { + LOG_ERROR("get table meta info fail", KR(ret), K(key)); + } else { + int64_t version = simple_table_schema->get_schema_version(); + ret = get_meta_from_meta_info_(meta_info, version, table_meta); + + if (OB_SUCCESS != ret && OB_ENTRY_NOT_EXIST != ret) { + LOG_ERROR("get_meta_from_meta_info_ fail", KR(ret), K(version)); + } else if (OB_ENTRY_NOT_EXIST == ret) { + ret = OB_SUCCESS; + // refresh ObTableSchema when build meta for the first time + const int64_t table_id = simple_table_schema->get_table_id(); + const int64_t schema_version = simple_table_schema->get_schema_version(); + const share::schema::ObTableSchema *table_schema = NULL; + ObLogSchemaGuard schema_mgr; + + RETRY_FUNC(stop_flag, schema_getter, get_schema_guard_and_full_table_schema, table_id, schema_version, GET_SCHEMA_TIMEOUT, + schema_mgr, table_schema); + + if (OB_FAIL(ret)) { + // caller deal with error code OB_TENANT_HAS_BEEN_DROPPED + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("get_schema_guard_and_full_table_schema fail", KR(ret), + "schema_version", simple_table_schema->get_schema_version(), + "table_id", simple_table_schema->get_table_id(), + "table_name", simple_table_schema->get_table_name(), KPC(table_schema)); + } + } else if (OB_ISNULL(table_schema)) { + // tenant has been dropped + LOG_WARN("table_schema is null, tenant may be dropped", K(table_schema), + "schema_version", simple_table_schema->get_schema_version(), + "tenant_id", simple_table_schema->get_tenant_id(), + "table_id", simple_table_schema->get_table_id(), + "table_name", simple_table_schema->get_table_name(), KPC(simple_table_schema)); + ret = OB_TENANT_HAS_BEEN_DROPPED; + } else if (OB_FAIL(add_and_get_table_meta_(meta_info, table_schema, schema_mgr, table_meta, + stop_flag))) { + // caller deal with error code OB_TENANT_HAS_BEEN_DROPPED + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("add_and_get_table_meta_ fail", KR(ret), + "table_name", table_schema->get_table_name(), + "table_id", table_schema->get_table_id()); + } + } else { + // succ + } + } else { /* OB_SUCCESS == ret*/ } + } + } + + return ret; +} + +void ObLogMetaManager::revert_table_meta(ITableMeta *table_meta) +{ + int ret = OB_SUCCESS; + + if (NULL != table_meta && ddl_table_meta_ != table_meta) { + int64_t ref_cnt = 0; + + if (OB_FAIL(dec_meta_ref_(table_meta, ref_cnt))) { + LOG_ERROR("dec_meta_ref_ fail", KR(ret), K(table_meta)); + } else if (0 == ref_cnt) { + // destroy all colMeta by default + LogMsgFactory::destroy(table_meta); + table_meta = NULL; + } + } +} + +// @retval OB_SUCCESS success +// @retval OB_TENANT_HAS_BEEN_DROPPED tenant has been dropped +// #retval other error code fail +int ObLogMetaManager::get_db_meta( + const DBSchemaInfo &db_schema_info, + ObLogSchemaGuard &schema_mgr, + IDBMeta *&db_meta, + volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + DBMetaInfo *meta_info = NULL; + const int64_t db_schema_version = db_schema_info.version_; + uint64_t db_id = db_schema_info.db_id_; + + if (OB_UNLIKELY(! db_schema_info.is_valid())) { + LOG_ERROR("invalid argument", K(db_schema_info)); + ret = OB_INVALID_ARGUMENT; + } else { + MetaKey key; + key.id_ = db_id; + + if (OB_FAIL((get_meta_info_(db_meta_map_, key, meta_info)))) { + LOG_ERROR("get database meta info fail", KR(ret), K(key)); + } else { + ret = get_meta_from_meta_info_(meta_info, db_schema_version, db_meta); + + if (OB_SUCCESS != ret && OB_ENTRY_NOT_EXIST != ret) { + LOG_ERROR("get_meta_from_meta_info_ fail", KR(ret), K(db_schema_version)); + } else if (OB_ENTRY_NOT_EXIST == ret) { + ret = OB_SUCCESS; + + // get db name and tenant name when first build db meta + uint64_t tenant_id = extract_tenant_id(db_id); + TenantSchemaInfo tenant_schema_info; + + // get tenant name + RETRY_FUNC(stop_flag, schema_mgr, get_tenant_schema_info, tenant_id, tenant_schema_info, + GET_SCHEMA_TIMEOUT); + + if (OB_FAIL(ret)) { + // caller deal with error code OB_TENANT_HAS_BEEN_DROPPED + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("get_tenant_schema_info fail", KR(ret), K(tenant_id), K(db_id)); + } + } else if (OB_FAIL(add_and_get_db_meta_(meta_info, db_schema_info, tenant_schema_info, db_meta))) { + LOG_ERROR("add_and_get_db_meta_ fail", KR(ret), KP(meta_info), K(db_schema_info), + K(tenant_schema_info)); + } + } else { /* OB_SUCCESS == ret*/ } + } + } + + return ret; +} + +void ObLogMetaManager::revert_db_meta(IDBMeta *db_meta) +{ + int ret = OB_SUCCESS; + + if (NULL != db_meta) { + int64_t ref_cnt = 0; + + if (OB_FAIL(dec_meta_ref_(db_meta, ref_cnt))) { + LOG_ERROR("dec_meta_ref_ fail", KR(ret), K(db_meta)); + } else if (0 == ref_cnt) { + LogMsgFactory::destroy(db_meta); + db_meta = NULL; + } + } +} + +int ObLogMetaManager::drop_table(const int64_t table_id) +{ + UNUSED(table_id); + int ret = OB_SUCCESS; + // TODO + return ret; +} + +int ObLogMetaManager::drop_database(const int64_t database_id) +{ + UNUSED(database_id); + int ret = OB_SUCCESS; + // TODO + return ret; +} + +template +int ObLogMetaManager::get_meta_info_(MetaMapType &meta_map, + const MetaKey &key, + MetaInfoType *&meta_info) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! key.is_valid())) { + LOG_ERROR("invalid argument", K(key)); + ret = OB_INVALID_ARGUMENT; + } else { + ret = meta_map.get(key, meta_info); + + if (OB_SUCCESS == ret && OB_ISNULL(meta_info)) { + LOG_ERROR("get meta info from meta_map fail", KR(ret), K(meta_info)); + ret = OB_ERR_UNEXPECTED; + } + + // insert new MetaInfo + if (OB_ENTRY_NOT_EXIST == ret) { + MetaInfoType *tmp_meta_info = + static_cast(allocator_.alloc(sizeof(MetaInfoType))); + + if (OB_ISNULL(tmp_meta_info)) { + LOG_ERROR("allocate memory for MetaInfo fail", "size", sizeof(MetaInfoType), K(key)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + new (tmp_meta_info) MetaInfoType(); + + ret = meta_map.insert(key, tmp_meta_info); + + if (OB_SUCC(ret)) { + meta_info = tmp_meta_info; + } else { + tmp_meta_info->~MetaInfoType(); + allocator_.free(static_cast(tmp_meta_info)); + tmp_meta_info = NULL; + + if (OB_ENTRY_EXIST == ret) { + if (OB_FAIL(meta_map.get(key, meta_info))) { + LOG_ERROR("get meta info from map fail", KR(ret), K(key)); + } else if (OB_ISNULL(meta_info)) { + LOG_ERROR("get meta info from meta_map fail", KR(ret), K(meta_info)); + ret = OB_ERR_UNEXPECTED; + } + } else { + LOG_ERROR("insert meta info into map fail", KR(ret), K(key)); + } + } + } + } else if (OB_FAIL(ret)) { + LOG_ERROR("get meta info from map fail", KR(ret), K(key)); + } else { + // OB_SUCCESS == ret + } + } + + return ret; +} + +template +int ObLogMetaManager::get_meta_from_meta_info_(MetaInfoType *meta_info, + const int64_t version, + MetaType *&meta) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(meta_info)) { + LOG_ERROR("invalid argument", K(meta_info)); + ret = OB_INVALID_ARGUMENT; + } else { + meta = NULL; + + // add read lock + RLockGuard guard(meta_info->lock_); + + ret = meta_info->get(version, meta); + + if (OB_SUCC(ret)) { + // increase ref count of meta + if (OB_FAIL(inc_meta_ref_(meta))) { + LOG_ERROR("inc_meta_ref_ fail", KR(ret), K(meta)); + } + } + } + + return ret; +} + +// @retval OB_SUCCESS success +// @retval OB_TENANT_HAS_BEEN_DROPPED tenant has been dropped +// #retval other error code fail +int ObLogMetaManager::add_and_get_table_meta_(TableMetaInfo *meta_info, + const share::schema::ObTableSchema *table_schema, + ObLogSchemaGuard &schema_mgr, + ITableMeta *&table_meta, + volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(meta_info) || OB_ISNULL(table_schema)) { + LOG_ERROR("invalid argument", K(meta_info), K(table_schema)); + ret = OB_INVALID_ARGUMENT; + } else { + int64_t version = table_schema->get_schema_version(); + + table_meta = NULL; + + // add write lock + WLockGuard guard(meta_info->lock_); + + // First check if there is already a corresponding version of Meta, and if not, create a new one directly + ret = meta_info->get(version, table_meta); + + if (OB_ENTRY_NOT_EXIST == ret) { // not exist + ret = OB_SUCCESS; + + // Create a new Table Meta and insert the Meta into the Meta Info chain(linked list) + if (OB_FAIL(build_table_meta_(table_schema, schema_mgr, table_meta, stop_flag))) { + // caller deal with error code OB_TENANT_HAS_BEEN_DROPPED + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("build_table_meta_ fail", KR(ret), KP(table_schema)); + } + } else if (OB_FAIL(meta_info->set(version, table_meta))) { + LOG_ERROR("set meta info meta info fail", KR(ret), K(version), KP(table_meta)); + } + } else if (OB_FAIL(ret)) { + LOG_ERROR("get meta from meta info fail", KR(ret), K(version)); + } else { + // succ + } + + // increase ref count after get meta + if (OB_SUCC(ret)) { + if (OB_FAIL(inc_meta_ref_(table_meta))) { + LOG_ERROR("inc_meta_ref_ fail", KR(ret), K(table_meta)); + } + } + } + + return ret; +} + +int ObLogMetaManager::add_and_get_db_meta_(DBMetaInfo *meta_info, + const DBSchemaInfo &db_schema_info, + const TenantSchemaInfo &tenant_schema_info, + IDBMeta *&db_meta) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(meta_info)) { + LOG_ERROR("invalid argument", K(meta_info)); + ret = OB_INVALID_ARGUMENT; + } else { + db_meta = NULL; + int64_t db_schema_version = db_schema_info.version_; + + // add write lock + WLockGuard guard(meta_info->lock_); + + // First check if there is already a corresponding version of Meta, and if not, create a new one directly + ret = meta_info->get(db_schema_version, db_meta); + + if (OB_ENTRY_NOT_EXIST == ret) { // not exist + ret = OB_SUCCESS; + + // Create a new DB Meta and insert the Meta into the Meta Info chain + if (OB_FAIL(build_db_meta_(db_schema_info, tenant_schema_info, db_meta))) { + LOG_ERROR("build_db_meta_ fail", KR(ret), K(db_schema_info), K(tenant_schema_info)); + } else if (OB_FAIL(meta_info->set(db_schema_version, db_meta))) { + LOG_ERROR("set meta info meta info fail", KR(ret), K(db_schema_version), KP(db_meta)); + } + } else if (OB_FAIL(ret)) { + LOG_ERROR("get meta from meta info fail", KR(ret), K(db_schema_version)); + } else { + // succ + } + + // increase ref count after get Meta + if (OB_SUCC(ret)) { + if (OB_FAIL(inc_meta_ref_(db_meta))) { + LOG_ERROR("inc_meta_ref_ fail", KR(ret), K(db_meta)); + } + } + } + + return ret; +} + +template +int ObLogMetaManager::inc_meta_ref_(MetaType *meta) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(meta)) { + LOG_ERROR("invalid argument", K(meta)); + ret = OB_INVALID_ARGUMENT; + } else { + (void)ATOMIC_AAF(reinterpret_cast(meta->getUserDataPtr()), 1); + } + + return ret; +} + +template +int ObLogMetaManager::dec_meta_ref_(MetaType *meta, int64_t &ref_cnt) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(meta)) { + LOG_ERROR("invalid argument", K(meta)); + ret = OB_INVALID_ARGUMENT; + } else { + ref_cnt = ATOMIC_AAF(reinterpret_cast(meta->getUserDataPtr()), -1); + } + + return ret; +} + +// @retval OB_SUCCESS success +// @retval OB_TENANT_HAS_BEEN_DROPPED tenant has been dropped +// #retval other error code fail +int ObLogMetaManager::build_table_meta_(const share::schema::ObTableSchema *table_schema, + ObLogSchemaGuard &schema_mgr, + ITableMeta *&table_meta, + volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(table_schema)) { + LOG_ERROR("invalid argument", K(table_schema)); + ret = OB_INVALID_ARGUMENT; + } else { + ITableMeta *tmp_table_meta = LogMsgFactory::createTableMeta(); + TableSchemaInfo *tb_schema_info = NULL; + + if (OB_FAIL(alloc_table_schema_info_(tb_schema_info))) { + LOG_ERROR("alloc_table_schema_info_ fail", KR(ret), KPC(tb_schema_info)); + } else if (OB_ISNULL(tb_schema_info)) { + LOG_ERROR("tb_schema_info is null"); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(tb_schema_info->init(table_schema))) { + LOG_ERROR("tb_schema_info init fail", KR(ret), K(table_schema), + "table_id", table_schema->get_table_id(), + "table_name", table_schema->get_table_name()); + } else if (OB_ISNULL(tmp_table_meta)) { + LOG_ERROR("createTableMeta fail, return NULL"); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(build_column_metas_(tmp_table_meta, table_schema, *tb_schema_info, + schema_mgr, stop_flag))) { + // caller deal with error code OB_TENANT_HAS_BEEN_DROPPED + if (OB_IN_FATAL_STATE != ret) { + LOG_ERROR("build_column_metas_ fail", KR(ret), KP(tmp_table_meta), KPC(tb_schema_info)); + } + } else { + tmp_table_meta->setName(table_schema->get_table_name()); + tmp_table_meta->setDBMeta(NULL); // NOTE: default to NULL + // The encoding of DB is set to empty, because there is currently an ambiguity, it can be either database or tenant + // to avoid ambiguity, it is set to empty here + SET_ENCODING(tmp_table_meta, table_schema->get_charset_type()); + tmp_table_meta->setUserData(reinterpret_cast(1)); + } + + if (OB_SUCC(ret)) { + table_meta = tmp_table_meta; + } else { + int tmp_ret = OB_SUCCESS; + + if (NULL != tmp_table_meta) { + LogMsgFactory::destroy(tmp_table_meta); + } + + if (NULL != tb_schema_info) { + if (OB_SUCCESS != (tmp_ret = free_table_schema_info_(tb_schema_info))) { + LOG_ERROR("free_table_schema_info_ fail", K(tmp_ret), K(tb_schema_info)); + } + } + } + } + + return ret; +} + +// @retval OB_SUCCESS success +// @retval OB_TENANT_HAS_BEEN_DROPPED tenant has been dropped +// #retval other error code fail +int ObLogMetaManager::build_column_metas_(ITableMeta *table_meta, + const share::schema::ObTableSchema *table_schema, + TableSchemaInfo &tb_schema_info, + ObLogSchemaGuard &schema_mgr, + volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(table_meta) || OB_ISNULL(table_schema)) { + LOG_ERROR("invalid argument", K(table_meta), K(table_schema)); + ret = OB_INVALID_ARGUMENT; + } else { + int64_t version = table_schema->get_schema_version(); + uint64_t table_id = table_schema->get_table_id(); + const bool is_hidden_pk_table = table_schema->is_no_pk_table(); + // index of column, numbering staarts from 0 + // note: hidden column won't task into numbering + int64_t column_index = 0; + ObColumnIterByPrevNextID pre_next_id_iter(*table_schema); + + // build Meata for each column + // ignore hidden column + while (OB_SUCCESS == ret && ! stop_flag) { + const share::schema::ObColumnSchemaV2 *column_table_schema = NULL; + IColMeta *col_meta = NULL; + int append_ret = 2; + bool is_column_filter = false; + bool is_hidden_pk_table_pk_increment_column = false; + + if (OB_FAIL(pre_next_id_iter.next(column_table_schema))) { + if (OB_ITER_END != ret) { + LOG_ERROR("pre_next_id_iter next fail", KR(ret), KPC(column_table_schema)); + } + } else if (OB_ISNULL(column_table_schema)) { + LOG_ERROR("column_table_schema is null", KPC(column_table_schema)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(filter_column_(*table_schema, is_hidden_pk_table, *column_table_schema, is_column_filter, + is_hidden_pk_table_pk_increment_column))) { + LOG_ERROR("filter_column_ fail", KR(ret), K(is_column_filter), + K(is_hidden_pk_table_pk_increment_column), + "table_name", table_schema->get_table_name(), + "table_id", table_schema->get_table_id(), + "column", column_table_schema->get_column_name(), + "column_id", column_table_schema->get_column_id()); + } else if (is_column_filter) { + // do nothing + } else if (NULL != (col_meta = table_meta->getCol(column_table_schema->get_column_name()))) { + // LOG WARN and won't treate it as ERROR + LOG_WARN("col_meta is added into table_meta multiple times", + "table", table_schema->get_table_name(), + "column", column_table_schema->get_column_name()); + } else if (OB_ISNULL(col_meta = LogMsgFactory::createColMeta())) { + LOG_ERROR("createColMeta fails", "col_name", column_table_schema->get_column_name()); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else if (OB_FAIL(set_column_meta_(col_meta, *column_table_schema, *table_schema))) { + LOG_ERROR("set_column_meta_ fail", KR(ret), KP(col_meta)); + } else if (0 != + (append_ret = table_meta->append(column_table_schema->get_column_name(), col_meta))) { + LOG_ERROR("append col_meta to table_meta fail", K(append_ret), + "table_name", table_schema->get_table_name(), + "column_name", column_table_schema->get_column_name()); + ret = OB_ERR_UNEXPECTED; + } else { + // success + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(set_column_schema_info_(*table_schema, tb_schema_info, + column_index, *column_table_schema))) { + LOG_ERROR("set_column_schema_info_ fail", KR(ret), KPC(table_schema), K(tb_schema_info), + K(column_index), KPC(column_table_schema)); + } + } + + if (OB_SUCC(ret)) { + if (! is_column_filter) { + ++column_index; + } + } + } // while + + if (stop_flag) { + ret = OB_IN_STOP_STATE; + } + + // iterator finish for all columns + if (OB_ITER_END == ret) { + ret = OB_SUCCESS; + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(set_table_schema_(version, table_id, table_schema->get_table_name(), column_index, + tb_schema_info))) { + LOG_ERROR("set_table_schema_ fail", KR(ret), K(version), K(table_id), + "table_name", table_schema->get_table_name(), + "non_hidden_column_cnt", column_index, K(tb_schema_info)); + } else { + // succ + } + } + + if (OB_SUCC(ret)) { + // set primary key column and index column + if (OB_FAIL(set_primary_keys_(table_meta, table_schema, tb_schema_info))) { + LOG_ERROR("set_primary_keys_ fail", KR(ret), "table_name", table_schema->get_table_name(), + "table_id", table_schema->get_table_id(), K(tb_schema_info)); + } else if (OB_FAIL(set_unique_keys_(table_meta, table_schema, tb_schema_info, schema_mgr, stop_flag))) { + // caller deal with error code OB_TENANT_HAS_BEEN_DROPPED + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("set_unique_keys_ fail", KR(ret), "table_name", table_schema->get_table_name(), + "table_id", table_schema->get_table_id(), K(tb_schema_info)); + } + } + } + } + + return ret; +} + +int ObLogMetaManager::filter_column_(const share::schema::ObTableSchema &table_schema, + const bool is_hidden_pk_table, + const share::schema::ObColumnSchemaV2 &column_table_schema, + bool &is_filter, + bool &is_hidden_pk_table_pk_increment_column) +{ + int ret = OB_SUCCESS; + // won't filter by default + is_filter = false; + is_hidden_pk_table_pk_increment_column = false; + bool is_non_user_column = false; + const uint64_t column_id = column_table_schema.get_column_id(); + const char *column_name = column_table_schema.get_column_name(); + const bool enable_output_invisible_column = TCONF.enable_output_invisible_column; + + if (OB_FAIL(filter_non_user_column(is_hidden_pk_table, enable_output_hidden_primary_key_, column_id, + is_non_user_column, is_hidden_pk_table_pk_increment_column))) { + LOG_ERROR("filter_non_user_column fail", KR(ret), K(is_hidden_pk_table), + K(enable_output_hidden_primary_key_), + K(column_id), K(column_name), + K(is_non_user_column), + K(is_hidden_pk_table_pk_increment_column)); + } else if (is_non_user_column) { + is_filter = true; + META_STAT_INFO("ignore non user column", K(is_non_user_column), + "table_name", table_schema.get_table_name(), + "table_id", table_schema.get_table_id(), + K(column_id), K(column_name)); + } else if (is_hidden_pk_table_pk_increment_column) { + is_filter = false; + LOG_INFO("handle hidden pk table __pk_increment column", K(is_filter), K(is_hidden_pk_table), + "table_name", table_schema.get_table_name(), + "table_id", table_schema.get_table_id(), + K(column_id), K(column_name)); + } else if (column_table_schema.is_hidden()) { + is_filter = true; + META_STAT_INFO("ignore hidden column", + "table_name", table_schema.get_table_name(), + "table_id", table_schema.get_table_id(), + K(column_id), K(column_name)); + } else if (column_table_schema.is_invisible_column() && ! enable_output_invisible_column) { + is_filter = true; + META_STAT_INFO("ignore invisible column", + "table_name", table_schema.get_table_name(), + "table_id", table_schema.get_table_id(), + K(column_id), K(column_name)); + } else { + // do nothing + } + + return ret; +} + +int ObLogMetaManager::set_column_meta_(IColMeta *col_meta, + const share::schema::ObColumnSchemaV2 &column_schema, + const share::schema::ObTableSchema &table_schema) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(col_meta)) { + LOG_ERROR("invalid argument", K(col_meta)); + ret = OB_INVALID_ARGUMENT; + } else { + uint16_t type_flag = 0; + ObScale decimals = 0; // FIXME: does liboblog need this? + EMySQLFieldType mysql_type = MYSQL_TYPE_NOT_DEFINED; + + if (OB_FAIL(ObSMUtils::get_mysql_type(column_schema.get_data_type(), + mysql_type, type_flag, decimals))) { + LOG_ERROR("get_mysql_type fail", KR(ret), "ob_type", column_schema.get_data_type()); + } else { + //mysql treat it as MYSQL_TYPE_STRING, it is not suitable for liboblog + if (ObEnumType == column_schema.get_data_type()) { + mysql_type = obmysql::MYSQL_TYPE_ENUM; + } else if (ObSetType == column_schema.get_data_type()) { + mysql_type = obmysql::MYSQL_TYPE_SET; + } + bool signed_flag = ((type_flag & UNSIGNED_FLAG) == 0); + + col_meta->setName(column_schema.get_column_name()); + col_meta->setType(static_cast(mysql_type)); + col_meta->setSigned(signed_flag); + col_meta->setIsPK(column_schema.is_original_rowkey_column()); + col_meta->setNotNull(! column_schema.is_nullable()); + SET_ENCODING(col_meta, column_schema.get_charset_type()); + + if (column_schema.is_heap_alter_rowkey_column()) { + col_meta->setHiddenRowKey(); + } + // mark if is generate column + // default value of IColMeta::isGenerated is false + // call setGenerated if is col is generated + if (column_schema.is_generated_column()) { + col_meta->setGenerated(true); + } + + META_STAT_DEBUG("build_col_meta: ", + "table_name", table_schema.get_table_name(), + "table_id", table_schema.get_table_id(), + "column", column_schema.get_column_name(), + "type", column_schema.get_data_type(), + "mysql_type", get_emysql_field_type_str(mysql_type), + "signed", col_meta->isSigned(), + "is_pk", col_meta->isPK(), + "not_null", col_meta->isNotNull(), + "encoding", col_meta->getEncoding(), + "default", col_meta->getDefault(), + "isHiddenRowKey", col_meta->isHiddenRowKey(), + "isGeneratedColumn", col_meta->isGenerated()); + + // Do not need + //col_meta->setLength(data_length); + //col_meta->setDecimals(int decimals); + //col_meta->setRequired(int required); + //col_meta->setValuesOfEnumSet(std::vector &v); + //col_meta->setValuesOfEnumSet(std::vector &v); + //col_meta->setValuesOfEnumSet(const char** v, size_t size); + } + } + + return ret; +} + +int ObLogMetaManager::set_primary_keys_(ITableMeta *table_meta, + const share::schema::ObTableSchema *schema, + const TableSchemaInfo &tb_schema_info) +{ + int ret = OB_SUCCESS; + int64_t valid_pk_num = 0; + const ObRowkeyInfo &rowkey_info = schema->get_rowkey_info(); + ObLogAdaptString pks(ObModIds::OB_LOG_TEMP_MEMORY); + ObLogAdaptString pk_info(ObModIds::OB_LOG_TEMP_MEMORY); + + if (OB_ISNULL(table_meta) || OB_ISNULL(schema)) { + LOG_ERROR("invalid argument", K(table_meta), K(schema)); + ret = OB_INVALID_ARGUMENT; + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < rowkey_info.get_size(); i++) { + uint64_t column_id = OB_INVALID_ID; + int64_t column_index = -1; + const share::schema::ObColumnSchemaV2 *column_schema = NULL; + ColumnSchemaInfo *column_schema_info = NULL; + ColumnPropertyFlag column_property_flag; + + if (OB_FAIL(rowkey_info.get_column_id(i, column_id))) { + LOG_ERROR("get_column_id from rowkey info fail", K(rowkey_info), KR(ret)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_ISNULL(column_schema = schema->get_column_schema(column_id))) { + LOG_ERROR("get column schema fail", "table", schema->get_table_name(), K(column_id)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(tb_schema_info.get_column_schema_info(column_id, enable_output_hidden_primary_key_, + column_schema_info, column_property_flag))) { + LOG_ERROR("get_column_schema_info", KR(ret), "table_id", schema->get_table_id(), + "table_name", schema->get_table_name(), + K(column_id), K(enable_output_hidden_primary_key_), K(column_schema_info), K(column_property_flag)); + // Only ColumnPropertyFlag non-user columns are judged here, column deletion and hidden columns depend on column_schema + } else if (column_property_flag.is_non_user()) { + // filter non user column + META_STAT_INFO("ignore non user rowkey column", K(column_property_flag), + "table_name", schema->get_table_name(), + "table_id", schema->get_table_id(), + K(column_id), "rowkey_index", i, + "rowkey_count", rowkey_info.get_size()); + } else if (column_property_flag.is_hidden()) { // NOTE: ignore hidden column + META_STAT_INFO("ignore hidden rowkey column", "table_name", schema->get_table_name(), + "table_id", schema->get_table_id(), + "column", column_schema->get_column_name(), K(column_id), "rowkey_index", i, + "rowkey_count", rowkey_info.get_size()); + } else if (column_property_flag.is_invisible()) { + META_STAT_INFO("ignore invisible rowkey column", "table_name", schema->get_table_name(), + "table_id", schema->get_table_id(), + "column", column_schema->get_column_name(), K(column_id), "rowkey_index", i, + "rowkey_count", rowkey_info.get_size()); + } else if (OB_ISNULL(column_schema_info)) { + LOG_ERROR("column_schema_info is null", K(column_schema_info)); + ret = OB_ERR_UNEXPECTED; + } else if (!column_schema_info->is_rowkey()) { // not rowkey + if (schema->is_new_no_pk_table()) { + META_STAT_INFO("ignore not rowkey column", "table_name", schema->get_table_name(), + "table_id", schema->get_table_id(), + "column", column_schema->get_column_name(), K(column_id), "rowkey_index", i, + "rowkey_count", rowkey_info.get_size()); + } else { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("not a heap table and have no-rowkey in TableSchema::rowley_info_", K(ret), + K(column_schema_info)); + } + } else { + column_index = column_schema_info->get_column_idx(); + const bool is_hidden_pk_table_pk_increment_column = column_schema_info->is_hidden_pk_table_pk_increment_column(); + + if (OB_UNLIKELY(column_index < 0 || column_index >= OB_MAX_COLUMN_NUMBER)) { + LOG_ERROR("column_index is invalid", K(column_index), + "table_id", schema->get_table_id(), + "table_name", schema->get_table_name(), + "column_id", column_schema->get_column_id(), + "column_name", column_schema->get_column_name()); + ret = OB_ERR_UNEXPECTED; + } else { + ret = pks.append(column_schema->get_column_name()); + + if (OB_SUCCESS == ret) { + if (i < (rowkey_info.get_size() - 1)) { + if (is_hidden_pk_table_pk_increment_column) { + // do nothing + } else { + ret = pks.append(","); + } + } + } + + if (OB_SUCCESS == ret) { + if (0 == valid_pk_num) { + ret = pk_info.append("("); + } else { + ret = pk_info.append(","); + } + } + + if (OB_SUCCESS == ret) { + ret = pk_info.append_int64(column_index); + } + + if (OB_SUCCESS == ret) { + valid_pk_num++; + } else { + LOG_ERROR("pks or pk_info append fail", KR(ret), K(pks), K(pk_info), K(column_index)); + } + } + } + } // for + + if (OB_SUCC(ret)) { + table_meta->setHasPK((valid_pk_num > 0)); + + // 只有在存在pk的情况下,才设置主键信息 + if (valid_pk_num > 0) { + if (OB_FAIL(pk_info.append(")"))) { + LOG_ERROR("pk_info append fail", KR(ret), K(pk_info)); + } else { + const char *pk_info_str = ""; + const char *pks_str = ""; + + if (OB_FAIL(pk_info.cstr(pk_info_str))) { + LOG_ERROR("get pk_info str fail", KR(ret), K(pk_info)); + } else if (OB_FAIL(pks.cstr(pks_str))) { + LOG_ERROR("get pks str fail", KR(ret), K(pks)); + } + // 要求cstr是有效的 + else if (OB_ISNULL(pk_info_str) || OB_ISNULL(pks_str)) { + LOG_ERROR("pk_info_str or pks_str is invalid", K(pk_info_str), K(pks_str), K(pk_info), + K(pks), K(valid_pk_num)); + ret = OB_ERR_UNEXPECTED; + } else { + table_meta->setPkinfo(pk_info_str); + table_meta->setPKs(pks_str); + } + } + } + + META_STAT_INFO("set_primary_keys", KR(ret), "table_name", schema->get_table_name(), + "table_id", schema->get_table_id(), + "has_pk", table_meta->hasPK(), "pk_info", table_meta->getPkinfo(), + "pks", table_meta->getPKs()); + } + } + return ret; +} + +int ObLogMetaManager::set_unique_keys_from_unique_index_table_(const share::schema::ObTableSchema *table_schema, + const TableSchemaInfo &tb_schema_info, + const share::schema::ObTableSchema *index_table_schema, + bool *is_uk_column_array, + ObLogAdaptString &uk_info, + int64_t &valid_uk_column_count) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(table_schema) + || OB_ISNULL(is_uk_column_array) + || OB_ISNULL(index_table_schema)) { + LOG_ERROR("invalid argument", K(table_schema), K(is_uk_column_array), K(index_table_schema)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_UNLIKELY(! index_table_schema->is_unique_index())) { + LOG_ERROR("invalid index table schema which is not unique index", + K(index_table_schema->is_unique_index())); + ret = OB_INVALID_ARGUMENT; + } else { + const ObIndexInfo &index_info = index_table_schema->get_index_info(); + int64_t index_key_count = index_info.get_size(); + valid_uk_column_count = 0; + + for (int64_t index_info_id = 0; + OB_SUCC(ret) && index_info_id < index_key_count; + index_info_id++) { + const share::schema::ObColumnSchemaV2 *column_schema = NULL; + uint64_t index_column_id = OB_INVALID_ID; + if (OB_FAIL(index_info.get_column_id(index_info_id, index_column_id))) { + LOG_ERROR("get_column_id from index_info fail", KR(ret), K(index_info_id), K(index_info), + "index_table_name", index_table_schema->get_table_name(), + "index_table_id", index_table_schema->get_table_id()); + } else if (OB_ISNULL(column_schema = table_schema->get_column_schema(index_column_id))) { + if (index_column_id > OB_MIN_SHADOW_COLUMN_ID) { + LOG_DEBUG("ignore shadow column", K(index_column_id), + "table_name", table_schema->get_table_name(), + "table_id", table_schema->get_table_id(), + "index_table_name", index_table_schema->get_table_name()); + } else if (ObColumnSchemaV2::is_hidden_pk_column_id(index_column_id)) { + LOG_DEBUG("ignore hidden column", K(index_column_id), + "table_name", table_schema->get_table_name(), + "table_id", table_schema->get_table_id(), + "index_table_name", index_table_schema->get_table_name()); + } else { + LOG_ERROR("get index column schema fail", K(index_column_id), + "table_name", table_schema->get_table_name(), + "table_id", table_schema->get_table_id(), + "index_table_name", index_table_schema->get_table_name()); + ret = OB_ERR_UNEXPECTED; + } + } else if (column_schema->is_hidden()) { + LOG_WARN("ignore hidden index column", "table_name", table_schema->get_table_name(), + "table_id", table_schema->get_table_id(), + "column_name", column_schema->get_column_name(), K(index_info)); + } else if (column_schema->is_shadow_column()) { + LOG_WARN("ignore shadow column", "table_name", table_schema->get_table_name(), + "table_id", table_schema->get_table_id(), + "column_name", column_schema->get_column_name(), K(index_info)); + } else { + int64_t user_column_index = -1; // Column index as seen from the user's perspective + ColumnSchemaInfo *column_schema_info = NULL; + ColumnPropertyFlag column_property_flag; + + if (OB_FAIL(tb_schema_info.get_column_schema_info(index_column_id, enable_output_hidden_primary_key_, + column_schema_info, column_property_flag))) { + LOG_ERROR("get_column_schema_info", KR(ret), "table_id", table_schema->get_table_id(), + "table_name", table_schema->get_table_name(), + K(index_column_id), K(enable_output_hidden_primary_key_), + K(column_schema_info), K(column_property_flag)); + // Only ColumnPropertyFlag non-user columns are judged here, column deletion and hidden columns depend on column_schema + } else if (column_property_flag.is_non_user()) { + // Filtering non-user columns + META_STAT_INFO("ignore non user column", K(column_property_flag), + "table_id", table_schema->get_table_id(), + "table_name", table_schema->get_table_name(), + K(index_column_id), K(index_info_id), K(index_key_count)); + } else if (column_property_flag.is_invisible()) { + // Filtering invisible columns + META_STAT_INFO("ignore invisible column", K(column_property_flag), + "table_id", table_schema->get_table_id(), + "table_name", table_schema->get_table_name(), + K(index_column_id), K(index_info_id), K(index_key_count)); + } else if (OB_ISNULL(column_schema_info)) { + LOG_ERROR("column_schema_info is null", K(column_schema_info)); + ret = OB_ERR_UNEXPECTED; + } else { + user_column_index = column_schema_info->get_column_idx(); + + if (OB_UNLIKELY(user_column_index < 0 || user_column_index >= OB_MAX_COLUMN_NUMBER)) { + LOG_ERROR("user_column_index is invalid", K(user_column_index), + "table_id", table_schema->get_table_id(), + "table_name", table_schema->get_table_name(), + K(index_column_id)); + ret = OB_ERR_UNEXPECTED; + } else { + LOG_DEBUG("set_unique_keys_from_unique_index_table", + "table_id", table_schema->get_table_id(), + "table_name", table_schema->get_table_name(), + "index_table_id", index_table_schema->get_table_id(), + "index_table_name", index_table_schema->get_table_name(), + "schema_version", table_schema->get_schema_version(), + K(index_column_id), + K(user_column_index)); + + if (0 == valid_uk_column_count) { + ret = uk_info.append("("); + } else { + ret = uk_info.append(","); + } + + if (OB_SUCC(ret)) { + ret = uk_info.append_int64(user_column_index); + } + + if (OB_FAIL(ret)) { + LOG_ERROR("uk_info append string fail", KR(ret), K(uk_info)); + } else { + is_uk_column_array[user_column_index] = true; + valid_uk_column_count++; + } + } + } + } + } // for + + if (OB_SUCC(ret)) { + if (valid_uk_column_count > 0) { + if (OB_FAIL(uk_info.append(")"))) { + LOG_ERROR("uk_info append string fail", KR(ret), K(uk_info), K(valid_uk_column_count)); + } + } + } + } + + return ret; +} + +// @retval OB_SUCCESS success +// @retval OB_TENANT_HAS_BEEN_DROPPED tenant has been dropped +// #retval other error code fail +int ObLogMetaManager::set_unique_keys_(ITableMeta *table_meta, + const share::schema::ObTableSchema *table_schema, + const TableSchemaInfo &tb_schema_info, + ObLogSchemaGuard &schema_mgr, + volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(table_meta) || OB_ISNULL(table_schema)) { + LOG_ERROR("invalid argument", K(table_meta), K(table_schema)); + ret = OB_INVALID_ARGUMENT; + } else { + ObLogAdaptString uks(ObModIds::OB_LOG_TEMP_MEMORY); + ObLogAdaptString uk_info(ObModIds::OB_LOG_TEMP_MEMORY); + + // Identifies which column is the unique index column + bool *is_uk_column_array = NULL; + // Number of valid unique index tables + int64_t valid_uk_table_count = 0; + int64_t index_table_count = table_schema->get_index_tid_count(); + int64_t version = table_schema->get_schema_version(); + uint64_t table_id = table_schema->get_table_id(); + int64_t column_count = tb_schema_info.get_non_hidden_column_count(); + + if (column_count < 0) { + LOG_ERROR("column_num is invalid", "table_name", table_schema->get_table_name(), + "table_id", table_schema->get_table_id(), K(column_count)); + ret = OB_ERR_UNEXPECTED; + } else { + if (index_table_count > 0) { + int64_t is_uk_column_array_size = column_count * sizeof(bool); + is_uk_column_array = static_cast(ob_log_malloc(is_uk_column_array_size)); + + if (OB_ISNULL(is_uk_column_array)) { + LOG_ERROR("allocate memory for is_uk column array fail", K(column_count), + K(is_uk_column_array_size)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + (void)memset(is_uk_column_array, 0, column_count * sizeof(bool)); + + // Set unique index information from all index tables + if (OB_FAIL(set_unique_keys_from_all_index_table_(valid_uk_table_count, *table_schema, tb_schema_info, + schema_mgr, stop_flag, is_uk_column_array, uk_info))) { + // caller deal with error code OB_TENANT_HAS_BEEN_DROPPED + LOG_ERROR("set_unique_keys_from_all_index_table_ fail", KR(ret), K(valid_uk_table_count), + K(is_uk_column_array)); + } + // Set the UKs() field value if it contains a valid unique index + else if (valid_uk_table_count > 0) { + bool is_first_uk_column = true; + for (int64_t index = 0; OB_SUCC(ret) && index < column_count; index++) { + if (is_uk_column_array[index]) { + const ObColumnSchemaV2 *column_schema = NULL; + uint64_t column_id = OB_INVALID_ID; + + if (OB_FAIL(tb_schema_info.get_column_id(index, column_id))) { + LOG_ERROR("tb_schema_info get_column_id fail", KR(ret), K(version), K(table_id), + "column_idx", index, K(column_id)); + } else if (OB_UNLIKELY(OB_INVALID_ID == column_id)) { + LOG_ERROR("column_id is not valid", K(column_id)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_ISNULL(column_schema = table_schema->get_column_schema(column_id))) { + LOG_ERROR("get column schema fail", K(column_id), K(index), K(column_count), + "table_id", table_schema->get_table_id(), + "table_name", table_schema->get_table_name(), + "table_schame_version", table_schema->get_schema_version()); + ret = OB_ERR_UNEXPECTED; + } else { + if (is_first_uk_column) { + is_first_uk_column = false; + } else { + // If not the first uk column, append comma + ret = uks.append(","); + } + + // then append column name + if (OB_SUCC(ret)) { + ret = uks.append(column_schema->get_column_name()); + } + + if (OB_FAIL(ret)) { + LOG_ERROR("uks append fail", KR(ret), K(uks), K(column_schema->get_column_name()), + K(table_schema->get_table_name())); + } + } + } + } // for + } + } + } // if (index_table_count > 0) + } + + if (OB_SUCC(ret)) { + table_meta->setHasUK((valid_uk_table_count > 0)); + + if (valid_uk_table_count > 0) { + const char *uks_str = ""; + const char *uk_info_str = ""; + + if (OB_FAIL(uks.cstr(uks_str))) { + LOG_ERROR("get uks string fail", KR(ret), K(uks)); + } else if (OB_FAIL(uk_info.cstr(uk_info_str))) { + LOG_ERROR("get uks string fail", KR(ret), K(uk_info)); + } else if (OB_ISNULL(uks_str) || OB_ISNULL(uk_info_str)) { + LOG_ERROR("invalid uks_str or uk_info_str", K(uks_str), K(uk_info_str), K(uks), K(uk_info)); + ret = OB_ERR_UNEXPECTED; + } else { + table_meta->setUkinfo(uk_info_str); + table_meta->setUKs(uks_str); + } + } + } + + META_STAT_INFO("set_unique_keys", KR(ret), "table_name", table_schema->get_table_name(), + "table_id", table_schema->get_table_id(), K(valid_uk_table_count), + "has_uk", table_meta->hasUK(), "uk_info", table_meta->getUkinfo(), + "uks", table_meta->getUKs()); + + if (NULL != is_uk_column_array) { + ob_log_free(is_uk_column_array); + is_uk_column_array = NULL; + } + } + return ret; +} + +// @retval OB_SUCCESS success +// @retval OB_TENANT_HAS_BEEN_DROPPED tenant has been dropped +// #retval other error code fail +int ObLogMetaManager::set_unique_keys_from_all_index_table_(int64_t &valid_uk_table_count, + const share::schema::ObTableSchema &table_schema, + const TableSchemaInfo &tb_schema_info, + ObLogSchemaGuard &schema_mgr, + volatile bool &stop_flag, + bool *is_uk_column_array, + ObLogAdaptString &uk_info) +{ + int ret = OB_SUCCESS; + int64_t index_table_count = table_schema.get_index_tid_count(); + ObSEArray simple_index_infos; + + if (OB_ISNULL(is_uk_column_array)) { + LOG_ERROR("invalid argument", K(is_uk_column_array)); + ret = OB_INVALID_ARGUMENT; + } else if (index_table_count <= 0) { + // no index table + } else { + // get array of index table id + if (OB_FAIL(table_schema.get_simple_index_infos(simple_index_infos))) { + LOG_ERROR("get_index_tid_array fail", KR(ret), "table_name", table_schema.get_table_name(), + "table_id", table_schema.get_table_id()); + } else { + // Iterate through all index tables to find the unique index table + for (int64_t index = 0; OB_SUCC(ret) && index < index_table_count; index++) { + const share::schema::ObTableSchema *index_table_schema = NULL; + + // retry to fetch schma until success of quit + // caller deal with error code OB_TENANT_HAS_BEEN_DROPPED + RETRY_FUNC(stop_flag, schema_mgr, get_table_schema, simple_index_infos.at(index).table_id_, + index_table_schema, GET_SCHEMA_TIMEOUT); + + if (OB_FAIL(ret)) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("get index table schema fail", KR(ret), K(simple_index_infos.at(index).table_id_)); + } + } else if (OB_ISNULL(index_table_schema)) { + LOG_ERROR("get index table schema fail", "table_id", table_schema.get_table_id(), + "table_name", table_schema.get_table_name(), + "index_table_id", simple_index_infos.at(index).table_id_, K(index_table_count), K(index)); + ret = OB_ERR_UNEXPECTED; + } + // Handling uniquely indexed tables + else if (index_table_schema->is_unique_index()) { + ObLogAdaptString tmp_uk_info(ObModIds::OB_LOG_TEMP_MEMORY); + int64_t valid_uk_column_count = 0; + + // Get unique key information from a unique index table + if (OB_FAIL(set_unique_keys_from_unique_index_table_(&table_schema, + tb_schema_info, + index_table_schema, + is_uk_column_array, + tmp_uk_info, + valid_uk_column_count))) { + LOG_ERROR("set_unique_keys_from_unique_index_table_ fail", KR(ret), + "table_name", table_schema.get_table_name(), + "table_id", table_schema.get_table_id(), + "index_table_name", index_table_schema->get_table_name(), + K(is_uk_column_array)); + } + // Process only when valid unique index column information is obtained + else if (valid_uk_column_count > 0) { + const char *tmp_uk_info_str = NULL; + if (OB_FAIL(tmp_uk_info.cstr(tmp_uk_info_str))) { + LOG_ERROR("get tmp_uk_info str fail", KR(ret), K(tmp_uk_info)); + } else if (OB_ISNULL(tmp_uk_info_str)) { + LOG_ERROR("tmp_uk_info_str is invalid", K(tmp_uk_info_str), K(tmp_uk_info), + K(valid_uk_column_count), K(index_table_schema->get_table_name())); + ret = OB_ERR_UNEXPECTED; + } else { + if (valid_uk_table_count > 0) { + ret = uk_info.append(","); + } + + if (OB_SUCC(ret)) { + ret = uk_info.append(tmp_uk_info_str); + } + + if (OB_FAIL(ret)) { + LOG_ERROR("uk_info append string fail", KR(ret), K(uk_info)); + } else { + valid_uk_table_count++; + } + } + } + } + } + } + } + return ret; +} + +int ObLogMetaManager::build_db_meta_( + const DBSchemaInfo &db_schema_info, + const TenantSchemaInfo &tenant_schema_info, + IDBMeta *&db_meta) +{ + int ret = OB_SUCCESS; + + IDBMeta *tmp_db_meta = LogMsgFactory::createDBMeta(); + + if (OB_ISNULL(tmp_db_meta)) { + LOG_ERROR("createDBMeta fail, return NULL"); + ret = OB_ERR_UNEXPECTED; + } else { + // set DB Name to:TENANT.DATABASE + std::string db_name_str = tenant_schema_info.name_; + db_name_str.append("."); + db_name_str.append(db_schema_info.name_); + + tmp_db_meta->setName(db_name_str.c_str()); + SET_ENCODING(tmp_db_meta, CHARSET_BINARY); + tmp_db_meta->setUserData((void*)1); + } + + if (OB_SUCC(ret)) { + db_meta = tmp_db_meta; + } + + return ret; +} + +int ObLogMetaManager::build_ddl_meta_() +{ + int ret = OB_SUCCESS; + + if (NULL != ddl_table_meta_) { + LOG_ERROR("meta has been built", K(ddl_table_meta_)); + ret = OB_INIT_TWICE; + } else { + ITableMeta *tmp_table_meta = LogMsgFactory::createTableMeta(); + IColMeta *ddl_stmt_col_meta = NULL; + IColMeta *ddl_schema_version_col_meta = NULL; + const char *ddl_stmt_col_name = "ddl"; + const char *ddl_schema_version_col_name = "ddl_schema_version"; + + if (OB_ISNULL(tmp_table_meta)) { + LOG_ERROR("createTableMeta fail, return NULL"); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(build_col_meta_(ddl_stmt_col_name, ddl_stmt_col_meta))) { + LOG_ERROR("build_col_meta_ fail", KR(ret), K(ddl_stmt_col_name), + K(ddl_stmt_col_meta)); + } else if (OB_FAIL(build_col_meta_(ddl_schema_version_col_name, ddl_schema_version_col_meta))) { + LOG_ERROR("build_col_meta_ fail", KR(ret), K(ddl_schema_version_col_name), + K(ddl_schema_version_col_meta)); + } else { + (void)tmp_table_meta->append(ddl_stmt_col_name, ddl_stmt_col_meta); + (void)tmp_table_meta->append(ddl_schema_version_col_name, ddl_schema_version_col_meta); + tmp_table_meta->setName(""); + tmp_table_meta->setDBMeta(NULL); + tmp_table_meta->setEncoding(""); + tmp_table_meta->setUserData(NULL); + + ddl_table_meta_ = tmp_table_meta; + } + + if (OB_FAIL(ret)) { + // destroy all column meta of table meta when destroy table meta + if (NULL != tmp_table_meta) { + LogMsgFactory::destroy(tmp_table_meta); + tmp_table_meta = NULL; + } + ddl_stmt_col_meta = NULL; + ddl_schema_version_col_meta = NULL; + } + } + + return ret; +} + +int ObLogMetaManager::build_col_meta_(const char *ddl_col_name, + IColMeta *&col_meta) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(col_meta = LogMsgFactory::createColMeta())) { + LOG_ERROR("createColMeta fail, return NULL", K(col_meta)); + ret = OB_ERR_UNEXPECTED; + } else { + EMySQLFieldType mysql_type = obmysql::MYSQL_TYPE_VAR_STRING; + + col_meta->setName(ddl_col_name); + col_meta->setType(static_cast(mysql_type)); + col_meta->setSigned(true); + col_meta->setIsPK(false); + col_meta->setNotNull(false); + col_meta->setEncoding(""); + col_meta->setDefault(""); + } + + if (OB_FAIL(ret)) { + if (NULL != col_meta) { + LogMsgFactory::destroy(col_meta); + col_meta = NULL; + } + } + + return ret; +} + +void ObLogMetaManager::destroy_ddl_meta_() +{ + if (NULL != ddl_table_meta_) { + // destroy all column meta of table meta when destroy table meta + LogMsgFactory::destroy(ddl_table_meta_); + ddl_table_meta_ = NULL; + } +} + +int ObLogMetaManager::get_table_schema_meta(const int64_t version, + const uint64_t table_id, + TableSchemaInfo *&tb_schema_info) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("meta manager has not inited"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(OB_INVALID_TIMESTAMP == version) + || OB_UNLIKELY(OB_INVALID_ID == table_id)) { + LOG_ERROR("invalid argument", K(version), K(table_id)); + ret = OB_INVALID_ARGUMENT; + } else { + MulVerTableKey table_key(version, table_id); + + if (OB_FAIL(tb_schema_info_map_.get(table_key, tb_schema_info))) { + LOG_ERROR("tb_schema_info_map_ get fail", KR(ret), K(table_key), K(tb_schema_info)); + } else { + // succ + } + } + + return ret; +} + +int ObLogMetaManager::set_table_schema_(const int64_t version, + const uint64_t table_id, + const char *table_name, + const int64_t non_hidden_column_cnt, + TableSchemaInfo &tb_schema_info) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("meta manager has not inited"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(OB_INVALID_TIMESTAMP == version) + || OB_UNLIKELY(OB_INVALID_ID == table_id) + || OB_ISNULL(table_name)) { + LOG_ERROR("invalid argument", K(version), K(table_id), K(table_name)); + ret = OB_INVALID_ARGUMENT; + } else { + tb_schema_info.set_non_hidden_column_count(non_hidden_column_cnt); + + MulVerTableKey table_key(version, table_id); + + if (OB_FAIL(tb_schema_info_map_.insert(table_key, &tb_schema_info))) { + LOG_ERROR("tb_schema_info_map_ insert fail", KR(ret), K(table_key), K(tb_schema_info)); + } else { + LOG_INFO("set_table_schema succ", "schema_version", version, + K(table_id), K(table_name), K(tb_schema_info)); + } + } + + return ret; +} + +int ObLogMetaManager::alloc_table_schema_info_(TableSchemaInfo *&tb_schema_info) +{ + int ret = OB_SUCCESS; + tb_schema_info = NULL; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("meta manager has not inited"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(tb_schema_info = static_cast(allocator_.alloc( + sizeof(TableSchemaInfo))))) { + LOG_ERROR("allocate memory fail", K(sizeof(TableSchemaInfo))); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + new(tb_schema_info) TableSchemaInfo(allocator_); + } + + return ret; +} + +int ObLogMetaManager::free_table_schema_info_(TableSchemaInfo *&tb_schema_info) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("meta manager has not inited"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(tb_schema_info)) { + LOG_ERROR("tb_schema_info is null", K(tb_schema_info)); + ret = OB_INVALID_ARGUMENT; + } else { + tb_schema_info->~TableSchemaInfo(); + allocator_.free(tb_schema_info); + tb_schema_info = NULL; + } + + return ret; +} + +int ObLogMetaManager::set_column_schema_info_(const share::schema::ObTableSchema &table_schema, + TableSchemaInfo &tb_schema_info, + const int64_t column_idx, + const share::schema::ObColumnSchemaV2 &column_table_schema) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("meta manager has not inited"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(tb_schema_info.init_column_schema_info(table_schema, column_table_schema, + column_idx, enable_output_hidden_primary_key_, *obj2str_helper_))) { + LOG_ERROR("tb_schema_info init_column_schema_info fail", KR(ret), + "table_id", table_schema.get_table_id(), + "table_name", table_schema.get_table_name(), + "version", table_schema.get_schema_version(), + K(column_table_schema), K(column_idx), + K(enable_output_hidden_primary_key_)); + } else { + // succ + } + + return ret; +} + +} // namespace liboblog +} // namespace oceanbase diff --git a/src/liboblog/src/ob_log_meta_manager.h b/src/liboblog/src/ob_log_meta_manager.h new file mode 100644 index 0000000000000000000000000000000000000000..d1acbc715a705b3acb089cff9a581e18e540943a --- /dev/null +++ b/src/liboblog/src/ob_log_meta_manager.h @@ -0,0 +1,446 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_META_MANAGER_H__ +#define OCEANBASE_LIBOBLOG_META_MANAGER_H__ + +#include // ITableMeta, IDBMeta + +#include "share/ob_errno.h" // OB_SUCCESS +#include "lib/lock/ob_spin_rwlock.h" // SpinRWLock, SpinRLockGuard, SpinWLockGuard +#include "lib/allocator/page_arena.h" // DefaultPageAllocator +#include "lib/allocator/ob_mod_define.h" // ObModIds +#include "lib/hash/ob_linear_hash_map.h" // ObLinearHashMap +#include "lib/allocator/ob_fifo_allocator.h" // ObFIFOAllocator +#include "lib/allocator/ob_concurrent_fifo_allocator.h" // ObConcurrentFIFOAllocator +#include "lib/allocator/ob_allocator.h" // ObIAllocator +#include "ob_log_schema_cache_info.h" // TableSchemaInfo + +using namespace oceanbase::logmessage; +namespace oceanbase +{ +namespace share +{ +namespace schema +{ +class ObTableSchema; +class ObSimpleTableSchemaV2; +class ObColumnSchemaV2; +} // namespace schema +} // namespace share + +namespace liboblog +{ +class ObLogSchemaGuard; +class IObLogSchemaGetter; + +typedef ObLogSchemaGuard ObLogSchemaGuard; + +class ObObj2strHelper; +class ObLogAdaptString; +struct DBSchemaInfo; +struct TenantSchemaInfo; + +class IObLogMetaManager +{ +public: + virtual ~IObLogMetaManager() {} + +public: + // add ref count of Table Meta + // 1. try to get table meta by ObSimpleTableSchemaV2(default) + // 2. try get table meta by ObTableSchema if meta info not exist,make sure ObTableSchema will only refresh exactly once + // + // @retval OB_SUCCESS success + // @retval OB_TENANT_HAS_BEEN_DROPPED tenant has been dropped + // #retval other error code fail + virtual int get_table_meta(const share::schema::ObSimpleTableSchemaV2 *table_schema, + IObLogSchemaGetter &schema_getter, + ITableMeta *&table_meta, + volatile bool &stop_flag) = 0; + + // get DDL Table Meta + virtual ITableMeta *get_ddl_table_meta() = 0; + + // decrease ref count of Table Meta + virtual void revert_table_meta(ITableMeta *table_meta) = 0; + + // add ref count of DB Meta + // + // @retval OB_SUCCESS success + // @retval OB_TENANT_HAS_BEEN_DROPPED tenant has been dropped + // #retval other error code fail + virtual int get_db_meta( + const DBSchemaInfo &db_schema_info, + ObLogSchemaGuard &schema_mgr, + IDBMeta *&db_meta, + volatile bool &stop_flag) = 0; + + // decrease ref count of DB Meta + virtual void revert_db_meta(IDBMeta *db_meta) = 0; + + // delete table + // delete all data of the deleted table and decrease ref count of the TableMeta by 1 for all version + virtual int drop_table(const int64_t table_id) = 0; + + // delete database + // delete all data of the deleted database and decrease ref count of the database meta by 1 for all version + virtual int drop_database(const int64_t database_id) = 0; + + virtual int get_table_schema_meta(const int64_t version, + const uint64_t table_id, + TableSchemaInfo *&tb_schema_info) = 0; +}; + +class ObLogMetaManager : public IObLogMetaManager +{ +public: + ObLogMetaManager(); + virtual ~ObLogMetaManager(); + +public: + virtual int get_table_meta(const share::schema::ObSimpleTableSchemaV2 *table_schema, + IObLogSchemaGetter &schema_getter, + ITableMeta *&table_meta, + volatile bool &stop_flag); + virtual ITableMeta *get_ddl_table_meta() { return ddl_table_meta_; } + virtual void revert_table_meta(ITableMeta *table_meta); + virtual int get_db_meta( + const DBSchemaInfo &db_schema_info, + ObLogSchemaGuard &schema_mgr, + IDBMeta *&db_meta, + volatile bool &stop_flag); + virtual void revert_db_meta(IDBMeta *db_meta); + virtual int drop_table(const int64_t table_id); + virtual int drop_database(const int64_t database_id); + virtual int get_table_schema_meta(const int64_t version, + const uint64_t table_id, + TableSchemaInfo *&tb_schema_info); +public: + int init(ObObj2strHelper *obj2str_helper, + const bool enable_output_hidden_primary_key); + void destroy(); + +private: + typedef common::SpinRWLock RWLock; + typedef common::SpinRLockGuard RLockGuard; + typedef common::SpinWLockGuard WLockGuard; + typedef common::ObConcurrentFIFOAllocator FIFOAllocator; + + static const int64_t ALLOCATOR_PAGE_SIZE = common::OB_MALLOC_NORMAL_BLOCK_SIZE; + static const int64_t ALLOCATOR_HOLD_LIMIT = common::OB_MALLOC_BIG_BLOCK_SIZE; + static const int64_t ALLOCATOR_TOTAL_LIMIT = 10L * 1024L * 1024L * 1024L; + static const int64_t GET_SCHEMA_TIMEOUT = 10L * 1000L * 1000L; + + template + struct MetaNode + { + Type *value_; + int64_t version_; + MetaNode *before_; + MetaNode *next_; + + void reset(); + }; + + template + struct MetaInfo + { + typedef common::DefaultPageAllocator BaseAllocator; + + int64_t num_; + MetaNode *head_; + MetaNode *tail_; + RWLock lock_; + BaseAllocator base_allocator_; + common::ObFIFOAllocator fifo_allocator_; + + MetaInfo(); + ~MetaInfo(); + + int get(const int64_t target_version, Type *&meta); + int set(const int64_t version, Type *meta); + }; + + struct MetaKey + { + uint64_t id_; + + bool is_valid() const { return common::OB_INVALID_ID != id_; } + uint64_t hash() const { return id_; } + bool operator== (const MetaKey & other) const { return id_ == other.id_; } + TO_STRING_KV(K_(id)); + }; + + // multi-version table + struct MulVerTableKey + { + int64_t version_; + uint64_t table_id_; + + MulVerTableKey(const int64_t version, + const uint64_t table_id) : version_(version), table_id_(table_id) {} + + uint64_t hash() const + { + uint64_t hash_val = 0; + hash_val = common::murmurhash(&version_, sizeof(version_), hash_val); + hash_val = common::murmurhash(&table_id_, sizeof(table_id_), hash_val); + + return hash_val; + } + bool operator== (const MulVerTableKey & other) const + { return (version_ == other.version_) && (table_id_ == other.table_id_); } + + TO_STRING_KV(K_(version), K_(table_id)); + }; + + typedef MetaNode TableMetaNode; + typedef MetaInfo TableMetaInfo; + typedef common::ObLinearHashMap TableMetaMap; + + typedef MetaNode DBMetaNode; + typedef MetaInfo DBMetaInfo; + typedef common::ObLinearHashMap DBMetaMap; + typedef common::ObLinearHashMap MulVerTableSchemaMap; + +private: + template + int get_meta_info_(MetaMapType &meta_map, const MetaKey &key, MetaInfoType *&meta_info); + template + int get_meta_from_meta_info_(MetaInfoType *meta_info, const int64_t version, MetaType *&meta); + int add_and_get_table_meta_(TableMetaInfo *meta_info, + const share::schema::ObTableSchema *table_schema, + ObLogSchemaGuard &schema_mgr, + ITableMeta *&table_meta, + volatile bool &stop_flag); + int add_and_get_db_meta_(DBMetaInfo *meta_info, + const DBSchemaInfo &db_schema_info, + const TenantSchemaInfo &tenant_schema_info, + IDBMeta *&db_meta); + template static int inc_meta_ref_(MetaType *meta); + template static int dec_meta_ref_(MetaType *meta, int64_t &ref_cnt); + int build_table_meta_(const share::schema::ObTableSchema *schema, + ObLogSchemaGuard &schema_mgr, + ITableMeta *&table_meta, + volatile bool &stop_flag); + int build_db_meta_( + const DBSchemaInfo &db_schema_info, + const TenantSchemaInfo &tenant_schema_info, + IDBMeta *&db_meta); + int build_column_metas_(ITableMeta *table_meta, + const share::schema::ObTableSchema *table_schema, + TableSchemaInfo &tb_schema_info, + ObLogSchemaGuard &schema_mgr, + volatile bool &stop_flag); + // 1. won't filter hidden pk for table without primary key, column_name=__pk_increment, column_id=1 + // 2. filter hidden column + // 3. filter non-user column + // 4. filter invisible column by default, won't filter if config enable_output_invisible_column = 1 + int filter_column_(const share::schema::ObTableSchema &table_schema, + const bool is_hidden_pk_table, + const share::schema::ObColumnSchemaV2 &column_schema, + bool &is_filter, + bool &is_hidden_pk_table_pk_increment_column); + int set_column_meta_(IColMeta *col_meta, + const share::schema::ObColumnSchemaV2 &column_schema, + const share::schema::ObTableSchema &table_schema); + int set_primary_keys_(ITableMeta *table_meta, + const share::schema::ObTableSchema *schema, + const TableSchemaInfo &tb_schema_info); + int set_unique_keys_(ITableMeta *table_meta, + const share::schema::ObTableSchema *table_schema, + const TableSchemaInfo &tb_schema_info, + ObLogSchemaGuard &schema_mgr, + volatile bool &stop_flag); + int set_unique_keys_from_unique_index_table_(const share::schema::ObTableSchema *table_schema, + const TableSchemaInfo &tb_schema_info, + const share::schema::ObTableSchema *index_table_schema, + bool *is_uk_column_array, + ObLogAdaptString &uk_info, + int64_t &valid_uk_column_count); + int set_unique_keys_from_all_index_table_(int64_t &valid_uk_table_count, + const share::schema::ObTableSchema &table_schema, + const TableSchemaInfo &tb_schema_info, + ObLogSchemaGuard &schema_mgr, + volatile bool &stop_flag, + bool *is_uk_column_array, + ObLogAdaptString &uk_info); + int build_ddl_meta_(); + + int build_col_meta_(const char *ddl_col_name, + IColMeta *&col_meta); + void destroy_ddl_meta_(); + + int alloc_table_schema_info_(TableSchemaInfo *&tb_schema_info); + int free_table_schema_info_(TableSchemaInfo *&tb_schema_info); + int set_column_schema_info_(const share::schema::ObTableSchema &table_schema, + TableSchemaInfo &tb_schema_info, + const int64_t column_idx, + const share::schema::ObColumnSchemaV2 &column_table_schema); + int set_table_schema_(const int64_t version, + const uint64_t table_id, + const char *table_name, + const int64_t non_hidden_column_cnt, + TableSchemaInfo &tb_schema_info); + +private: + bool inited_; + bool enable_output_hidden_primary_key_; + ObObj2strHelper *obj2str_helper_; + ITableMeta *ddl_table_meta_; + DBMetaMap db_meta_map_; + TableMetaMap tb_meta_map_; + MulVerTableSchemaMap tb_schema_info_map_; + FIFOAllocator allocator_; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogMetaManager); +}; + +template +void ObLogMetaManager::MetaNode::reset() +{ + value_ = NULL; + version_ = 0; + before_ = NULL; + next_ = NULL; +} + +template +ObLogMetaManager::MetaInfo::MetaInfo() : + num_(0), + head_(NULL), + tail_(NULL), + lock_(), + base_allocator_(common::ObModIds::OB_LOG_META_INFO), + fifo_allocator_() +{ + fifo_allocator_.init(&base_allocator_, common::OB_MALLOC_NORMAL_BLOCK_SIZE); +} + +template +ObLogMetaManager::MetaInfo::~MetaInfo() { } + +template +int ObLogMetaManager::MetaInfo::get(const int64_t target_version, Type *&meta) +{ + int ret = common::OB_SUCCESS; + meta = NULL; + + if (num_ > 0) { + MetaNode *meta_node = head_; + + while (NULL != meta_node) { + if (meta_node->version_ == target_version) { + meta = meta_node->value_; + break; + } else if (meta_node->version_ < target_version) { + break; + } else { + meta_node = meta_node->next_; + } + } + } + + if (NULL == meta) { + ret = common::OB_ENTRY_NOT_EXIST; + } + + return ret; +} + +template +int ObLogMetaManager::MetaInfo::set(const int64_t version, Type *meta) +{ + int ret = common::OB_SUCCESS; + if (NULL == meta) { + ret = common::OB_INVALID_ARGUMENT; + } else { + // create a node + MetaNode *meta_node = + static_cast *>(fifo_allocator_.alloc(sizeof(MetaNode))); + + if (OB_ISNULL(meta_node)) { + OBLOG_LOG(ERROR, "allocate memory for MetaNode fail", K(sizeof(MetaNode))); + ret = common::OB_ALLOCATE_MEMORY_FAILED; + } else { + meta_node->reset(); + + meta_node->value_ = meta; + meta_node->version_ = version; + + // put Node info linkedlist with inverser order of version + if (NULL == head_) { + head_ = meta_node; + tail_ = head_; + num_ = 1; + } else if (OB_ISNULL(tail_)) { + OBLOG_LOG(ERROR, "tail node is NULL, but head node is not NULL", K(head_), K(tail_)); + ret = common::OB_ERR_UNEXPECTED; + } else { + MetaNode *node = head_; + bool inserted = false; + + while (NULL != node) { + if (node->version_ < version) { + // insert ahead of the first version which is smaller than self + meta_node->next_ = node; + meta_node->before_ = node->before_; + // if ahead node exist + // make sure node with higher version point to the new node + if (NULL != node->before_) { + node->before_->next_ = meta_node; + } + node->before_ = meta_node; + + // deal with situation if node is head + // node can't be the tail node + if (node == head_) { + head_ = meta_node; + } + + inserted = true; + break; + } else if (node->version_ == version) { + // error if node with same version already exist + ret = common::OB_ENTRY_EXIST; + } else { + node = node->next_; + } + } + + if (OB_SUCC(ret)) { + // put at tail of linkedlist if can't find version smaller than self + if (! inserted) { + tail_->next_ = meta_node; + meta_node->before_ = tail_; + meta_node->next_ = NULL; + tail_ = meta_node; + } + + num_++; + } + } + } + + if (common::OB_SUCCESS != ret && NULL != meta_node) { + fifo_allocator_.free(static_cast(meta_node)); + meta_node = NULL; + } + } + return ret; +} + +} // namespace liboblog +} // namespace oceanbase +#endif /* OCEANBASE_LIBOBLOG_META_MANAGER_H__ */ diff --git a/src/liboblog/src/ob_log_mock_store_service.h b/src/liboblog/src/ob_log_mock_store_service.h new file mode 100644 index 0000000000000000000000000000000000000000..b2c5c8ec5635ff5edae7dacaa1af9a8548023ddd --- /dev/null +++ b/src/liboblog/src/ob_log_mock_store_service.h @@ -0,0 +1,115 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_MOCK_STORE_SERVICE_H_ +#define OCEANBASE_LIBOBLOG_OB_LOG_MOCK_STORE_SERVICE_H_ + +#include "ob_log_store_service.h" + +namespace oceanbase +{ +namespace liboblog +{ +class MockObLogStoreService : public IObStoreService +{ +public: + MockObLogStoreService() {} + void destroy() {} + ~MockObLogStoreService() { destroy(); } + int init(const std::string &path) { UNUSED(path); return 0; } + int close() { return 0; } +public: + int put(const std::string &key, const ObSlice &value) + { + UNUSED(key); + UNUSED(value); + return 0; + } + + int put(void *cf_handle, const std::string &key, const ObSlice &value) + { + UNUSED(key); + UNUSED(value); + UNUSED(cf_handle); + return 0; + } + + int batch_write(void *cf_handle, const std::vector &keys, const std::vector &values) + { + UNUSED(cf_handle); + UNUSED(keys); + UNUSED(values); + return 0; + } + + int get(const std::string &key, std::string &value) + { + UNUSED(key); + UNUSED(value); + return 0; + } + + int get(void *cf_handle, const std::string &key, std::string &value) + { + UNUSED(cf_handle); + UNUSED(key); + UNUSED(value); + return 0; + } + + int del(const std::string &key) + { + UNUSED(key); + return 0; + } + + int del(void *cf_handle, const std::string &key) + { + UNUSED(cf_handle); + UNUSED(key); + return 0; + } + + int create_column_family(const std::string& column_family_name, + void *&cf_handle) + { + UNUSED(column_family_name); + cf_handle = this; + return 0; + } + + int drop_column_family(void *cf_handle) + { + UNUSED(cf_handle); + return 0; + } + + int destory_column_family(void *cf_handle) + { + UNUSED(cf_handle); + return 0; + } + + void get_mem_usage(const std::vector ids, + const std::vector cf_handles) + { + UNUSED(ids); + UNUSED(cf_handles); + } + +}; + +} +} + + +#endif diff --git a/src/liboblog/src/ob_log_mysql_connector.cpp b/src/liboblog/src/ob_log_mysql_connector.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ba3e5199a55c4afc872d83a369fde7fbd4580c6d --- /dev/null +++ b/src/liboblog/src/ob_log_mysql_connector.cpp @@ -0,0 +1,690 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG + +#include "ob_log_mysql_connector.h" + +#include "lib/string/ob_string.h" // ObString +#include "share/ob_time_utility2.h" // ObTimeUtility2 + +#include "ob_log_utils.h" // _SEC_ + + +using namespace oceanbase::common; +using namespace oceanbase::share; + +namespace oceanbase +{ +namespace liboblog +{ +bool MySQLConnConfig::is_valid() const +{ + bool ret = true; + if (! svr_.is_valid() + || NULL == mysql_user_ + || NULL == mysql_password_ + || NULL == mysql_db_ + || mysql_connect_timeout_sec_ <= 0 + || mysql_query_timeout_sec_ <= 0) { + ret = false; + } + return ret; +} + +int MySQLConnConfig::reset(const ObAddr &svr, + const char *mysql_user, + const char *mysql_password, + const char *mysql_db, + const int mysql_connect_timeout_sec, + const int mysql_query_timeout_sec) +{ + int ret = OB_SUCCESS; + + reset(); + + if (OB_UNLIKELY(! svr.ip_to_string(ip_buf_, sizeof(ip_buf_)))) { + LOG_ERROR("ip_to_string fail", K(svr), K(ip_buf_), K(sizeof(ip_buf_))); + ret = OB_ERR_UNEXPECTED; + } else { + svr_ = svr; + mysql_db_ = mysql_db; + mysql_connect_timeout_sec_ = mysql_connect_timeout_sec; + mysql_query_timeout_sec_ = mysql_query_timeout_sec; + mysql_user_ = mysql_user; + mysql_password_ = mysql_password; + } + + return ret; +} + +////////////////////////////////////// ObLogMySQLConnector ///////////////////////////////// +ObLogMySQLConnector::ObLogMySQLConnector() : + inited_(false), + mysql_(NULL), + svr_() +{ +} + +ObLogMySQLConnector::~ObLogMySQLConnector() +{ + destroy(); +} + +int ObLogMySQLConnector::init(const MySQLConnConfig& cfg, + const bool enable_ssl_client_authentication) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("twice init", KR(ret)); + ret = OB_INIT_TWICE; + } else if (OB_UNLIKELY(!cfg.is_valid())) { + LOG_ERROR("invalid config", KR(ret), K(cfg)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(init_conn_(cfg, enable_ssl_client_authentication))) { + if (OB_NEED_RETRY != ret) { + LOG_ERROR("init connector fail", KR(ret), K(cfg), K(enable_ssl_client_authentication)); + } + } else { + svr_ = cfg.svr_; + inited_ = true; + LOG_INFO("init mysql connector succ", K(this), K(cfg)); + } + + if (OB_SUCCESS != ret) { + destroy(); + } + + return ret; +} + +void ObLogMySQLConnector::destroy() +{ + inited_ = false; + destroy_conn_(); + + mysql_ = NULL; + svr_.reset(); + + LOG_INFO("destroy mysql connector succ", K(this), K(svr_)); +} + +int ObLogMySQLConnector::query(MySQLQueryBase& query) +{ + int ret = OB_SUCCESS; + int err = 0; + bool done = false; + const char *sql = NULL; + unsigned long sql_len = 0; + MYSQL_RES *res = NULL; + + if (OB_UNLIKELY(!inited_)) { + LOG_ERROR("not init"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(query.get_sql(sql, sql_len))) { + LOG_ERROR("get_sql fail", KR(ret), K(sql), K(sql_len)); + } else if (OB_ISNULL(sql) || sql_len <= 0) { + LOG_ERROR("invalid sql", K(sql), K(sql_len)); + ret = OB_INVALID_ARGUMENT; + } else if (0 != (err = mysql_real_query(mysql_, sql, sql_len))) { + int mysql_error_code = mysql_errno(mysql_); + const char *err_msg = mysql_error(mysql_); + + LOG_WARN("mysql_real_query fail", K(err), K(mysql_error_code), "mysql_error", err_msg, + K(svr_), K(sql_len), K(sql), K(svr_)); + + // 1. execution of sql failed, and mysql error: 1054 - Unknown column 'id' in 'field list' + // This means that liboblog is connected to a low version of the observer, no replica_type information is available, error code OB_ERR_COLUMN_NOT_FOUND is returned + // + // 2. execution of sql failed, and mysql error: 1146 - Table xx doesn't exist + query.set_mysql_error(mysql_error_code, err_msg); + } else if (OB_FAIL(query.set_result(mysql_, svr_))) { + LOG_ERROR("set result fail", KR(ret), K(res), K(mysql_), K(svr_)); + } else { + done = true; + } + + if (OB_SUCCESS == ret && !done) { + ret = OB_NEED_RETRY; + } + return ret; +} + +int ObLogMySQLConnector::exec(MySQLQueryBase& query) +{ + int ret = OB_SUCCESS; + int err = 0; + bool done = false; + const char *sql = NULL; + unsigned long sql_len = 0; + + if (OB_UNLIKELY(!inited_)) { + LOG_ERROR("not init"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(query.get_sql(sql, sql_len))) { + LOG_ERROR("get_sql fail", KR(ret), K(sql), K(sql_len)); + } else if (OB_ISNULL(sql) || sql_len <= 0) { + LOG_ERROR("invalid sql", K(sql), K(sql_len)); + ret = OB_INVALID_ARGUMENT; + } else if (0 != (err = mysql_real_query(mysql_, sql, sql_len))) { + int mysql_error_code = mysql_errno(mysql_); + const char *err_msg = mysql_error(mysql_); + LOG_WARN("mysql_real_query fail", K(err), K(mysql_error_code), K(err_msg), K(svr_), + K(sql_len), K(sql)); + + // Setting error codes in case of errors + query.set_mysql_error(mysql_error_code, err_msg); + } else if (OB_FAIL(query.set_result(mysql_, svr_))) { + LOG_ERROR("set result fail", KR(ret), K(mysql_), K(svr_)); + } else { + done = true; + } + + if (OB_SUCCESS == ret && !done) { + ret = OB_NEED_RETRY; + } + return ret; +} + +#define SET_TIMEOUT_SQL "SET SESSION ob_query_timeout = %ld, SESSION ob_trx_timeout = %ld" + +int ObLogMySQLConnector::set_timeout_variable_(const int64_t query_timeout, const int64_t trx_timeout) +{ + int ret = OB_SUCCESS; + SMART_VAR(char[OB_MAX_SQL_LENGTH], sql) { + sql[0] = '\0'; + int64_t sql_len = 0; + int mysql_err = 0; + + if (OB_UNLIKELY(query_timeout <= 0) || OB_UNLIKELY(trx_timeout <= 0)) { + LOG_ERROR("invalid argument", K(query_timeout), K(trx_timeout)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(databuff_printf(sql, sizeof(sql), sql_len, SET_TIMEOUT_SQL, query_timeout, + trx_timeout))) { + LOG_ERROR("build SET_TIMEOUT_SQL fail", KR(ret), K(sql), K(sizeof(sql)), K(query_timeout), + K(trx_timeout)); + } else if (0 != (mysql_err = mysql_real_query(mysql_, sql, sql_len))) { + LOG_WARN("mysql_real_query fail", K(mysql_err), K(mysql_error(mysql_)), K(sql_len), K(sql), + K(svr_)); + ret = OB_NEED_RETRY; + } else { + // success + } + } + return ret; +} + +int ObLogMySQLConnector::init_conn_(const MySQLConnConfig &cfg, + const bool enable_ssl_client_authentication) +{ + int ret = OB_SUCCESS; + unsigned int connect_timeout = static_cast(cfg.mysql_connect_timeout_sec_); + unsigned int read_timeout = connect_timeout; + unsigned int write_timeout = connect_timeout; + int64_t query_timeout_us = cfg.mysql_query_timeout_sec_ * _SEC_; + int64_t trx_timeout_us = query_timeout_us; + + if (NULL == (mysql_ = mysql_init(NULL))) { + LOG_ERROR("mysql_init fail", KR(ret)); + ret = OB_ERR_UNEXPECTED; + } else if (0 != (mysql_options(mysql_, MYSQL_OPT_CONNECT_TIMEOUT, &connect_timeout))) { + LOG_ERROR("failed to set conn timeout for mysql conn", + K(mysql_error(mysql_)), K(connect_timeout)); + ret = OB_ERR_UNEXPECTED; + } else { + if (! enable_ssl_client_authentication) { + int64_t ssl_mode = 0; + + if (0 != (mysql_options(mysql_, MYSQL_OPT_SSL_VERIFY_SERVER_CERT, (void *)&ssl_mode))) { + LOG_ERROR("failed to set ssl mode for mysql conn", + K(mysql_error(mysql_)), K(ssl_mode)); + ret = OB_ERR_UNEXPECTED; + } + } + + // CLIENT_MULTI_STATEMENTS: enable multiple-statement execution and multiple-result + if (mysql_ != mysql_real_connect(mysql_, + cfg.get_mysql_addr(), + cfg.mysql_user_, + cfg.mysql_password_, + cfg.mysql_db_, + cfg.get_mysql_port(), + NULL, CLIENT_MULTI_STATEMENTS)) { + LOG_WARN("mysql connect failed", "mysql_error", mysql_error(mysql_), + K(cfg.get_mysql_addr()), K(cfg.get_mysql_port()), K(cfg)); + ret = OB_NEED_RETRY; + } + // set timeout variables + else if (OB_FAIL(set_timeout_variable_(query_timeout_us, trx_timeout_us))) { + LOG_WARN("set_timeout_variable_ fail", KR(ret), K(query_timeout_us), K(trx_timeout_us), K(cfg)); + } else { + // Connection successful + } + } + + if (OB_SUCCESS != ret && NULL != mysql_) { + mysql_close(mysql_); + mysql_ = NULL; + } + + return ret; +} + +void ObLogMySQLConnector::destroy_conn_() +{ + if (NULL != mysql_) { + mysql_close(mysql_); + mysql_ = NULL; + } +} + +///////////////////////////////////////// MySQLQueryBase /////////////////////////////////////////// + +MySQLQueryBase::MySQLQueryBase() : + inited_(false), + sql_(NULL), + sql_len_(0), + mysql_(NULL), + svr_(), + res_(NULL), + row_(NULL), + col_lens_(0), + col_cnt_(0), + mysql_err_code_(0), + succ_result_set_cnt_(0) +{ + mysql_err_msg_[0] = '\0'; +} + +MySQLQueryBase::~MySQLQueryBase() +{ + destroy(); +} + +int MySQLQueryBase::init(const char *sql, const unsigned long sql_len) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("init twice"); + ret = OB_INIT_TWICE; + } else if (OB_ISNULL(sql) || OB_UNLIKELY(sql_len <= 0)) { + LOG_ERROR("invalid argument", K(sql), K(sql_len)); + ret = OB_INVALID_ARGUMENT; + } else { + sql_ = sql; + sql_len_ = sql_len; + mysql_ = NULL; + svr_.reset(); + res_ = NULL; + row_ = NULL; + col_lens_ = 0; + col_cnt_ = 0; + mysql_err_code_ = 0; + mysql_err_msg_[0] = '\0'; + succ_result_set_cnt_ = 0; + + inited_ = true; + } + return ret; +} + +void MySQLQueryBase::destroy() +{ + inited_ = false; + + if (NULL != res_) { + mysql_free_result(res_); + res_ = NULL; + } + + sql_ = NULL; + sql_len_ = 0; + mysql_ = NULL; + svr_.reset(); + res_ = NULL; + row_ = NULL; + col_lens_ = NULL; + col_cnt_ = 0; + + mysql_err_code_ = 0; + mysql_err_msg_[0] = '\0'; + + succ_result_set_cnt_ = 0; +} + +int MySQLQueryBase::get_sql(const char *&sql, unsigned long &sql_len) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init"); + ret = OB_NOT_INIT; + } else { + sql = sql_; + sql_len = sql_len_; + } + return ret; +} + +int MySQLQueryBase::set_result(MYSQL *mysql, const ObAddr &svr) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(mysql)) { + LOG_ERROR("mysql handle is NULL", K(mysql)); + ret = OB_INVALID_ARGUMENT; + } else { + mysql_ = mysql; + svr_ = svr; + res_ = NULL; + col_lens_ = 0; + row_ = NULL; + col_cnt_ = 0; + } + return ret; +} + +int MySQLQueryBase::next_result() +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(mysql_)) { + LOG_ERROR("mysql_ is null", K(mysql_)); + ret = OB_INVALID_ARGUMENT; + } else { + // If the previous result is valid, release the previous result + if (OB_UNLIKELY(NULL != res_)) { + mysql_free_result(res_); + res_ = NULL; + } + + if (OB_FAIL(iterate_next_result_())) { + if (OB_NEED_RETRY != ret && OB_ITER_END != ret) { + LOG_ERROR("iterate_next_result_ fail", KR(ret)); + } + } + // Save the result + // FIXME: Here it is assumed that the statement is a SELECT type statement with a result set. + // If INSERT/UPDATE/DELETE etc. are to be supported in the future, here it is determined whether the result set should be returned + else if (OB_ISNULL(res_ = mysql_store_result(mysql_))) { + int mysql_error_code = mysql_errno(mysql_); + const char *err_msg = mysql_error(mysql_); + LOG_WARN("mysql_store_result return NULL", K(mysql_error_code), K(err_msg), K(svr_)); + // Setting error codes + set_mysql_error(mysql_error_code, err_msg); + ret = OB_NEED_RETRY; + } else { + ++succ_result_set_cnt_; + } + } + + return ret; +} + +int MySQLQueryBase::iterate_next_result_() +{ + int ret = OB_SUCCESS; + int status = 0; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init"); + ret = OB_NOT_INIT; + } else { + // No need to execute mysql_next_result() when iterating through the result set for the first time + if (0 == succ_result_set_cnt_) { + // do nothing + } else { + // Iterate over the results of the next statement in the multi-statement + // This is the stage on the server side where the specific "query" is executed, from syntax checking to successful execution. + // Any errors may occur, so the effect and handling should be consistent with mysql_real_query() + // + // @return -1 = no, end of iteration already + // @return >0 = error, error occurred + // @return 0 = yes, there is still a next result set + status = mysql_next_result(mysql_); + + if (-1 == status) { + // No more result sets available + ret = OB_ITER_END; + } else if (0 == status) { + // Iteration success + } else { + int mysql_error_code = mysql_errno(mysql_); + const char *err_msg = mysql_error(mysql_); + LOG_WARN("mysql_next_result fail, need retry", K(status), K(mysql_error_code), K(err_msg), + K(succ_result_set_cnt_), K(svr_)); + // set error code + set_mysql_error(mysql_error_code, err_msg); + // Return need_retry + ret = OB_NEED_RETRY; + } + } + } + + return ret; +} + +int MySQLQueryBase::next_row() +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(res_)) { + LOG_ERROR("invalid mysql result", K(res_)); + ret = OB_INVALID_DATA; + } else if (OB_ISNULL(row_ = mysql_fetch_row(res_))) { + if (0 != mysql_errno(mysql_)) { + LOG_WARN("mysql_fetch_row fail", K(mysql_errno(mysql_)), K(mysql_error(mysql_)), K(svr_)); + ret = OB_NEED_RETRY; + } else { + ret = OB_ITER_END; + } + } else if (OB_ISNULL(col_lens_ = mysql_fetch_lengths(res_))) { + LOG_ERROR("mysql_fetch_lengths fail", K(mysql_errno(mysql_)), K(mysql_error(mysql_)), K(svr_)); + ret = OB_ERR_UNEXPECTED; + } else { + // Calculate the number of columns + col_cnt_ = mysql_num_fields(res_); + } + + // Uniform setting of mysql error codes + if (OB_SUCCESS != ret && OB_ITER_END != ret && NULL != mysql_) { + set_mysql_error(mysql_errno(mysql_), mysql_error(mysql_)); + } + + if (OB_SUCCESS != ret) { + // The current result iteration is complete, or an error is encountered and the current SQL query result set is released early + // 1. ret = OB_ITER_END, end of iteration + // 2. ret = other error code + if (NULL != res_) { + mysql_free_result(res_); + res_ = NULL; + } + } + + return ret; +} + +int MySQLQueryBase::get_column_index(const char *column_name, int64_t &column_index) +{ + int ret = OB_SUCCESS; + column_index = -1; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(column_name)) { + LOG_ERROR("column_name is null"); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(res_)) { + LOG_ERROR("invalid mysql result", K(res_)); + ret = OB_INVALID_DATA; + } else { + MYSQL_FIELD *field = NULL; + bool done = false; + int column_count = mysql_num_fields(res_); + + for (int i = 0; ! done && i < column_count; i++) { + // Get the definition of column i + if (NULL != (field = mysql_fetch_field_direct(res_, i))) { + if (0 == STRCMP(column_name, field->name)) { + done = true; + column_index = i; + } + } + } + + if (!done) { + column_index = -1; + ret = OB_ERR_COLUMN_NOT_FOUND; + } + } + + return ret; +} + +int MySQLQueryBase::get_int(const int64_t col_idx, + int64_t& int_val, + bool &is_null_value) const +{ + int ret = OB_SUCCESS; + ObString varchar_val; + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(get_varchar(col_idx, varchar_val, is_null_value))) { + LOG_ERROR("failed to get varchar", KR(ret), K(col_idx)); + } else if (is_null_value) { + // null value + } else if (OB_ISNULL(varchar_val.ptr())) { + LOG_ERROR("varchar_val is invalid", K(varchar_val), K(col_idx)); + ret = OB_INVALID_DATA; + } else { + int64_t ret_val = 0; + const char *nptr = varchar_val.ptr(); + char *end_ptr = NULL; + ret_val = strtoll(nptr, &end_ptr, 10); + if (*nptr != '\0' && *end_ptr == '\0') { + int_val = ret_val; + } else { + LOG_ERROR("invalid int value", K(varchar_val)); + ret = OB_INVALID_DATA; + } + } + return ret; +} + +int MySQLQueryBase::get_uint(const int64_t col_idx, + uint64_t& int_val, + bool &is_null_value) const +{ + int ret = OB_SUCCESS; + ObString varchar_val; + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(get_varchar(col_idx, varchar_val, is_null_value))) { + LOG_ERROR("failed to get varchar", KR(ret), K(col_idx)); + } else if (is_null_value) { + // null value + } else if (OB_ISNULL(varchar_val.ptr())) { + LOG_ERROR("varchar_val is invalid", K(varchar_val), K(col_idx)); + ret = OB_INVALID_DATA; + } else { + uint64_t ret_val = 0; + const char *nptr = varchar_val.ptr(); + char *end_ptr = NULL; + ret_val = strtoull(nptr, &end_ptr, 10); + if (*nptr != '\0' && *end_ptr == '\0') { + int_val = ret_val; + } else { + LOG_ERROR("invalid int value", K(varchar_val)); + ret = OB_INVALID_DATA; + } + } + return ret; +} + +int MySQLQueryBase::get_datetime(const int64_t col_idx, + int64_t& datetime, + bool &is_null_value) const +{ + int ret = OB_SUCCESS; + ObString varchar_val; + int64_t ret_val = 0; + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(get_varchar(col_idx, varchar_val, is_null_value))) { + LOG_ERROR("failed to get varchar", KR(ret), K(col_idx)); + } else if (is_null_value) { + // null value + } else if (OB_ISNULL(varchar_val.ptr())) { + LOG_ERROR("varchar_val is invalid", K(varchar_val), K(col_idx)); + ret = OB_INVALID_DATA; + } + // Convert str to usec. + else if (OB_FAIL(ObTimeUtility2::str_to_usec(varchar_val, ret_val))) { + LOG_ERROR("failed to convert str to usec", KR(ret), K(varchar_val)); + } else { + datetime = ret_val; + } + return ret; +} + +int MySQLQueryBase::get_varchar(const int64_t col_idx, + ObString& varchar_val, + bool &is_null_value) const +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(row_) || OB_ISNULL(col_lens_)) { + LOG_ERROR("invalid row or col_lens ", K(row_), K(col_lens_)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_UNLIKELY(col_idx < 0) || OB_UNLIKELY(col_cnt_ <= col_idx)) { + LOG_ERROR("invalid col idx", K(col_idx), K(col_cnt_)); + ret = OB_INVALID_ARGUMENT; + } else { + varchar_val.assign(row_[col_idx], static_cast(col_lens_[col_idx])); + is_null_value = (NULL == row_[col_idx]); + } + return ret; +} + +void MySQLQueryBase::set_mysql_error(const int err_code, const char *err_msg) +{ + int ret = OB_SUCCESS; + + mysql_err_code_ = err_code; + mysql_err_msg_[0] = '\0'; + + // set error msg + if (0 != err_code && NULL != err_msg) { + int64_t pos = 0; + if (OB_FAIL(databuff_printf(mysql_err_msg_, sizeof(mysql_err_msg_), pos, "%s", err_msg))) { + LOG_ERROR("databuff_printf err_msg fail", KR(ret), K(mysql_err_msg_), K(sizeof(mysql_err_msg_)), + K(pos), K(err_msg), K(err_code)); + } + } +} + +} +} diff --git a/src/liboblog/src/ob_log_mysql_connector.h b/src/liboblog/src/ob_log_mysql_connector.h new file mode 100644 index 0000000000000000000000000000000000000000..f5ad980962f578eb90559099c9d09d757f4e6fc5 --- /dev/null +++ b/src/liboblog/src/ob_log_mysql_connector.h @@ -0,0 +1,221 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_MYSQL_CONNECTOR_H_ +#define OCEANBASE_LIBOBLOG_OB_LOG_MYSQL_CONNECTOR_H_ + +#include +#include + +#include "share/ob_define.h" // OB_MAX_* +#include "lib/utility/ob_print_utils.h" // TO_STRING_KV +#include "lib/net/ob_addr.h" // ObAddr + +namespace oceanbase +{ +namespace liboblog +{ +class MySQLQueryBase; +struct MySQLConnConfig; +class IObLogMySQLConnector +{ +public: + virtual ~IObLogMySQLConnector() {} + +public: + /// Execute the query operation: SELECT + /// + /// The only two return values are the following, to prevent an error from occurring in the query, which could lead to an unexpected exit + /// @retval OB_SUCCESS success + /// @retval OB_NEED_RETRY needs to be retried + virtual int query(MySQLQueryBase &query) = 0; + + /// execute write operation: + /// UPDATE, INSERT, CREATE TABLE, etc. + virtual int exec(MySQLQueryBase &query) = 0; + + virtual int init(const MySQLConnConfig &cfg, const bool enable_ssl_client_authentication) = 0; + virtual bool is_inited() const = 0; + virtual void destroy() = 0; +}; + +/////////////////////////////////// ObLogMySQLConnector ///////////////////////// + +class ObLogMySQLConnector : public IObLogMySQLConnector +{ +public: + ObLogMySQLConnector(); + virtual ~ObLogMySQLConnector(); + int init(const MySQLConnConfig &cfg, + const bool enable_ssl_client_authentication); + bool is_inited() const { return inited_; } + void destroy(); + const common::ObAddr &get_server() const { return svr_; } +public: + int query(MySQLQueryBase &query); + int exec(MySQLQueryBase &query); +private: + int init_conn_(const MySQLConnConfig &cfg, + const bool enable_ssl_client_authentication); + void destroy_conn_(); + int set_timeout_variable_(const int64_t query_timeout, const int64_t trx_timeout); +private: + bool inited_; + MYSQL *mysql_; + common::ObAddr svr_; +}; + +struct MySQLConnConfig +{ + common::ObAddr svr_; + const char *mysql_user_; + const char *mysql_password_; + const char *mysql_db_; + int mysql_connect_timeout_sec_; + int mysql_query_timeout_sec_; + char ip_buf_[common::MAX_IP_ADDR_LENGTH + 1]; + + const char *get_mysql_addr() const { return ip_buf_; } + int get_mysql_port() const { return svr_.get_port(); } + + bool is_valid() const; + void reset() + { + svr_.reset(); + mysql_user_ = 0; + mysql_password_ = 0; + mysql_db_ = 0; + mysql_connect_timeout_sec_ = 0; + mysql_query_timeout_sec_ = 0; + ip_buf_[0] = '\0'; + } + + int reset(const common::ObAddr &svr, + const char *mysql_user, + const char *mysql_password, + const char *mysql_db, + const int mysql_connect_timeout_sec, + const int mysql_query_timeout_sec); + + TO_STRING_KV(K_(svr), + K_(mysql_user), + K_(mysql_password), + K_(mysql_db), + K_(mysql_connect_timeout_sec), + K_(mysql_query_timeout_sec)); +}; + +/* + * MySQL Query Base Class + * To implement special queries and writes, you can inherit this class and then use the MySQL Connector to execute the queries and writes + * + * single/multiple-statement query operations (select). + * 1. init(), which initialises the SQL + * 2. pass the class object into the MySQL Connector's query() function to execute the SQL + * 3. call next_result(), get the result set + * 4. call next_row() to iterate over the row data + * 5. use the get_xxx() function to get the corresponding column data when processing each row of data + * + * Write operations (update, insert) + * 1. init(), initialise the SQL + * 2. pass the class object into the MySQL Connector's exec() function to execute the SQL + */ +class MySQLQueryBase +{ +protected: + static const int64_t DEFAULT_SQL_LENGTH = 1024; + +protected: + MySQLQueryBase(); + virtual ~MySQLQueryBase(); + +public: + int init(const char *sql, const unsigned long sql_len); + void destroy(); + + int get_sql(const char *&sql, unsigned long &sql_len); + + int set_result(MYSQL *mysql, const common::ObAddr &svr); + + /* + * Iterate over the next result set + * Support multiple-statement execution and multiple-result + * + * OB_ITER_END: no more results. + * OB_NEED_RETRY: on connection failure. + */ + int next_result(); + + /* + * Get next row till end. + * OB_ITER_END: no more rows. + * OB_NEED_RETRY: on connection failure. + */ + int next_row(); + + /* + * Get index by column name + * OB_ERR_COLUMN_NOT_FOUND : not found + */ + int get_column_index(const char *column_name, int64_t &column_index); + /* + * Get column data. + * Read column col_idx of certain type. + */ + int get_int(const int64_t col_idx, + int64_t &int_val, + bool &is_null_value) const; + int get_uint(const int64_t col_idx, + uint64_t &int_val, + bool &is_null_value) const; + int get_datetime(const int64_t col_idx, + int64_t &datetime, + bool &is_null_value) const; + int get_varchar(const int64_t col_idx, + common::ObString &varchar_val, + bool &is_null_value) const; + + // Error handling + void set_mysql_error(const int err_code, const char *err_msg); + int get_mysql_err_code() const { return mysql_err_code_; } + const char *get_mysql_err_msg() const { return mysql_err_msg_; } + + int64_t get_result_count() const { return succ_result_set_cnt_; } + + const common::ObAddr &get_server() const { return svr_; } + +private: + int iterate_next_result_(); + +protected: + bool inited_; + const char *sql_; + unsigned long sql_len_; + MYSQL *mysql_; + common::ObAddr svr_; + MYSQL_RES *res_; + MYSQL_ROW row_; // single row of data + unsigned long *col_lens_; // Record the value of all columns in a single row + unsigned int col_cnt_; // Record the number of columns + // Log mysql execution error codes + int mysql_err_code_; + // Logging mysql execution error text messages + char mysql_err_msg_[common::OB_MAX_ERROR_MSG_LEN]; + + // Number of result sets that have been successfully iterated + int64_t succ_result_set_cnt_; +}; + +} +} + +#endif diff --git a/src/liboblog/src/ob_log_mysql_proxy.cpp b/src/liboblog/src/ob_log_mysql_proxy.cpp new file mode 100644 index 0000000000000000000000000000000000000000..04497528f7892702b7e9cc8743df2e1c7ad30a55 --- /dev/null +++ b/src/liboblog/src/ob_log_mysql_proxy.cpp @@ -0,0 +1,136 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG + +#include "ob_log_mysql_proxy.h" // ObLogMysqlProxy + +#include "lib/mysqlclient/ob_mysql_server_provider.h" // ObMySQLServerProvider +#include "ob_log_utils.h" // is_mysql_client_errno +#include "share/ob_thread_mgr.h" + +using namespace oceanbase::common; +using namespace oceanbase::common::sqlclient; + +namespace oceanbase +{ +namespace liboblog +{ + +ObLogMysqlProxy::ObLogMysqlProxy() : inited_(false), + connection_pool_(), + mysql_proxy_() +{ + cluster_user_[0] = '\0'; + cluster_password_[0] = '\0'; + cluster_db_name_[0] = '\0'; +} + +ObLogMysqlProxy::~ObLogMysqlProxy() +{ + destroy(); +} + +int ObLogMysqlProxy::init(ServerProviderType *server_provider, + const char *cluster_user, + const char *cluster_password, + const char *cluster_db_name, + const int64_t sql_conn_timeout_us, + const int64_t sql_query_timeout_us, + const bool enable_ssl_client_authentication) +{ + int ret = OB_SUCCESS; + int64_t user_pos = 0; + int64_t password_pos = 0; + int64_t db_pos = 0; + + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("ObLogMysqlProxy has been initialized"); + ret = OB_INIT_TWICE; + } else if (OB_ISNULL(server_provider) + || OB_ISNULL(cluster_user) + || OB_ISNULL(cluster_password) + || OB_ISNULL(cluster_db_name) + || OB_UNLIKELY(sql_conn_timeout_us <= 0) + || OB_UNLIKELY(sql_query_timeout_us <= 0)) { + LOG_ERROR("invalid argument", K(server_provider), + K(cluster_user), K(cluster_password), K(cluster_db_name), K(sql_conn_timeout_us), + K(sql_query_timeout_us)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(TG_START(lib::TGDefIDs::LogMysqlPool))) { + LOG_ERROR("init connection pool timer fail", KR(ret)); + } else if (OB_FAIL(databuff_printf(cluster_user_, sizeof(cluster_user_), user_pos, "%s", cluster_user))) { + LOG_ERROR("print cluster_user fail", KR(ret), K(user_pos), K(cluster_user)); + } else if (OB_FAIL(databuff_printf(cluster_password_, sizeof(cluster_password_), password_pos, "%s", cluster_password))) { + LOG_ERROR("print cluster_password fail", KR(ret), K(password_pos), K(cluster_password)); + } else if (OB_FAIL(databuff_printf(cluster_db_name_, sizeof(cluster_db_name_), db_pos, "%s", cluster_db_name))) { + LOG_ERROR("print cluster_db_name fail", KR(ret), K(db_pos), K(cluster_db_name)); + } else { + ObConnPoolConfigParam conn_pool_config; + conn_pool_config.reset(); + + // Configure refresh interval + // 1. The default is the shortest refresh time when no connection is available + // 2. When a connection is available, the actual refresh time is (connection_refresh_interval * 50) + conn_pool_config.connection_refresh_interval_ = 1L * 1000L * 1000L; // us + conn_pool_config.sqlclient_wait_timeout_ = sql_conn_timeout_us / 1000000L; // s + conn_pool_config.connection_pool_warn_time_ = 10L * 1000L * 1000L; // us + conn_pool_config.long_query_timeout_ = sql_query_timeout_us; // us + conn_pool_config.sqlclient_per_observer_conn_limit_ = 20; // us + + _LOG_INFO("mysql connection pool: sql_conn_timeout_us=%ld us, " + "sqlclient_wait_timeout=%ld sec, sql_query_timeout_us=%ld us, " + "long_query_timeout=%ld us, connection_refresh_interval=%ld us, " + "connection_pool_warn_time=%ld us, sqlclient_per_observer_conn_limit=%ld", + sql_conn_timeout_us, conn_pool_config.sqlclient_wait_timeout_, + sql_query_timeout_us, conn_pool_config.long_query_timeout_, + conn_pool_config.connection_refresh_interval_, conn_pool_config.connection_pool_warn_time_, + conn_pool_config.sqlclient_per_observer_conn_limit_); + + connection_pool_.update_config(conn_pool_config); + connection_pool_.set_server_provider(server_provider); + if (! enable_ssl_client_authentication) { + connection_pool_.disable_ssl(); + } + if (OB_FAIL(connection_pool_.set_db_param(cluster_user_, + cluster_password_, + cluster_db_name_))) { + LOG_ERROR("set connection pool db param fail", KR(ret), K(cluster_user_), + K(cluster_password_), K(cluster_db_name_)); + } else if (OB_FAIL(connection_pool_.start(lib::TGDefIDs::LogMysqlPool))) { + // launch ConnectinPool + LOG_ERROR("start connection pool fail", KR(ret)); + } else if (OB_FAIL(mysql_proxy_.init(&connection_pool_))) { // init MySQL Proxy + LOG_ERROR("init mysql proxy fail", KR(ret)); + } else { + LOG_INFO("ObLogMysqlProxy init succ", "use_ssl", connection_pool_.is_use_ssl()); + inited_ = true; + } + } + + return ret; +} + +void ObLogMysqlProxy::destroy() +{ + inited_ = false; + + connection_pool_.stop(); + TG_DESTROY(lib::TGDefIDs::LogMysqlPool); + + cluster_user_[0] = '\0'; + cluster_password_[0] = '\0'; + cluster_db_name_[0] = '\0'; +} + +} // namespace liboblog +} // namespace oceanbase diff --git a/src/liboblog/src/ob_log_mysql_proxy.h b/src/liboblog/src/ob_log_mysql_proxy.h new file mode 100644 index 0000000000000000000000000000000000000000..0ec462e0cd5aa2035b7f5ce27bb888570f3762a8 --- /dev/null +++ b/src/liboblog/src/ob_log_mysql_proxy.h @@ -0,0 +1,73 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_MYSQL_PROXY_H__ +#define OCEANBASE_LIBOBLOG_MYSQL_PROXY_H__ + +#include "lib/task/ob_timer.h" // ObTimer +#include "lib/mysqlclient/ob_mysql_connection_pool.h" // ObMySQLConnectionPool +#include "lib/mysqlclient/ob_mysql_proxy.h" // ObMySQLProxy + +namespace oceanbase +{ +namespace common +{ +class ObCommonConfig; + +namespace sqlclient +{ +class ObMySQLServerProvider; +} // namespace sqlclient +} // namespace common + +namespace liboblog +{ + +///////////////////////////////////// ObLogMysqlProxy ///////////////////////////////// +typedef common::sqlclient::ObMySQLServerProvider ServerProviderType; +typedef common::sqlclient::ObMySQLConnectionPool ConnectionPoolType; + +class ObLogMysqlProxy +{ +public: + ObLogMysqlProxy(); + virtual ~ObLogMysqlProxy(); + +public: + int init(ServerProviderType *server_provider, + const char *cluster_user, + const char *cluster_password, + const char *cluster_db_name, + const int64_t sql_conn_timeout_us, + const int64_t sql_query_timeout_us, + const bool enable_ssl_client_authentication); + void destroy(); + + common::ObMySQLProxy &get_ob_mysql_proxy() { return mysql_proxy_; } + +private: + bool inited_; + + char cluster_user_[common::OB_MAX_USER_NAME_BUF_LENGTH]; + char cluster_password_[common::OB_MAX_PASSWORD_LENGTH + 1]; + char cluster_db_name_[common::OB_MAX_DATABASE_NAME_BUF_LENGTH]; + + ConnectionPoolType connection_pool_; + // Thread-safe proxies, getting connections and locking + common::ObMySQLProxy mysql_proxy_; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogMysqlProxy); +}; +} // namespace liboblog +} // namespace oceanbase +#endif /* OCEANBASE_LIBOBLOG_MYSQL_PROXY_H__ */ diff --git a/src/liboblog/src/ob_log_part_callback.h b/src/liboblog/src/ob_log_part_callback.h new file mode 100644 index 0000000000000000000000000000000000000000..3d618642cf4185e52aa710e514ac1fedd32aea6e --- /dev/null +++ b/src/liboblog/src/ob_log_part_callback.h @@ -0,0 +1,55 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_PART_CALLBACK_H_ +#define OCEANBASE_LIBOBLOG_OB_LOG_PART_CALLBACK_H_ + +#include "lib/container/ob_se_array.h" // ObSEArray + +namespace oceanbase +{ +namespace common +{ +struct ObPartitionKey; +} + +namespace liboblog +{ + +struct PartAddCallback +{ +public: + virtual ~PartAddCallback() {} + +public: + // Add partition + virtual int add_partition(const common::ObPartitionKey &pkey, + const int64_t start_tstamp, + const uint64_t start_log_id) = 0; +}; + +struct PartRecycleCallback +{ +public: + virtual ~PartRecycleCallback() {} + +public: + // Recycling partition + virtual int recycle_partition(const common::ObPartitionKey &pkey) = 0; +}; + +typedef common::ObSEArray PartCBArray; + +} +} + +#endif diff --git a/src/liboblog/src/ob_log_part_fetch_ctx.cpp b/src/liboblog/src/ob_log_part_fetch_ctx.cpp new file mode 100644 index 0000000000000000000000000000000000000000..dff1231ff8b3fa6b7b300d03cf698ccaed8be836 --- /dev/null +++ b/src/liboblog/src/ob_log_part_fetch_ctx.cpp @@ -0,0 +1,1982 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG_FETCHER + +#include "ob_log_part_fetch_ctx.h" + +#include "lib/hash_func/murmur_hash.h" // murmurhash +#include "storage/ob_storage_log_type.h" // ObStorageLogType + +#include "ob_log_utils.h" // get_timestamp +#include "ob_log_fetcher_heartbeat_worker.h" // IObLogFetcherHeartbeatWorker +#include "ob_log_config.h" // ObLogConfig +#include "ob_log_part_fetch_mgr.h" // IObLogPartFetchMgr +#include "ob_log_trace_id.h" // ObLogTraceIdGuard + +#define STAT(level, fmt, args...) OBLOG_FETCHER_LOG(level, "[STAT] [FETCH_CTX] " fmt, ##args) +#define _STAT(level, fmt, args...) _OBLOG_FETCHER_LOG(level, "[STAT] [FETCH_CTX] " fmt, ##args) +#define ISTAT(fmt, args...) STAT(INFO, fmt, ##args) +#define _ISTAT(fmt, args...) _STAT(INFO, fmt, ##args) +#define DSTAT(fmt, args...) STAT(DEBUG, fmt, ##args) + +using namespace oceanbase::common; +using namespace oceanbase::storage; +using namespace oceanbase::share; + +namespace oceanbase +{ +namespace liboblog +{ + +/////////////////////////////// PartFetchCtx ///////////////////////////////// + +// Defining global class variables +int64_t PartFetchCtx::g_svr_list_update_interval = + ObLogConfig::default_svr_list_update_interval_sec * _SEC_; +int64_t PartFetchCtx::g_leader_info_update_interval = + ObLogConfig::default_leader_info_update_interval_sec * _SEC_; +int64_t PartFetchCtx::g_heartbeat_interval = ObLogConfig::default_heartbeat_interval_sec * _SEC_; +int64_t PartFetchCtx::g_blacklist_history_clear_interval= + ObLogConfig::default_blacklist_history_clear_interval_min * _MIN_; + +PartFetchCtx::PartFetchCtx() +{ + reset(); +} + +PartFetchCtx::~PartFetchCtx() +{ + reset(); +} + +void PartFetchCtx::configure(const ObLogConfig &config) +{ + // update global class variables + int64_t svr_list_update_interval_sec = config.svr_list_update_interval_sec; + int64_t leader_info_update_interval_sec = config.leader_info_update_interval_sec; + int64_t heartbeat_interval_sec = config.heartbeat_interval_sec; + int64_t blacklist_history_clear_interval_min = config.blacklist_history_clear_interval_min; + + ATOMIC_STORE(&g_svr_list_update_interval, svr_list_update_interval_sec * _SEC_); + LOG_INFO("[CONFIG]", K(svr_list_update_interval_sec)); + + ATOMIC_STORE(&g_leader_info_update_interval, leader_info_update_interval_sec * _SEC_); + LOG_INFO("[CONFIG]", K(leader_info_update_interval_sec)); + + ATOMIC_STORE(&g_heartbeat_interval, heartbeat_interval_sec * _SEC_); + LOG_INFO("[CONFIG]", K(heartbeat_interval_sec)); + + ATOMIC_STORE(&g_blacklist_history_clear_interval, blacklist_history_clear_interval_min * _MIN_); + LOG_INFO("[CONFIG]", K(blacklist_history_clear_interval_min)); +} + +void PartFetchCtx::reset() +{ + // Note: The default stream type for setting partitions is hot stream + stype_ = FETCH_STREAM_TYPE_HOT; + state_ = STATE_NORMAL; + discarded_ = false; + pkey_.reset(); + serve_info_.reset(); + progress_id_ = -1; + part_fetch_mgr_ = NULL; + part_trans_resolver_ = NULL; + last_sync_progress_ = OB_INVALID_TIMESTAMP; + progress_.reset(); + fetch_info_.reset(); + blacklist_.reset(); + svr_list_[0].reset(); + svr_list_[1].reset(); + cur_svr_list_idx_ = 0; + svr_list_need_update_ = true; + svr_list_last_update_tstamp_ = OB_INVALID_TIMESTAMP; + svr_find_req_.reset(); + start_log_id_locate_req_.reset(); + leader_.reset(); + has_leader_ = false; + leader_last_update_tstamp_ = OB_INVALID_TIMESTAMP; + leader_find_req_.reset(); + heartbeat_req_.reset(); + heartbeat_last_update_tstamp_ = OB_INVALID_TIMESTAMP; + split_dest_array_.reset(); + FetchTaskListNode::reset(); +} + +int PartFetchCtx::init(const common::ObPartitionKey &pkey, + const int64_t start_tstamp, + const int64_t start_log_id, + const int64_t progress_id, + IObLogPartTransResolver &part_trans_resolver, + IObLogPartFetchMgr &part_fetch_mgr) +{ + int ret = OB_SUCCESS; + // If the start log ID is 1, the service is started from creation + bool start_serve_from_create = (1 == start_log_id); + + reset(); + + pkey_ = pkey; + serve_info_.reset(start_serve_from_create, start_tstamp); + progress_.reset(start_log_id, start_tstamp); + progress_id_ = progress_id; + part_fetch_mgr_ = &part_fetch_mgr; + part_trans_resolver_ = &part_trans_resolver; + + // Default is DDL stream type if it is a DDL partition, otherwise it is a hot stream + if (is_ddl_table(pkey.get_table_id())) { + stype_ = FETCH_STREAM_TYPE_DDL; + } else { + stype_ = FETCH_STREAM_TYPE_HOT; + } + + return ret; +} + +int PartFetchCtx::dispatch_heartbeat_if_need_() +{ + int ret = OB_SUCCESS; + // Get current progress + int64_t cur_progress = get_progress(); + if (OB_ISNULL(part_trans_resolver_)) { + LOG_ERROR("invalid part_trans_resolver_", K(part_trans_resolver_)); + ret = OB_NOT_INIT; + } + // heartbeats are sent down only if progress updated + else if (cur_progress != last_sync_progress_) { + LOG_DEBUG("partition progress updated. generate HEARTBEAT task", K_(pkey), + "last_sync_progress", TS_TO_STR(last_sync_progress_), + "cur_progress", TS_TO_STR(cur_progress)); + + if (OB_FAIL(part_trans_resolver_->heartbeat(pkey_, cur_progress))) { + LOG_ERROR("generate HEARTBEAT task fail", KR(ret), K(pkey_), K(cur_progress)); + } else { + last_sync_progress_ = cur_progress; + } + } + return ret; +} + +int PartFetchCtx::sync(volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + int64_t pending_trans_count = 0; + // Heartbeat issued according to conditions + if (OB_FAIL(dispatch_heartbeat_if_need_())) { + LOG_ERROR("dispatch_heartbeat_if_need_ fail", KR(ret)); + } else { + ret = dispatch_(stop_flag, pending_trans_count); + } + return ret; +} + +int PartFetchCtx::dispatch_(volatile bool &stop_flag, int64_t &pending_trans_count) +{ + int ret = OB_SUCCESS; + // get current state + int cur_state = state_; + + // If in a waiting state, no task is issued + if (is_in_wait_state(cur_state)) { + LOG_DEBUG("part is in wait state, can not dispatch trans task", K_(pkey), + "cur_state", print_state(cur_state)); + } else if (OB_ISNULL(part_trans_resolver_)) { + LOG_ERROR("invalid part trans resolver", K(part_trans_resolver_)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(part_trans_resolver_->dispatch(stop_flag, pending_trans_count))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("resolver dispatch fail", KR(ret)); + } + } else { + LOG_DEBUG("dispatch trans task success", K_(pkey), K(pending_trans_count), + "state", print_state(state_)); + } + return ret; +} + +int PartFetchCtx::read_log(const clog::ObLogEntry &log_entry, + IObLogPartTransResolver::ObLogMissingInfo &missing, + TransStatInfo &tsi, + const bool need_filter_pg_no_missing_redo_trans, + const IObLogPartTransResolver::ObAggreLogIndexArray &log_indexs, + volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + const clog::ObLogEntryHeader &header = log_entry.get_header(); + const ObPartitionKey &pkey = header.get_partition_key(); + ObStorageLogType log_type = storage::OB_LOG_UNKNOWN; + + if (OB_ISNULL(part_trans_resolver_)) { + LOG_ERROR("invalid part trans resolver", K(part_trans_resolver_)); + ret = OB_INVALID_ERROR; + } + // Verify log validity + else if (OB_UNLIKELY(pkey != pkey_)) { + LOG_ERROR("invalid log entry which pkey does not match", K(pkey_), K(log_entry)); + ret = OB_INVALID_ARGUMENT; + } + // Verifying log continuity + else if (OB_UNLIKELY(progress_.get_next_log_id() != header.get_log_id())) { + LOG_ERROR("log not sync", "next_log_id", progress_.get_next_log_id(), + "cur_log_id", header.get_log_id(), K(log_entry)); + ret = OB_LOG_NOT_SYNC; + } + // Parsing logs and returning log types + else if (OB_FAIL(part_trans_resolver_->read(log_entry, missing, tsi, serve_info_, log_type, + need_filter_pg_no_missing_redo_trans, log_indexs))) { + if (OB_ITEM_NOT_SETTED == ret) { + // Missing redo log + } else { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("resolve log entry fail", KR(ret), K(log_entry), K(serve_info_), K(log_type), + K(need_filter_pg_no_missing_redo_trans), K(log_indexs)); + } + } + } + // Handling split source partition logs + else if (OB_LOG_SPLIT_SOURCE_PARTITION == log_type + && OB_FAIL(handle_split_src_log_(log_entry, stop_flag))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("handle_split_src_log_ fail", KR(ret), K(log_type), K(log_entry)); + } + } + // Processing split target partition logs + else if (OB_LOG_SPLIT_DEST_PARTITION == log_type && OB_FAIL(handle_split_dst_log_(log_entry))) { + LOG_ERROR("handle_split_dst_log_ fail", KR(ret), K(log_type), K(log_entry)); + } + // Processing OFFLINE logs + else if ((storage::ObStorageLogTypeChecker::is_offline_partition_log(log_type)) + && OB_FAIL(handle_offline_partition_log_(log_entry, stop_flag))) { + LOG_ERROR("handle_offline_partition_log_ fail", KR(ret), K(log_type), K(log_entry)); + } else { + uint64_t next_log_id = header.get_log_id() + 1; + int64_t log_tstamp = header.get_submit_timestamp(); + + // Advancing zoning progress + if (OB_FAIL(progress_.update_log_progress(next_log_id, log_tstamp))) { + LOG_ERROR("update log progress fail", KR(ret), K(next_log_id), K(log_tstamp), K(progress_)); + } + + LOG_DEBUG("read log and update progress success", K_(pkey), K(log_entry), K_(progress)); + } + return ret; +} + +int PartFetchCtx::handle_offline_partition_log_(const clog::ObLogEntry &log_entry, + volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + const uint64_t log_id = log_entry.get_header().get_log_id(); + const int64_t tstamp = log_entry.get_header().get_submit_timestamp(); + + ISTAT("[HANDLE_OFFLINE_LOG] begin", K_(pkey), "state", print_state(state_), K(log_id), K(tstamp)); + + // For OFFLINE logs, only tasks in NORMAL state are processed + // Tasks in other states will be deleted by other scenarios responsible for the partition + // + // Ensure that the discard recycling mechanism. + // + // 1. STATE_NORMAL: discard will be set when OFFLINE logging or partition deletion DDL is encountered + // 2. STATE_WAIT: Deleting a partition DDL sets the discard + // 3. STATE_SPLIT_IN_PROCESS: discard is set when a split is completed, and discard is also set when a partition DDL is deleted + // 4. STATE_WAIT_AND_SPLIT_IN_PROCESS: Discard is set when splitting is complete and discard is set when partition DDL is deleted + // 5. STATE_SPLIT_DONE: Discard will be set when the split is complete, and discard will be set when the partition DDL is deleted + // + // Note: Mechanically, we have to take precautions in many ways and cannot rely on one mechanism to guarantee partition recovery. + // There are two scenarios in which partitions need to be reclaimed. + // 1. partition deletion by DDL: this includes deleting tables, deleting partitions, deleting DBs, deleting tenants, etc. This scenario relies on DDL deletion to be sufficient + // The observer ensures that the partition is not iterated over in the schema after the DDL is deleted + // + // 2. Partition completion: this scenario must wait for all logs to be collected before recycling + // One is to receive the split completion log; the other is to receive the OFFLINE log. + // The reason for the scenario that relies on the OFFLINE log is that the start timestamp may be located after the split completes and before OFFLINE, + // so that the split completion log is not received. Therefore, the OFFLINE log has to be handled, and when the state is always STATE_NORMAL, discard is set if the OFFLINE log is encountered. + // + // It is worth noting that partitions can be deleted by DDL when they are in the middle of a split. The scenario is that when a DB is deleted or a tenant is deleted, + // all partitions under the DB and tenant will be traversed, in which case the split source partition will also be deleted by DDL. + // fetcher DEAD POOL has to handle this concurrency scenario. + if (STATE_NORMAL == state_) { + int64_t pending_trans_count = 0; + // First ensure that all tasks in the queue are dispatched + if (OB_FAIL(dispatch_(stop_flag, pending_trans_count))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("dispatch task fail", KR(ret), K(pkey_)); + } + } + // Check if there are pending transactions to be output + else if (OB_UNLIKELY(pending_trans_count > 0)) { + LOG_ERROR("there are still pending trans after dispatch when processing offline log, unexcept error", + K(pending_trans_count), K(pkey_), K(state_)); + ret = OB_INVALID_DATA; + } else { + // Finally mark the partition as ready for deletion + // Note: there is a concurrency situation here, after a successful setup, it may be dropped into the DEAD POOL for recycling by other threads immediately + // Since all data is already output here, it doesn't matter if it goes to the DEAD POOL + set_discarded(); + } + } + + ISTAT("[HANDLE_OFFLINE_LOG] end", KR(ret), K_(pkey), "state", print_state(state_), + K(log_id), K(tstamp)); + + return ret; +} + +// Processing of source partition split end logs +int PartFetchCtx::handle_split_src_log_(const clog::ObLogEntry &log_entry, volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + int64_t pos = 0; + const char *buf = log_entry.get_buf(); + const int64_t len = log_entry.get_header().get_data_len(); + ObStorageLogType log_type = storage::OB_LOG_UNKNOWN; + ObPartitionSplitSourceLog split_src_log; + bool split_in_process = false; + + // Parsing log headers: log types + if (OB_FAIL(decode_storage_log_type(log_entry, pos, log_type))) { + LOG_ERROR("decode_storage_log_type fail", KR(ret), K(log_entry), K(pos)); + } else if (OB_UNLIKELY(OB_LOG_SPLIT_SOURCE_PARTITION != log_type)) { + LOG_ERROR("invalid log type which is not OB_LOG_SPLIT_SOURCE_PARTITION", K(log_type), + K(log_entry)); + ret = OB_ERR_UNEXPECTED; + } + // Deserialising source partition split logs + else if (OB_FAIL(split_src_log.deserialize(buf, len, pos))) { + LOG_ERROR("deserialize split source partition log fail", KR(ret), K(buf), K(len), K(pos), + K(log_entry), K(log_type)); + } + // Verify that the source partition is self + else if (OB_UNLIKELY(split_src_log.get_spp().get_source_pkey() != pkey_)) { + LOG_ERROR("unexcepted error, split source partition is not self", K_(pkey), K(split_src_log)); + ret = OB_ERR_UNEXPECTED; + } + // Populate an array of split target partitions + // Target partition must be populated first, subsequent state transitions will depend on this array and there are concurrency scenarios + else if (OB_FAIL(split_dest_array_.assign(split_src_log.get_spp().get_dest_array()))) { + LOG_ERROR("assign split dest array fail", KR(ret), K(split_src_log), K(split_dest_array_)); + } + // Prepare to split, switch state to STATE_SPLIT_IN_PROCESS or STATE_WAIT_AND_SPLIT_IN_PROCESS + else if (OB_FAIL(prepare_split_(split_src_log, split_in_process))) { + LOG_ERROR("prepare split fail", KR(ret), K(split_src_log)); + } else if (! split_in_process) { + // The splitting operation does not continue and is taken over by other threads + // Thereafter, the object data structure cannot be referenced again and a concurrency scenario exists + } + // Handle the split task + // There will be no multi-threaded operation here, only one thread will handle the splitting + else if (OB_FAIL(process_split_(stop_flag))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("process_split_ fail", KR(ret), K(split_src_log)); + } + } else { + // success + } + + return ret; +} + +int PartFetchCtx::prepare_split_(const ObPartitionSplitSourceLog &split_src_log, + bool &split_in_process) +{ + int ret = OB_SUCCESS; + bool done = false; + FetchState cur_state = state_; + FetchState new_state = state_; + // The reason for referencing the partition in the log here is to prevent the current object's memory from being reclaimed in a concurrency scenario + const ObPartitionKey &pkey = split_src_log.get_spp().get_source_pkey(); + + split_in_process = false; + + // Check status + if (OB_UNLIKELY(STATE_NORMAL != cur_state && STATE_WAIT != cur_state)) { + LOG_ERROR("state not match which can not prepare split", K(cur_state), K(pkey_)); + ret = OB_STATE_NOT_MATCH; + } else { + ISTAT("[SPLIT] [SPLIT_SRC] prepare split begin", K(pkey), "state", print_state(cur_state)); + + while (! done) { + switch (cur_state) { + case STATE_NORMAL: + // STATE_NORMAL -> STATE_SPLIT_IN_PROCESS + new_state = STATE_SPLIT_IN_PROCESS; + cur_state = ATOMIC_CAS(&state_, STATE_NORMAL, new_state); + done = (STATE_NORMAL == cur_state); + // Successful conversions toSTATE_SPLIT_IN_PROCESS,Only then can we continue to process the split + if (done) { + split_in_process = true; + } + break; + case STATE_WAIT: + // STATE_WAIT -> STATE_WAIT_AND_SPLIT_IN_PROCESS + // Note that after the conversion to this state, no further reference can be made to the object's data content, and there may be concurrency scenarios where + // Other threads will continue to advance the state and then the data structure will be reset or destructured + new_state = STATE_WAIT_AND_SPLIT_IN_PROCESS; + cur_state = ATOMIC_CAS(&state_, STATE_WAIT, new_state); + done = (STATE_WAIT == cur_state); + break; + // Other states considered successful + default: + done = true; + break; + } + } + + // The object data structure is no longer referenced here + ISTAT("[SPLIT] [SPLIT_SRC] prepare split done", K(pkey), + "old_state", print_state(cur_state), + "new_state", print_state(new_state)); + } + return ret; +} + +int PartFetchCtx::process_split_(volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + int64_t pending_trans_count = 0; + ISTAT("[SPLIT] [SPLIT_SRC] process begin", K_(pkey), "state", print_state(state_), + K_(discarded)); + // The required status is STATE_SPLIT_IN_PROCESS + if (OB_UNLIKELY(STATE_SPLIT_IN_PROCESS != state_)) { + LOG_ERROR("state not match which is not STATE_SPLIT_IN_PROCESS", K(state_), K(pkey_)); + ret = OB_STATE_NOT_MATCH; + } + // Firstly, the backlog of service tasks is issued + else if (OB_FAIL(dispatch_(stop_flag, pending_trans_count))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("dispatch trans task fail", KR(ret)); + } + } + // Check if there are pending transactions to be output + else if (OB_UNLIKELY(pending_trans_count > 0)) { + LOG_ERROR("there are still pending trans after dispatch when processing split, unexcept error", + K(pending_trans_count), K(pkey_), K(state_)); + ret = OB_INVALID_DATA; + } + // Then mark this partition splitting process complete + else if (OB_FAIL(mark_split_done_())) { + LOG_ERROR("mark split done fail", KR(ret), K(pkey_)); + } + // At this point, all the transactions for this partition have been output, and the following processes the split target partition + else if (OB_FAIL(process_split_dest_part_(stop_flag))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("process_split_dest_part_ fail", KR(ret), K(pkey_)); + } + } else { + // Success, mark this partition as ready for deletion + // Note: there is a concurrency situation here, after a successful setup, it may be immediately dropped into the DEAD POOL by other threads for recycling + // Since all data is already output here, it does not matter if it goes to the DEAD POOL + set_discarded(); + + ISTAT("[SPLIT] [SPLIT_SRC] process done", K_(pkey), "state", print_state(state_), + K_(discarded)); + } + return ret; +} + +int PartFetchCtx::mark_split_done_() +{ + int ret = OB_SUCCESS; + int old_state = ATOMIC_CAS(&state_, STATE_SPLIT_IN_PROCESS, STATE_SPLIT_DONE); + if (OB_UNLIKELY(STATE_SPLIT_IN_PROCESS != old_state)) { + LOG_ERROR("state not match which is not STATE_SPLIT_IN_PROCESS", K(old_state), K(state_), + K(pkey_)); + ret = OB_STATE_NOT_MATCH; + } else { + // success + ISTAT("[SPLIT] mark split done", K_(pkey), "state", print_state(state_)); + } + return ret; +} + +int PartFetchCtx::process_split_dest_part_(volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(part_fetch_mgr_)) { + LOG_ERROR("invalid part fetch mgr", K(part_fetch_mgr_)); + ret = OB_NOT_INIT; + } + // Verify that the target split partition array is valid + else if (OB_UNLIKELY(split_dest_array_.count() <= 0)) { + LOG_ERROR("split dest array is invalid", K(split_dest_array_)); + ret = OB_ERR_UNEXPECTED; + } else { + int64_t dst_part_count = split_dest_array_.count(); + ISTAT("[SPLIT] [SPLIT_SRC] process dest partitions", "count", dst_part_count, + "src_part", pkey_); + + for (int64_t index = 0; OB_SUCCESS == ret && index < dst_part_count; index++) { + const ObPartitionKey &pkey = split_dest_array_.at(index); + // Notify target partition that source partition split processing is complete + if (OB_FAIL(part_fetch_mgr_->activate_split_dest_part(pkey, stop_flag))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("notify_src_part_split_done fail", KR(ret), K(pkey)); + } + } + } + } + return ret; +} + +int PartFetchCtx::handle_when_src_split_done(volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + // Is the old state marked WAIT + // i.e. :STATE_WAIT or STATE_WAIT_AND_SPLIT_IN_PROCESS + bool old_state_mark_wait = false; + // Has the old state started to split + bool old_state_in_split = false; + int64_t pending_trans_count = 0; + + ISTAT("[SPLIT] [ACTIVATE_DEST] handle begin", K_(pkey), "state", print_state(state_), + K_(discarded)); + + // Clear the WAIT wait state out + if (OB_FAIL(clear_wait_state_(old_state_mark_wait, old_state_in_split))) { + LOG_ERROR("clear_wait_state_ fail", KR(ret), K(pkey_)); + } + // If previously entered into a waiting state, the stacked task is sent down + else if (old_state_mark_wait && OB_FAIL(dispatch_(stop_flag, pending_trans_count))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("dispatch fail", KR(ret), K(pkey_)); + } + } + // If a split has been entered previously, the split is processed + else if (old_state_in_split && OB_FAIL(process_split_(stop_flag))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("process_split_ fail", KR(ret), K(pkey_)); + } + } + + ISTAT("[SPLIT] [ACTIVATE_DEST] handle done", KR(ret), K_(pkey), + "state", print_state(state_), K(old_state_mark_wait), K(old_state_in_split), + K(pending_trans_count), K_(discarded)); + return ret; +} + +// STATE_WAIT -> STATE_NORMAL +// STATE_WAIT_AND_SPLIT_IN_PROCESS -> STATE_SPLIT_IN_PROCESS +int PartFetchCtx::clear_wait_state_(bool &old_state_mark_wait, bool &old_state_in_split) +{ + int ret = OB_SUCCESS; + bool done = false; + FetchState cur_state = state_; + FetchState new_state = state_; + + old_state_mark_wait = false; + old_state_in_split = false; + + while (! done) { + switch (cur_state) { + case STATE_WAIT: + // STATE_WAIT -> STATE_NORMAL + // Clear wait status if currently waiting + new_state = STATE_NORMAL; + cur_state = ATOMIC_CAS(&state_, STATE_WAIT, new_state); + done = (STATE_WAIT == cur_state); + if (done) { + old_state_mark_wait = true; + old_state_in_split = false; // The old state did not enter the split state + } + break; + + // STATE_WAIT_AND_SPLIT_IN_PROCESS -> STATE_SPLIT_IN_PROCESS + // If the partition is in the wait state and has entered the split state, clear the wait state only and then enter the split state + case STATE_WAIT_AND_SPLIT_IN_PROCESS: + new_state = STATE_SPLIT_IN_PROCESS; + cur_state = ATOMIC_CAS(&state_, STATE_WAIT_AND_SPLIT_IN_PROCESS, new_state); + done = (STATE_WAIT_AND_SPLIT_IN_PROCESS == cur_state); + if (done) { + old_state_mark_wait = true; + old_state_in_split = true; // The old state has gone into schism + } + break; + + // Other states are not in the wait state and do not need to be processed + // Since the source partition has already been split, the target partition will not enter the wait state subsequently + default: + done = true; + old_state_in_split = false; + old_state_mark_wait = false; + break; + } + } + + ISTAT("[SPLIT] clear wait state", K_(pkey), "old_state", print_state(cur_state), + "new_state", print_state(new_state), K(old_state_mark_wait), K(old_state_in_split)); + + return ret; +} + +// Process the target partition split log +// This partition is the target partition of the split, depending on the status of the source partition it is decided whether to wait or not +int PartFetchCtx::handle_split_dst_log_(const clog::ObLogEntry &log_entry) +{ + int ret = OB_SUCCESS; + int64_t pos = 0; + const char *buf = log_entry.get_buf(); + const int64_t len = log_entry.get_header().get_data_len(); + ObStorageLogType log_type = storage::OB_LOG_UNKNOWN; + ObPartitionSplitDestLog split_dst_log; + + // Parsing log headers: log types + if (OB_FAIL(decode_storage_log_type(log_entry, pos, log_type))) { + LOG_ERROR("decode_storage_log_type fail", KR(ret), K(log_entry), K(pos)); + } else if (OB_UNLIKELY(OB_LOG_SPLIT_DEST_PARTITION != log_type)) { + LOG_ERROR("invalid log type which is not OB_LOG_SPLIT_DEST_PARTITION", K(log_type), + K(log_entry)); + ret = OB_ERR_UNEXPECTED; + } + // Deserialising source partition split logs + else if (OB_FAIL(split_dst_log.deserialize(buf, len, pos))) { + LOG_ERROR("deserialize split dest partition log fail", KR(ret), K(buf), K(len), K(pos), + K(log_entry), K(log_type)); + } else { + const ObSplitPartitionPair &spp = split_dst_log.get_spp(); + bool src_part_split_done = false; + + ISTAT("[SPLIT] [SPLIT_DEST] process begin", K_(pkey), "state", print_state(state_), + "src_pkey", spp.get_source_pkey(), "dst_pkey_count", spp.get_dest_array().count(), + K_(discarded)); + + // First into a waiting state + if (OB_FAIL(mark_state_wait_from_normal_())) { + LOG_ERROR("mark_state_wait_from_normal_ fail", KR(ret)); + } + // Check if the source partition is split + else if (OB_FAIL(check_src_part_split_state_(split_dst_log, src_part_split_done))) { + LOG_ERROR("check_src_part_split_state_ fail", KR(ret), K(split_dst_log)); + } else if (! src_part_split_done) { + // If the source partition does not finish splitting, it is no longer processed + // Hand over to the source partition processing thread to continue advancing state + } + // If the source partition split is complete, mark the status NORMAL + else if (OB_FAIL(mark_state_normal_from_wait_())) { + LOG_ERROR("mark_state_normal_from_wait_ fail", KR(ret), K(pkey_)); + } else { + // success + } + + ISTAT("[SPLIT] [SPLIT_DEST] process done", K_(pkey), "state", print_state(state_), + K(src_part_split_done), K_(discarded), + "src_pkey", spp.get_source_pkey(), "dst_pkey_count", spp.get_dest_array().count()); + } + + return ret; +} + +// convert state:STATE_NORMAL -> STATE_WAIT +int PartFetchCtx::mark_state_wait_from_normal_() +{ + int ret = OB_SUCCESS; + FetchState new_state = STATE_WAIT; + FetchState old_state = ATOMIC_CAS(&state_, STATE_NORMAL, new_state); + + // Requires current state to be STATE_NORMAL + // Note: With future support for splitting no-kill transactions, the state here may not be equal to NORMAL and may enter WATI state earlier + if (OB_UNLIKELY(STATE_NORMAL != old_state)) { + LOG_ERROR("state not match which is not STATE_NORMAL", K(old_state), K(state_)); + ret = OB_STATE_NOT_MATCH; + } else { + ISTAT("[SPLIT] mark state wait", K_(pkey), "old_state", print_state(old_state), + "new_state", print_state(new_state)); + } + return ret; +} + +int PartFetchCtx::check_src_part_split_state_(const ObPartitionSplitDestLog &split_dst_log, + bool &src_part_split_done) +{ + int ret = OB_SUCCESS; + src_part_split_done = false; + const ObPartitionKey &src_pkey = split_dst_log.get_spp().get_source_pkey(); + uint64_t src_log_id = split_dst_log.get_source_log_id(); + int64_t src_log_ts = split_dst_log.get_source_log_ts(); + + if (OB_ISNULL(part_fetch_mgr_)) { + LOG_ERROR("invalid part fetch mgr", K(part_fetch_mgr_)); + ret = OB_NOT_INIT; + } else if (OB_FAIL(part_fetch_mgr_->check_part_split_state(src_pkey, src_log_id, src_log_ts, + src_part_split_done))) { + LOG_ERROR("check_part_split_state fail", KR(ret), K(src_pkey), K(pkey_), K(split_dst_log)); + } else { + // success + } + + return ret; +} + +// State transition: STATE_WAIT -> STATE_NORMAL +// Note that. +// 1. this function needs to take concurrency into account, the source partition will also come over to concurrent settings +// 2. The wait state can only be STATE_WAIT, not STATE_WAIT_AND_SPLIT_IN_PROCESS, because this function is called +// when the split target partition log is received, and it is not possible to move on to the next round of splits +int PartFetchCtx::mark_state_normal_from_wait_() +{ + int ret = OB_SUCCESS; + FetchState new_state = STATE_NORMAL; + FetchState old_state = ATOMIC_CAS(&state_, STATE_WAIT, new_state); + + // Requires the current state to be STATE_WAIT or STATE_NORMAL + // If it is STATE_NORMAL, the source partition is concurrently set + if (OB_UNLIKELY(STATE_WAIT != old_state && STATE_NORMAL != old_state)) { + LOG_ERROR("state not match which is not STATE_WAIT or STATE_NORMAL", K(old_state), + K(state_), K(pkey_)); + ret = OB_STATE_NOT_MATCH; + } else { + ISTAT("[SPLIT] mark state normal", K_(pkey), "old_state", print_state(old_state), + "new_state", print_state(new_state)); + } + return ret; +} + +int PartFetchCtx::read_missing_redo(const clog::ObLogEntry &log_entry, + const IObLogPartTransResolver::ObTransIDArray &missing_log_trans_id_array) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(part_trans_resolver_)) { + LOG_ERROR("invalid part trans resolver", K(part_trans_resolver_)); + ret = OB_INVALID_ERROR; + } + // Verify log validity + else if (OB_UNLIKELY(log_entry.get_header().get_partition_key() != pkey_)) { + LOG_ERROR("invalid log entry which pkey does not match", K(pkey_), K(log_entry)); + ret = OB_INVALID_ARGUMENT; + } + // Parsing missing logs + else if (OB_FAIL(part_trans_resolver_->read_missing_redo(log_entry, missing_log_trans_id_array))) { + LOG_ERROR("resolver read missing redo fail", KR(ret), K(log_entry), K(missing_log_trans_id_array)); + } else { + // success + } + + return ret; +} + +// Note: this function is called concurrently with handle_when_src_split_done() +// part_trans_resolver ensures that the underlying data operations are mutually exclusive +// +// The scenario is that the split target partition is deleted early by the DDL while the split source partition is responsible for advancing the state of the split target partition +// +// This function is called by the DEAD POOL and will clean up all unpublished tasks and then downlink them +// handle_when_src_split_done() is called by the split source partition and requires it not to produce new tasks, otherwise the downline task would not be the last one. +// +// The implementation guarantees that tasks will only be produced when the log is read. +int PartFetchCtx::offline(volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + ISTAT("[OFFLINE_PART] begin", K_(pkey), "state", print_state(state_), K_(discarded)); + if (OB_ISNULL(part_trans_resolver_)) { + LOG_ERROR("invalid part trans resolver", K(part_trans_resolver_)); + ret = OB_INVALID_ERROR; + } + // 发出“分区结束”任务 + else if (OB_FAIL(part_trans_resolver_->offline(stop_flag))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("delete partition by part trans resolver fail", KR(ret)); + } + } else { + // 成功 + ISTAT("[OFFLINE_PART] end", K_(pkey), "state", print_state(state_), K_(discarded)); + } + return ret; +} + +bool PartFetchCtx::need_update_svr_list() const +{ + bool bool_ret = false; + int64_t cur_time = get_timestamp(); + int64_t update_delta_time = cur_time - svr_list_last_update_tstamp_; + int64_t avail_svr_count = get_cur_svr_list().count(); + int64_t svr_list_update_interval = ATOMIC_LOAD(&g_svr_list_update_interval); + + // If it has never been updated, it must be updated + if (OB_INVALID_TIMESTAMP == svr_list_last_update_tstamp_) { + bool_ret = true; + } + // If no server is available, or if a proactive update is requested, an update is required + else if (avail_svr_count <= 0 || svr_list_need_update_) { + bool_ret = true; + } + // Periodic updates are required if not updated for a period of time + else if (update_delta_time >= svr_list_update_interval) { + bool_ret = true; + } + + LOG_DEBUG("need_update_svr_list", K(bool_ret), K(pkey_), K(svr_list_need_update_), + K(update_delta_time), K(svr_list_update_interval), K(avail_svr_count), + K(has_leader_), K(leader_)); + return bool_ret; +} + +bool PartFetchCtx::need_update_leader_info() const +{ + bool bool_ret = false; + int64_t cur_time = get_timestamp(); + int64_t update_delta_time = cur_time - leader_last_update_tstamp_; + int64_t leader_info_update_interval = ATOMIC_LOAD(&g_leader_info_update_interval); + + // No mains need to be updated + if (! has_leader_) { + bool_ret = true; + } + // Periodic update of leader information + else if (OB_INVALID_TIMESTAMP == leader_last_update_tstamp_ + || update_delta_time >= leader_info_update_interval) { + bool_ret = true; + } + + LOG_DEBUG("need_update_leader_info", K(bool_ret), K(pkey_), K(update_delta_time), + K(leader_info_update_interval), K(has_leader_), K(leader_)); + + return bool_ret; +} + +bool PartFetchCtx::need_locate_start_log_id() const +{ + return OB_INVALID_ID == progress_.get_next_log_id(); +} + +int PartFetchCtx::update_svr_list(IObLogSvrFinder &svr_finder, const bool need_print_info) +{ + int ret = OB_SUCCESS; + int64_t cur_time = get_timestamp(); + int64_t update_delta_time = cur_time - svr_list_last_update_tstamp_; + int64_t svr_list_update_min_interval = DEFAULT_SVR_LIST_UPDATE_MIN_INTERVAL; + int64_t start_tstamp = serve_info_.start_serve_timestamp_; + + if (OB_INVALID_TIMESTAMP != svr_list_last_update_tstamp_ + && svr_find_req_.is_state_idle() + && update_delta_time < svr_list_update_min_interval) { + // Check that the minimum update interval is met + LOG_DEBUG("svr list is updated too often", K_(pkey), K(update_delta_time), + K(svr_list_update_min_interval), "svr_list", get_cur_svr_list()); + } else { + int state = svr_find_req_.get_state(); + + // If in IDLE state, initiate an asynchronous request to update the server list + if (SvrFindReq::IDLE == state) { + // Prepare alternate objects for requesting server lists + PartSvrList &req_svr_list = get_bak_svr_list(); + req_svr_list.reset(); + + // Preparing the request structure + uint64_t next_log_id = progress_.get_next_log_id(); + if (OB_INVALID_ID == next_log_id) { + svr_find_req_.reset_for_req_by_tstamp(req_svr_list, pkey_, start_tstamp); + } else { + svr_find_req_.reset_for_req_by_log_id(req_svr_list, pkey_, next_log_id); + } + + if (OB_FAIL(svr_finder.async_svr_find_req(&svr_find_req_))) { + LOG_ERROR("launch async svr find request fail", KR(ret), K(svr_find_req_)); + } + + LOG_DEBUG("begin to update server list", KR(ret), K_(pkey), K(next_log_id)); + } else if (SvrFindReq::REQ == state) { + // On request + } else if (SvrFindReq::DONE == state) { + svr_list_last_update_tstamp_ = get_timestamp(); + + // Set the Trace ID to be used during execution + ObLogTraceIdGuard guard(svr_find_req_.trace_id_); + + if (OB_SUCCESS != svr_find_req_.get_err_code()) { + // Failed to print WARN + LOG_WARN("update server list", "err", svr_find_req_.get_err_code(), + "mysql_err", svr_find_req_.get_mysql_err_code(), + K_(pkey), "next_log_id", progress_.get_next_log_id(), K_(serve_info), + "svr_list", get_cur_svr_list()); + } else { + // Update server list successfully, switch server list for atomic: switch from standby to official + switch_svr_list(); + + // Withdraw the server list update flag + mark_svr_list_update_flag(false); + + if (need_print_info) { + LOG_INFO("update server list", "err", svr_find_req_.get_err_code(), + "mysql_err", svr_find_req_.get_mysql_err_code(), + K_(pkey), "next_log_id", progress_.get_next_log_id(), K_(serve_info), + "svr_list", get_cur_svr_list()); + } else { + LOG_DEBUG("update server list", "err", svr_find_req_.get_err_code(), + "mysql_err", svr_find_req_.get_mysql_err_code(), + K_(pkey), "next_log_id", progress_.get_next_log_id(), K_(serve_info), + "svr_list", get_cur_svr_list()); + } + } + + // Finally reset the request structure anyway + svr_find_req_.reset(); + } else { + LOG_ERROR("invalid svr finder request", K(state), K(svr_find_req_)); + ret = OB_ERR_UNEXPECTED; + } + } + return ret; +} + +int PartFetchCtx::update_leader_info(IObLogSvrFinder &svr_finder) +{ + int ret = OB_SUCCESS; + int64_t cur_time = get_timestamp(); + int64_t update_delta_time = cur_time - leader_last_update_tstamp_; + int64_t leader_info_update_min_interval = DEFAULT_LEADER_INFO_UPDATE_MIN_INTERVAL; + + if (OB_INVALID_TIMESTAMP != leader_last_update_tstamp_ + && leader_find_req_.is_state_idle() + && update_delta_time < leader_info_update_min_interval) { + // Check minimum update interval + LOG_DEBUG("leader info is updated too often", K_(pkey), K(update_delta_time), + K(leader_info_update_min_interval), K_(has_leader), K_(leader)); + } else { + int state = leader_find_req_.get_state(); + + if (LeaderFindReq::IDLE == state) { + leader_find_req_.reset(pkey_); + + if (OB_FAIL(svr_finder.async_leader_find_req(&leader_find_req_))) { + LOG_ERROR("launch leader find request fail", KR(ret), K(leader_find_req_)); + } + + LOG_DEBUG("begin to update leader info", KR(ret), K_(pkey)); + } else if (LeaderFindReq::REQ == state) { + // On request + } else if (LeaderFindReq::DONE == state) { + leader_last_update_tstamp_ = get_timestamp(); + bool leader_changed = false; + + // Set the Trace ID to be used during execution + ObLogTraceIdGuard guard(leader_find_req_.trace_id_); + + // Only process the result of a request if it is successful + if (OB_SUCCESS == leader_find_req_.get_err_code()) { + if (leader_find_req_.has_leader_ != has_leader_ + || leader_ != leader_find_req_.leader_) { + leader_changed = true; + } + + has_leader_ = leader_find_req_.has_leader_; + leader_ = leader_find_req_.leader_; + } + + // No leader or failed to request a leader, print WARN + if (OB_SUCCESS != leader_find_req_.get_err_code() || ! has_leader_) { + LOG_WARN("update leader info", "err", leader_find_req_.get_err_code(), + "mysql_err", leader_find_req_.get_mysql_err_code(), + K_(pkey), "has_leader", leader_find_req_.has_leader_, + "leader", leader_find_req_.leader_); + } + // Print INFO when leader information changes + else if (leader_changed) { + LOG_INFO("update leader info", "err", leader_find_req_.get_err_code(), + "mysql_err", leader_find_req_.get_mysql_err_code(), + K_(pkey), "has_leader", leader_find_req_.has_leader_, + "leader", leader_find_req_.leader_); + } else { + // In other cases, print DEBUG + LOG_DEBUG("update leader info", "err", leader_find_req_.get_err_code(), + "mysql_err", leader_find_req_.get_mysql_err_code(), + K_(pkey), "has_leader", leader_find_req_.has_leader_, + "leader", leader_find_req_.leader_); + } + + // Finally reset the request anyway + leader_find_req_.reset(); + } else { + LOG_ERROR("invalid leader find request", K(state), K(leader_find_req_)); + ret = OB_ERR_UNEXPECTED; + } + } + return ret; +} + +bool PartFetchCtx::need_heartbeat(const int64_t upper_limit) +{ + return progress_.need_heartbeat(upper_limit, ATOMIC_LOAD(&g_heartbeat_interval)); +} + +int PartFetchCtx::locate_start_log_id(IObLogStartLogIdLocator &start_log_id_locator) +{ + int ret = OB_SUCCESS; + int state = start_log_id_locate_req_.get_state(); + int64_t start_tstamp = serve_info_.start_serve_timestamp_; + + if (StartLogIdLocateReq::IDLE == state) { + start_log_id_locate_req_.reset(pkey_, start_tstamp); + + PartSvrList &cur_svr_list = get_cur_svr_list(); + + if (cur_svr_list.count() <= 0) { + LOG_INFO("server list is empty for locating start log id, mark for updating server list"); + mark_svr_list_update_flag(true); + } else if (OB_FAIL(init_locate_req_svr_list_(start_log_id_locate_req_, cur_svr_list))) { + LOG_ERROR("init_locate_req_svr_list_ fail", KR(ret), K(cur_svr_list)); + } else if (OB_FAIL(start_log_id_locator.async_start_log_id_req(&start_log_id_locate_req_))) { + LOG_ERROR("launch async start log id request fail", KR(ret), K(start_log_id_locate_req_)); + } else { + LOG_INFO("start log id locate request launched", K_(pkey), + "start_tstamp", TS_TO_STR(start_tstamp), + "svr_cnt", start_log_id_locate_req_.svr_list_.count(), + "svr_list", start_log_id_locate_req_.svr_list_); + } + } else if (StartLogIdLocateReq::REQ == state) { + // On request + } else if (StartLogIdLocateReq::DONE == state) { + uint64_t start_log_id = OB_INVALID_ID; + ObAddr locate_svr; + + if (! start_log_id_locate_req_.get_result(start_log_id, locate_svr)) { + LOG_ERROR("start log id locate fail", K_(start_log_id_locate_req)); + } else { + progress_.set_next_log_id(start_log_id); + LOG_INFO("start log id located", K_(pkey), K(start_log_id), + "start_tstamp", TS_TO_STR(start_tstamp), K(locate_svr), + "svr_cnt", start_log_id_locate_req_.svr_list_.count(), + "svr_list", start_log_id_locate_req_.svr_list_); + } + + // Reset the location request, whether successful or not + start_log_id_locate_req_.reset(); + } else { + LOG_ERROR("unknown start log id locator request state", K(state), + K(start_log_id_locate_req_)); + ret = OB_ERR_UNEXPECTED; + } + return ret; +} + + +int PartFetchCtx::update_heartbeat_info(IObLogFetcherHeartbeatWorker &heartbeater, + IObLogSvrFinder &svr_finder) +{ + int ret = OB_SUCCESS; + int64_t cur_time = get_timestamp(); + int64_t update_delta_time = cur_time - heartbeat_last_update_tstamp_; + int64_t heartbeat_min_interval = ATOMIC_LOAD(&g_heartbeat_interval); + + if (OB_INVALID_TIMESTAMP != heartbeat_last_update_tstamp_ + && heartbeat_req_.is_state_idle() + && update_delta_time < heartbeat_min_interval) { + // Check minimum update interval to avoid frequent updates + LOG_DEBUG("heartbeat is updated too often", K_(pkey), K(update_delta_time), + K(heartbeat_min_interval), K_(progress)); + } else { + int state = heartbeat_req_.get_state(); + + // Idle state, ready to initiate a heartbeat + if (HeartbeatRequest::IDLE == state) { + if (OB_FAIL(launch_heartbeat_request_(heartbeater, svr_finder))) { + LOG_ERROR("launch heartbeat request fail", KR(ret), K_(pkey)); + } + } else if (HeartbeatRequest::REQ == state) { + // Request in progress + } else if (HeartbeatRequest::DONE == state) { + // When the heartbeat request is complete, update the time of the last request + heartbeat_last_update_tstamp_ = cur_time; + + // Request completed, heartbeat processed + if (OB_FAIL(handle_heartbeat_request_(svr_finder))) { + LOG_ERROR("handle heartbeat request fail", KR(ret), K_(pkey)); + } + + // Reset the heartbeat request anyway + heartbeat_req_.reset(); + } + } + return ret; +} + +int PartFetchCtx::launch_heartbeat_request_(IObLogFetcherHeartbeatWorker &heartbeater, + IObLogSvrFinder &svr_finder) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! heartbeat_req_.is_state_idle())) { + LOG_ERROR("heartbeat request state is not IDLE", K(heartbeat_req_)); + ret = OB_INVALID_ERROR; + } else { + // If there is no leader, first update the leader information + if (! has_leader_) { + if (OB_FAIL(update_leader_info(svr_finder))) { + LOG_ERROR("update leader info fail", KR(ret), K(pkey_)); + } + } + + if (OB_SUCCESS == ret) { + if (! has_leader_) { + LOG_DEBUG("partition has no leader, can not request heartbeat", K_(pkey)); + + // If the leader query request has completed and there is still no leader, print the WARN message + if (leader_find_req_.is_state_idle()) { + LOG_WARN("partition has no leader, can not request heartbeat", K_(pkey), + "leader_last_update_time", TS_TO_STR(leader_last_update_tstamp_)); + } + } else { + // leader is valid and initiates a heartbeat request to the leader + heartbeat_req_.reset(pkey_, progress_.get_next_log_id(), leader_); + + LOG_DEBUG("launch heartbeat", K_(pkey), K_(leader), K_(progress)); + + if (OB_FAIL(heartbeater.async_heartbeat_req(&heartbeat_req_))) { + LOG_ERROR("launch async heartbeat request fail", KR(ret), K(heartbeat_req_)); + } + } + } + } + return ret; +} + +int PartFetchCtx::handle_heartbeat_request_(IObLogSvrFinder &svr_finder) +{ + int ret = OB_SUCCESS; + + // Use the Trace ID from the request process + ObLogTraceIdGuard guard(heartbeat_req_.trace_id_); + + // Requires a status of DONE + if (OB_UNLIKELY(! heartbeat_req_.is_state_done())) { + LOG_ERROR("heartbeat request state is not DONE", K(heartbeat_req_)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(part_trans_resolver_)) { + LOG_ERROR("invalid part trans resolver", K(part_trans_resolver_)); + ret = OB_INVALID_ARGUMENT; + } else { + bool need_update_leader_info = false; + const HeartbeatResponse &resp = heartbeat_req_.get_resp(); + + LOG_DEBUG("handle heartbeat", K_(pkey), K(resp), K_(leader)); + + if (OB_SUCCESS != resp.rpc_err_ || OB_SUCCESS != resp.svr_err_) { + LOG_ERROR("request heartbeat fail, rpc or server error", "rpc_err", resp.rpc_err_, + "svr_err", resp.svr_err_, "svr", heartbeat_req_.svr_, "pkey", heartbeat_req_.pkey_); + need_update_leader_info = true; + } else { + if (OB_SUCCESS != resp.partition_err_) { + // The requested server is not the master + LOG_INFO("heartbeat server is not master", K_(heartbeat_req)); + need_update_leader_info = true; + } + + // Only handle cases where the primary or backup returns successful results + if (OB_SUCCESS == resp.partition_err_ || OB_NOT_MASTER == resp.partition_err_) { + if (OB_INVALID_ID == resp.next_served_log_id_ || + OB_INVALID_TIMESTAMP == resp.next_served_tstamp_) { + LOG_ERROR("heartbeat result is invalid, this maybe server's BUG", K_(heartbeat_req)); + } else { + // Update Heartbeat Progress + progress_.update_heartbeat_progress(resp.next_served_log_id_, + resp.next_served_tstamp_); + } + } + } + + // If the leader information needs to be updated, the update task is launched immediately + if (OB_SUCCESS == ret && need_update_leader_info) { + reset_leader_info(); + + if (OB_FAIL(update_leader_info(svr_finder))) { + LOG_ERROR("update_leader_info fail", KR(ret), K_(pkey)); + } + } + } + return ret; +} + +int PartFetchCtx::update_log_heartbeat(const uint64_t next_log_id, + const int64_t log_progress) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(part_trans_resolver_)) { + LOG_ERROR("invalid part trans resolver", K(part_trans_resolver_)); + ret = OB_INVALID_ERROR; + } else { + int64_t old_progress = progress_.get_progress(); + + // Update Log Heartbeat Progress + if (OB_FAIL(progress_.update_log_heartbeat(next_log_id, log_progress))) { + LOG_ERROR("PartProgress update log heartbeat fail", KR(ret), K(next_log_id), K(log_progress), + K(progress_), K(pkey_)); + } else { + int64_t new_progress = progress_.get_progress(); + bool progress_updated = false; + + // Check if progress is up to date + if (OB_INVALID_TIMESTAMP != new_progress && + (OB_INVALID_TIMESTAMP == old_progress || new_progress > old_progress)) { + progress_updated = true; + } else { + progress_updated = false; + } + + LOG_DEBUG("update log heartbeat", K_(pkey), K(next_log_id), K(log_progress), + K(old_progress), K(new_progress), K(progress_updated), K_(progress)); + } + } + return ret; +} + +int PartFetchCtx::next_server(common::ObAddr &svr) +{ + int ret = OB_SUCCESS; + PartSvrList &svr_list = get_cur_svr_list(); + uint64_t next_log_id = progress_.get_next_log_id(); + IBlackList::BLSvrArray wash_svr_array; + wash_svr_array.reset(); + + // Note: The blacklist must be cleansed before the next available server can be retrieved from the server list. + if (OB_FAIL(blacklist_.do_white_washing(wash_svr_array))) { + LOG_ERROR("blacklist do while washing fail", KR(ret), K(pkey_), K(blacklist_)); + } else { + if (wash_svr_array.count() > 0) { + ISTAT("[BLACK_LIST] [WASH]", KR(ret), K_(pkey), + "wash_svr_cnt", wash_svr_array.count(), K(wash_svr_array)); + } + + if (OB_FAIL(svr_list.next_server(next_log_id, blacklist_, svr))) { + if (OB_ITER_END != ret) { + LOG_ERROR("get next server from svr_list fail", KR(ret), K(next_log_id), K(svr), K(svr_list)); + } + } else { + // Get server success + } + } + + if (OB_ITER_END == ret) { + // If the server is exhausted, ask to update the server list + mark_svr_list_update_flag(true); + } + + return ret; +} + +int PartFetchCtx::init_locate_req_svr_list_(StartLogIdLocateReq &req, PartSvrList &svr_list) +{ + int ret = OB_SUCCESS; + // Locate start log ids preferably from the server with the latest logs to avoid inaccurate history tables that can lead to too many locate fallbacks. + if (OB_FAIL(svr_list.get_server_array_for_locate_start_log_id(req.svr_list_))) { + LOG_ERROR("get_server_array_for_locate_start_log_id fail", KR(ret), K(svr_list), K(req)); + } + return ret; +} + +PartSvrList &PartFetchCtx::get_cur_svr_list() +{ + return svr_list_[(ATOMIC_LOAD(&cur_svr_list_idx_)) % 2]; +} + +const PartSvrList &PartFetchCtx::get_cur_svr_list() const +{ + return svr_list_[(ATOMIC_LOAD(&cur_svr_list_idx_)) % 2]; +} + +PartSvrList &PartFetchCtx::get_bak_svr_list() +{ + return svr_list_[(ATOMIC_LOAD(&cur_svr_list_idx_) + 1) % 2]; +} + +void PartFetchCtx::switch_svr_list() +{ + ATOMIC_INC(&cur_svr_list_idx_); +} + +void PartFetchCtx::mark_svr_list_update_flag(const bool need_update) +{ + ATOMIC_STORE(&svr_list_need_update_, need_update); +} + +uint64_t PartFetchCtx::hash() const +{ + // hash by "PKEY + next_log_id" + uint64_t next_log_id = progress_.get_next_log_id(); + return murmurhash(&next_log_id, sizeof(next_log_id), pkey_.hash()); +} + +// Timeout conditions: (both satisfied) +// 1. the progress is not updated for a long time on the target server +// 2. progress is less than upper limit +int PartFetchCtx::check_fetch_timeout(const common::ObAddr &svr, + const int64_t upper_limit, + const int64_t fetcher_resume_tstamp, + bool &is_fetch_timeout, // Is the log fetch timeout + bool &is_fetch_timeout_on_lagged_replica) // Is the log fetch timeout on a lagged replica +{ + int ret = OB_SUCCESS; + int64_t cur_time = get_timestamp(); + int64_t svr_start_fetch_tstamp = OB_INVALID_TIMESTAMP; + // Partition timeout, after which time progress is not updated, it is considered to be a log fetch timeout + const int64_t partition_timeout = TCONF.partition_timeout_sec * _SEC_; + // Timeout period for partitions on lagging replica, compared to normal timeout period + const int64_t partition_timeout_for_lagged_replica = TCONF.partition_timeout_sec_for_lagged_replica * _SEC_; + + is_fetch_timeout = false; + is_fetch_timeout_on_lagged_replica = false; + + // Get the starting log time on the current server + if (OB_FAIL(fetch_info_.get_cur_svr_start_fetch_tstamp(svr, svr_start_fetch_tstamp))) { + LOG_ERROR("get_cur_svr_start_fetch_tstamp fail", KR(ret), K(svr), K(fetch_info_)); + } else if (OB_UNLIKELY(OB_INVALID_TIMESTAMP == svr_start_fetch_tstamp)) { + LOG_ERROR("invalid start fetch tstamp", K(svr_start_fetch_tstamp), K(fetch_info_)); + ret = OB_INVALID_ERROR; + } else { + // Get the current progress and when the progress was last updated + int64_t cur_progress = progress_.get_progress(); + int64_t progress_last_update_tstamp = progress_.get_touch_tstamp(); + int64_t next_served_log_id = progress_.get_next_served_log_id(); + int64_t hb_touch_tstamp = progress_.get_hb_touch_tstamp(); + int64_t next_log_id = progress_.get_next_log_id(); + + if (OB_INVALID_TIMESTAMP != cur_progress && cur_progress >= upper_limit) { + is_fetch_timeout = false; + is_fetch_timeout_on_lagged_replica = false; + } else { + // Consider the time at which logs start to be fetched on the server as a lower bound for progress updates + // Ensure that the partition stays on a server for a certain period of time + int64_t last_update_tstamp = OB_INVALID_TIMESTAMP; + if ((OB_INVALID_TIMESTAMP == progress_last_update_tstamp)) { + last_update_tstamp = svr_start_fetch_tstamp; + } else { + last_update_tstamp = std::max(progress_last_update_tstamp, svr_start_fetch_tstamp); + } + + if (OB_INVALID_TIMESTAMP != fetcher_resume_tstamp) { + // After a fetcher pause and restart, the fetcher pause time is also counted as partition fetch time, + // a misjudgement may occur, resulting in a large number of partition timeouts being dispatched + last_update_tstamp = std::max(last_update_tstamp, fetcher_resume_tstamp); + } + + // Progress update interval + const int64_t progress_update_interval = (cur_time - last_update_tstamp); + // Heartbeat progress update interval + const int64_t hb_progress_update_interval = + OB_INVALID_TIMESTAMP != hb_touch_tstamp ? (cur_time - hb_touch_tstamp) : 0; + + // long periods of non-updating progress and progress timeouts, where it is no longer necessary to determine if the machine is behind in its backup + if (progress_update_interval >= partition_timeout) { + is_fetch_timeout = true; + is_fetch_timeout_on_lagged_replica = false; + } else { + // Before the progress timeout, verify that the server fetching the logs is a lagged replica, and if the logs are not fetched on the lagged replica for some time, then it is also considered a progress timeout + // Generally, the timeout for a lagged copy is less than the progress timeout + // partition_timeout_for_lagged_replica < partition_timeout + // + // How to define a long timeout for fetching logs on a lagged replica? + // 1. lagged replica: the next log does exist, but this server can't fetch it, indicating that this server is most likely behind the replica + // 2. When to start counting the timeout: from the time liboblog confirms the existence of the next log + // + // next_served_log_id: the log ID of the next service in the Leader, if the next log is smaller than it, the next log exists + // hb_touch_tstamp: Leader heartbeat update time, i.e. the time to confirm the log ID of the largest service + if (OB_INVALID_ID != next_served_log_id + && next_log_id < next_served_log_id // Next log exists + && progress_update_interval >= partition_timeout_for_lagged_replica // Progress update time over lagging replica configuration item + && hb_progress_update_interval >= partition_timeout_for_lagged_replica) { // Heartbeat progress update time over lagging replica configuration item + is_fetch_timeout = true; + is_fetch_timeout_on_lagged_replica = true; + } + } + + if (is_fetch_timeout) { + LOG_INFO("[CHECK_PROGRESS_TIMEOUT]", K_(pkey), K(svr), + K(is_fetch_timeout), K(is_fetch_timeout_on_lagged_replica), + K(progress_update_interval), K(hb_progress_update_interval), + K(progress_), + "update_limit", TS_TO_STR(upper_limit), + "last_update_tstamp", TS_TO_STR(last_update_tstamp), + "svr_start_fetch_tstamp", TS_TO_STR(svr_start_fetch_tstamp)); + } else { + LOG_DEBUG("[CHECK_PROGRESS_TIMEOUT]", K_(pkey), K(svr), + K(is_fetch_timeout), K(is_fetch_timeout_on_lagged_replica), + K(progress_update_interval), K(hb_progress_update_interval), + K(progress_), + "update_limit", TS_TO_STR(upper_limit), + "last_update_tstamp", TS_TO_STR(last_update_tstamp), + "svr_start_fetch_tstamp", TS_TO_STR(svr_start_fetch_tstamp)); + } + } + } + return ret; +} + +int PartFetchCtx::get_dispatch_progress(int64_t &dispatch_progress, PartTransDispatchInfo &dispatch_info) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(part_trans_resolver_)) { + LOG_ERROR("invalid part trans resolver", K(part_trans_resolver_)); + ret = OB_NOT_INIT; + } else if (OB_FAIL(part_trans_resolver_->get_dispatch_progress(dispatch_progress, + dispatch_info))) { + LOG_ERROR("get_dispatch_progress from part trans resolver fail", KR(ret), K(pkey_)); + } else if (OB_UNLIKELY(OB_INVALID_TIMESTAMP == dispatch_progress)) { + LOG_ERROR("dispatch_progress is invalid", K(dispatch_progress), K(pkey_), K(dispatch_info)); + ret = OB_ERR_UNEXPECTED; + } + return ret; +} + +bool PartFetchCtx::is_in_use() const +{ + // As long as there is an asynchronous request in progress, it is considered to be "in use" + return svr_find_req_.is_state_req() + || start_log_id_locate_req_.is_state_req() + || leader_find_req_.is_state_req() + || heartbeat_req_.is_state_req(); +} + +void PartFetchCtx::print_dispatch_info() const +{ + PartProgress cur_progress; + progress_.atomic_copy(cur_progress); + + int64_t progress = cur_progress.get_progress(); + + _ISTAT("[DISPATCH_FETCH_TASK] PKEY=%s TO=%s FROM=%s REASON=\"%s\" " + "DELAY=%s PROGRESS=%s DISCARDED=%d", + to_cstring(pkey_), to_cstring(fetch_info_.cur_mod_), + to_cstring(fetch_info_.out_mod_), fetch_info_.out_reason_, + TS_TO_DELAY(progress), + to_cstring(cur_progress), + discarded_); +} + +void PartFetchCtx::dispatch_in_idle_pool() +{ + fetch_info_.dispatch_in_idle_pool(); + print_dispatch_info(); +} + +void PartFetchCtx::dispatch_in_fetch_stream(const common::ObAddr &svr, FetchStream &fs) +{ + fetch_info_.dispatch_in_fetch_stream(svr, fs); + print_dispatch_info(); +} + +void PartFetchCtx::dispatch_in_dead_pool() +{ + fetch_info_.dispatch_in_dead_pool(); + print_dispatch_info(); +} + +int PartFetchCtx::get_cur_svr_start_fetch_tstamp(const common::ObAddr &svr, + int64_t &svr_start_fetch_tstamp) const +{ + int ret = OB_SUCCESS; + + if (OB_FAIL(fetch_info_.get_cur_svr_start_fetch_tstamp(svr, svr_start_fetch_tstamp))) { + LOG_ERROR("get_cur_svr_start_fetch_tstamp fail", KR(ret), K(fetch_info_), + K(svr), K(svr_start_fetch_tstamp)); + } + + return ret; +} + +int PartFetchCtx::add_into_blacklist(const common::ObAddr &svr, + const int64_t svr_service_time, + int64_t &survival_time) +{ + int ret = OB_SUCCESS; + int64_t blacklist_history_clear_interval = ATOMIC_LOAD(&g_blacklist_history_clear_interval); + + // Cyclical cleaning blacklist history + if (REACH_TIME_INTERVAL(blacklist_history_clear_interval)) { + IBlackList::SvrHistoryArray clear_svr_array; + clear_svr_array.reset(); + if (OB_FAIL(clear_blacklist_history_(clear_svr_array))) { + LOG_ERROR("blacklist remove history error", KR(ret), K(pkey_)); + } else { + if (clear_svr_array.count() > 0) { + // Print the number of blacklisted servers and the servers + ISTAT("[BLACK_LIST] [CLEAR]", KR(ret), K(pkey_), + "clear_svr_cnt", clear_svr_array.count(), K(clear_svr_array)); + } + } + } + + if (OB_SUCCESS == ret) { + if (OB_FAIL(blacklist_.add(svr, svr_service_time, survival_time))) { + LOG_ERROR("blacklist add error", KR(ret), K(pkey_), K(svr), + "svr_service_time", TVAL_TO_STR(svr_service_time), + "survival_time", TVAL_TO_STR(survival_time)); + } else { + ISTAT("[BLACK_LIST] [ADD]", K_(pkey), K(svr), + "svr_service_time", TVAL_TO_STR(svr_service_time), + "survival_time", TVAL_TO_STR(survival_time), + "blacklist_cnt", blacklist_.count(), K_(blacklist)); + } + } + + return ret; +} + +int PartFetchCtx::clear_blacklist_history_(IBlackList::SvrHistoryArray &clear_svr_array) +{ + int ret = OB_SUCCESS; + + if (OB_FAIL(blacklist_.clear_overdue_history(clear_svr_array))) { + } else { + // succ + } + + return ret; +} + +bool PartFetchCtx::need_switch_server(const common::ObAddr &cur_svr) +{ + bool bool_ret = false; + PartSvrList &svr_list = get_cur_svr_list(); + uint64_t next_log_id = progress_.get_next_log_id(); + + bool_ret = svr_list.need_switch_server(next_log_id, blacklist_, pkey_, cur_svr); + + return bool_ret; +} + +bool PartFetchCtx::is_split_done(const uint64_t split_log_id, const int64_t split_log_ts) const +{ + // If the start timestamp is greater than the split log timestamp, the split is complete + bool split_done = (split_log_ts < serve_info_.start_serve_timestamp_); + int cur_state = state_; + + // Otherwise the current state prevails + if (! split_done) { + split_done = (STATE_SPLIT_DONE == cur_state); + } + + ISTAT("[SPLIT] [CHECK_STATE]", K_(pkey), K(split_done), + "cur_state", print_state(cur_state), + K(split_done), K(split_log_id), K(split_log_ts), K(serve_info_), + K_(discarded)); + return split_done; +} + +/////////////////////////////////// PartProgress /////////////////////////////////// + +void PartFetchCtx::PartProgress::reset() +{ + progress_ = OB_INVALID_TIMESTAMP; + touch_tstamp_ = OB_INVALID_TIMESTAMP; + is_log_progress_ = false; + + next_log_id_ = OB_INVALID_ID; + log_progress_ = OB_INVALID_TIMESTAMP; + log_touch_tstamp_ = OB_INVALID_TIMESTAMP; + + next_served_log_id_ = OB_INVALID_ID; + next_served_tstamp_ = OB_INVALID_TIMESTAMP; + hb_touch_tstamp_ = OB_INVALID_TIMESTAMP; +} + +// start_log_id refers to the start log id, which may not be valid +// start_tstamp refers to the partition start timestamp, not the start_log_id log timestamp +// +// Therefore, this function sets start_tstamp to the current progress progress_, but does not update to log_progress_ +// log_progress_ only indicates log progress +void PartFetchCtx::PartProgress::reset(const uint64_t start_log_id, const int64_t start_tstamp) +{ + reset(); + + // Update next_log_id + // but does not update the log progress + next_log_id_ = start_log_id; + + // Set start-up timestamp to global progress + progress_ = start_tstamp; + touch_tstamp_ = get_timestamp(); + is_log_progress_ = true; +} + +// If the progress is greater than the upper limit, the touch timestamp of the corresponding progress is updated +// NOTE: The purpose of this function is to prevent the touch timestamp from not being updated for a long time if the progress +// is greater than the upper limit, which could lead to a false detection of a progress timeout if the upper limit suddenly increases. +void PartFetchCtx::PartProgress::update_touch_tstamp_if_progress_beyond_upper_limit(const int64_t upper_limit) +{ + ObByteLockGuard guard(lock_); + + if (OB_INVALID_TIMESTAMP != progress_ + && OB_INVALID_TIMESTAMP != upper_limit + && progress_ >= upper_limit) { + touch_tstamp_ = get_timestamp(); + } +} + +// Two scenarios for updating heartbeats. +// 1. the heartbeat message is invalid +// 2. The log is not behind the heartbeat, and the progress is less than the upper limit, and the progress update times out +bool PartFetchCtx::PartProgress::need_heartbeat(const int64_t upper_limit, + const int64_t hb_interval) const +{ + ObByteLockGuard guard(lock_); + + // Is the progress update timed out + bool is_progress_timeout = false; + if (OB_INVALID_TIMESTAMP == touch_tstamp_) { + is_progress_timeout = true; + } else { + // If progress has not been updated for a period of time, it is suspected that a heartbeat will need to be used to update progress + is_progress_timeout = ((get_timestamp() - touch_tstamp_) >= hb_interval); + } + + // Is the log lagging behind the heartbeat + bool log_fall_behind_hb = (OB_INVALID_ID == next_log_id_ || next_log_id_ < next_served_log_id_); + + return OB_INVALID_TIMESTAMP == next_served_tstamp_ + || OB_INVALID_ID == next_served_log_id_ + || OB_INVALID_TIMESTAMP == progress_ + || ((! log_fall_behind_hb) && progress_ < upper_limit && is_progress_timeout); +} + +void PartFetchCtx::PartProgress::update_heartbeat_progress(const uint64_t ns_log_id, + const int64_t ns_tstamp) +{ + ObByteLockGuard guard(lock_); + + if (OB_INVALID_ID != ns_log_id && OB_INVALID_TIMESTAMP != ns_tstamp) { + bool touched = false; + + if (OB_INVALID_ID == next_served_log_id_ || ns_log_id > next_served_log_id_) { + next_served_log_id_ = ns_log_id; + touched = true; + } + + if (OB_INVALID_TIMESTAMP == next_served_tstamp_ || ns_tstamp > next_served_tstamp_) { + next_served_tstamp_ = ns_tstamp; + touched = true; + } + + if (touched) { + hb_touch_tstamp_ = get_timestamp(); + + // Update global progress + update_progress_(); + } + } +} + +int PartFetchCtx::PartProgress::update_log_progress(const uint64_t new_next_log_id, + const int64_t new_log_progress) +{ + ObByteLockGuard guard(lock_); + + int ret = OB_SUCCESS; + // Require next_log_id to be valid + if (OB_UNLIKELY(OB_INVALID_ID == next_log_id_)) { + LOG_ERROR("invalid next_log_id", K(next_log_id_), K(log_progress_), K(progress_)); + ret = OB_INVALID_ERROR; + } + // Verifying log continuity + else if (OB_UNLIKELY((next_log_id_ + 1) != new_next_log_id)) { + LOG_ERROR("log not sync", K(next_log_id_), K(new_next_log_id)); + ret = OB_LOG_NOT_SYNC; + } else { + next_log_id_ = new_next_log_id; + + // Update log progress if it is invalid, or if log progress has been updated + if (OB_INVALID_TIMESTAMP == log_progress_ || + (OB_INVALID_TIMESTAMP != new_log_progress && new_log_progress > log_progress_)) { + log_progress_ = new_log_progress; + } + + log_touch_tstamp_ = get_timestamp(); + + // Update global progress + update_progress_(); + + // Log progress update, forcing the update timestamp of the global progress to be updated + // NOTE: The reason is: the log progress is updated, indicating that the log was fetched and that the progress was updated anyway + // + // 1. normally, if the log progress is updated, the global progress is also updated, because the global progress is equal to the log progress + // 2. At startup, there is a log rollback and the global progress is equal to the startup timestamp and cannot be rolled back, + // so the log progress is less than the global progress and the update of the log progress does not drive the global progress update, + // but the partition does pull the log, in which case the global progress should be considered to be forcibly updated + // and therefore the "update timestamp of global progress" needs to be updated + touch_tstamp_ = log_touch_tstamp_; + } + return ret; +} + +int PartFetchCtx::PartProgress::update_log_heartbeat(const uint64_t next_log_id, + const int64_t new_log_progress) +{ + ObByteLockGuard guard(lock_); + + int ret = OB_SUCCESS; + + // Request the next log to be valid + if (OB_UNLIKELY(OB_INVALID_ID == next_log_id_)) { + LOG_ERROR("invalid log progress", K(next_log_id_), K(log_progress_)); + ret = OB_INVALID_ERROR; + } + // Require log id match + else if (OB_UNLIKELY(next_log_id != next_log_id_)) { + LOG_ERROR("next log id does not match, log not sync", K(next_log_id_), K(next_log_id)); + ret = OB_LOG_NOT_SYNC; + } else if (OB_INVALID_TIMESTAMP == log_progress_ || + (OB_INVALID_TIMESTAMP != new_log_progress && new_log_progress > log_progress_)) { + // Update the touch timestamp only after the progress has actually been updated + log_progress_ = new_log_progress; + log_touch_tstamp_ = get_timestamp(); + + // Update global progress + update_progress_(); + + // Log progress update, force update of global progress update timestamp + touch_tstamp_ = log_touch_tstamp_; + } + + return ret; +} + +void PartFetchCtx::PartProgress::update_progress_() +{ + int64_t new_progress = OB_INVALID_TIMESTAMP; + bool new_is_log_progress = false; + bool new_touch_tstamp = OB_INVALID_TIMESTAMP; + + // If the heartbeat progress is ahead of the log stream progress in the case of fetching the latest log, then the heartbeat progress is used + // Note: requires next_served_log_id == next_log_id, i.e. the log streams are consistent + if (OB_INVALID_ID != next_log_id_ + && OB_INVALID_TIMESTAMP != next_served_tstamp_ + && OB_INVALID_ID != next_served_log_id_ + && next_log_id_ == next_served_log_id_) { + if (OB_UNLIKELY(OB_INVALID_TIMESTAMP == log_progress_) || next_served_tstamp_ > log_progress_) { + new_progress = next_served_tstamp_; + new_is_log_progress = false; + new_touch_tstamp = hb_touch_tstamp_; + } else { + new_progress = log_progress_; + new_is_log_progress = true; + new_touch_tstamp = log_touch_tstamp_; + } + } else { + // Default is log stream progress + new_progress = log_progress_; + new_is_log_progress = true; + new_touch_tstamp = log_touch_tstamp_; + } + + // At this point, the new progress value has been confirmed and updated to the global progress if it is ahead of the global progress + if (OB_INVALID_TIMESTAMP != new_progress && new_progress > progress_) { + progress_ = new_progress; + is_log_progress_ = new_is_log_progress; + + // If the update timestamp is not updated, it is updated to the current timestamp + // indicates that the global progress has just been updated + if (new_touch_tstamp <= touch_tstamp_) { + touch_tstamp_ = get_timestamp(); + } else { + touch_tstamp_ = new_touch_tstamp; + } + } +} + +void PartFetchCtx::PartProgress::atomic_copy(PartProgress &prog) const +{ + // protected by lock + ObByteLockGuard guard(lock_); + + prog.progress_ = progress_; + prog.touch_tstamp_ = touch_tstamp_; + prog.is_log_progress_ = is_log_progress_; + + prog.next_log_id_ = next_log_id_; + prog.log_progress_ = log_progress_; + prog.log_touch_tstamp_ = log_touch_tstamp_; + + prog.next_served_log_id_ = next_served_log_id_; + prog.next_served_tstamp_ = next_served_tstamp_; + prog.hb_touch_tstamp_ = hb_touch_tstamp_; +} + +///////////////////////////////// FetchModule ///////////////////////////////// +void PartFetchCtx::FetchModule::reset() +{ + module_ = FETCH_MODULE_NONE; + svr_.reset(); + fetch_stream_ = NULL; + start_fetch_tstamp_ = OB_INVALID_TIMESTAMP; +} + +void PartFetchCtx::FetchModule::reset_to_idle_pool() +{ + reset(); + module_ = FETCH_MODULE_IDLE_POOL; +} + +void PartFetchCtx::FetchModule::reset_to_fetch_stream(const common::ObAddr &svr, FetchStream &fs) +{ + module_ = FETCH_MODULE_FETCH_STREAM; + svr_ = svr; + fetch_stream_ = &fs; + start_fetch_tstamp_ = get_timestamp(); +} + +void PartFetchCtx::FetchModule::reset_to_dead_pool() +{ + reset(); + module_ = FETCH_MODULE_DEAD_POOL; +} + +int64_t PartFetchCtx::FetchModule::to_string(char *buffer, const int64_t size) const +{ + int64_t pos = 0; + + switch (module_) { + case FETCH_MODULE_NONE: { + (void)databuff_printf(buffer, size, pos, "NONE"); + break; + } + + case FETCH_MODULE_IDLE_POOL: { + (void)databuff_printf(buffer, size, pos, "IDLE_POOL"); + break; + } + + case FETCH_MODULE_FETCH_STREAM: { + (void)databuff_printf(buffer, size, pos, "[%s](%p)", + to_cstring(svr_), fetch_stream_); + break; + } + + case FETCH_MODULE_DEAD_POOL: { + (void)databuff_printf(buffer, size, pos, "DEAD_POOL"); + break; + } + + default: { + // Invalid module + (void)databuff_printf(buffer, size, pos, "INVALID"); + break; + } + } + + return pos; +} + +///////////////////////////////// FetchInfo ///////////////////////////////// +void PartFetchCtx::FetchInfo::reset() +{ + cur_mod_.reset(); + out_mod_.reset(); + out_reason_ = "NONE"; +} + +void PartFetchCtx::FetchInfo::dispatch_in_idle_pool() +{ + cur_mod_.reset_to_idle_pool(); +} + +void PartFetchCtx::FetchInfo::dispatch_in_fetch_stream(const common::ObAddr &svr, FetchStream &fs) +{ + cur_mod_.reset_to_fetch_stream(svr, fs); +} + +void PartFetchCtx::FetchInfo::dispatch_in_dead_pool() +{ + cur_mod_.reset_to_dead_pool(); +} + +void PartFetchCtx::FetchInfo::dispatch_out(const char *reason) +{ + out_mod_ = cur_mod_; + cur_mod_.reset(); + out_reason_ = reason; +} + +int PartFetchCtx::FetchInfo::get_cur_svr_start_fetch_tstamp(const common::ObAddr &svr, + int64_t &svr_start_fetch_ts) const +{ + int ret = OB_SUCCESS; + + svr_start_fetch_ts = OB_INVALID_TIMESTAMP; + + // Requires that the FetchStream module is currently in progress + if (OB_UNLIKELY(FetchModule::FETCH_MODULE_FETCH_STREAM != cur_mod_.module_)) { + LOG_ERROR("current module is not FetchStream", K(cur_mod_)); + ret = OB_INVALID_ERROR; + } + // verify server + else if (OB_UNLIKELY(svr != cur_mod_.svr_)) { + LOG_ERROR("server does not match", K(svr), K(cur_mod_)); + ret = OB_INVALID_ARGUMENT; + } else { + svr_start_fetch_ts = cur_mod_.start_fetch_tstamp_; + } + + return ret; +} + +//////////////////////////////////////// PartFetchInfoForPrint ////////////////////////////////// + +PartFetchInfoForPrint::PartFetchInfoForPrint() : + tps_(0), + is_discarded_(false), + pkey_(), + progress_(), + fetch_mod_(), + dispatch_progress_(OB_INVALID_TIMESTAMP), + dispatch_info_() +{ +} + +int PartFetchInfoForPrint::init(PartFetchCtx &ctx) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(ctx.get_dispatch_progress(dispatch_progress_, dispatch_info_))) { + LOG_ERROR("get_dispatch_progress from PartFetchCtx fail", KR(ret), K(ctx)); + } else { + tps_ = ctx.get_tps(); + is_discarded_ = ctx.is_discarded(); + pkey_ = ctx.get_pkey(); + ctx.get_progress_struct(progress_); + fetch_mod_ = ctx.get_cur_fetch_module(); + } + return ret; +} + +void PartFetchInfoForPrint::print_fetch_progress(const char *description, + const int64_t idx, + const int64_t array_cnt, + const int64_t cur_time) const +{ + _LOG_INFO("[STAT] %s idx=%ld/%ld pkey=%s mod=%s " + "discarded=%d delay=%s tps=%.2lf progress=%s", + description, idx, array_cnt, to_cstring(pkey_), + to_cstring(fetch_mod_), + is_discarded_, TVAL_TO_STR(cur_time - progress_.get_progress()), + tps_, to_cstring(progress_)); +} + +void PartFetchInfoForPrint::print_dispatch_progress(const char *description, + const int64_t idx, + const int64_t array_cnt, + const int64_t cur_time) const +{ + _LOG_INFO("[STAT] %s idx=%ld/%ld pkey=%s delay=%s pending_task(queue/total)=%ld/%ld " + "dispatch_progress=%s last_dispatch_log_id=%lu next_task=%s " + "next_trans(log_id=%lu,committed=%d,ready_to_commit=%d,global_version=%s) checkpoint=%s", + description, idx, array_cnt, to_cstring(pkey_), + TVAL_TO_STR(cur_time - dispatch_progress_), + dispatch_info_.task_count_in_queue_, + dispatch_info_.pending_task_count_, + TS_TO_STR(dispatch_progress_), + dispatch_info_.last_dispatch_log_id_, + dispatch_info_.next_task_type_, + dispatch_info_.next_trans_log_id_, + dispatch_info_.next_trans_committed_, + dispatch_info_.next_trans_ready_to_commit_, + TS_TO_STR(dispatch_info_.next_trans_global_version_), + TS_TO_STR(dispatch_info_.current_checkpoint_)); +} + +} +} diff --git a/src/liboblog/src/ob_log_part_fetch_ctx.h b/src/liboblog/src/ob_log_part_fetch_ctx.h new file mode 100644 index 0000000000000000000000000000000000000000..a6027be9908f4d5feae19b21eda2948b023c0da8 --- /dev/null +++ b/src/liboblog/src/ob_log_part_fetch_ctx.h @@ -0,0 +1,683 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_PART_FETCH_CTX_H__ +#define OCEANBASE_LIBOBLOG_OB_LOG_PART_FETCH_CTX_H__ + +#include "lib/atomic/ob_atomic.h" // ATOMIC_STORE +#include "lib/net/ob_addr.h" // ObAddr +#include "lib/utility/ob_print_utils.h" // TO_STRING_KV +#include "lib/container/ob_array.h" // ObArray +#include "common/ob_partition_key.h" // ObPartitionKey +#include "clog/ob_log_entry.h" // ObLogEntry +#include "storage/ob_partition_split.h" // ObPartitionSplitSourceLog, ObPartitionSplitDestLog + +#include "ob_log_utils.h" // _SEC_ +#include "ob_log_part_svr_list.h" // PartSvrList +#include "ob_log_svr_finder.h" // SvrFindReq +#include "ob_log_start_log_id_locator.h" // StartLogIdLocateReq +#include "ob_log_fetcher_heartbeat_worker.h" // HeartbeatRequest +#include "ob_log_dlist.h" // ObLogDList, ObLogDListNode +#include "ob_log_fetch_stream_type.h" // FetchStreamType +#include "ob_log_part_trans_resolver.h" // IObLogPartTransResolver +#include "ob_log_part_serve_info.h" // PartServeInfo +#include "ob_log_part_trans_dispatcher.h" // PartTransDispatchInfo + +namespace oceanbase +{ +namespace liboblog +{ + +/////////////////////////////// PartFetchCtx ///////////////////////////////// + +class IObLogSvrFinder; +class IObLogFetcherHeartbeatWorker; +class ObLogConfig; +class FetchStream; +class IObLogPartFetchMgr; + +struct TransStatInfo; + +class PartFetchCtx; +typedef ObLogDListNode FetchTaskListNode; + +// Two-way linked list of fetch log tasks +typedef ObLogDList FetchTaskList; + +class PartFetchCtx : public FetchTaskListNode +{ + static const int64_t DEFAULT_SVR_LIST_UPDATE_MIN_INTERVAL = 1 * _SEC_; + static const int64_t DEFAULT_LEADER_INFO_UPDATE_MIN_INTERVAL = 200 * _MSEC_; + +// variables of class +public: + // server list update interval + static int64_t g_svr_list_update_interval; + // leader info update interval + static int64_t g_leader_info_update_interval; + // heartbeat update interval + static int64_t g_heartbeat_interval; + // Clear Blacklist History Intervals + static int64_t g_blacklist_history_clear_interval; + +public: + PartFetchCtx(); + virtual ~PartFetchCtx(); + +public: + static void configure(const ObLogConfig &config); + +public: + void reset(); + int init(const common::ObPartitionKey &pkey, + const int64_t start_tstamp, + const int64_t start_log_id, + const int64_t progress_id, + IObLogPartTransResolver &part_trans_resolver, + IObLogPartFetchMgr &part_fetch_mgr); + + /// Synchronising data to downstream + /// + /// Note that. + /// 1. The flush() operation is only called by this function and not by other functions, i.e. there is only one flush() globally in oblog + //// + //// @retval OB_SUCCESS Success + //// @retval OB_IN_STOP_STATE exit + //// @retval Other error codes Fail + int sync(volatile bool &stop_flag); + + // read log entry + /// + /// @param [in] log_entry Target log entry + /// @param [out] missing An array of missing logs and an array of trans_id, the missing logs are guaranteed to be ordered and not duplicated. + /// @param [out] tsi Transaction resolution statistics + /// @param [in] stop_flag The stop flag + /// @param [in] need_filter_pg_no_missing_redo_trans + /// @param [in] log_indexs + /// + /// @retval OB_SUCCESS success + /// @retval OB_ITEM_NOT_SETTED redo log incomplete + /// @retval OB_LOG_NOT_SYNC Logs are not continuous + /// @retval OB_IN_STOP_STATE Entered stop state + /// @retval Other error codes Failed + int read_log(const clog::ObLogEntry &log_entry, + IObLogPartTransResolver::ObLogMissingInfo &missing, + TransStatInfo &tsi, + const bool need_filter_pg_no_missing_redo_trans, + const IObLogPartTransResolver::ObAggreLogIndexArray &log_indexs, + volatile bool &stop_flag); + + // Read missing redo log entries + /// + /// @param log_entry Target log entries + /// + /// @retval OB_SUCCESS success + /// @retval Other error codes fail + int read_missing_redo(const clog::ObLogEntry &log_entry, + const IObLogPartTransResolver::ObTransIDArray &missing_log_trans_id_array); + + /// Offline partition, clear all unexported tasks and issue OFFLINE type tasks + /// + /// @retval OB_SUCCESS success + /// @retval OB_IN_STOP_STATE Entered stop state + /// @retval Other error codes Failed + int offline(volatile bool &stop_flag); + + // Check if relevant information needs to be updated + bool need_update_svr_list() const; + bool need_update_leader_info() const; + bool need_locate_start_log_id() const; + bool need_heartbeat(const int64_t upper_limit); + + // Update server list + int update_svr_list(IObLogSvrFinder &svr_finder, const bool need_print_info = true); + + // Updata leader info + int update_leader_info(IObLogSvrFinder &svr_finder); + + // Reset leader info + void reset_leader_info() + { + has_leader_ = false; + leader_.reset(); + } + + // locatestart log id + int locate_start_log_id(IObLogStartLogIdLocator &start_log_id_locator); + + /// Update heartbeat information + /// Note: only updates progress, no heartbeat tasks are generated + /// + /// @retval OB_SUCCESS success + /// @retval Other error codes Failed + int update_heartbeat_info(IObLogFetcherHeartbeatWorker &heartbeater, IObLogSvrFinder &svr_finder); + + /// Update log heartbeat information progress + /// + /// @retval OB_SUCCESS success + /// @retval OB_LOG_NOT_SYNC Logs are not continuous + /// @retval Other error codes Failed + int update_log_heartbeat(const uint64_t next_log_id, const int64_t log_progress); + + /// Iterate over the next server in the service log + /// 1. If the server has completed one round of iteration (all servers have been iterated over), then OB_ITER_END is returned + /// 2. After returning OB_ITER_END, the list of servers will be iterated over from the beginning next time + /// 3. If no servers are available, return OB_ITER_END + /// + /// @retval OB_SUCCESS success + /// @retval OB_ITER_END Server list iterations complete one round + /// @retval Other error codes Failed + int next_server(common::ObAddr &svr); + + /// Get the number of servers in the server list + int64_t get_server_count() const { return get_cur_svr_list().count(); } + + const PartSvrList &get_cur_svr_list() const; + PartSvrList &get_cur_svr_list(); + PartSvrList &get_bak_svr_list(); + void switch_svr_list(); + + void mark_svr_list_update_flag(const bool need_update); + + uint64_t hash() const; + + FetchStreamType get_fetch_stream_type() const { return stype_; } + void set_fetch_stream_type(FetchStreamType stype) { stype_ = stype; } + + IObLogPartTransResolver *get_part_trans_resolver() { return part_trans_resolver_; } + + const common::ObPartitionKey &get_pkey() const { return pkey_; } + + int64_t get_progress_id() const { return progress_id_; } + + // Check if the logs are timed out on the target server + // i.e. if the logs are not fetched for a long time on the target server, this result will be used as a basis for switching servers + // + // Timeout conditions. + // 1. the progress is not updated on the target server for a long time + // 2. progress is less than the upper limit + int check_fetch_timeout(const common::ObAddr &svr, + const int64_t upper_limit, + const int64_t fetcher_resume_tstamp, + bool &is_fetch_timeout, // Is the log fetch timeout + bool &is_fetch_timeout_on_lagged_replica); // Is the log fetch timeout on a lagged replica + + // Get the progress of a transaction + // 1. When there is a transaction ready to be sent, the timestamp of the transaction to be sent - 1 is taken as the progress of the sending + // 2. When no transaction is ready to be sent, the log progress is taken as the progress + // + // This value will be used as the basis for sending the heartbeat timestamp downstream + int get_dispatch_progress(int64_t &progress, PartTransDispatchInfo &dispatch_info); + + struct PartProgress; + void get_progress_struct(PartProgress &prog) const { progress_.atomic_copy(prog); } + int64_t get_progress() const { return progress_.get_progress(); } + uint64_t get_next_log_id() const { return progress_.get_next_log_id(); } + struct FetchModule; + const FetchModule &get_cur_fetch_module() const { return fetch_info_.cur_mod_; } + void update_touch_tstamp_if_progress_beyond_upper_limit(const int64_t upper_limit) + { + progress_.update_touch_tstamp_if_progress_beyond_upper_limit(upper_limit); + } + + double get_tps() + { + return NULL == part_trans_resolver_ ? 0 : part_trans_resolver_->get_tps(); + } + + bool is_discarded() const { return ATOMIC_LOAD(&discarded_); } + void set_discarded() { ATOMIC_STORE(&discarded_, true); } + + // Whether the FetcherDeadPool can clean up the PartFetchCtx + // whether there are asynchronous requests pending + // including: heartbeat requests, locate requests, svr_list and leader update requests, etc. + bool is_in_use() const; + + void print_dispatch_info() const; + void dispatch_in_idle_pool(); + void dispatch_in_fetch_stream(const common::ObAddr &svr, FetchStream &fs); + void dispatch_in_dead_pool(); + + void dispatch_out(const char *reason) + { + fetch_info_.dispatch_out(reason); + } + + // Get the start fetch log time on the current server + int get_cur_svr_start_fetch_tstamp(const common::ObAddr &svr, + int64_t &svr_start_fetch_tstamp) const; + + // add server to blacklist + /// + /// @param [in] svr blacklisted sever + /// @param [in] svr_service_time Current server service partition time + /// @param [in] survival_time server survival time in blacklist (may be modified based on history) + /// + /// @retval OB_SUCCESS add svr to blacklist success + /// @retval Other error codes Fail + int add_into_blacklist(const common::ObAddr &svr, + const int64_t svr_service_time, + int64_t &survival_time); + + // Determine if the server needs to be switched + // + /// @param [in] cur_svr The fetch log stream where the partition task is currently located - target server + /// + /// @retval true + /// @retval false + bool need_switch_server(const common::ObAddr &cur_svr); + + // Determining if the split is complete + bool is_split_done(const uint64_t split_log_id, const int64_t split_log_ts) const; + + // Source partition split processing complete, advance this partition status + int handle_when_src_split_done(volatile bool &stop_flag); + + // Internal member functions +private: + int dispatch_heartbeat_if_need_(); + // Periodic deletion of history + int clear_blacklist_history_(IBlackList::SvrHistoryArray &clear_svr_array); + int init_locate_req_svr_list_(StartLogIdLocateReq &req, PartSvrList &svr_list); + int launch_heartbeat_request_(IObLogFetcherHeartbeatWorker &heartbeater, + IObLogSvrFinder &svr_finder); + int handle_heartbeat_request_(IObLogSvrFinder &svr_finder); + int dispatch_(volatile bool &stop_flag, int64_t &pending_trans_count); + int handle_offline_partition_log_(const clog::ObLogEntry &log_entry, volatile bool &stop_flag); + int handle_split_src_log_(const clog::ObLogEntry &log_entry, volatile bool &stop_flag); + int prepare_split_(const storage::ObPartitionSplitSourceLog &split_src_log, + bool &split_in_process); + int process_split_(volatile bool &stop_flag); + int mark_split_done_(); + int process_split_dest_part_(volatile bool &stop_flag); + int clear_wait_state_(bool &old_state_mark_wait, bool &old_state_in_split); + int handle_split_dst_log_(const clog::ObLogEntry &log_entry); + int mark_state_wait_from_normal_(); + int check_src_part_split_state_(const storage::ObPartitionSplitDestLog &split_dst_log, + bool &src_part_split_done); + int mark_state_normal_from_wait_(); + +public: + ///////////////////////////////// PartProgress ///////////////////////////////// + // + // At the moment of startup, only the startup timestamp of the partition is known, not the specific log progress, using the following convention. + // 1. set the start timestamp to the global progress: progress + // 2. next_log_id and log_progress are invalid + // 3. wait for the start log id locator to look up the start log id and set it to next_log_id + // 4. start log id may have fallback, during the fallback log, the log_progress is updated, + // since the log progress is less than the global progress, the global progress remains unchanged; but touch_tstamp remains updated + struct PartProgress + { + // Global progress and update timestamps + // Update scenarios. + // 1. If the global progress is advanced when the log progress or heartbeat progress is updated, the global progress value should be updated, along with the corresponding update timestamp + // 2. If the global progress is greater than the upper limit, the corresponding update timestamp should be updated in real time to avoid future misconceptions that the progress update has timed out + // + // In general, the progress value is equal to either the log progress or the heartbeat progress; the following cases will cause the progress value to differ from both the log progress and the heartbeat progress + // 1. next_log_id = 8 + // 2. next_served_log_id = 8 + // 3. log_progress = 10:00 + // 4. next_served_tstamp = 11:00 + // 5. progress = 11:00 + // 6. is_log_progress = false + // i.e. the timestamp of log #7 is less than or equal to 10:00 and no logs were written between 10:00 and 11:00 + // The current global progress is 11:00, representing the heartbeat progress + // + // At 11:01 the division wrote log #8 and the individual progress became. + // 1. next_log_id = 8 + // 2. next_served_log_id = 9 + // 3. log_progress = 10:00 + // 4. next_served_tstamp = 11:01 + // 5. progress = 11:00 + // 6. is_log_progress = false + // That is, the log progress remains unchanged, the heartbeat progress advances by one log, and the global progress remains unchanged at 11 points, which is different from both the log and heartbeat progress + int64_t progress_; + int64_t touch_tstamp_; + bool is_log_progress_; // Current progress represents log progress or heartbeat progress + + // Log progress + // 1. log_progress normally refers to the lower bound of the next log timestamp + // 2. log_progress and next_log_id are invalid at startup + uint64_t next_log_id_; // next log id + int64_t log_progress_; // log progress + int64_t log_touch_tstamp_; // Log progress last update time + + // heartbeat progress + uint64_t next_served_log_id_; // The next log ID of the server-side service + int64_t next_served_tstamp_; // Lower bound on the next log timestamp of the server-side service + int64_t hb_touch_tstamp_; // heartbeat progress last update time + + // Lock: Keeping read and write operations atomic + mutable common::ObByteLock lock_; + + PartProgress() { reset(); } + ~PartProgress() { reset(); } + + TO_STRING_KV( + "progress", TS_TO_STR(progress_), + "touch_tstamp", TS_TO_STR(touch_tstamp_), + K_(is_log_progress), + K_(next_log_id), + "log_progress", TS_TO_STR(log_progress_), + "log_touch_tstamp", TS_TO_STR(log_touch_tstamp_), + K_(next_served_log_id), + "next_served_tstamp", TS_TO_STR(next_served_tstamp_), + "hb_touch_tstamp", TS_TO_STR(hb_touch_tstamp_)); + + void reset(); + + // Note: start_log_id may be invalid, but start_tstamp should be valid + void reset(const uint64_t start_log_id, const int64_t start_tstamp); + + uint64_t get_next_log_id() const { return next_log_id_; } + void set_next_log_id(const uint64_t start_log_id) { next_log_id_ = start_log_id; } + uint64_t get_next_served_log_id() const { return next_served_log_id_; } + // Get heartbeat update timestamp + int64_t get_hb_touch_tstamp() const { return hb_touch_tstamp_; } + + // Get current progress + int64_t get_progress() const { return progress_; } + int64_t get_touch_tstamp() const { return touch_tstamp_; } + bool is_log_progress() const { return is_log_progress_; } + + // Copy the entire progress item to ensure atomicity + void atomic_copy(PartProgress &prog) const; + + // Update the touch timestamp if progress is greater than the upper limit + void update_touch_tstamp_if_progress_beyond_upper_limit(const int64_t upper_limit); + + // Whether a heartbeat request needs to be initiated + bool need_heartbeat(const int64_t upper_limit, const int64_t hb_interval) const; + + // Update Heartbeat Progress + void update_heartbeat_progress(const uint64_t ns_log_id, const int64_t ns_tstamp); + + // Update log progress + // Update both the log ID and the log progress + // Require log ID to be updated sequentially, otherwise return OB_LOG_NOT_SYNC + // + // Update log progress once for each log parsed to ensure sequential update + int update_log_progress(const uint64_t new_next_log_id, const int64_t log_progress); + + // Update the log heartbeat + // Update log progress only, keeping the log ID unchanged + // Require that the log ID provided matches the log ID saved, otherwise return OB_LOG_NOT_SYNC + // + // The usage scenario is to update the progress when the server side returns the lower bound timestamp of the next log + int update_log_heartbeat(const uint64_t next_log_id, const int64_t log_progress); + + private: + void update_progress_(); + }; + +public: + ///////////////////////////////// FetchModule ///////////////////////////////// + // Module where the partition is located + struct FetchModule + { + enum ModuleName + { + FETCH_MODULE_NONE = 0, // Not part of any module + FETCH_MODULE_IDLE_POOL = 1, // IDLE POOL module + FETCH_MODULE_FETCH_STREAM = 2, // FetchStream module + FETCH_MODULE_DEAD_POOL = 3, // DEAD POOL module + }; + + ModuleName module_; // module name + + // FetchStream info + common::ObAddr svr_; + void *fetch_stream_; // Pointer to FetchStream, object may be invalid, cannot reference content + int64_t start_fetch_tstamp_; + + void reset(); + void reset_to_idle_pool(); + void reset_to_fetch_stream(const common::ObAddr &svr, FetchStream &fs); + void reset_to_dead_pool(); + int64_t to_string(char *buffer, const int64_t size) const; + }; + + ///////////////////////////////// FetchInfo ///////////////////////////////// + // Fetching log stream information + struct FetchInfo + { + FetchModule cur_mod_; // The module to which currently belong to + + FetchModule out_mod_; // module that dispatch out from + const char *out_reason_; // reason for dispatch out + + FetchInfo() { reset(); } + + void reset(); + void dispatch_in_idle_pool(); + void dispatch_in_fetch_stream(const common::ObAddr &svr, FetchStream &fs); + void dispatch_in_dead_pool(); + void dispatch_out(const char *reason); + + // Get the start fetch time of the log on the current server + // Requires FETCH_STREAM for the fetch log module; requiring the server to match + int get_cur_svr_start_fetch_tstamp(const common::ObAddr &svr, + int64_t &svr_start_fetch_ts) const; + + TO_STRING_KV(K_(cur_mod), K_(out_mod), K_(out_reason)); + }; + +private: + // state convert: + // 1. STATE_NORMAL -> STATE_WAIT + // Received "Split target partition" log, entered wait state, blocked sending data + // + // 2. STATE_WAIT -> STATE_NORMAL + // The "split source partition" has been processed and is ready to send data + // + // 3. STATE_WAIT -> STATE_WAIT_AND_SPLIT_IN_PROCESS + // The "split target partition" receives the "split source partition" log in the wait state and enters the split processing state + // + // 4. STATE_WAIT_AND_SPLIT_IN_PROCESS -> STATE_SPLIT_IN_PROCESS + // The "split source partition" has been processed and the partition is officially in the split processing state + // + // 5. STATE_NORMAL -> STATE_SPLIT_IN_PROCESS + // Partition receives "Split Source Partition" log and enters split processing state + // + // 6. STATE_SPLIT_IN_PROCESS -> STATE_SPLIT_DONE + // split done + enum FetchState + { + // noral state + STATE_NORMAL = 0, + + // wait state + // Wait for source partition splitting to complete + STATE_WAIT = 1, + + // partition spliting(dealing with partition split) + STATE_SPLIT_IN_PROCESS = 2, + + // Waiting state while entering a split state + // Wait for source partition splitting to complete,Meanwhile split log (self) is ready, ready to split + // + // STATE_WAIT & STATE_SPLIT_IN_PROCESS + STATE_WAIT_AND_SPLIT_IN_PROCESS = 3, + + // splitting completion state + // final state, no further transitions to other states + STATE_SPLIT_DONE = 4, + + STATE_MAX + }; + + // Is it in waiting status + bool is_in_wait_state(const int state) + { + return (STATE_WAIT_AND_SPLIT_IN_PROCESS == state) || (STATE_WAIT == state); + } + + const char *print_state(int state) const + { + const char *str = "UNKNOWN"; + switch (state) { + case STATE_NORMAL: + str = "NORMAL"; + break; + case STATE_WAIT: + str = "STATE_WAIT"; + break; + case STATE_SPLIT_IN_PROCESS: + str = "SPLIT_IN_PROCESS"; + break; + case STATE_WAIT_AND_SPLIT_IN_PROCESS: + str = "WAIT_AND_SPLIT_IN_PROCESS"; + break; + case STATE_SPLIT_DONE: + str = "SPLIT_DONE"; + break; + default: + str = "UNKNOWN"; + break; + } + return str; + } + +public: + TO_STRING_KV("type", "FETCH_TASK", + "stype", print_fetch_stream_type(stype_), + K_(state), + "state_str", print_state(state_), + K_(discarded), + K_(pkey), + K_(serve_info), + K_(progress_id), + KP_(part_fetch_mgr), + KP_(part_trans_resolver), + K_(last_sync_progress), + K_(progress), + K_(fetch_info), + "blacklist_svr_count", blacklist_.count(), + K_(blacklist), + "svr_count", get_server_count(), + "svr_list", get_cur_svr_list(), + K_(svr_list_need_update), + "svr_list_last_update_tstamp", TS_TO_STR(svr_list_last_update_tstamp_), + "svr_find_req", + svr_find_req_.is_state_idle() ? "IDLE" : to_cstring(svr_find_req_), // won't print IDLE state + "start_log_id_locate_req", + start_log_id_locate_req_.is_state_idle() ? "IDLE" : to_cstring(start_log_id_locate_req_), + K_(leader), + K_(has_leader), + "leader_last_update_tstamp", TS_TO_STR(leader_last_update_tstamp_), + "leader_find_req", leader_find_req_.is_state_idle() ? "IDLE" : to_cstring(leader_find_req_), + "heartbeat_req", heartbeat_req_.is_state_idle() ? "IDLE" : to_cstring(heartbeat_req_), + "heartbeat_last_update_tstamp", TS_TO_STR(heartbeat_last_update_tstamp_), + K_(split_dest_array), + KP_(next), + KP_(prev)); + +private: + FetchStreamType stype_; + FetchState state_; + bool discarded_; // partition is deleted or not + + common::ObPartitionKey pkey_; + PartServeInfo serve_info_; + int64_t progress_id_; // Progress Unique Identifier + IObLogPartFetchMgr *part_fetch_mgr_; // PartFetchCtx manager + IObLogPartTransResolver *part_trans_resolver_; // Partitioned transaction resolvers, one for each partition exclusively + + // Last synced progress + int64_t last_sync_progress_ CACHE_ALIGNED; + + // partition progress + PartProgress progress_; + + /// fetch log info + FetchInfo fetch_info_; + + //////////// server list //////////// + // Update timing. + // 1. when the svr list is exhausted, the svr_list_need_update_ flag is set to true, check that the flag is true to require an update + // 2. periodic update: to ensure the svr list is always up to date, it needs to be updated periodically + // 3. ensure that updates are not too frequent + // + // Implementation options + // 1. In order to update the server list asynchronously, a dual-server list mechanism is used, with one serving as the official list and the other as a "standby list" to store the updated data; after a successful update, the standby list is switched to the official list atomically + // 2. + // 2. blacklist is a partition-level blacklist of servers that are not available in the partition, the purpose of which is to ensure that each access is filtered for servers that are not currently available. + BlackList blacklist_; + PartSvrList svr_list_[2]; + int64_t cur_svr_list_idx_; + bool svr_list_need_update_; + int64_t svr_list_last_update_tstamp_; + + /// Update server list request + SvrFindReq svr_find_req_; + + /// start log id locator request + StartLogIdLocateReq start_log_id_locate_req_; + + /////////// Leader info //////////// + common::ObAddr leader_; + bool has_leader_; + int64_t leader_last_update_tstamp_; + + /// request to update leader info + LeaderFindReq leader_find_req_; + + /// heartbeat request + HeartbeatRequest heartbeat_req_; + int64_t heartbeat_last_update_tstamp_; + + // partition arrays of Split target + typedef common::ObArray PartArray; + PartArray split_dest_array_; + +private: + DISALLOW_COPY_AND_ASSIGN(PartFetchCtx); +}; + +//////////////////////////////////////// PartFetchInfoForPrint ////////////////////////////////// + +// For printing fetch log information +struct PartFetchInfoForPrint +{ + double tps_; + bool is_discarded_; + common::ObPartitionKey pkey_; + PartFetchCtx::PartProgress progress_; + PartFetchCtx::FetchModule fetch_mod_; + int64_t dispatch_progress_; + PartTransDispatchInfo dispatch_info_; + + PartFetchInfoForPrint(); + int init(PartFetchCtx &ctx); + + // for printing fetch progress + void print_fetch_progress(const char *description, + const int64_t idx, + const int64_t array_cnt, + const int64_t cur_time) const; + + // for printing partition dispatch progress + void print_dispatch_progress(const char *description, + const int64_t idx, + const int64_t array_cnt, + const int64_t cur_time) const; + + int64_t get_progress() const { return progress_.get_progress(); } + int64_t get_dispatch_progress() const { return dispatch_progress_; } + + TO_STRING_KV(K_(tps), K_(is_discarded), K_(pkey), K_(progress), K_(fetch_mod), + K_(dispatch_progress), K_(dispatch_info)); +}; + +} // namespace liboblog +} // namespace oceanbase +#endif /* OCEANBASE_LIBOBLOG_OB_LOG_PART_FETCH_CTX_H__ */ diff --git a/src/liboblog/src/ob_log_part_fetch_mgr.cpp b/src/liboblog/src/ob_log_part_fetch_mgr.cpp new file mode 100644 index 0000000000000000000000000000000000000000..91b53730869b6b411b719d6e9f531d973a5beb1f --- /dev/null +++ b/src/liboblog/src/ob_log_part_fetch_mgr.cpp @@ -0,0 +1,539 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG_FETCHER + +#include "ob_log_part_fetch_mgr.h" + +#include "share/ob_errno.h" // OB_SUCCESS, .. +#include "lib/oblog/ob_log_module.h" // LOG_* +#include "lib/allocator/ob_mod_define.h" // ObModIds + +#include "ob_log_part_progress_controller.h" // PartProgressController +#include "ob_log_part_trans_resolver_factory.h" // IObLogPartTransResolverFactory +#include + +#define FREE_CTX(ctx, pool) \ + do {\ + int free_ret = OB_SUCCESS; \ + if (NULL != ctx) { \ + ctx->reset(); \ + free_ret = pool.free(ctx); \ + if (OB_SUCCESS != free_ret) { \ + LOG_ERROR("free PartFetchCtx fail", K(free_ret), K(ctx), KPC(ctx)); \ + } else { \ + ctx = NULL; \ + } \ + } \ + } while (0) + +using namespace oceanbase::common; + +namespace oceanbase +{ +namespace liboblog +{ +int64_t ObLogPartFetchMgr::g_print_slowest_part_num = ObLogConfig::default_print_fetcher_slowest_part_num; + +ObLogPartFetchMgr::ObLogPartFetchMgr() : + inited_(false), + progress_controller_(NULL), + part_trans_resolver_factory_(NULL), + ctx_map_(), + ctx_pool_(), + pkey_serialize_allocator_("PkeySerialize"), + start_global_trans_version_(OB_INVALID_TIMESTAMP) +{ +} + +ObLogPartFetchMgr::~ObLogPartFetchMgr() +{ + destroy(); +} + +int ObLogPartFetchMgr::init(const int64_t max_cached_part_fetch_ctx_count, + PartProgressController &progress_controller, + IObLogPartTransResolverFactory &part_trans_resolver_factory) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("init twice"); + ret = OB_INIT_TWICE; + } else if (OB_UNLIKELY(max_cached_part_fetch_ctx_count <= 0)) { + LOG_ERROR("invalid argument", K(max_cached_part_fetch_ctx_count)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(ctx_map_.init(ObModIds::OB_LOG_PART_FETCH_CTX_MAP))) { + LOG_ERROR("init PartFetchCtxMap fail", KR(ret)); + } else if (OB_FAIL(ctx_pool_.init(max_cached_part_fetch_ctx_count, + ObModIds::OB_LOG_PART_FETCH_CTX_POOL, + DEFAULT_TENANT_ID, + PART_FETCH_CTX_POOL_BLOCK_SIZE))) { + LOG_ERROR("init PartFetchCtxPool fail", KR(ret), K(max_cached_part_fetch_ctx_count), + LITERAL_K(PART_FETCH_CTX_POOL_BLOCK_SIZE)); + } else { + progress_controller_ = &progress_controller; + part_trans_resolver_factory_ = &part_trans_resolver_factory; + start_global_trans_version_ = OB_INVALID_TIMESTAMP; + inited_ = true; + + LOG_INFO("init part fetch mgr succ", K(max_cached_part_fetch_ctx_count)); + } + return ret; +} + +void ObLogPartFetchMgr::destroy() +{ + // TODO: recycle all task in map + + inited_ = false; + progress_controller_ = NULL; + part_trans_resolver_factory_ = NULL; + (void)ctx_map_.destroy(); + ctx_pool_.destroy(); + pkey_serialize_allocator_.clear(); + start_global_trans_version_ = OB_INVALID_TIMESTAMP; + LOG_INFO("destroy part fetch mgr succ"); +} + +int ObLogPartFetchMgr::add_partition(const common::ObPartitionKey &pkey, + const int64_t start_tstamp, + const uint64_t start_log_id) +{ + int ret = OB_SUCCESS; + PartFetchCtx *ctx = NULL; + int64_t progress_id = -1; + IObLogPartTransResolver *part_trans_resolver = NULL; + char *pkey_str = NULL; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init"); + ret = OB_NOT_INIT; + } + // start timestamp must be valid! + else if (OB_UNLIKELY(OB_INVALID_TIMESTAMP == start_tstamp)) { + LOG_ERROR("invalid start tstamp", K(start_tstamp)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(progress_controller_) + || OB_ISNULL(part_trans_resolver_factory_)) { + LOG_ERROR("invalid handlers", K(progress_controller_), K(part_trans_resolver_factory_)); + ret = OB_INVALID_ERROR; + } else if (OB_FAIL(init_pkey_info_(pkey, pkey_str))) { + LOG_ERROR("init_pkey_info_ fail", KR(ret), K(pkey), K(pkey_str)); + } + // alloc a part trans resolver + else if (OB_FAIL(part_trans_resolver_factory_->alloc(pkey_str, part_trans_resolver))) { + LOG_ERROR("alloc IObLogPartTransResolver fail", KR(ret), K(pkey_str)); + } else if (OB_ISNULL(part_trans_resolver)) { + LOG_ERROR("invalid part_trans_resolver", K(part_trans_resolver)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(part_trans_resolver->init(pkey, start_tstamp, start_global_trans_version_))) { + LOG_ERROR("init part trans resolver fail", KR(ret), K(pkey), K(start_tstamp), + K(start_global_trans_version_)); + } + // alloc a PartFetchCtx + else if (OB_FAIL(ctx_pool_.alloc(ctx))) { + LOG_ERROR("alloc PartFetchCtx fail", KR(ret)); + } else if (OB_ISNULL(ctx)) { + LOG_ERROR("PartFetchCtx is NULL", K(ctx)); + ret = OB_ERR_UNEXPECTED; + } + // alloc a progress id which should be unique for each partition + else if (OB_FAIL(progress_controller_->acquire_progress(progress_id, start_tstamp))) { + LOG_ERROR("acquire_progress fail", KR(ret), K(start_tstamp)); + // init PartFetchCtx + } else if (OB_FAIL(ctx->init(pkey, start_tstamp, start_log_id, progress_id, *part_trans_resolver, *this))) { + LOG_ERROR("ctx init fail", KR(ret), K(pkey), K(start_tstamp), K(start_log_id), K(progress_id)); + } else { + + if (OB_FAIL(ctx_map_.insert(pkey, ctx))) { + if (OB_ENTRY_EXIST == ret) { + LOG_ERROR("partition has been added", KR(ret), K(pkey), K(start_tstamp), K(start_log_id)); + } else { + LOG_ERROR("insert into map fail", KR(ret), K(pkey), K(ctx)); + } + } else { + _LOG_INFO("[STAT] [PartFetchMgr] [ADD_PART] pkey=%s start_log_id=%lu start_tstamp=%ld(%s) " + "progress_id=%ld fetch_task=%p part_trans_resolver=%p", + to_cstring(pkey), start_log_id, start_tstamp, TS_TO_STR(start_tstamp), + progress_id, ctx, part_trans_resolver); + } + } + + if (OB_SUCCESS != ret) { + // recycle progress id, delete from global_progress_controller and should not effect progress of normal partition + int release_ret = OB_SUCCESS; + + if (-1 != progress_id && NULL != progress_controller_) { + if (OB_UNLIKELY(OB_SUCCESS != (release_ret = progress_controller_->release_progress(progress_id)))) { + LOG_ERROR("release progress fail", K(release_ret), K(progress_id), K(pkey), K(ctx), + KPC(ctx)); + } else { + progress_id = -1; + } + } + + if (NULL != ctx) { + FREE_CTX(ctx, ctx_pool_); + } + + if (NULL != part_trans_resolver) { + part_trans_resolver_factory_->free(part_trans_resolver); + part_trans_resolver = NULL; + } + } + + return ret; +} + +int ObLogPartFetchMgr::init_pkey_info_(const common::ObPartitionKey &pkey, + char *&pkey_str) +{ + int ret = OB_SUCCESS; + pkey_str = NULL; + // 1024 is enough for seriailized pkey + static const int64_t PKEY_BUF_SIZE = 1024; + char pkey_str_buf[PKEY_BUF_SIZE]; + int64_t pkey_local_buf_pos = pkey.to_string(pkey_str_buf, PKEY_BUF_SIZE); + + if (OB_UNLIKELY(pkey_local_buf_pos <= 0 || pkey_local_buf_pos >= PKEY_BUF_SIZE)) { + LOG_ERROR("pkey_local_buf_pos is not valid", K(pkey_local_buf_pos), K(pkey)); + ret = OB_ERR_UNEXPECTED; + } else { + const int64_t buf_len = pkey_local_buf_pos + 1; + pkey_str = static_cast(pkey_serialize_allocator_.alloc(buf_len)); + + if (OB_ISNULL(pkey_str)) { + LOG_ERROR("allocator_ alloc for pkey str fail", K(pkey_str), K(buf_len)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + MEMCPY(pkey_str, pkey_str_buf, pkey_local_buf_pos); + pkey_str[pkey_local_buf_pos] = '\0'; + } + } + + return ret; +} + +bool ObLogPartFetchMgr::CtxRecycleCond::operator() (const common::ObPartitionKey &pkey, + PartFetchCtx *&ctx) +{ + bool bool_ret = false; + if (OB_ISNULL(ctx)) { + LOG_ERROR("invalid part fetch ctx", K(ctx), K(pkey)); + bool_ret = false; + } else { + _LOG_INFO("[STAT] [PartFetchMgr] [RECYCLE_PART] pkey=%s " + "fetch_task=%p fetch_task=%s", + to_cstring(pkey), ctx, to_cstring(*ctx)); + + // modify partitin status to DISCARDED + ctx->set_discarded(); + + bool_ret = true; + } + + return bool_ret; +} + +int ObLogPartFetchMgr::recycle_partition(const common::ObPartitionKey &pkey) +{ + int ret = OB_SUCCESS; + CtxRecycleCond recycle_cond; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(ctx_map_.operate(pkey, recycle_cond))) { + if (OB_ENTRY_NOT_EXIST == ret) { + // as expected + } else { + LOG_ERROR("operate on ctx map fail", KR(ret), K(pkey)); + } + } else { + // succ + } + + return ret; +} + +int ObLogPartFetchMgr::remove_partition(const common::ObPartitionKey &pkey) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(progress_controller_) || OB_ISNULL(part_trans_resolver_factory_)) { + LOG_ERROR("invalid progress controller", K(progress_controller_), K(part_trans_resolver_factory_)); + ret = OB_INVALID_ERROR; + } else { + PartFetchCtx *fetch_ctx = NULL; + + // remove node from map first to guarantee the correctness of the concurrent operation on the map + if (OB_FAIL(ctx_map_.erase(pkey, fetch_ctx))) { + LOG_ERROR("erase PartFetchCtx from map fail", KR(ret), K(pkey)); + } else if (OB_ISNULL(fetch_ctx)) { + LOG_ERROR("PartFetchCtx is NULL, unexcepted error", K(pkey), K(fetch_ctx)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_UNLIKELY(! fetch_ctx->is_discarded())) { + LOG_ERROR("partition is not discarded, can not remove", K(pkey), KPC(fetch_ctx)); + ret = OB_ERR_UNEXPECTED; + } else { + IObLogPartTransResolver *ptr = fetch_ctx->get_part_trans_resolver(); + int64_t progress_id = fetch_ctx->get_progress_id(); + + _LOG_INFO("[STAT] [PartFetchMgr] [REMOVE_PART] pkey=%s progress_id=%ld " + "fetch_task=%p part_trans_resolver=%p fetch_task=%s", + to_cstring(pkey), progress_id, fetch_ctx, ptr, to_cstring(*fetch_ctx)); + + // recycle progress id, delete from global progress_controller + int release_ret = progress_controller_->release_progress(progress_id); + if (OB_UNLIKELY(OB_SUCCESS != release_ret)) { + LOG_ERROR("release progress fail", K(release_ret), K(progress_id), K(pkey), K(fetch_ctx), + KPC(fetch_ctx)); + } + + if (NULL != fetch_ctx) { + FREE_CTX(fetch_ctx, ctx_pool_); + } + + if (NULL != ptr) { + part_trans_resolver_factory_->free(ptr); + ptr = NULL; + } + } + } + + return ret; +} + +int ObLogPartFetchMgr::get_part_fetch_ctx(const common::ObPartitionKey &pkey, PartFetchCtx *&ctx) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(ctx_map_.get(pkey, ctx))) { + if (OB_ENTRY_NOT_EXIST == ret) { + // pkey not exist in map + ctx = NULL; + } else { + LOG_ERROR("get PartFetchCtx from map fail", KR(ret), K(pkey)); + } + } else if (OB_ISNULL(ctx)) { + LOG_ERROR("PartFetchCtx is NULL", K(ctx)); + ret = OB_ERR_UNEXPECTED; + } else { + // succ + } + return ret; +} + +int ObLogPartFetchMgr::CtxPartProgressCond::init(const int64_t count) +{ + int ret = OB_SUCCESS; + + if (OB_FAIL(part_fetch_info_array_.reserve(count))) { + LOG_ERROR("reserve array fail", KR(ret), K(count)); + } else { + ctx_cnt_ = 0; + } + + return ret; +} + +bool ObLogPartFetchMgr::CtxPartProgressCond::operator() (const common::ObPartitionKey &pkey, + PartFetchCtx *ctx) +{ + int ret = OB_SUCCESS; + + if (NULL != ctx) { + PartFetchInfoForPrint part_fetch_info; + + if (OB_FAIL(part_fetch_info.init(*ctx))) { + LOG_ERROR("init part_fetch_info fail", KR(ret), K(pkey), KPC(ctx)); + } else if(OB_FAIL(part_fetch_info_array_.push_back(part_fetch_info))) { + LOG_ERROR("part_progress_array_ push back fail", KR(ret), K(pkey), KPC(ctx), K(ctx_cnt_)); + } else { + ctx_cnt_++; + } + } + + return OB_SUCCESS == ret; +} + +void ObLogPartFetchMgr::print_k_slowest_partition() +{ + int ret = OB_SUCCESS; + PartFetchInfoArray fetch_slow_array; // array of partitions with slowest log fetch progress + PartFetchInfoArray dispatch_slow_array; // array of partitions with slowest task dispatch progress + int64_t slowest_part_num = ATOMIC_LOAD(&g_print_slowest_part_num); + int64_t part_num = ctx_map_.count(); + CtxPartProgressCond part_progress_cond; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init"); + ret = OB_NOT_INIT; + } else if (part_num > 0) { + if (OB_FAIL(part_progress_cond.init(part_num))) { + LOG_ERROR("part progree cond init fail", KR(ret), K(part_num)); + } else if (OB_FAIL(ctx_map_.for_each(part_progress_cond))) { + LOG_ERROR("ctx_map_ for_each fail", KR(ret)); + } else { + const PartFetchInfoArray &part_fetch_info_array = part_progress_cond.part_fetch_info_array_; + FetchProgressCompFunc fetch_progress_comp_func; + DispatchProgressCompFunc dispatch_progress_comp_func; + + // get TOP-K partition with slowest log fetch progress + if (OB_FAIL(top_k(part_fetch_info_array, slowest_part_num, fetch_slow_array, + fetch_progress_comp_func))) { + LOG_ERROR("find the k slowest partition fail", KR(ret), K(slowest_part_num), + K(fetch_slow_array)); + } + // get TOP-K partition with slowest task dispatch progress + else if (OB_FAIL(top_k(part_fetch_info_array, slowest_part_num, dispatch_slow_array, + dispatch_progress_comp_func))) { + LOG_ERROR("find the k slowest partition fail", KR(ret), K(slowest_part_num), + K(dispatch_slow_array)); + } else { + int64_t cur_time = get_timestamp(); + int64_t array_cnt = fetch_slow_array.count(); + + // ************************ Print the K partitions with the slowest fetch log progress + _LOG_INFO("[STAT] slow fetch progress start. part_count=%ld/%ld", array_cnt, + part_fetch_info_array.count()); + + for (int64_t idx = 0; OB_SUCCESS == ret && idx < array_cnt; ++idx) { + fetch_slow_array.at(idx).print_fetch_progress("slow fetch progress", + idx, array_cnt, cur_time); + } + + LOG_INFO("[STAT] slow fetch progress end"); + + // ************************ Print the K partitions with the slowest part dispatch progress + array_cnt = dispatch_slow_array.count(); + _LOG_INFO("[STAT] slow dispatch progress start. part_count=%ld/%ld", array_cnt, + part_fetch_info_array.count()); + + for (int64_t idx = 0; OB_SUCCESS == ret && idx < array_cnt; ++idx) { + dispatch_slow_array.at(idx).print_dispatch_progress("slow dispatch progress", + idx, array_cnt, cur_time); + } + + LOG_INFO("[STAT] slow dispatch progress end"); + } + } + } +} + +bool ObLogPartFetchMgr::PartSplitStateChecker::operator() (const common::ObPartitionKey &pkey, + PartFetchCtx *&ctx) +{ + bool bool_ret = false; + if (pkey_ == pkey) { + bool_ret = true; + if (NULL != ctx) { + split_done_ = ctx->is_split_done(split_log_id_, split_log_ts_); + } + } + return bool_ret; +} + +int ObLogPartFetchMgr::check_part_split_state(const common::ObPartitionKey &pkey, + const uint64_t split_log_id, + const int64_t split_log_ts, + bool &split_done) +{ + int ret = OB_SUCCESS; + PartSplitStateChecker checker(pkey, split_log_id, split_log_ts); + split_done = false; + + if (OB_FAIL(ctx_map_.operate(pkey, checker))) { + // consider partition already split if partitin not exist + if (OB_ENTRY_NOT_EXIST == ret) { + split_done = true; + ret = OB_SUCCESS; + LOG_INFO("[STAT] [SPLIT] [CHECK_STATE] partition not exist, " + "it must have been split done", K(pkey), K(split_log_id), K(split_log_ts)); + } else { + LOG_ERROR("operate on ctx map fail", KR(ret), K(pkey)); + } + } else { + split_done = checker.split_done_; + } + return ret; +} + +bool ObLogPartFetchMgr::ActivateSplitDestPartFunc::operator() (const common::ObPartitionKey &pkey, PartFetchCtx *ctx) +{ + int ret = OB_SUCCESS; + err_code_ = OB_SUCCESS; + if (pkey_ == pkey) { + if (NULL != ctx) { + if (OB_FAIL(ctx->handle_when_src_split_done(stop_flag_))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("handle_when_src_split_done fail", KR(ret), K(pkey), KPC(ctx)); + } + } + } + } + err_code_ = ret; + return true; +} + +int ObLogPartFetchMgr::activate_split_dest_part(const ObPartitionKey &pkey, + volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + ActivateSplitDestPartFunc func(pkey, stop_flag); + if (OB_FAIL(ctx_map_.operate(pkey, func))) { + if (OB_ENTRY_NOT_EXIST == ret) { + LOG_INFO("[STAT] [SPLIT] [ACTIVATE_DEST] split dest partition not exist", K(pkey)); + ret = OB_SUCCESS; + } else { + LOG_ERROR("operate on ctx map fail", KR(ret), K(pkey)); + } + } else { + // return error code + ret = func.err_code_; + } + return ret; +} + +int ObLogPartFetchMgr::set_start_global_trans_version(const int64_t start_global_trans_version) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init"); + ret = OB_INIT_TWICE; + } else if (OB_UNLIKELY(OB_INVALID_TIMESTAMP == start_global_trans_version)) { + LOG_ERROR("invalid argument", K(start_global_trans_version)); + ret = OB_INVALID_ARGUMENT; + } else { + start_global_trans_version_ = start_global_trans_version; + } + + return ret; +} + +void ObLogPartFetchMgr::configure(const ObLogConfig & config) +{ + int64_t print_slowest_part_num = config.print_fetcher_slowest_part_num; + + ATOMIC_STORE(&g_print_slowest_part_num, print_slowest_part_num); + LOG_INFO("[CONFIG]", K(print_slowest_part_num)); +} + +} +} diff --git a/src/liboblog/src/ob_log_part_fetch_mgr.h b/src/liboblog/src/ob_log_part_fetch_mgr.h new file mode 100644 index 0000000000000000000000000000000000000000..fa562dccaa1f8b28c445476c2c16af0edcb145f0 --- /dev/null +++ b/src/liboblog/src/ob_log_part_fetch_mgr.h @@ -0,0 +1,221 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_PART_FETCH_MGR_H__ +#define OCEANBASE_LIBOBLOG_OB_LOG_PART_FETCH_MGR_H__ + +#include "ob_log_part_fetch_ctx.h" // PartFetchCtx, PartFetchInfoForPrint + +#include "share/ob_define.h" // OB_SERVER_TENANT_ID +#include "lib/objectpool/ob_small_obj_pool.h" // ObSmallObjPool +#include "lib/hash/ob_linear_hash_map.h" // ObLinearHashMap +#include "lib/container/ob_array.h" // ObArray +#include "lib/allocator/ob_safe_arena.h" // ObSafeArena +#include "common/ob_partition_key.h" // ObPartitionKey + +#include "ob_log_config.h" // ObLogConfig + +namespace oceanbase +{ +namespace liboblog +{ + +// Partition fetch manager +class IObLogPartFetchMgr +{ +public: + typedef common::ObArray PartFetchInfoArray; + typedef common::ObArray PartArray; + +public: + virtual ~IObLogPartFetchMgr() {} + +public: + /// add a new partition + virtual int add_partition(const common::ObPartitionKey &pkey, + const int64_t start_tstamp, + const uint64_t start_log_id) = 0; + + /// recycle a partition + /// mark partition deleted and begin recycle resource + virtual int recycle_partition(const common::ObPartitionKey &pkey) = 0; + + /// remove partition + /// delete partition by physical + virtual int remove_partition(const common::ObPartitionKey &pkey) = 0; + + /// get part fetch context + virtual int get_part_fetch_ctx(const common::ObPartitionKey &pkey, PartFetchCtx *&ctx) = 0; + + /// get the slowest k partition + virtual void print_k_slowest_partition() = 0; + + // check partition split state + virtual int check_part_split_state(const common::ObPartitionKey &pkey, + const uint64_t split_log_id, + const int64_t split_log_ts, + bool &split_done) = 0; + + // active split target partitin + // partition split is done and notify partitin dispatch data + virtual int activate_split_dest_part(const common::ObPartitionKey &pkey, + volatile bool &stop_flag) = 0; + + virtual int set_start_global_trans_version(const int64_t start_global_trans_version) = 0; +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +class PartProgressController; +class IObLogPartTransResolverFactory; +template class ObLogTransTaskPool; + +class ObLogPartFetchMgr : public IObLogPartFetchMgr +{ + // static golbal class variable + static int64_t g_print_slowest_part_num; + + static const int64_t PART_FETCH_CTX_POOL_BLOCK_SIZE = 1L << 24; + static const uint64_t DEFAULT_TENANT_ID = common::OB_SERVER_TENANT_ID; + + typedef common::ObSmallObjPool PartFetchCtxPool; + typedef common::ObLinearHashMap PartFetchCtxMap; + +public: + ObLogPartFetchMgr(); + virtual ~ObLogPartFetchMgr(); + +public: + int init(const int64_t max_cached_part_fetch_ctx_count, + PartProgressController &progress_controller, + IObLogPartTransResolverFactory &part_trans_resolver_factory); + void destroy(); + +public: + virtual int add_partition(const common::ObPartitionKey &pkey, + const int64_t start_tstamp, + const uint64_t start_log_id); + virtual int recycle_partition(const common::ObPartitionKey &pkey); + virtual int remove_partition(const common::ObPartitionKey &pkey); + virtual int get_part_fetch_ctx(const common::ObPartitionKey &pkey, PartFetchCtx *&ctx); + virtual void print_k_slowest_partition(); + virtual int check_part_split_state(const common::ObPartitionKey &pkey, + const uint64_t split_log_id, + const int64_t split_log_ts, + bool &split_done); + virtual int activate_split_dest_part(const common::ObPartitionKey &pkey, + volatile bool &stop_flag); + virtual int set_start_global_trans_version(const int64_t start_global_trans_version); + + template int for_each_part(Func &func) + { + return ctx_map_.for_each(func); + } + +public: + static void configure(const ObLogConfig & config); + +private: + int init_pkey_info_(const common::ObPartitionKey &pkey, + char *&pkey_str); + struct CtxRecycleCond + { + bool operator() (const common::ObPartitionKey &pkey, PartFetchCtx *&ctx); + }; + + struct CtxPartProgressCond + { + CtxPartProgressCond() : ctx_cnt_(0), part_fetch_info_array_() {} + int init(const int64_t count); + bool operator() (const common::ObPartitionKey &pkey, PartFetchCtx *ctx); + + int64_t ctx_cnt_; + PartFetchInfoArray part_fetch_info_array_; + }; + + // do top-k + int find_k_slowest_partition_(const PartFetchInfoArray &part_fetch_ctx_array, + const int64_t slowest_part_num, + PartFetchInfoArray &slow_part_array); + + class FetchProgressCompFunc + { + public: + bool operator() (const PartFetchInfoForPrint &a, const PartFetchInfoForPrint &b) + { + return a.get_progress() < b.get_progress(); + } + }; + + class DispatchProgressCompFunc + { + public: + bool operator() (const PartFetchInfoForPrint &a, const PartFetchInfoForPrint &b) + { + return a.get_dispatch_progress() < b.get_dispatch_progress(); + } + }; + + struct PartSplitStateChecker + { + bool operator() (const common::ObPartitionKey &pkey, PartFetchCtx *&ctx); + + PartSplitStateChecker(const common::ObPartitionKey &pkey, const uint64_t split_log_id, + const int64_t split_log_ts) : + pkey_(pkey), + split_done_(false), + split_log_id_(split_log_id), + split_log_ts_(split_log_ts) + {} + + const common::ObPartitionKey &pkey_; // partition key of split source partition + bool split_done_; + uint64_t split_log_id_; // log id of partition split log in source partition + int64_t split_log_ts_; // timestamp of source partition(in partition split) + }; + + struct ActivateSplitDestPartFunc + { + bool operator() (const common::ObPartitionKey &pkey, PartFetchCtx *ctx); + ActivateSplitDestPartFunc(const common::ObPartitionKey &pkey, volatile bool &stop_flag) : + err_code_(0), + pkey_(pkey), + stop_flag_(stop_flag) + {} + + int err_code_; + const common::ObPartitionKey &pkey_; // partition key of target partition for partitin split + volatile bool &stop_flag_; + }; + +private: + bool inited_; + PartProgressController *progress_controller_; + IObLogPartTransResolverFactory *part_trans_resolver_factory_; + + PartFetchCtxMap ctx_map_; + PartFetchCtxPool ctx_pool_; + // 1. pkey's serialized string is maintained globally, and cannot be placed in modules such as PartTransDispatcher, + // because after partition deletion, when the partition has been ordered, the partition context may be cleaned up, + // but the Reader still needs to read data based on pkey_str + // 2. TODO supports pkey deletion and recycling + common::ObSafeArena pkey_serialize_allocator_; + + int64_t start_global_trans_version_; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogPartFetchMgr); +}; + +} +} +#endif diff --git a/src/liboblog/src/ob_log_part_info.cpp b/src/liboblog/src/ob_log_part_info.cpp new file mode 100644 index 0000000000000000000000000000000000000000..236acddc7c3445fa5c57eca974f7efb96542c056 --- /dev/null +++ b/src/liboblog/src/ob_log_part_info.cpp @@ -0,0 +1,230 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG + +#include "ob_log_part_info.h" + +#include "ob_log_config.h" // TCONF + +namespace oceanbase +{ +using namespace common; + +namespace liboblog +{ + +void ObLogPartInfo::reset() +{ + ctx_.sv_.state_ = PART_STATE_INVALID; + ctx_.sv_.trans_count_ = 0; + pkey_.reset(); + serve_info_.reset(); +} + +int ObLogPartInfo::init(const ObPartitionKey &pkey, + const bool start_serve_from_create, + const int64_t start_tstamp, + const bool is_served) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(! is_invalid())) { + LOG_ERROR("invalid state which is not INVALID", "state", print_state()); + ret = OB_STATE_NOT_MATCH; + } else { + pkey_ = pkey; + serve_info_.reset(start_serve_from_create, start_tstamp); + + // set state to NORMAL + ctx_.sv_.trans_count_ = 0; + ctx_.sv_.state_ = is_served ? PART_STATE_NORMAL : PART_STATE_NOT_SERVED;; + } + return ret; +} + +bool ObLogPartInfo::operator<(const ObLogPartInfo& other) const +{ + return pkey_ < other.pkey_; +} + +bool ObLogPartInfo::offline(int64_t &end_trans_count) +{ + bool bool_ret = false; + Ctx cur_ctx = ctx_; + + while (PART_STATE_OFFLINE != cur_ctx.sv_.state_) { + Ctx old_ctx = cur_ctx; + Ctx new_ctx = cur_ctx; + new_ctx.sv_.state_ = PART_STATE_OFFLINE; // No change in number of transactions, status changed to offline + + cur_ctx.iv_ = ATOMIC_CAS(&ctx_.iv_, old_ctx.iv_, new_ctx.iv_); + + if (old_ctx.iv_ == cur_ctx.iv_) { + bool_ret = true; + end_trans_count = cur_ctx.sv_.trans_count_; + + // CAS is successful, update current value + cur_ctx.iv_ = new_ctx.iv_; + } + } + + return bool_ret; +} + +void ObLogPartInfo::inc_trans_count_on_serving(bool &is_serving) +{ + Ctx cur_ctx = ctx_; + bool done = false; + + is_serving = false; + while (! done && is_serving_state_(cur_ctx.sv_.state_)) { + Ctx old_ctx = cur_ctx; + Ctx new_ctx = cur_ctx; + new_ctx.sv_.trans_count_++; // Status unchanged, number of transactions plus one + + cur_ctx.iv_ = ATOMIC_CAS(&ctx_.iv_, old_ctx.iv_, new_ctx.iv_); + + if (cur_ctx.iv_ == old_ctx.iv_) { + done = true; + is_serving = true; + } + } +} + +int ObLogPartInfo::dec_trans_count(bool &need_remove) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(is_invalid())) { + LOG_ERROR("invalid state", "state", print_state(), "state", ctx_.sv_.state_); + ret = OB_STATE_NOT_MATCH; + } else { + Ctx cur_ctx = ctx_; + bool done = false; + + while (OB_SUCCESS == ret && !done) { + Ctx old_ctx = cur_ctx; + Ctx new_ctx = cur_ctx; + new_ctx.sv_.trans_count_--; // No change in status, number of transactions minus 1 + + if (OB_UNLIKELY(new_ctx.sv_.trans_count_ < 0)) { + LOG_ERROR("transaction count will become invalid, unexcepted", + "state", print_state(), "trans_count", new_ctx.sv_.trans_count_); + ret = OB_ERR_UNEXPECTED; + } else { + cur_ctx.iv_ = ATOMIC_CAS(&ctx_.iv_, old_ctx.iv_, new_ctx.iv_); + + if (old_ctx.iv_ == cur_ctx.iv_) { + done = true; + // If the transaction is offline and the transaction count is 0, then it needs to be deleted + need_remove = ((PART_STATE_OFFLINE == new_ctx.sv_.state_) && (0 == new_ctx.sv_.trans_count_)); + } + } + } + } + return ret; +} + +const char *ObLogPartInfo::print_state() const +{ + const char *ret = "UNKNOWN"; + + switch (ctx_.sv_.state_) { + case PART_STATE_INVALID: { + ret = "INVALID"; + break; + } + case PART_STATE_NORMAL: { + ret = "NORMAL"; + break; + } + case PART_STATE_OFFLINE: { + ret = "OFFLINE"; + break; + } + case PART_STATE_NOT_SERVED: { + ret = "NOT_SERVED"; + break; + } + default: { + ret = "UNKNOWN"; + break; + } + } + + return ret; +} + +///////////////////////////////////////////////////////////////////////////// + +bool PartInfoPrinter::operator()( + const ObPartitionKey& pkey, + ObLogPartInfo* part_info) +{ + if (pkey.get_tenant_id() == tenant_id_) { + if (OB_ISNULL(part_info)) { + LOG_ERROR("part_info is invalid", K(pkey), K(part_info)); + } else if (part_info->is_offline()) { + offline_part_count_++; + } else if (part_info->is_not_serving()) { + not_served_part_count_++; + } else { + serving_part_count_++; + } + + if (TCONF.print_partition_serve_info) { + PART_ISTAT(part_info, "[SERVE_INFO]"); + } + } + return true; +} + +///////////////////////////////////////////////////////////////////////////// +bool PartInfoScannerByTenant::operator()( + const ObPartitionKey& pkey, + ObLogPartInfo* part_info) +{ + int ret = OB_SUCCESS; + + if (pkey.get_tenant_id() == tenant_id_) { + if (OB_FAIL(pkey_array_.push_back(pkey))) { + LOG_ERROR("push partition key into array fail", + KR(ret), K(pkey), KPC(part_info), K(pkey_array_)); + } + } + + return OB_SUCCESS == ret; +} + +PartInfoScannerByTableID::PartInfoScannerByTableID(const uint64_t table_id) +{ + table_id_ = table_id; + pkey_array_.reset(); +} + +bool PartInfoScannerByTableID::operator()( + const ObPartitionKey& pkey, + ObLogPartInfo* part_info) +{ + int ret = OB_SUCCESS; + + if (pkey.get_table_id() == table_id_) { + if (OB_FAIL(pkey_array_.push_back(pkey))) { + LOG_ERROR("push partition key into array fail", + KR(ret), K(pkey), K(table_id_), KPC(part_info), K(pkey_array_)); + } + } + + return OB_SUCCESS == ret; +} + +} +} diff --git a/src/liboblog/src/ob_log_part_info.h b/src/liboblog/src/ob_log_part_info.h new file mode 100644 index 0000000000000000000000000000000000000000..88ba5746d53e00a4b85d32bdcbad0a8f934108ec --- /dev/null +++ b/src/liboblog/src/ob_log_part_info.h @@ -0,0 +1,180 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_PART_INFO_H_ +#define OCEANBASE_LIBOBLOG_OB_LOG_PART_INFO_H_ + +#include "common/ob_partition_key.h" // ObPartitionKey +#include "lib/hash/ob_linear_hash_map.h" // ObLinearHashMap +#include "lib/container/ob_array.h" // ObArray + +#include "ob_easy_hazard_map.h" // ObEasyHazardMap +#include "ob_log_part_serve_info.h" // PartServeInfo + +#define PART_STAT(level, part_info, fmt, arg...) \ + do { \ + if (NULL != part_info) { \ + _OBLOG_LOG(level, "[STAT] [PART_INFO] " fmt " TENANT=%lu PART=%s IS_PG=%d STATE=%s TRANS_COUNT=%ld " \ + "START_TSTAMP=%s START_FROM_CREATE=%d", \ + ##arg, \ + part_info->pkey_.get_tenant_id(), \ + to_cstring(part_info->pkey_), \ + part_info->pkey_.is_pg(), \ + part_info->print_state(), part_info->ctx_.sv_.trans_count_, \ + TS_TO_STR(part_info->serve_info_.start_serve_timestamp_), \ + part_info->serve_info_.start_serve_from_create_); \ + } \ + } while (0) + +#define PART_ISTAT(part_info, fmt, arg...) PART_STAT(INFO, part_info, fmt, ##arg) +#define PART_DSTAT(part_info, fmt, arg...) PART_STAT(DEBUG, part_info, fmt, ##arg) + +#define REVERT_PART_INFO(info, ret) \ + do { \ + if (NULL != info && NULL != map_) { \ + int revert_ret = map_->revert(info); \ + if (OB_SUCCESS != revert_ret) { \ + LOG_ERROR("revert PartInfo fail", K(revert_ret), K(info)); \ + ret = OB_SUCCESS == ret ? revert_ret : ret; \ + } else { \ + info = NULL; \ + } \ + } \ + } while (0) + + +namespace oceanbase +{ +namespace liboblog +{ + +struct ObLogPartInfo +{ + // PART_STATE_NORMAL: normal state + // 1. Enter NORMAL state immediately after the partition is added + // 2. accepting new transaction writes, transaction count allowed to rise + // + // PART_STATE_OFFLINE: Offline state + // 1. If a partition is deleted by a schema, e.g. drop table, drop partition, etc., it enters the OFFLINE state + // 2. If the partition receives a split completion log and all previous data has been output, it enters the OFFLINE state + // 3. if the partition receives the OFFLINE log and all previous data has been output, it enters the OFFLINE state + // 4. This state no longer accepts new transaction writes and the number of transactions no longer changes + enum + { + PART_STATE_INVALID = 0, // Invalid state (uninitialized state) + PART_STATE_NORMAL = 1, // normal service state + PART_STATE_OFFLINE = 3, // offline state + PART_STATE_NOT_SERVED = 4 // Partition not serviced state + }; + + // Joint variables to facilitate atomic operations + union Ctx + { + struct + { + int64_t state_:8; // The lower 8 bits are status variables + int64_t trans_count_:56; // The high 56 bits are the number of transactions + } sv_; + + int64_t iv_; // Full 64-bit values + } ctx_; + + common::ObPartitionKey pkey_; + PartServeInfo serve_info_; + + ObLogPartInfo() { reset(); } + ~ObLogPartInfo() { reset(); } + + void reset(); + const char *print_state() const; + bool operator<(const ObLogPartInfo &other) const; + + /// Initialize + /// Set from INVALID state to PART_STATE_NORMAL state if serviced + /// If not in service, set from INVALID state to PART_STATE_NOT_SERVED state + int init(const common::ObPartitionKey &pkey, + const bool start_serve_from_create, // Whether to start the service from the creation of a partition + const int64_t start_tstamp, + const bool is_served); + + /// Goes to the offline state + /// Returns true if this is the state transition achieved by this operation, otherwise false + /// + /// @param [out] end_trans_count The number of transactions at the end, valid when the return value is true + bool offline(int64_t &end_trans_count); + + /// Increase the number of transactions when the status is SERVING + void inc_trans_count_on_serving(bool &is_serving); + + /// Decrement the number of transactions and return whether the partition needs to be deleted + int dec_trans_count(bool &need_remove); + + bool is_invalid() const { return PART_STATE_INVALID == ctx_.sv_.state_; } + bool is_serving() const { return is_serving_state_(ctx_.sv_.state_); } + bool is_offline() const { return PART_STATE_OFFLINE == ctx_.sv_.state_; } + bool is_not_serving() const { return PART_STATE_NOT_SERVED == ctx_.sv_.state_; } + int64_t get_trans_count() const { return ctx_.sv_.trans_count_; } + + bool is_serving_state_(const int64_t state) const + { + // To facilitate the extension of more states in the future, there may be more than one NORMAL state in service + return PART_STATE_NORMAL == state; + } + + TO_STRING_KV("state", print_state(), + "trans_count", ctx_.sv_.trans_count_, + K_(pkey), K_(serve_info)); +}; + +// Print partition information by tenant +struct PartInfoPrinter +{ + uint64_t tenant_id_; + int64_t serving_part_count_; + int64_t offline_part_count_; + int64_t not_served_part_count_; + + explicit PartInfoPrinter(const uint64_t tenant_id) : + tenant_id_(tenant_id), + serving_part_count_(0), + offline_part_count_(0), + not_served_part_count_(0) + {} + bool operator()(const common::ObPartitionKey &pkey, ObLogPartInfo *part_info); +}; + +struct PartInfoScannerByTenant +{ + uint64_t tenant_id_; + common::ObArray pkey_array_; + + explicit PartInfoScannerByTenant(const uint64_t tenant_id) : + tenant_id_(tenant_id), + pkey_array_() + {} + bool operator()(const common::ObPartitionKey &pkey, ObLogPartInfo *part_info); +}; + +struct PartInfoScannerByTableID +{ + uint64_t table_id_; + common::ObArray pkey_array_; + + explicit PartInfoScannerByTableID(const uint64_t table_id); + bool operator()(const common::ObPartitionKey &pkey, ObLogPartInfo *part_info); +}; + +typedef ObEasyHazardMap PartInfoMap; +} +} + +#endif diff --git a/src/liboblog/src/ob_log_part_mgr.cpp b/src/liboblog/src/ob_log_part_mgr.cpp new file mode 100644 index 0000000000000000000000000000000000000000..14ad654ad25d2980146d4e00cf0cc2dfb2fe44bd --- /dev/null +++ b/src/liboblog/src/ob_log_part_mgr.cpp @@ -0,0 +1,3848 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG + +#include "ob_log_part_mgr.h" + +#include // FNM_CASEFOLD +#include "share/schema/ob_schema_struct.h" // USER_TABLE +#include "share/inner_table/ob_inner_table_schema.h" // OB_ALL_DDL_OPERATION_TID +#include "share/schema/ob_part_mgr_util.h" // ObTablePartitionKeyIter + +#include "ob_log_schema_getter.h" // IObLogSchemaGetter, ObLogSchemaGuard +#include "ob_log_utils.h" // is_ddl_table +#include "ob_log_config.h" // TCONF +#include "ob_log_instance.h" // TCTX +#include "ob_log_table_matcher.h" // IObLogTableMatcher +#include "ob_log_tenant.h" // ObLogTenant + +#define _STAT(level, fmt, args...) _OBLOG_LOG(level, "[STAT] [PartMgr] " fmt, ##args) +#define STAT(level, fmt, args...) OBLOG_LOG(level, "[STAT] [PartMgr] " fmt, ##args) +#define _ISTAT(fmt, args...) _STAT(INFO, fmt, ##args) +#define ISTAT(fmt, args...) STAT(INFO, fmt, ##args) +#define _DSTAT(fmt, args...) _STAT(DEBUG, fmt, ##args) +#define DSTAT(fmt, args...) STAT(DEBUG, fmt, ##args) + +#define CHECK_SCHEMA_VERSION(check_schema_version, fmt, arg...) \ + do { \ + if (OB_UNLIKELY(check_schema_version < ATOMIC_LOAD(&cur_schema_version_))) { \ + LOG_ERROR(fmt, K(tenant_id_), K(cur_schema_version_), K(check_schema_version), ##arg); \ + if (!TCONF.skip_reversed_schema_verison) { \ + ret = OB_INVALID_ARGUMENT; \ + } \ + } \ + } while (0) + +#define PROXY_INFO_TABLE_NAME "ob_all_proxy" +#define PROXY_CONFIG_TABLE_OLD_NAME "ob_all_proxy_config" +#define PROXY_CONFIG_TABLE_NAME "ob_all_proxy_app_config" +#define PROXY_STAT_TABLE_NAME "ob_all_proxy_stat" +#define PROXY_KV_TABLE_NAME "ob_all_proxy_kv_table" +#define PROXY_VIP_TENANT_TABLE_NAME "ob_all_proxy_vip_tenant" +#define PROXY_VIP_TENANT_TABLE_OLD_NAME "ob_all_proxy_vip_tenant_table" + +namespace oceanbase +{ + +using namespace common; +using namespace share; +using namespace share::schema; + +namespace liboblog +{ +ObLogPartMgr::ObLogPartMgr(ObLogTenant &tenant) : host_(tenant) +{ + reset(); +} + +ObLogPartMgr::~ObLogPartMgr() +{ + reset(); +} + +int ObLogPartMgr::init(const uint64_t tenant_id, + const int64_t start_schema_version, + const bool enable_oracle_mode_match_case_sensitive, + PartInfoMap &map, + GIndexCache &gi_cache, + TableIDCache &table_id_cache, + PartCBArray &part_add_cb_array, + PartCBArray &part_rc_cb_array) +{ + int ret = OB_SUCCESS; + + if (OB_FAIL(schema_cond_.init(common::ObWaitEventIds::OBLOG_PART_MGR_SCHEMA_VERSION_WAIT))) { + LOG_ERROR("schema_cond_ init fail", KR(ret)); + } else { + tenant_id_ = tenant_id; + map_ = ↦ + global_normal_index_table_cache_ = &gi_cache; + table_id_cache_ = &table_id_cache; + part_add_cb_array_ = &part_add_cb_array; + part_rc_cb_array_ = &part_rc_cb_array; + cur_schema_version_ = start_schema_version; + enable_oracle_mode_match_case_sensitive_ = enable_oracle_mode_match_case_sensitive; + + inited_ = true; + LOG_INFO("init PartMgr succ", K(tenant_id), K(start_schema_version)); + } + + return ret; +} + +void ObLogPartMgr::reset() +{ + inited_ = false; + tenant_id_ = OB_INVALID_ID; + map_ = NULL; + global_normal_index_table_cache_ = NULL; + table_id_cache_ = NULL; + part_add_cb_array_ = NULL; + part_rc_cb_array_ = NULL; + cur_schema_version_ = OB_INVALID_VERSION; + enable_oracle_mode_match_case_sensitive_ = false; + schema_cond_.destroy(); +} + +int ObLogPartMgr::add_table( + const uint64_t table_id, + const int64_t start_schema_version, + const int64_t start_serve_tstamp, + const bool is_create_partition, + bool &is_table_should_ignore_in_committer, + ObLogSchemaGuard &schema_guard, + const char *&tenant_name, + const char *&db_name, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + bool table_is_ignored = false; + is_table_should_ignore_in_committer = false; + const ObSimpleTableSchemaV2 *table_schema = NULL; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("PartMgr has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(start_schema_version <= 0) + || OB_UNLIKELY(start_serve_tstamp <= 0) + || OB_UNLIKELY(extract_tenant_id(table_id) != tenant_id_)) { + LOG_ERROR("invalid argument", K(start_schema_version), K(start_serve_tstamp), K(table_id), + K(tenant_id_)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(get_schema_guard_and_schemas_(table_id, start_schema_version, timeout, + table_is_ignored, schema_guard, table_schema, tenant_name, db_name))) { + if (OB_TIMEOUT != ret && OB_TENANT_HAS_BEEN_DROPPED != ret) { + LOG_ERROR("get_schema_guard_and_schemas_ fail", KR(ret), K(table_id), K(start_schema_version)); + } + } else if (table_is_ignored) { + // table ignored + if (table_schema->is_tmp_table()) { + LOG_INFO("add table ddl is ignored in part mgr, and also should be ignored in committer output", + "table_id", table_id, + "table_name", table_schema->get_table_name(), + "is_tmp_table", table_schema->is_tmp_table()); + is_table_should_ignore_in_committer = true; + } + } else if (OB_ISNULL(table_schema) || OB_ISNULL(tenant_name) || OB_ISNULL(db_name)) { + LOG_ERROR("invalid schema", K(table_schema), K(tenant_name), K(db_name)); + ret = OB_ERR_UNEXPECTED; + } else { + // Requires adding tables in order, encountering a Schema version reversal case, + // ignore if skip_reversed_schema_version_=true, otherwise exit with an error + CHECK_SCHEMA_VERSION(start_schema_version, "add-table schema version reversed", + "table_id", table_schema->get_table_id(), + "table_name", table_schema->get_table_name()); + + if (TCONF.test_mode_on) { + int64_t block_time_us = TCONF.test_mode_block_create_table_ddl_sec * _SEC_; + if (block_time_us > 0) { + ISTAT("[ADD_TABLE] [TEST_MODE_ON] block to create table", + K_(tenant_id), K(table_id), K(block_time_us)); + usleep((useconds_t)block_time_us); + } + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(add_table_(start_serve_tstamp, is_create_partition, table_schema, tenant_name, + db_name, timeout))) { + LOG_ERROR("add table fail", KR(ret), K(table_id), K(tenant_name), K(db_name), + "table_name", table_schema->get_table_name(), K(start_serve_tstamp), + K(is_create_partition)); + } else { + // success + } + } + } + + return ret; +} + +bool ObLogPartMgr::has_physical_part_(const ObSimpleTableSchemaV2 &table_schema) +{ + // Normal tables have physical partitions when not binding to a tablegroup + return (! table_schema.get_binding()); +} + +bool ObLogPartMgr::has_physical_part_(const ObTablegroupSchema &tg_schema) +{ + // tablegroup has physical partitions when the binding attribute is in effect + return (tg_schema.get_binding()); +} + +int ObLogPartMgr::alter_table(const uint64_t table_id, + const int64_t schema_version_before_alter, + const int64_t schema_version_after_alter, + const int64_t start_serve_timestamp, + ObLogSchemaGuard &old_schema_guard, + ObLogSchemaGuard &new_schema_guard, + const char *&old_tenant_name, + const char *&old_db_name, + const char *event, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + // In order to support alter table add/drop parition, only need to get the corresponding + // table_schema based on the old schema version, not the tenant/database schema based on the old version. + bool table_is_ignored = false; + const ObSimpleTableSchemaV2 *old_table_schema = NULL; + const ObSimpleTableSchemaV2 *new_table_schema = NULL; + // get tenant mode: MYSQL or ORACLE + // 1. oracle database/table matc needs to be case sensitive + // 2. mysql match don't needs to be case sensitive + share::ObWorker::CompatMode compat_mode = share::ObWorker::CompatMode::INVALID; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("PartMgr has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(schema_version_before_alter <= 0) + || OB_UNLIKELY(schema_version_after_alter <= 0) + || OB_UNLIKELY(extract_tenant_id(table_id) != tenant_id_)) { + LOG_ERROR("invalid argument", K(schema_version_before_alter), K(schema_version_after_alter), + K(table_id), K(tenant_id_)); + ret = OB_INVALID_ARGUMENT; + } + // Get the old version of schema + else if (OB_FAIL(get_schema_guard_and_schemas_(table_id, schema_version_before_alter, timeout, + table_is_ignored, old_schema_guard, old_table_schema, old_tenant_name, old_db_name))) { + if (OB_TIMEOUT != ret && OB_TENANT_HAS_BEEN_DROPPED != ret) { + LOG_ERROR("get old schemas fail", KR(ret), K(table_id), K(schema_version_before_alter)); + } + } else if (table_is_ignored) { + // table is ignored + } + // get new schema + else if (OB_FAIL(get_schema_guard_and_table_schema_(table_id, schema_version_after_alter, + timeout, new_schema_guard, new_table_schema))) { + if (OB_TIMEOUT != ret && OB_TENANT_HAS_BEEN_DROPPED != ret) { + LOG_ERROR("get schemas fail", KR(ret), K(table_id), K(schema_version_after_alter)); + } + } else if (OB_ISNULL(old_tenant_name) || OB_ISNULL(old_db_name)) { + LOG_ERROR("invalid schema", K(old_tenant_name), K(old_db_name)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(get_tenant_compat_mode(tenant_id_, compat_mode, timeout))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get_tenant_compat_mode fail", KR(ret), K(tenant_id_), + "compat_mode", print_compat_mode(compat_mode), KPC(new_table_schema)); + } + } else { + // Require sequential DDL, encounter Schema version reversal, + // ignore if skip_reversed_schema_version_=true, otherwise exit with an error + CHECK_SCHEMA_VERSION(schema_version_after_alter, "alter-table schema version reversed", + "table_id", new_table_schema->get_table_id(), + "table_name", new_table_schema->get_table_name()); + + if (OB_SUCC(ret)) { + bool table_is_chosen = false; + const bool is_tablegroup = false; + const bool has_physical_part = has_physical_part_(*new_table_schema); + bool is_primary_table_chosen = false; + + if (TCONF.test_mode_on) { + int64_t block_time_us = TCONF.test_mode_block_alter_table_ddl_sec * _SEC_; + if (block_time_us > 0) { + ISTAT("[ALTER_TABLE] [TEST_MODE_ON] block to alter table", + K_(tenant_id), K(table_id), K(block_time_us)); + usleep((useconds_t)block_time_us); + } + } + + // Filtering tables to operate only on whitelisted tables + // Use the old TENANT.DB.TABLE to filter + // + // In fact filtering with both the new and old names here is "problematic", as long as we whitelist to the DB level or table level. + // Both RENAME and ALTER TABLE operations will have problems, for example, if a table that was initially + // served is not served after RENAME, or if a table that is not served is not served after RENAME. + // RENAME is serviced, neither of which is currently supported and will have correctness issues. + if (OB_FAIL(filter_table_(old_table_schema, old_tenant_name, old_db_name, compat_mode, + table_is_chosen, is_primary_table_chosen))) { + LOG_ERROR("filter table fail", KR(ret), K(table_id), + "compat_mode", print_compat_mode(compat_mode), K(old_tenant_name), K(old_db_name)); + } else if (! table_is_chosen) { + LOG_INFO("table is not served, alter table DDL is filtered", K(table_is_chosen), + "table_id", old_table_schema->get_table_id(), + "table_name", old_table_schema->get_table_name(), + K(old_db_name), + K(old_tenant_name)); + } + // Add or delete partitions if the table is selected + else if (OB_FAIL(alter_table_add_or_drop_partition_( + is_tablegroup, + has_physical_part, + start_serve_timestamp, + old_table_schema, + new_table_schema, + new_table_schema->get_database_id(), + event))) { + LOG_ERROR("alter table add or drop partition fail", KR(ret), K(is_tablegroup), + K(has_physical_part), K(start_serve_timestamp)); + } else { + // succ + } + } + } + + return ret; +} + +int ObLogPartMgr::split_table(const uint64_t table_id, + const int64_t new_schema_version, + const int64_t start_serve_timestamp, + ObLogSchemaGuard &new_schema_guard, + const char *&tenant_name, + const char *&db_name, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + bool table_is_ignored = false; + const ObSimpleTableSchemaV2 *new_table_schema = NULL; + // get tenant mode: MYSQL or ORACLE + // 1. oracle database/table matc needs to be case sensitive + // 2. mysql match don't needs to be case sensitive + share::ObWorker::CompatMode compat_mode = share::ObWorker::CompatMode::INVALID; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("PartMgr has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(OB_INVALID_ID == table_id) + || OB_UNLIKELY(new_schema_version <= 0) + || OB_UNLIKELY(start_serve_timestamp <= 0) + || OB_UNLIKELY(extract_tenant_id(table_id) != tenant_id_)) { + LOG_ERROR("invalid argument", K(table_id), K(new_schema_version), K(start_serve_timestamp), + K(tenant_id_)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(get_schema_guard_and_schemas_(table_id, new_schema_version, timeout, + table_is_ignored, new_schema_guard, new_table_schema, tenant_name, db_name))) { + if (OB_TIMEOUT != ret && OB_TENANT_HAS_BEEN_DROPPED != ret) { + LOG_ERROR("get schemas fail", KR(ret), K(table_id), K(new_schema_version)); + } + } else if (table_is_ignored) { + // table is ignored + } else if (OB_ISNULL(new_table_schema) || OB_ISNULL(tenant_name) || OB_ISNULL(db_name)) { + LOG_ERROR("invalid schema", K(new_table_schema), K(tenant_name), K(db_name)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(get_tenant_compat_mode(tenant_id_, compat_mode, timeout))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get_tenant_compat_mode fail", KR(ret), K(tenant_id_), + "compat_mode", print_compat_mode(compat_mode), KPC(new_table_schema)); + } + } else { + // Require sequential DDL, encounter Schema version reversal, + // ignore if skip_reversed_schema_version_=true, otherwise exit with an error + CHECK_SCHEMA_VERSION(new_schema_version, "table-split DDL schema version reversed", + "table_id", new_table_schema->get_table_id(), + "table_name", new_table_schema->get_table_name()); + + if (OB_SUCC(ret)) { + if (OB_FAIL(split_table_(new_table_schema, tenant_name, db_name, start_serve_timestamp, compat_mode))) { + LOG_ERROR("split_table_ fail", KR(ret), K(table_id), K(start_serve_timestamp), + "compat_mode", print_compat_mode(compat_mode), K(tenant_name), K(db_name)); + } else { + // success + } + } + } + + return ret; +} + +int ObLogPartMgr::drop_table(const uint64_t table_id, + const int64_t schema_version_before_drop, + const int64_t schema_version_after_drop, + bool &is_table_should_ignore_in_committer, + ObLogSchemaGuard &old_schema_guard, + const char *&tenant_name, + const char *&db_name, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + const ObSimpleTableSchemaV2 *table_schema = NULL; + bool table_is_ignored = false; + is_table_should_ignore_in_committer = false; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("PartMgr has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(schema_version_before_drop <= 0) + || OB_UNLIKELY(schema_version_after_drop <= 0) + || OB_UNLIKELY(schema_version_before_drop > schema_version_after_drop) + || OB_UNLIKELY(extract_tenant_id(table_id) != tenant_id_)) { + LOG_ERROR("invalid arguments", K(schema_version_before_drop), + K(schema_version_after_drop), K(cur_schema_version_), K(tenant_id_), K(table_id)); + ret = OB_INVALID_ARGUMENT; + } + // TODO: Currently you need to fetch the Schema every time you add a table, this process is time consuming and should consider not fetching the Schema afterwards + else if (OB_FAIL(get_schema_guard_and_schemas_(table_id, schema_version_before_drop, timeout, + table_is_ignored, old_schema_guard, table_schema, tenant_name, db_name))) { + if (OB_TENANT_HAS_BEEN_DROPPED != ret && OB_TIMEOUT != ret) { + LOG_ERROR("get_schema_guard_and_schemas_ fail", KR(ret), K(table_id), K(schema_version_before_drop)); + } + } else if (table_is_ignored) { + // table is ignored + if (table_schema->is_tmp_table()) { + LOG_INFO("drop table ddl is ignored in part mgr, and also should be ignored in committer output", + "table_id", table_id, + "table_name", table_schema->get_table_name(), + "is_tmp_table", table_schema->is_tmp_table()); + is_table_should_ignore_in_committer = true; + } + } else { + // Ignore if skip_reversed_schema_version_=true, otherwise exit with an error + CHECK_SCHEMA_VERSION(schema_version_after_drop, "drop-table schema version reversed", + "table_id", table_schema->get_table_id(), + "table_name", table_schema->get_table_name()); + + if (OB_SUCC(ret)) { + if (OB_FAIL(drop_table_(table_schema))) { + LOG_ERROR("drop table fail", KR(ret), K(table_id)); + } else { + // succ + } + } + } + + return ret; +} + +int ObLogPartMgr::add_index_table(const uint64_t table_id, + const int64_t start_schema_version, + const int64_t start_serve_tstamp, + ObLogSchemaGuard &schema_guard, + const char *&tenant_name, + const char *&db_name, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + bool table_is_ignored = false; + const ObSimpleTableSchemaV2 *index_table_schema = NULL; + const ObSimpleTableSchemaV2 *primary_table_schema = NULL; + uint64_t primary_table_id = OB_INVALID_ID; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("PartMgr has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(start_schema_version <= 0) + || OB_UNLIKELY(start_serve_tstamp <= 0) + || OB_UNLIKELY(extract_tenant_id(table_id) != tenant_id_)) { + LOG_ERROR("invalid argument", K(start_schema_version), K(start_serve_tstamp), K(table_id), + K(tenant_id_)); + ret = OB_INVALID_ARGUMENT; + } + // TODO: Currently you need to fetch the Schema every time you add a table, this process is time consuming and should consider not fetching the Schema afterwards + else if (OB_FAIL(get_schema_guard_and_schemas_(table_id, start_schema_version, timeout, + table_is_ignored, schema_guard, index_table_schema, tenant_name, db_name))) { + if (OB_TIMEOUT != ret && OB_TENANT_HAS_BEEN_DROPPED != ret) { + LOG_ERROR("get schemas fail", KR(ret), K(table_id), K(start_schema_version)); + } + } else if (table_is_ignored) { + // table is ignored + } else if (OB_ISNULL(index_table_schema) || OB_ISNULL(tenant_name) || OB_ISNULL(db_name)) { + LOG_ERROR("invalid schema", K(index_table_schema), K(tenant_name), K(db_name)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_UNLIKELY(OB_INVALID_ID == (primary_table_id = index_table_schema->get_data_table_id()))) { + LOG_ERROR("primary_table_id is not valid", K(primary_table_id), KPC(index_table_schema)); + ret = OB_ERR_UNEXPECTED; + // Get the global index table corresponding to the main table schema + // Get table_schema based on the global index table schema, whitelist filter based on the master table, + // If the master table matches, the global index table also matches; otherwise it does not match + } else if (OB_FAIL(get_simple_table_schema_(primary_table_id, timeout, schema_guard, primary_table_schema))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get table schema fail", KR(ret), + "index_table_id", index_table_schema->get_table_id(), + "index_table_name", index_table_schema->get_table_name(), + K(primary_table_id), "primary_table_name", primary_table_schema->get_table_name()); + } + } else if (OB_ISNULL(primary_table_schema)) { + LOG_ERROR("invalid schema", K(primary_table_schema)); + ret = OB_ERR_UNEXPECTED; + } else { + // Requires adding tables in order, encountering a Schema version reversal case, + // ignore if skip_reversed_schema_version_=true, otherwise exit with an error + CHECK_SCHEMA_VERSION(start_schema_version, "add-index-table schema version reversed", + "table_id", index_table_schema->get_table_id(), + "table_name", index_table_schema->get_table_name()); + + if (OB_SUCC(ret)) { + const bool is_create_partition = true; + if (OB_FAIL(add_table_(start_serve_tstamp, is_create_partition, index_table_schema, + tenant_name, db_name, timeout, primary_table_schema))) { + LOG_ERROR("add table fail", KR(ret), + "index_table_id", table_id, + "index_table_name", index_table_schema->get_table_name(), + K(tenant_name), + K(db_name), + "is_global_normal_index_table", index_table_schema->is_global_normal_index_table(), + "is_global_unique_index_table", index_table_schema->is_global_unique_index_table(), + K(primary_table_id), "primary_table_name", primary_table_schema->get_table_name(), + K(start_schema_version), K(start_serve_tstamp), + K(is_create_partition)); + } else { + // succ + } + } + } + + return ret; +} + +int ObLogPartMgr::drop_index_table(const uint64_t table_id, + const int64_t schema_version_before_drop, + const int64_t schema_version_after_drop, + ObLogSchemaGuard &old_schema_guard, + const char *&tenant_name, + const char *&db_name, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + const ObSimpleTableSchemaV2 *table_schema = NULL; + bool table_is_ignored = false; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("PartMgr has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(schema_version_before_drop <= 0) + || OB_UNLIKELY(schema_version_after_drop <= 0) + || OB_UNLIKELY(schema_version_before_drop > schema_version_after_drop) + || OB_UNLIKELY(extract_tenant_id(table_id) != tenant_id_)) { + LOG_ERROR("invalid arguments", K(schema_version_before_drop), + K(schema_version_after_drop), K(cur_schema_version_), K(table_id), K(tenant_id_)); + ret = OB_INVALID_ARGUMENT; + } + // TODO: Currently you need to fetch the Schema every time you add a table, this process is time consuming and should consider not fetching the Schema afterwards + else if (OB_FAIL(get_schema_guard_and_schemas_(table_id, schema_version_before_drop, timeout, + table_is_ignored, old_schema_guard, table_schema, tenant_name, db_name))) { + if (OB_TENANT_HAS_BEEN_DROPPED != ret && OB_TIMEOUT != ret) { + LOG_ERROR("get_schema_guard_and_schemas_ fail", KR(ret), K(table_id), K(schema_version_before_drop)); + } + } else if (table_is_ignored) { + // table is ignored + } else { + // ignore if skip_reversed_schema_version_=true, otherwise exit with an error + CHECK_SCHEMA_VERSION(schema_version_after_drop, "drop-index-table schema version reversed", + "table_id", table_schema->get_table_id(), + "table_name", table_schema->get_table_name()); + + // drop_table_ supports handling of global general indexes and globally unique indexed tables + // 1. for globally unique indexes, perform delete logic + // 2. For global common indexes, clear the cache + if (OB_SUCC(ret)) { + if (OB_FAIL(drop_table_(table_schema))) { + LOG_ERROR("drop table fail", KR(ret), "index_table_id", table_id, + "index_table_name", table_schema->get_table_name(), + "is_global_normal_index_table", table_schema->is_global_normal_index_table(), + "is_global_unique_index_table", table_schema->is_global_unique_index_table()); + } else { + // succ + } + } + } + + return ret; +} + +int ObLogPartMgr::add_tablegroup_partition( + const uint64_t tablegroup_id, + const int64_t schema_version, + const int64_t start_serve_timestamp, + ObLogSchemaGuard &schema_guard, + const char *&tenant_name, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + bool is_create_partition = true; // add new create partition + const ObTablegroupSchema *tg_schema = NULL; + TenantSchemaInfo tenant_schema_info; + const uint64_t tenant_id = extract_tenant_id(tablegroup_id); + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("PartMgr has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(OB_INVALID_ID == tablegroup_id) + || OB_UNLIKELY(schema_version <= 0) + || OB_UNLIKELY(OB_INVALID_TIMESTAMP == start_serve_timestamp) + || OB_UNLIKELY(extract_tenant_id(tablegroup_id) != tenant_id_)) { + LOG_ERROR("invalid argument", K(tablegroup_id), K(schema_version), K(start_serve_timestamp), + K(tenant_id_)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(get_lazy_schema_guard_and_tablegroup_schema_(tablegroup_id, schema_version, + timeout, schema_guard, tg_schema))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get_lazy_schema_guard_and_tablegroup_schema_ fail", KR(ret), K(tablegroup_id), + K(schema_version)); + } + } else if (OB_FAIL(schema_guard.get_tenant_schema_info(tenant_id, tenant_schema_info, timeout))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get_tenant_schema_info fail", KR(ret), K(tenant_id), K(tablegroup_id)); + } + } else { + // set tenant name + tenant_name = tenant_schema_info.name_; + + // ignore if skip_reversed_schema_version_=true, otherwise exit with an error + CHECK_SCHEMA_VERSION(schema_version, "add-tablegroup schema version reversed", K(tablegroup_id)); + + if (OB_FAIL(ret)) { + // fail + } else if (OB_FAIL(add_tablegroup_partition_( + tablegroup_id, + *tg_schema, + start_serve_timestamp, + is_create_partition, + tenant_name, + timeout))) { + LOG_ERROR("add_tablegroup_partition_ fail", KR(ret), + K(tablegroup_id), + K(tg_schema), + K(schema_version), + K(start_serve_timestamp), + K(tenant_name), + K(is_create_partition)); + } else { + // succ + } + } + + return ret; +} + +int ObLogPartMgr::add_tablegroup_partition_( + const uint64_t tablegroup_id, + const ObTablegroupSchema &tg_schema, + const int64_t start_serve_timestamp, + const bool is_create_partition, + const char *tenant_name, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("PartMgr has not been initialized"); + ret = OB_NOT_INIT; + } else { + int64_t served_part_count = 0; + const uint64_t db_id = OB_INVALID_ID; + // tablegroup bind indicates is PG, with entity partition + const bool has_physical_part = has_physical_part_(tg_schema); + bool check_dropped_schema = false; + ObTablegroupPartitionKeyIter pkey_iter(tg_schema, check_dropped_schema); + const char *tablegroup_name = tg_schema.get_tablegroup_name_str(); + const bool is_tablegroup = true; + // get tenant mode: MYSQL or ORACLE + // 1. oracle database/table matc needs to be case sensitive + // 2. mysql match don't needs to be case sensitive + share::ObWorker::CompatMode compat_mode = share::ObWorker::CompatMode::INVALID; + bool tablegroup_is_chosen = false; + + // TABLEGROUP whitelist filtering based on tablegroup + if (OB_FAIL(get_tenant_compat_mode(tenant_id_, compat_mode, timeout))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get_tenant_compat_mode fail", KR(ret), K(tenant_id_)); + } + } else if (OB_FAIL(filter_tablegroup_(&tg_schema, tenant_name, compat_mode, tablegroup_is_chosen))) { + LOG_ERROR("filter_tablegroup_ fail", KR(ret), K(tablegroup_id), + "tablegroup_name", tg_schema.get_tablegroup_name_str(), + K(tenant_id_), K(tenant_name), K(tablegroup_is_chosen)); + } else if (! tablegroup_is_chosen) { + // tablegroup is filtered and no longer processed + LOG_INFO("tablegroup is not served, tablegroup add DDL is filtered", K(tablegroup_id), + "tablegroup_name", tg_schema.get_tablegroup_name_str(), + K(tenant_id_), K(tenant_name), K(tablegroup_is_chosen)); + } else if (OB_FAIL(add_table_or_tablegroup_( + is_tablegroup, + tablegroup_id, + tablegroup_id, + db_id, + has_physical_part, + is_create_partition, + start_serve_timestamp, + pkey_iter, + tg_schema, + served_part_count))) { + LOG_ERROR("add_part_ fail", KR(ret), + K(is_tablegroup), + K(tablegroup_id), + K(db_id), + K(has_physical_part), + K(is_create_partition), + K(start_serve_timestamp), + K(tg_schema)); + } else { + _ISTAT("[DDL] [ADD_TABLEGROUP] TENANT=%lu TABLEGROUP=%s(%ld) HAS_PHY_PART=%d " + "START_TSTAMP=%ld SERVED_PART_COUNT=%ld", + tenant_id_, tablegroup_name, tablegroup_id, + has_physical_part, start_serve_timestamp, served_part_count); + } + } + + return ret; +} + +int ObLogPartMgr::drop_tablegroup_partition( + const uint64_t tablegroup_id, + const int64_t schema_version_before_drop, + const int64_t schema_version_after_drop, + ObLogSchemaGuard &schema_guard, + const char *&tenant_name, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + const ObTablegroupSchema *tg_schema = NULL; + TenantSchemaInfo tenant_schema_info; + const uint64_t tenant_id = extract_tenant_id(tablegroup_id); + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("PartMgr has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(OB_INVALID_ID == tablegroup_id) + || OB_UNLIKELY(schema_version_after_drop <= 0) + || OB_UNLIKELY(schema_version_before_drop > schema_version_after_drop) + || OB_UNLIKELY(extract_tenant_id(tablegroup_id) != tenant_id_)) { + LOG_ERROR("invalid argument", K(tablegroup_id), K(tenant_id_), + K(schema_version_before_drop), K(schema_version_after_drop)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(get_lazy_schema_guard_and_tablegroup_schema_(tablegroup_id, + schema_version_before_drop, timeout, schema_guard, tg_schema))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get_lazy_schema_guard_and_tablegroup_schema_ fail", KR(ret), K(tablegroup_id), + K(schema_version_before_drop)); + } + } else if (OB_FAIL(schema_guard.get_tenant_schema_info(tenant_id, tenant_schema_info, timeout))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get_tenant_schema_info fail", KR(ret), K(tenant_id), K(tablegroup_id)); + } + } else { + tenant_name = tenant_schema_info.name_; + + // ignore if skip_reversed_schema_version_=true, otherwise exit with an error + CHECK_SCHEMA_VERSION(schema_version_after_drop, "drop-tablegroup schema version reversed", K(tablegroup_id)); + + if (OB_FAIL(ret)) { + // fail + } else if (OB_FAIL(drop_tablegroup_partition_(tablegroup_id, *tg_schema))) { + LOG_ERROR("drop_tablegroup_partition_ fail", KR(ret), + K(tablegroup_id), "tablegroup_name", tg_schema->get_tablegroup_name_str()); + } + } + + return ret; +} + +// TODO +// Now when tablegroup is deleted, PG is deleted immediately, because now the table does not support PG migration, so this is no problem, +// in the future, after supporting PG migration, PG can not be deleted directly, to rely on OFFLINE log, because the data must be all processed, otherwise data will be lost +int ObLogPartMgr::drop_tablegroup_partition_( + const uint64_t tablegroup_id, + const ObTablegroupSchema &tg_schema) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("PartMgr has not been initialized"); + ret = OB_NOT_INIT; + } else { + const char *tablegroup_name = tg_schema.get_tablegroup_name_str(); + int64_t served_part_count = 0; + const bool is_tablegroup = true; + + if (OB_FAIL(drop_table_or_tablegroup_( + is_tablegroup, + tablegroup_id, + tablegroup_name, + tg_schema, + served_part_count))) { + LOG_ERROR("drop_table_or_tablegroup_ fail", KR(ret), + K(is_tablegroup), + K(tablegroup_id), + K(tablegroup_name)); + } else { + _ISTAT("[DDL] [DROP_TABLEGROUP] [END] TENANT=%lu TABLEGROUP=%s(%ld) HAS_PHY_PART=%d " + "SERVED_PART_COUNT=%ld TOTAL_PART_COUNT=%ld", + tenant_id_, tg_schema.get_tablegroup_name_str(), tablegroup_id, + tg_schema.get_binding(), served_part_count, map_->get_valid_count()); + } + } + + return ret; +} + +template +int ObLogPartMgr::drop_table_or_tablegroup_( + const bool is_tablegroup, + const uint64_t table_id, + const char *table_name, + PartitionSchema &table_schema, + int64_t &served_part_count) +{ + int ret = OB_SUCCESS; + served_part_count = 0; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("PartMgr has not been initialized"); + ret = OB_NOT_INIT; + } else { + const uint64_t tenant_id = extract_tenant_id(table_id); + const bool is_binding = table_schema.get_binding(); + const char *drop_part_type_str = is_tablegroup ? "TABLEGROUP" : "TABLE"; + + // Iterate through the partitions being served and delete the partitions in the corresponding table + PartInfoScannerByTableID scanner_by_table(table_id); + if (OB_FAIL(map_->for_each(scanner_by_table))) { + LOG_ERROR("scan map by table id fail", KR(ret), K(table_id)); + } else { + _ISTAT("[DDL] [DROP_%s] [BEGIN] TENANT=%lu %s=%s(%lu) IS_BINDING=%d PART_COUNT_IN_%s=%ld/%ld", + drop_part_type_str, tenant_id, drop_part_type_str, + table_name, table_id, is_binding, drop_part_type_str, + scanner_by_table.pkey_array_.count(), + map_->get_valid_count()); + + for (int64_t idx = 0; OB_SUCC(ret) && idx < scanner_by_table.pkey_array_.count(); idx++) { + const ObPartitionKey &pkey = scanner_by_table.pkey_array_.at(idx); + + ret = offline_partition_(pkey); + + if (OB_ENTRY_NOT_EXIST == ret) { + DSTAT("[DDL] [DROP_TABLE] partition not served", K(pkey), K(is_tablegroup)); + ret = OB_SUCCESS; + } else if (OB_SUCCESS != ret) { + LOG_ERROR("offline partition fail", KR(ret), K(pkey), K(is_tablegroup)); + } else { + served_part_count++; + } + } // for + } + } + + return ret; +} + +int ObLogPartMgr::split_tablegroup_partition( + const uint64_t tablegroup_id, + const int64_t new_schema_version, + const int64_t start_serve_timestamp, + ObLogSchemaGuard &schema_guard, + const char *&tenant_name, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + const ObTablegroupSchema *tg_schema = NULL; + // get tenant mode: MYSQL or ORACLE + // 1. oracle database/table matc needs to be case sensitive + // 2. mysql match don't needs to be case sensitive + share::ObWorker::CompatMode compat_mode = share::ObWorker::CompatMode::INVALID; + bool tablegroup_is_chosen = false; + TenantSchemaInfo tenant_schema_info; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("PartMgr has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(OB_INVALID_ID == tablegroup_id) + || OB_UNLIKELY(new_schema_version <= 0) + || OB_UNLIKELY(start_serve_timestamp <= 0) + || OB_UNLIKELY(extract_tenant_id(tablegroup_id) != tenant_id_)) { + LOG_ERROR("invalid argument", K(tablegroup_id), K(new_schema_version), K(start_serve_timestamp), + K(tenant_id_)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(get_lazy_schema_guard_and_tablegroup_schema_(tablegroup_id, new_schema_version, + timeout, schema_guard, tg_schema))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get_lazy_schema_guard_and_tablegroup_schema_ fail", KR(ret), K(tablegroup_id), + K(new_schema_version)); + } + } else if (OB_FAIL(schema_guard.get_tenant_schema_info(tenant_id_, tenant_schema_info, timeout))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get_tenant_schema_info fail", KR(ret), K(tenant_id_), K(tablegroup_id)); + } + } else if (OB_FAIL(get_tenant_compat_mode(tenant_id_, compat_mode, timeout))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get_tenant_compat_mode fail", KR(ret), K(tenant_id_)); + } + } else if (OB_FAIL(filter_tablegroup_(tg_schema, tenant_schema_info.name_, compat_mode, tablegroup_is_chosen))) { + LOG_ERROR("filter_tablegroup_ fail", KR(ret), K(tablegroup_id), + "tablegroup_name", tg_schema->get_tablegroup_name_str(), + K(tenant_id_), K(tenant_schema_info), + K(tablegroup_is_chosen)); + } else if (! tablegroup_is_chosen) { + // tablegroup filtered + LOG_INFO("tablegroup is not served, tablegroup split DDL is filtered", K(tablegroup_is_chosen), + K(tablegroup_id), + "tablegroup_name", tg_schema->get_tablegroup_name_str(), + K(tenant_id_), K(tenant_schema_info)); + } else { + tenant_name = tenant_schema_info.name_; + + // ignore if skip_reversed_schema_version_=true, otherwise exit with an error + CHECK_SCHEMA_VERSION(new_schema_version, "split-tablegroup schema version reversed", K(tablegroup_id)); + + if (OB_FAIL(ret)) { + // fail + } else if (OB_FAIL(split_tablegroup_partition_(tablegroup_id, *tg_schema, + start_serve_timestamp))) { + LOG_ERROR("split_tablegroup_partition_ fail", KR(ret), K(tablegroup_id), + K(tg_schema), K(new_schema_version), K(start_serve_timestamp)); + } else { + // succ + } + } + + return ret; +} + +int ObLogPartMgr::split_tablegroup_partition_( + const uint64_t tablegroup_id, + const ObTablegroupSchema &tg_schema, + const int64_t start_serve_timestamp) +{ + int ret = OB_SUCCESS; + const uint64_t db_id = OB_INVALID_ID; + const bool has_physical_part = has_physical_part_(tg_schema); + bool check_dropped_schema = false; + ObTablegroupPartitionKeyIter pkey_iter(tg_schema, check_dropped_schema); + const bool is_tablegroup = true; + + if (OB_FAIL(split_table_or_tablegroup_( + is_tablegroup, + tablegroup_id, + tablegroup_id, + db_id, + has_physical_part, + start_serve_timestamp, + pkey_iter, + tg_schema))) { + LOG_ERROR("split_table_or_tablegroup_ fail", KR(ret), + K(tenant_id_), + K(is_tablegroup), + K(tablegroup_id), + K(db_id), + K(has_physical_part), + K(start_serve_timestamp), + K(tg_schema)); + } else { + _ISTAT("[DDL] [SPLIT_TABLEGROUP] TENANT=%lu TABLEGROUP=%s(%ld) HAS_PHY_PART=%d START_TSTAMP=%ld", + extract_tenant_id(tablegroup_id), + tg_schema.get_tablegroup_name_str(), tablegroup_id, + has_physical_part, start_serve_timestamp); + } + + return ret; +} + +template +int ObLogPartMgr::split_table_or_tablegroup_( + const bool is_tablegroup, + const uint64_t table_id, + const uint64_t tablegroup_id, + const uint64_t db_id, + const bool has_physical_part, + const int64_t start_serve_timestamp, + PartitionKeyIter &pkey_iter, + PartitionSchema &table_schema) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("PartMgr has not been initialized"); + ret = OB_NOT_INIT; + } else if (! need_to_support_split_when_in_multi_instance_()) { + LOG_ERROR("split table under multi instance NOT SUPPORTED", + "instance_index", (int64_t)(TCONF.instance_index), + "instance_num", (int64_t)(TCONF.instance_num), + "table_id", table_schema.get_table_id(), + K(is_tablegroup), K(has_physical_part)); + ret = OB_NOT_SUPPORTED; + } else if (! table_schema.is_in_logical_split()) { + // When a user modifies a partition rule for a table that has no partition rule and keeps the partition count at 1, it does not enter the split state + LOG_INFO("table is not in splitting, maybe its partition number is not modified", + K_(tenant_id), K(is_tablegroup), K(table_id), + "partition_status", table_schema.get_partition_status(), + "schema_version", table_schema.get_schema_version(), + K(has_physical_part)); + } else { + ObPartitionKey dst_pkey; + bool is_served = false; + // The split new partition is a newly created partition that synchronises data from scratch + const bool is_create_partition = true; + + // Iterate over each split partition, get its split source partition, and add the split partition if the split source partition is in service + // Note that. + // 1. This will only affect scenarios with multiple liboblog instances, if there is only one liboblog instance, all partitions are served + // 2. This does not support multiple consecutive split scenarios, requiring all liboblog instances to be restarted and the data redistributed before the next split + // 3. The purpose of this rule is to ensure that in a multiple instance scenario, data is not misplaced during the splitting process, + // and that all instances must be restarted after the split is complete to redistribute the data between instances + // + // For example, if p0 and p1 are split into p3, p4, p5 and p6, and according to the instance hash rule, this instance only serves p0, not p1, + // then p3 and p4 are served by p0, while p5 and p6 are not served by p1. + while (OB_SUCC(ret) && OB_SUCC(pkey_iter.next_partition_key_v2(dst_pkey))) { + // Get the partition before the split + ObPartitionKey src_pkey; + // get_split_source_partition_key guarantees that. + // 1. when dst_pkey is a split partition, source_part_key returns the split source partition + // 2. when dst_pkey is not a split partition, source_part_key returns its own + if (OB_FAIL(table_schema.get_split_source_partition_key(dst_pkey, src_pkey))) { + LOG_ERROR("get_split_source_partition_key fail", KR(ret), K(tenant_id_), K(dst_pkey), + K(table_id), K(is_tablegroup)); + } + // Check if the partitions are the same, if they are, then this partition is not split + else if (src_pkey == dst_pkey) { + LOG_INFO("partition does not split, need not add new partition", K(is_tablegroup), + K(table_id), K(has_physical_part), K(dst_pkey)); + } + // Determining whether a split partition will be serviced based on the partition before split + else if (OB_FAIL(add_served_partition_( + dst_pkey, + src_pkey, + start_serve_timestamp, + is_create_partition, + has_physical_part, + tablegroup_id, + db_id, + is_served))) { + LOG_ERROR("add_served_partition_ fail", KR(ret), + K(dst_pkey), + K(src_pkey), + K(start_serve_timestamp), + K(is_create_partition), + K(has_physical_part), + K(tablegroup_id), + K(db_id)); + } else if (! is_served) { + LOG_INFO("split source partition is not served, ignore split dst partition", K_(tenant_id), + K(table_id), K(is_tablegroup), K(has_physical_part), K(src_pkey), K(dst_pkey), + K(db_id), K(tablegroup_id)); + } + + dst_pkey.reset(); + } // while + + if (OB_ITER_END == ret) { + ret = OB_SUCCESS; + } + } + + return ret; +} + +int ObLogPartMgr::alter_tablegroup_partition( + const uint64_t tablegroup_id, + const int64_t schema_version_before_alter, + const int64_t schema_version_after_alter, + const int64_t start_serve_timestamp, + ObLogSchemaGuard &old_schema_guard, + ObLogSchemaGuard &new_schema_guard, + const char *&tenant_name, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + const ObTablegroupSchema *old_tg_schema = NULL; + const ObTablegroupSchema *new_tg_schema = NULL; + IObLogSchemaGetter *schema_getter = TCTX.schema_getter_; + // get tenant mode: MYSQL or ORACLE + // 1. oracle database/table matc needs to be case sensitive + // 2. mysql match don't needs to be case sensitive + share::ObWorker::CompatMode compat_mode = share::ObWorker::CompatMode::INVALID; + TenantSchemaInfo tenant_schema_info; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("PartMgr has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(OB_INVALID_ID == tablegroup_id) + || OB_UNLIKELY(schema_version_before_alter <= 0) + || OB_UNLIKELY(schema_version_after_alter <= 0) + || OB_UNLIKELY(OB_INVALID_TIMESTAMP == start_serve_timestamp) + || OB_ISNULL(schema_getter) + || OB_UNLIKELY(extract_tenant_id(tablegroup_id) != tenant_id_)) { + LOG_ERROR("invalid argument", K(tenant_id_), K(tablegroup_id), + K(schema_version_before_alter), K(schema_version_after_alter), K(start_serve_timestamp), + K(schema_getter)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(get_tenant_compat_mode(tenant_id_, compat_mode, timeout))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get_tenant_compat_mode fail", KR(ret), K(tenant_id_), + "compat_mode", print_compat_mode(compat_mode)); + } + } else if (OB_FAIL(schema_getter->get_lazy_schema_guard(tenant_id_, schema_version_before_alter, + timeout, old_schema_guard))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get_schema fail", KR(ret), K(tenant_id_), K(schema_version_before_alter)); + } + } else if (OB_FAIL(schema_getter->get_lazy_schema_guard(tenant_id_, schema_version_after_alter, + timeout, new_schema_guard))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get_schema fail", KR(ret), K(tenant_id_), K(schema_version_after_alter)); + } + } else if (OB_FAIL(old_schema_guard.get_tablegroup_schema(tablegroup_id, old_tg_schema, timeout))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get tablegroup schema fail", KR(ret), K(tenant_id_), K(tablegroup_id)); + } + } else if (OB_ISNULL(old_tg_schema)) { + LOG_WARN("schema error: tablegroup does not exist in target schema", K(tenant_id_), + K(tablegroup_id), "schema_version", schema_version_before_alter); + // TODO Is it appropriate to replace the error code with OB_TENANT_HAS_BEEN_DROPPED? + ret = OB_TENANT_HAS_BEEN_DROPPED; + } else if (OB_FAIL(new_schema_guard.get_tablegroup_schema(tablegroup_id, new_tg_schema, timeout))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get tablegroup schema fail", KR(ret), K(tenant_id_), K(tablegroup_id)); + } + } else if (OB_ISNULL(new_tg_schema)) { + LOG_WARN("schema error: tablegroup does not exist in target schema", K(tenant_id_), K(tablegroup_id), + "schema_version", schema_version_after_alter); + ret = OB_TENANT_HAS_BEEN_DROPPED; + } else if (OB_FAIL(old_schema_guard.get_tenant_schema_info(tenant_id_, tenant_schema_info, timeout))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get_tenant_schema_info fail", KR(ret), K(tenant_id_), K(tablegroup_id)); + } + } else { + tenant_name = tenant_schema_info.name_; + + const bool has_physical_part = has_physical_part_(*new_tg_schema); + const int64_t new_database_id = OB_INVALID_ID; // tablegroup DB invalid + const bool is_tablegroup = true; + const char *tablegroup_name = new_tg_schema->get_tablegroup_name_str(); + + _ISTAT("[DDL] [ALTER_TABLEGROUP] [BEGIN] TENANT=%s(%lu) TABLEGROUP=%s(%lu) HAS_PHY_PART=%d", + tenant_name, tenant_id_, old_tg_schema->get_tablegroup_name_str(), tablegroup_id, + has_physical_part); + + // ignore if skip_reversed_schema_version_=true, otherwise exit with an error + CHECK_SCHEMA_VERSION(schema_version_after_alter, "alter-tablegroup schema version reversed", + K(tablegroup_id)); + + if (OB_FAIL(ret)) { + // fail + } else if (has_physical_part) { + // PG Dynamic add/drop partition + bool tablegroup_is_chosen = false; + if (OB_FAIL(filter_tablegroup_(new_tg_schema, tenant_name, compat_mode, tablegroup_is_chosen))) { + LOG_ERROR("filter_tablegroup_ fail", KR(ret), K(tablegroup_id), K(tablegroup_name), + K(tenant_id_), K(tenant_name), K(tablegroup_is_chosen)); + } else if (! tablegroup_is_chosen) { + // tablegroup is filtered and no longer processed + } else if (OB_FAIL(alter_table_add_or_drop_partition_( + is_tablegroup, + has_physical_part, + start_serve_timestamp, + old_tg_schema, + new_tg_schema, + new_database_id, + "alter_tablegroup_partition"))) { + LOG_ERROR("alter table add or drop partition fail", KR(ret), K(is_tablegroup), + K(start_serve_timestamp), K(new_database_id), K(has_physical_part)); + } else { + // succ + } + + _ISTAT("[DDL] [ALTER_TABLEGROUP] RET=%d TENANT=%s(%lu) TABLEGROUP=%s(%ld) IS_SERVED=%d HAS_PHY_PART=%d " + "START_TSTAMP=%ld", + ret, tenant_name, tenant_id_, tablegroup_name, tablegroup_id, + tablegroup_is_chosen, has_physical_part, start_serve_timestamp); + } else { + if (OB_FAIL(alter_tablegroup_partition_when_is_not_binding_( + tablegroup_id, + schema_version_before_alter, + old_schema_guard, + schema_version_after_alter, + new_schema_guard, + start_serve_timestamp, + compat_mode, + timeout))) { + LOG_ERROR("alter_tablegroup_partition_when_is_not_binding_ fail", KR(ret), K(tablegroup_id), + K(schema_version_before_alter), K(schema_version_after_alter), K(start_serve_timestamp)); + } + } + } + + return ret; +} + +int ObLogPartMgr::get_table_ids_in_tablegroup_(const uint64_t tenant_id, + const uint64_t tablegroup_id, + const int64_t schema_version, + const int64_t timeout, + ObArray &table_id_array) +{ + int ret = OB_SUCCESS; + ObLogSchemaGuard schema_guard; + IObLogSchemaGetter *schema_getter = TCTX.schema_getter_; + + if (OB_ISNULL(schema_getter)) { + LOG_ERROR("schema getter is NULL", K(schema_getter)); + ret = OB_ERR_UNEXPECTED; + } + /// A fallback schema guard is needed to get all the tables under a tablegroup + else if (OB_FAIL(schema_getter->get_fallback_schema_guard(tenant_id, schema_version, timeout, + schema_guard))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get_fallback_schema_guard fail", KR(ret), K(tenant_id), K(schema_version), K(tablegroup_id)); + } + } else if (OB_FAIL(schema_guard.get_table_ids_in_tablegroup(tenant_id, tablegroup_id, + table_id_array, timeout))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get_table_ids_in_tablegroup fail", KR(ret), K(tenant_id), + K(tablegroup_id), K(table_id_array)); + } + } else { + LOG_INFO("get_table_ids_in_tablegroup by fallback schema mode succ", K(tenant_id), + K(tablegroup_id), K(schema_version), K(table_id_array)); + } + + return ret; +} + +int ObLogPartMgr::alter_tablegroup_partition_when_is_not_binding_( + const uint64_t tablegroup_id, + const int64_t schema_version_before_alter, + ObLogSchemaGuard &old_schema_guard, + const int64_t schema_version_after_alter, + ObLogSchemaGuard &new_schema_guard, + const int64_t start_serve_timestamp, + const share::ObWorker::CompatMode &compat_mode, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + ObArray table_id_array; + table_id_array.reset(); + + // The get_table_ids_in_tablegroup() interface can only use the fallback schema guard + // The new schema version is used here to get the table ids in the tablegroup + if (OB_FAIL(get_table_ids_in_tablegroup_(tenant_id_, + tablegroup_id, + schema_version_after_alter, + timeout, + table_id_array))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get_table_ids_in_tablegroup fail", KR(ret), K(tenant_id_), + K(tablegroup_id), K(schema_version_after_alter), K(table_id_array)); + } + } else { + const int64_t table_count_in_tablegroup = table_id_array.count(); + ISTAT("[DDL] [ALTER_TABLEGROUP_NO_BINDING]", K_(tenant_id), K(tablegroup_id), + K(table_count_in_tablegroup), K(schema_version_before_alter), K(schema_version_after_alter), + K(start_serve_timestamp)); + + // To ensure that functions can be called repeatedly, get the full schema first + // Get the Full Table Schema here, Lazy Mode + ObArray new_tb_schema_array; + ObArray old_tb_schema_array; + // The tenant_name of all tables in a tablegroup is constant, but get_schema_info_based_on_table_schema_ will initially reset the tenant_name/db_name to NULL each time + // So it is not safe to use a global tenant_name when the tablegroup contains tables that need to be filtered out, e.g. including local indexes + ObArray tenant_name_array; + ObArray db_name_array; + ObArray table_is_ignored_array; + + for (int64_t idx = 0; OB_SUCCESS == ret && idx < table_count_in_tablegroup; idx++) { + const uint64_t table_id = table_id_array.at(idx); + const ObSimpleTableSchemaV2 *new_tb_schema = NULL; + const ObSimpleTableSchemaV2 *old_tb_schema = NULL; + bool table_is_ignored = false; + const char *tenant_name = NULL; + const char *db_name = NULL; + + if (OB_FAIL(new_schema_guard.get_table_schema(table_id, new_tb_schema, timeout))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get table schema fail", KR(ret), K(table_id)); + } + } else if (OB_ISNULL(new_tb_schema)) { + LOG_WARN("schema error: table does not exist in target schema", K(table_id), + "schema_version", schema_version_after_alter); + ret = OB_TENANT_HAS_BEEN_DROPPED; + } else if (OB_FAIL(get_schema_info_based_on_table_schema_(new_tb_schema, new_schema_guard, + timeout, table_is_ignored, tenant_name, db_name))) { + if (OB_TIMEOUT != ret && OB_TENANT_HAS_BEEN_DROPPED != ret) { + LOG_ERROR("get schemas fail", KR(ret), "table_id", new_tb_schema->get_table_id(), + "schema_version", schema_version_after_alter); + } + } else if (OB_FAIL(old_schema_guard.get_table_schema(table_id, old_tb_schema, timeout))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get table schema fail", KR(ret), K(table_id)); + } + } else if (OB_ISNULL(old_tb_schema)) { + LOG_WARN("schema error: table does not exist in target schema", K(table_id), + "schema_version", schema_version_before_alter); + ret = OB_TENANT_HAS_BEEN_DROPPED; + } else { + if (OB_FAIL(new_tb_schema_array.push_back(new_tb_schema))) { + LOG_ERROR("new_tb_schema_array push_back fail", KR(ret), K(table_id), KPC(new_tb_schema)); + } else if (OB_FAIL(old_tb_schema_array.push_back(old_tb_schema))) { + LOG_ERROR("old_tb_schema_array push_back fail", KR(ret), K(table_id), KPC(old_tb_schema)); + } else if (OB_FAIL(tenant_name_array.push_back(tenant_name))) { + LOG_ERROR("tenant_name_array push_back fail", KR(ret), K(table_id), K(tenant_name)); + } else if (OB_FAIL(db_name_array.push_back(db_name))) { + LOG_ERROR("db_name_array push_back fail", KR(ret), K(table_id), K(db_name)); + } else if (OB_FAIL(table_is_ignored_array.push_back(table_is_ignored))) { + LOG_ERROR("table_is_ignored_array push_back fail", KR(ret), K(table_id), K(table_is_ignored)); + } + } + } // for + + // Processing logic + if (OB_SUCC(ret)) { + for (int64_t idx = 0; OB_SUCCESS == ret && idx < table_count_in_tablegroup; idx++) { + const uint64_t table_id = table_id_array.at(idx); + bool table_is_ignored = table_is_ignored_array.at(idx); + const ObSimpleTableSchemaV2 *new_tb_schema = new_tb_schema_array.at(idx); + const ObSimpleTableSchemaV2 *old_tb_schema = old_tb_schema_array.at(idx); + const char *tenant_name = tenant_name_array.at(idx); + const char *db_name = db_name_array.at(idx); + + if (table_is_ignored) { + // do nothing + // Filter first to avoid invalid db_name/tenant_name of index table + } else if (OB_ISNULL(new_tb_schema) || OB_ISNULL(old_tb_schema) || OB_ISNULL(db_name) + || OB_ISNULL(tenant_name)) { + LOG_ERROR("new_tb_schema or old_tb_schema or db_name or tenant_name is null", + K(table_id), K(new_tb_schema), K(old_tb_schema), K(db_name), K(tenant_name)); + ret = OB_ERR_UNEXPECTED; + } else if (new_tb_schema->is_in_logical_split()) { + // tablegroup split + // split_table_ performs the filter table logic, so the upper level does not judge + if (OB_FAIL(split_table_(new_tb_schema, tenant_name, db_name, start_serve_timestamp, compat_mode))) { + // split_table_ performs the filter table logic, so the upper level does not judge + LOG_ERROR("split_table_ fail", KR(ret), K(tenant_id_), K(tablegroup_id), + "table_id", new_tb_schema->get_table_id(), + "schema_version", schema_version_after_alter, + K(start_serve_timestamp), "compat_mode", print_compat_mode(compat_mode)); + } + } else { + // tablegroup dynamically adds and removes partitions + // Only whitelisted tables are manipulated + bool table_is_chosen = false; + bool is_primary_table_chosen = false; + const bool is_tablegroup = false; + const bool has_physical_part = has_physical_part_(*new_tb_schema); + + if (OB_FAIL(filter_table_(new_tb_schema, tenant_name, db_name, compat_mode, table_is_chosen, is_primary_table_chosen))) { + LOG_ERROR("filter table fail", KR(ret)); + } else if (! table_is_chosen) { + // do nothing + } else { + if (OB_FAIL(alter_table_add_or_drop_partition_( + is_tablegroup, + has_physical_part, + start_serve_timestamp, + old_tb_schema, + new_tb_schema, + new_tb_schema->get_database_id(), + "alter_tablegroup_partition_when_is_not_binding"))) { + LOG_ERROR("alter table add or drop partition fail", KR(ret), + K(is_tablegroup), + K(has_physical_part), + K(start_serve_timestamp), + K(old_tb_schema), + K(new_tb_schema)); + } else { + // succ + } + } + } + } // for + } + } + + return ret; +} + +int ObLogPartMgr::add_inner_tables(const int64_t start_serve_tstamp, + const int64_t start_schema_version, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + ObLogSchemaGuard schema_guard; + IObLogSchemaGetter *schema_getter = TCTX.schema_getter_; + const int64_t served_part_count_before = NULL != map_ ? map_->get_valid_count() : 0; + const bool enable_backup_mode = (TCONF.enable_backup_mode != 0); + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("PartMgr has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(! enable_backup_mode)) { + LOG_ERROR("inner tables can only be added on backup mode", K(enable_backup_mode), K(tenant_id_), + K(start_serve_tstamp), K(start_schema_version)); + ret = OB_NOT_SUPPORTED; + } else if (OB_UNLIKELY(start_serve_tstamp <= 0) + || OB_UNLIKELY(start_schema_version <= 0) + || OB_ISNULL(schema_getter)) { + LOG_ERROR("invalid argument", K(start_serve_tstamp), K(start_schema_version), K(schema_getter)); + ret = OB_INVALID_ARGUMENT; + } + // Get schema guard based on tenant_id + // use fallback mode to refresh schema + // Because the following is to get the full schema, some interfaces only support fallback mode, e.g. get_table_schemas_in_tenant() + else if (OB_FAIL(schema_getter->get_fallback_schema_guard(tenant_id_, start_schema_version, + timeout, schema_guard))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get_fallback_schema_guard fail", KR(ret), K(tenant_id_), K(start_schema_version), + K(start_serve_tstamp)); + } + } + // Add internal tables for normal tenants required in backup mode + // TODO: Only all_sequence_value table is currently available in this mode + else if (OB_FAIL(do_add_inner_tables_( + schema_guard, + start_serve_tstamp, + start_schema_version, + timeout))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("add inner tables on backup mode fail", KR(ret), K(tenant_id_), + K(start_serve_tstamp), K(start_schema_version)); + } + } else { + const int64_t total_served_part_count = map_->get_valid_count(); + ISTAT("[ADD_INNER_TABLES_ON_BACKUP_MODE]", K_(tenant_id), + K(start_serve_tstamp), + K(start_schema_version), + "tenant_served_inner_table_part_count", total_served_part_count - served_part_count_before, + K(total_served_part_count)); + } + + return ret; +} + +// add all tables of current tenant +// +// @retval OB_SUCCESS success +// @retval OB_TIMEOUT timeout +// @retval OB_TENANT_HAS_BEEN_DROPPED caller should ignore if tenant/database not exist +// @retval other error code fail +int ObLogPartMgr::add_all_tables( + const int64_t start_serve_tstamp, + const int64_t start_schema_version, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + ObLogSchemaGuard schema_guard; + IObLogSchemaGetter *schema_getter = TCTX.schema_getter_; + const int64_t served_part_count_before = NULL != map_ ? map_->get_valid_count() : 0; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("PartMgr has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(start_serve_tstamp <= 0) + || OB_UNLIKELY(start_schema_version <= 0) + || OB_ISNULL(schema_getter)) { + LOG_ERROR("invalid argument", K(start_serve_tstamp), K(start_schema_version), K(schema_getter)); + ret = OB_INVALID_ARGUMENT; + } + // Get schema guard based on tenant_id + // use fallback mode to refresh schema + // Because the following is to get the full schema, some interfaces only support fallback mode, e.g. get_table_schemas_in_tenant() + else if (OB_FAIL(schema_getter->get_fallback_schema_guard(tenant_id_, start_schema_version, + timeout, schema_guard))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get_fallback_schema_guard fail", KR(ret), K(tenant_id_), K(start_schema_version), + K(start_serve_tstamp)); + } + } + // add all tables + else if (OB_FAIL(do_add_all_tables_( + schema_guard, + start_serve_tstamp, + start_schema_version, + timeout))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("add all tables fail", KR(ret), K(tenant_id_), K(start_serve_tstamp), + K(start_schema_version)); + } + } + // add all tablegroups + else if (OB_FAIL(do_add_all_tablegroups_( + schema_guard, + start_serve_tstamp, + start_schema_version, + timeout))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("add all tablegroups fail", KR(ret), K(tenant_id_), K(start_serve_tstamp), + K(start_schema_version)); + } + } else { + const int64_t total_served_part_count = map_->get_valid_count(); + ISTAT("[ADD_ALL_TABLES_AND_TABLEGROUPS]", K_(tenant_id), + K(start_serve_tstamp), + K(start_schema_version), + "tenant_served_part_count", total_served_part_count - served_part_count_before, + K(total_served_part_count)); + } + + return ret; +} + +int ObLogPartMgr::do_add_all_tablegroups_( + ObLogSchemaGuard &schema_guard, + const int64_t start_serve_tstamp, + const int64_t start_schema_version, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + const bool is_create_partition = false; + ObArray tg_schemas; + TenantSchemaInfo tenant_schema_info; + + // get tenant schema info + if (OB_FAIL(schema_guard.get_tenant_schema_info(tenant_id_, tenant_schema_info, timeout))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get_tenant_schema_info fail", KR(ret), K(tenant_id_)); + } + } + // get all tablegroup schema + else if (OB_FAIL(schema_guard.get_tablegroup_schemas_in_tenant(tenant_id_, tg_schemas, timeout))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get_tablegroup_schemas_in_tenant fail", KR(ret), K(tenant_id_), K(tg_schemas)); + } + } else { + for (int64_t idx = 0; OB_SUCC(ret) && idx < tg_schemas.count(); ++idx) { + const ObTablegroupSchema *tg_schema = tg_schemas.at(idx); + + if (OB_ISNULL(tg_schema)) { + LOG_ERROR("tg_schema is NULL", K(idx), K(tg_schema)); + ret = OB_ERR_UNEXPECTED; + } else { + const int64_t tablegroup_id = tg_schema->get_tablegroup_id(); + + if (OB_FAIL(add_tablegroup_partition_( + tablegroup_id, + *tg_schema, + start_serve_tstamp, + is_create_partition, + tenant_schema_info.name_, + timeout))) { + LOG_ERROR("add_tablegroup_partition_ fail", KR(ret), K(tablegroup_id), + K(start_serve_tstamp), K(is_create_partition), K(tenant_schema_info)); + } + } + } // for + } + + ISTAT("[ADD_ALL_TABLEGROUPS]", KR(ret), "tablegroup_count", tg_schemas.count(), + K(start_serve_tstamp), K(start_schema_version), K(tenant_schema_info)); + + return ret; +} + +int ObLogPartMgr::do_add_inner_tables_( + ObLogSchemaGuard &schema_guard, + const int64_t start_serve_tstamp, + const int64_t start_schema_version, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + ObArray table_schemas; + + // get_table_schemas_in_tenant will fetch all table schema at this time, including primary tables, index tables + if (OB_FAIL(schema_guard.get_table_schemas_in_tenant(tenant_id_, table_schemas, timeout))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get table schemas in tenant fail", KR(ret), K(tenant_id_), K(start_schema_version)); + } + } else { + const int64_t table_schema_count = table_schemas.count(); + for (int64_t i = 0; OB_SUCC(ret) && i < table_schema_count; i++) { + const ObSimpleTableSchemaV2 *table_schema = table_schemas.at(i); + const char *tenant_name = NULL; + const char *db_name = NULL; + bool table_is_ignored = false; + bool is_create_partition = false; + + if (OB_ISNULL(table_schema)) { + LOG_ERROR("table_schema is null", K(i), K(tenant_id_), K(table_schemas)); + ret = OB_ERR_UNEXPECTED; + } + // filter some table if in backup mode + else if (! BackupTableHelper::is_sys_table_exist_on_backup_mode( + table_schema->is_sys_table(), table_schema->get_table_id())) { + // skip + } + // get tenant、db schema + else if (OB_FAIL(get_schema_info_based_on_table_schema_(table_schema, schema_guard, timeout, + table_is_ignored, tenant_name, db_name))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get schemas fail", KR(ret), "table_id", table_schema->get_table_id(), + "table_name", table_schema->get_table_name(), K(start_schema_version)); + } + } else if (OB_UNLIKELY(table_is_ignored)) { + LOG_ERROR("table should not be ignored by get_schema_info_based_on_table_schema_", + "table_id", table_schema->get_table_id(), + "table_name", table_schema->get_table_name(), + K(start_schema_version), K(table_is_ignored)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(add_table_(start_serve_tstamp, is_create_partition, table_schema, + tenant_name, db_name, timeout))) { + LOG_ERROR("add table fail", KR(ret), "table_id", table_schema->get_table_id(), + "table_name", table_schema->get_table_name(), K(start_serve_tstamp), + K(is_create_partition), K(tenant_name), K(db_name)); + } else { + // succ + } + } + + if (OB_SUCC(ret)) { + ISTAT("[DO_ADD_INNER_TABLES]", K(tenant_id_), + K(start_serve_tstamp), K(start_schema_version)); + } + } + + return ret; +} + +// add normal user tables +int ObLogPartMgr::do_add_all_tables_( + ObLogSchemaGuard &schema_guard, + const int64_t start_serve_tstamp, + const int64_t start_schema_version, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + const bool is_create_partition = false; + ObArray table_schemas; + + // get_table_schemas_in_tenant will fetch all table schema at this time, including primary tables, index tables + if (OB_FAIL(schema_guard.get_table_schemas_in_tenant(tenant_id_, table_schemas, timeout))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get table schemas in tenant fail", KR(ret), K(tenant_id_), K(start_schema_version)); + } + } + + for (int64_t i = 0; OB_SUCC(ret) && i < table_schemas.count(); i++) { + const ObSimpleTableSchemaV2 *table_schema = table_schemas.at(i); + bool table_is_ignored = false; + const char *tenant_name = NULL; + const char *db_name = NULL; + + if (OB_ISNULL(table_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("table_schema is null", K(i), K(table_schemas), K(tenant_id_)); + } else if (table_schema->is_sys_table()) { + // skip + } + // get tenant、db schema + else if (OB_FAIL(get_schema_info_based_on_table_schema_(table_schema, schema_guard, timeout, + table_is_ignored, tenant_name, db_name))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get schemas fail", KR(ret), "table_id", table_schema->get_table_id(), + "table_name", table_schema->get_table_name(), K(start_schema_version)); + } + } else if (table_is_ignored) { + // Tables are ignored + // Globally indexed tables are not ignored + // Uniquely indexed tables are not ignored + // 1. The get_schemas_based_on_table_schema_ function does not filter global indexed tables, for consistency and correctness, because currently + // get_table_schemas returns the table schema array, ensuring that the main table comes first and the indexed tables come second, in ascending order by table id , + // 2. here does not rely on the schema interface guarantee, global index table filtering first get the corresponding main table, then based on the main table to complete the whitelist filtering + // 3. The get_schema_guard_and_table_schema_ function does not filter unique index tables, it is used to add TableIDCache + } else if (table_schema->is_global_index_table() || table_schema->is_unique_index()) { + uint64_t primary_table_id = table_schema->get_data_table_id(); + // 当处理到全局索引表/唯一索引表, 需要获取主表schema + const ObSimpleTableSchemaV2 *primary_table_schema = NULL; + + if (OB_UNLIKELY(OB_INVALID_ID == primary_table_id)) { + LOG_ERROR("primary_table_id is not valid", K(primary_table_id), KPC(table_schema)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(get_simple_table_schema_(primary_table_id, timeout, schema_guard, primary_table_schema))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get table schema fail", KR(ret), + "index_table_id", table_schema->get_table_id(), + "index_table_name", table_schema->get_table_name(), + K(primary_table_id), "primary_table_name", primary_table_schema->get_table_name()); + } + } else if (OB_ISNULL(primary_table_schema)) { + LOG_ERROR("invalid schema", K(primary_table_schema)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(add_table_(start_serve_tstamp, is_create_partition, table_schema, + tenant_name, db_name, timeout, primary_table_schema))) { + LOG_ERROR("add table fail", KR(ret), "index_table_id", table_schema->get_table_id(), + "table_name", table_schema->get_table_name(), K(start_serve_tstamp), + K(is_create_partition), K(tenant_name), K(db_name)); + } + } else if (OB_FAIL(add_table_(start_serve_tstamp, is_create_partition, table_schema, + tenant_name, db_name, timeout))) { + LOG_ERROR("add table fail", KR(ret), "table_id", table_schema->get_table_id(), + "table_name", table_schema->get_table_name(), K(start_serve_tstamp), + K(is_create_partition), K(tenant_name), K(db_name)); + } else { + // add table success + } + + if (OB_TENANT_HAS_BEEN_DROPPED == ret) { + // When a schema error is encountered (database schema, tenant schema not fetched) + // simply ignore the table and do not add + LOG_WARN("schema error when add table, ignore table", KR(ret), + "table_id", table_schema->get_table_id(), + "table_name", table_schema->get_table_name()); + ret = OB_SUCCESS; + } + } // for + + ISTAT("[ADD_ALL_TABLES]", KR(ret), K_(tenant_id), "table_count", table_schemas.count(), + K(start_serve_tstamp), K(start_schema_version)); + + return ret; +} + +int ObLogPartMgr::get_ddl_pkey_(const uint64_t tenant_id, const int64_t schema_version, + ObPartitionKey &pkey) +{ + int ret = OB_SUCCESS; + const uint64_t table_id = combine_id(tenant_id, OB_ALL_DDL_OPERATION_TID); + if (OB_SYS_TENANT_ID == tenant_id) { + // The DDL partition of the sys tenant needs to be queried for schema, as it may have been upgraded from an old cluster and the partition count may be 1 + // Adding a DDL partition for a sys tenant must be the start-up time, the timeout here is written dead + ObLogSchemaGuard schema_guard; + int64_t timeout = GET_SCHEMA_TIMEOUT_ON_START_UP; + const ObSimpleTableSchemaV2 *tb_schema = NULL; + + // The __all_ddl_operation table schema for the sys tenant should not fail to fetch, or report an error if it does + if (OB_FAIL(get_schema_guard_and_table_schema_(table_id, schema_version, timeout, + schema_guard, tb_schema))) { + LOG_ERROR("get_schema_guard_and_table_schema_ fail", KR(ret), K(table_id), K(schema_version)); + } else if (OB_ISNULL(tb_schema)) { + LOG_ERROR("table schema is NULL", K(table_id), K(schema_version)); + ret = OB_ERR_UNEXPECTED; + } else { + bool check_dropped_schema = false; + ObTablePartitionKeyIter pkey_iter(*tb_schema, check_dropped_schema); + if (OB_FAIL(pkey_iter.next_partition_key_v2(pkey))) { + LOG_ERROR("iterate pkey fail", KR(ret), K(table_id), K(pkey)); + } + // Only one DDL partition is supported + else if (OB_UNLIKELY(1 != pkey_iter.get_partition_num())) { + LOG_ERROR("partition number of DDL partition is not 1, not supported", K(tenant_id), + K(schema_version), K(pkey_iter.get_partition_num()), K(pkey)); + ret = OB_NOT_SUPPORTED; + } else { + // success + } + } + } else { + // DDL partitioning is fixed for common tenants + if (OB_FAIL(pkey.init(table_id, 0, 0))) { + LOG_ERROR("partition key init fail", KR(ret), K(table_id), K(tenant_id)); + } + } + + LOG_INFO("get_ddl_pkey", KR(ret), K(tenant_id), K(pkey), K(schema_version)); + return ret; +} + +// add all_ddl_operation table for tenant +int ObLogPartMgr::add_ddl_table( + const int64_t start_serve_tstamp, + const int64_t start_schema_version, + const bool is_create_tenant) +{ + int ret = OB_SUCCESS; + ObPartitionKey ddl_pkey; + + if (OB_UNLIKELY(! inited_)) { + ret = OB_NOT_INIT; + LOG_ERROR("not init", K(inited_)); + } else if (OB_FAIL(get_ddl_pkey_(tenant_id_, start_schema_version, ddl_pkey))) { + LOG_ERROR("get_ddl_pkey_ fail", KR(ret), K(tenant_id_), K(start_schema_version), K(ddl_pkey)); + } else { + bool add_succ = false; + uint64_t tg_id = 0; + uint64_t db_id = 0; + const bool has_physical_part = true; // Physical entities exist by default in DDL partitions + + // DDL partition adding process without tg_id and db_id + if (OB_FAIL(add_served_partition_( + ddl_pkey, + ddl_pkey, + start_serve_tstamp, + is_create_tenant, + has_physical_part, + tg_id, + db_id, + add_succ))) { + LOG_ERROR("add partition fail", KR(ret), K(ddl_pkey), K(start_serve_tstamp), + K(is_create_tenant), K(has_physical_part), K(add_succ), K(tg_id), K(db_id)); + } else if (add_succ) { + ISTAT("[ADD_DDL_TABLE]", K_(tenant_id), K(ddl_pkey), + "is_schema_split_mode", TCTX.is_schema_split_mode_, K(start_schema_version), + K(start_serve_tstamp), K(is_create_tenant)); + } else { + LOG_ERROR("DDL partition add fail", K_(tenant_id), K(ddl_pkey), + "is_schema_split_mode", TCTX.is_schema_split_mode_, K(start_schema_version), + K(start_serve_tstamp), K(is_create_tenant)); + ret = OB_ERR_UNEXPECTED; + } + } + + return ret; +} + +int ObLogPartMgr::inc_part_trans_count_on_serving(bool &is_serving, + const ObPartitionKey &key, + const uint64_t prepare_log_id, + const int64_t prepare_log_timestamp, + const bool print_partition_not_serve_info, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + + // TODO: verify prepare_log_id + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("PartMgr has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(! key.is_valid()) + || OB_UNLIKELY(prepare_log_timestamp <= 0) + || OB_UNLIKELY(OB_INVALID_ID == prepare_log_id)) { + LOG_ERROR("invalid argument", K(key), K(prepare_log_timestamp), K(prepare_log_id)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_UNLIKELY(prepare_log_timestamp < TCTX.start_tstamp_)) { + // If the Prepare log timestamp is less than the start timestamp, it must not be served + is_serving = false; + _ISTAT("[INC_TRANS_COUNT] [PART_NOT_SERVE] LOG_TSTAMP(%ld) <= START_TSTAMP(%ld) " + "PART=%s LOG_ID=%ld TENATN_ID=%lu", + prepare_log_timestamp, TCTX.start_tstamp_, to_cstring(key), prepare_log_id, tenant_id_); + } else { + // In test mode, determine if want to block the participant list confirmation process, and if so, wait for a period of time + if (TCONF.test_mode_on) { + int64_t block_time_us = TCONF.test_mode_block_verify_participants_time_sec * _SEC_; + if (block_time_us > 0) { + ISTAT("[INC_TRANS_COUNT] [TEST_MODE_ON] block to verify participants", + K_(tenant_id), K(block_time_us)); + usleep((useconds_t)block_time_us); + } + } + // First save the current version of Schema + int64_t schema_version = ATOMIC_LOAD(&cur_schema_version_); + + // Then determine if the partitioned transaction is serviced, and if so, increase the number of transactions + ret = inc_trans_count_on_serving_(is_serving, key, print_partition_not_serve_info); + + // Handle cases where partitions do not exist + // Special treatment is needed here for normal global index partitions, they exist for a long time but don't pull + // their logs, they need to be filtered out, here a cache of normal global index tables should be added to maintain their lifecycle as normal tables are maintained. + if (OB_ENTRY_NOT_EXIST == ret) { + ret = OB_SUCCESS; + + // Check if it is a global normal index partition + bool is_exist = false; + const bool is_global_normal_index = true; + if (OB_FAIL(is_exist_table_id_cache_(key.get_table_id(), is_global_normal_index, is_exist))) { + LOG_ERROR("is_exist_table_id_cache_ fail", KR(ret), K(key), K(is_global_normal_index), K(is_exist)); + } else if (is_exist) { + // Filtering global general index partitions + is_serving = false; + + if (print_partition_not_serve_info) { + _ISTAT("[INC_TRANS_COUNT] [PART_NOT_SERVE] [GLOBAL_NORMAL_INDEX_TABLE] " + "TENANT=%lu PART=%s LOG_ID=%ld LOG_TSTAMP=%ld SCHEMA_VERSION=%ld", + tenant_id_, to_cstring(key), prepare_log_id, prepare_log_timestamp, schema_version); + } else if (REACH_TIME_INTERVAL(PRINT_LOG_INTERVAL)) { + _ISTAT("[INC_TRANS_COUNT] [PART_NOT_SERVE] [GLOBAL_NORMAL_INDEX_TABLE] " + "TENANT=%lu PART=%s LOG_ID=%ld LOG_TSTAMP=%ld SCHEMA_VERSION=%ld", + tenant_id_, to_cstring(key), prepare_log_id, prepare_log_timestamp, schema_version); + } else { + // do nothing + } + } else { + // Determine the status of the partition based on the previously saved schema + PartitionStatus part_status = PART_STATUS_INVALID; + ret = check_part_status_(key, schema_version, timeout, part_status); + + if (OB_SUCCESS != ret) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("check_part_status_ fail", KR(ret), K(key), K(schema_version)); + } + } + // handle future partition + else if (PART_NOT_CREATE == part_status) { + if (OB_FAIL(handle_future_part_when_inc_trans_count_on_serving_(is_serving, + key, + print_partition_not_serve_info, + schema_version, + timeout))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("handle_future_part_when_inc_trans_count_on_serving_ fail", + KR(ret), K(key), K(schema_version)); + } + } + } else { + // If it is not a future partition, the partition exists or the partition is deleted, both cases indicating that the partition is not serviced + is_serving = false; + + if (print_partition_not_serve_info) { + _ISTAT("[INC_TRANS_COUNT] [PART_NOT_SERVE] TENANT=%lu " + "PART=%s STATUS=%s LOG_ID=%ld LOG_TSTAMP=%ld SCHEMA_VERSION=%ld", + tenant_id_, to_cstring(key), print_part_status(part_status), prepare_log_id, + prepare_log_timestamp, schema_version); + } else if (REACH_TIME_INTERVAL(PRINT_LOG_INTERVAL)) { + _ISTAT("[INC_TRANS_COUNT] [PART_NOT_SERVE] TENANT=%lu " + "PART=%s STATUS=%s LOG_ID=%ld LOG_TSTAMP=%ld SCHEMA_VERSION=%ld", + tenant_id_, to_cstring(key), print_part_status(part_status), prepare_log_id, + prepare_log_timestamp, schema_version); + } else { + // do nothing + } + } + } + } else { + // do nothing + } + } + + return ret; +} + +int ObLogPartMgr::inc_trans_count_on_serving_(bool &is_serving, + const ObPartitionKey &key, + const bool print_partition_not_serve_info) +{ + int ret = OB_SUCCESS; + ObLogPartInfo *info = NULL; + bool enable_create = false; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("PartMgr has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(map_->get(key, info, enable_create))) { + if (OB_ENTRY_NOT_EXIST != ret) { + LOG_ERROR("get PartInfo from map fail", KR(ret), K(key)); + } else { + // not exist + } + } else if (OB_ISNULL(info)) { + LOG_ERROR("get PartInfo from map fail, PartInfo is NULL"); + ret = OB_ERR_UNEXPECTED; + } else { + + info->inc_trans_count_on_serving(is_serving); + + if (! is_serving) { + if (print_partition_not_serve_info) { + PART_ISTAT(info, "[INC_TRANS_COUNT] [PART_NOT_SERVE]"); + } else if (REACH_TIME_INTERVAL(PRINT_LOG_INTERVAL)) { + PART_ISTAT(info, "[INC_TRANS_COUNT] [PART_NOT_SERVE]"); + } else { + // do nothing + } + } else { + PART_DSTAT(info, "[INC_TRANS_COUNT]"); + } + } + + REVERT_PART_INFO(info, ret); + return ret; +} + +int ObLogPartMgr::check_part_status_(const common::ObPartitionKey &pkey, + const int64_t schema_version, + const int64_t timeout, + PartitionStatus &part_status) +{ + int ret = OB_SUCCESS; + ObLogSchemaGuard schema_guard; + part_status = PART_STATUS_INVALID; + IObLogSchemaGetter *schema_getter = TCTX.schema_getter_; + + if (OB_ISNULL(schema_getter)) { + LOG_ERROR("invalid schema getter", K(schema_getter)); + ret = OB_NOT_INIT; + } else if (OB_FAIL(schema_getter->get_lazy_schema_guard(pkey.get_tenant_id(), schema_version, + timeout, schema_guard))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get_lazy_schema_guard fail", "tenant_id", pkey.get_tenant_id(), KR(ret), + K(schema_version)); + } + } else if (OB_FAIL(schema_guard.query_partition_status(pkey, part_status, timeout))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("query_partition_status fail", KR(ret), K(pkey), K(part_status)); + } + } else { + } + + // Partition is considered deleted if the tenant does not exist + if (OB_TENANT_HAS_BEEN_DROPPED == ret) { + part_status = PART_DELETED; + ISTAT("[INC_TRANS_COUNT] [CHECK_PART_STATUS] tenant has been dropped, " + "partition status set to PART_DELETED", + KR(ret), K(tenant_id_), K(pkey), K(part_status), K(schema_version)); + ret = OB_SUCCESS; + } + + if (OB_SUCCESS == ret) { + _ISTAT("[INC_TRANS_COUNT] [CHECK_PART_STATUS] TENANT=%lu PKEY=%s STATUS=%s SCHEMA_VERSION=%ld", + tenant_id_, to_cstring(pkey), print_part_status(part_status), schema_version); + } + + return ret; +} + +int ObLogPartMgr::handle_future_part_when_inc_trans_count_on_serving_(bool &is_serving, + const ObPartitionKey &key, + const bool print_partition_not_serve_info, + const int64_t base_schema_version, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("PartMgr has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(base_schema_version <= 0) + || OB_UNLIKELY(base_schema_version > cur_schema_version_)) { + LOG_ERROR("invalid argument", K(base_schema_version), K(cur_schema_version_)); + ret = OB_INVALID_ARGUMENT; + } else { + int64_t start_time = get_timestamp(); + int64_t end_time = start_time + timeout; + int64_t schema_version = base_schema_version; + // default to future partition + PartitionStatus part_status = PART_NOT_CREATE; + + _ISTAT("[INC_TRANS_COUNT] [HANDLE_FUTURE_PART] [BEGIN] TENANT=%lu PART=%s " + "BASE_SCHEMA_VERSION=%ld", + tenant_id_, to_cstring(key), base_schema_version); + + // Wait for Schema version to advance until the target partition is no longer a future partition + while (OB_SUCCESS == ret && PART_NOT_CREATE == part_status) { + // wait until schema version update to desired value + if (OB_FAIL(check_cur_schema_version_when_handle_future_part_(schema_version, end_time))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("check_cur_schema_version_when_handle_future_part_ fail", KR(ret), K(key), K(schema_version)); + } + } + + if (OB_SUCCESS == ret) { + int64_t left_time = end_time - get_timestamp(); + schema_version = cur_schema_version_; + + part_status = PART_STATUS_INVALID; + + // Check if the partition is still a future partition + if (OB_FAIL(check_part_status_(key, + schema_version, + left_time, + part_status))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("check_part_status_ fail", KR(ret), K(key), K(schema_version)); + } + } else { + // success + } + } + } + + if (OB_SUCCESS == ret) { + // Guaranteed no more future paritition + if (OB_UNLIKELY(PART_NOT_CREATE == part_status)) { + LOG_ERROR("partition is still future partition", K(part_status), + K(print_part_status(part_status)), K(key), + K(schema_version), K(cur_schema_version_), + K(base_schema_version)); + ret = OB_ERR_UNEXPECTED; + } else { + // If Schema advances to after partition creation, re-inc_trans_count + ret = inc_trans_count_on_serving_(is_serving, key, print_partition_not_serve_info); + + // If the partition information does not exist, the partition is deleted, or the partition exists but is not serviced + if (OB_ENTRY_NOT_EXIST == ret) { + is_serving = false; + ret = OB_SUCCESS; + + _ISTAT("[INC_TRANS_COUNT] [PART_NOT_SERVE] TENANT=%lu PART=%s STATUS=%s SCHEMA_VERSION=%ld", + tenant_id_, to_cstring(key), print_part_status(part_status), schema_version); + } + } + } + + _ISTAT("[INC_TRANS_COUNT] [HANDLE_FUTURE_PART] [END] TENANT=%lu RET=%d PART=%s STATUS=%s " + "IS_SERVING=%d END_SCHEMA_VERSION=%ld INTERVAL=%ld", + tenant_id_, ret, to_cstring(key), print_part_status(part_status), is_serving, schema_version, + get_timestamp() - start_time); + } + + return ret; +} + +int ObLogPartMgr::check_cur_schema_version_when_handle_future_part_(const int64_t schema_version, + const int64_t end_time) +{ + int ret = OB_SUCCESS; + ObThreadCondGuard guard(schema_cond_); + + // Wait until Schema version upgrade + while (OB_SUCC(ret) && schema_version >= ATOMIC_LOAD(&cur_schema_version_)) { + int64_t left_time = end_time - get_timestamp(); + + if (left_time <= 0) { + ret = OB_TIMEOUT; + break; + } + + schema_cond_.wait_us(left_time); + } + + return ret; +} + +int ObLogPartMgr::check_cur_schema_version_when_handle_future_table_(const int64_t schema_version, + const int64_t end_time) +{ + int ret = OB_SUCCESS; + ObThreadCondGuard guard(schema_cond_); + + // Wait until Schema version upgrade + // Parsing row data, e.g. table_version=100, the current PartMgr is processing to version 90, + // so you need to wait for the schema version to advance to a version greater than or equal to 100 + while (OB_SUCC(ret) && schema_version > ATOMIC_LOAD(&cur_schema_version_)) { + int64_t left_time = end_time - get_timestamp(); + + if (left_time <= 0) { + ret = OB_TIMEOUT; + break; + } + + schema_cond_.wait_us(left_time); + } + + return ret; +} + +int ObLogPartMgr::dec_part_trans_count(const ObPartitionKey &key) +{ + int ret = OB_SUCCESS; + bool need_remove = false; + ObLogPartInfo *info = NULL; + bool enable_create = false; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("PartMgr has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(map_->get(key, info, enable_create))) { + if (OB_ENTRY_NOT_EXIST == ret) { + LOG_ERROR("PartInfo does not exist", K(key)); + ret = OB_ENTRY_NOT_EXIST; + } else { + LOG_ERROR("get PartInfo from map fail", KR(ret), K(key)); + } + } else if (OB_ISNULL(info)) { + LOG_ERROR("get PartInfo from map fail, PartInfo is NULL", KR(ret), K(info)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(info->dec_trans_count(need_remove))) { + LOG_ERROR("dec_trans_count fail", KR(ret), K(*info)); + } else { + PART_DSTAT(info, "[DEC_TRANS_COUNT]"); + + if (need_remove) { + if (OB_FAIL(recycle_partition_(key, info))) { + LOG_ERROR("recycle_partition_ fail", KR(ret), K(key), K(info)); + } + } + } + + REVERT_PART_INFO(info, ret); + + return ret; +} + +bool ObLogPartMgr::is_partition_served_(const ObPartitionKey &pkey, + const uint64_t tablegroup_id, + const uint64_t database_id) const +{ + bool bool_ret = false; + + if (! inited_) { + bool_ret = false; + } + // DDL partition must serve + else if (is_ddl_table(pkey.get_table_id())) { + bool_ret = true; + } else { + uint64_t hash_v = 0; + + if (0 != TCONF.enable_new_partition_hash_algorithm) { + // Allowing the use of the new calculation + // table_id + partition_id to divide tasks + hash_v = pkey.get_table_id() + pkey.get_partition_id(); + } else { + uint64_t mod_key = (OB_INVALID_ID == tablegroup_id) ? database_id : tablegroup_id; + hash_v = murmurhash(&mod_key, sizeof(mod_key), 0); + uint64_t part_id = pkey.get_partition_id(); + hash_v = murmurhash(&part_id, sizeof(part_id), hash_v); + } + + bool_ret = ((hash_v % TCONF.instance_num) == TCONF.instance_index); + } + + return bool_ret; +} + +void ObLogPartMgr::print_part_info(int64_t &serving_part_count, + int64_t &offline_part_count, + int64_t ¬_served_part_count) +{ + int ret = OB_SUCCESS; + + if (inited_) { + // print PartInfo + PartInfoPrinter part_info_printer(tenant_id_); + if (OB_FAIL(map_->for_each(part_info_printer))) { + LOG_ERROR("PartInfo map foreach fail", KR(ret)); + } else { + // success + } + + if (OB_SUCCESS == ret) { + serving_part_count = part_info_printer.serving_part_count_; + offline_part_count = part_info_printer.offline_part_count_; + not_served_part_count = part_info_printer.not_served_part_count_; + } + } +} + +int ObLogPartMgr::update_schema_version(const int64_t schema_version) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("PartMgr has not been initialized"); + ret = OB_NOT_INIT; + } else { + CHECK_SCHEMA_VERSION(schema_version, "update-schema-version schema version reversed", + K(schema_version), K_(cur_schema_version)); + + if (OB_SUCC(ret)) { + _ISTAT("[DDL] [UPDATE_SCHEMA] TENANT=%lu NEW_VERSION=%ld OLD_VERSION=%ld DELTA=%ld", + tenant_id_, schema_version, cur_schema_version_, schema_version - cur_schema_version_); + ObThreadCondGuard guard(schema_cond_); + + cur_schema_version_ = std::max(cur_schema_version_, schema_version); + // Filtering data within PG: In filtering row data, multiple threads may encounter future table data, + // at which point a uniform wake-up call is required via the broadcast mechanism + schema_cond_.broadcast(); + } + } + + return ret; +} + +int ObLogPartMgr::offline_partition(const common::ObPartitionKey &pkey) +{ + int ret = OB_SUCCESS; + bool ensure_recycled_when_offlined = false; + + if (OB_FAIL(offline_partition_(pkey, ensure_recycled_when_offlined))) { + if (OB_ENTRY_NOT_EXIST == ret) { + // partition not exist + ISTAT("offline partition, but not served", K(pkey)); + } else { + LOG_ERROR("offline_partition_ fail", K(pkey)); + } + } else { + ISTAT("offline and recycle partition success", K_(tenant_id), K(pkey)); + } + return ret; +} + +int ObLogPartMgr::offline_and_recycle_partition(const common::ObPartitionKey &pkey) +{ + int ret = OB_SUCCESS; + bool ensure_recycled_when_offlined = true; + + if (OB_FAIL(offline_partition_(pkey, ensure_recycled_when_offlined))) { + if (OB_ENTRY_NOT_EXIST == ret) { + // partition not exist + ISTAT("offline and recycle partition, but not served", K(pkey)); + } else { + LOG_ERROR("offline_partition_ fail", K(pkey)); + } + } else { + ISTAT("offline and recycle partition success", K_(tenant_id), K(pkey)); + } + return ret; +} + +int ObLogPartMgr::is_exist_table_id_cache(const uint64_t table_id, + bool &is_exist) +{ + const bool is_global_normal_index = false; + is_exist = false; + + return is_exist_table_id_cache_(table_id, is_global_normal_index, is_exist); +} + +int ObLogPartMgr::handle_future_table(const uint64_t table_id, + const int64_t table_version, + const int64_t timeout, + bool &is_exist) +{ + int ret = OB_SUCCESS; + is_exist = false; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("PartMgr has not been initialized"); + ret = OB_NOT_INIT; + } else { + int64_t start_time = get_timestamp(); + int64_t end_time = start_time + timeout; + int64_t cur_schema_version = ATOMIC_LOAD(&cur_schema_version_); + + _ISTAT("[HANDLE_FUTURE_TABLE] [BEGIN] TENANT=%lu TABLE=%ld " + "TABLE_VERSION=%ld CUR_SCHEMA_VERSION=%ld DELTA=%ld", + tenant_id_, table_id, table_version, cur_schema_version, table_version - cur_schema_version); + + // 等待直到Schema版本升级 + if (OB_FAIL(check_cur_schema_version_when_handle_future_table_(table_version, end_time))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("check_cur_schema_version_when_handle_future_table_ fail", KR(ret), K(table_id), K(table_version)); + } + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(is_exist_table_id_cache(table_id, is_exist))) { + LOG_ERROR("is_exist_table_id_cache fail", KR(ret), K(table_id), K(is_exist)); + } + } + + cur_schema_version = ATOMIC_LOAD(&cur_schema_version_); + _ISTAT("[HANDLE_FUTURE_TABLE] [END] RET=%d TENANT=%lu TABLE=%ld " + "TABLE_VERSION=%ld IS_EXIST=%d CUR_SCHEMA_VERSION=%ld DELTA=%ld INTERVAL=%ld", + ret, tenant_id_, table_id, table_version, is_exist, cur_schema_version, table_version - cur_schema_version, get_timestamp() - start_time); + } + + return ret; +} + +// @retval OB_SUCCESS success +// @retval OB_TIMEOUT timeout +// @retval OB_TENANT_HAS_BEEN_DROPPED caller should ignore error code if schema error like tenant/database not exist +// @retval other error code fail +int ObLogPartMgr::get_schema_info_based_on_table_schema_(const ObSimpleTableSchemaV2 *tb_schema, + ObLogSchemaGuard &schema_guard, + const int64_t timeout, + bool &table_is_ignored, + const char *&tenant_name, + const char *&db_name) +{ + int ret = OB_SUCCESS; + table_is_ignored = false; + tenant_name = NULL; + db_name = NULL; + + if (OB_ISNULL(tb_schema)) { + LOG_ERROR("invalid table schema", K(tb_schema)); + ret = OB_INVALID_ARGUMENT; + } + // 1. 由于Schema实现缺陷,如果一个租户被删除,取历史schema时,被删除租户的 + // "oceanbase" DB schema将构建不出来。但oceanbase DB下面的某些系统表, + // 比如__all_dummy等,会被构建出来,即出现table schema存在,但DB schema + // 不存在的情况。由于liboblog不需要同步系统表,因此此处通过过滤系统表的 + // 方法来规避DB schema不存在情况。 + // + // 2. 保证全局索引表不被过滤掉 + // + // 3. 保证唯一索引表不被过滤掉, 用于维护TableIDCache + // 注意:is_unique_index接口和is_global_index_table存在交集:全局唯一索引 + // + // 4. DDL表默认被过滤 + // + // 5. backupm模式下指定表不被过滤 + // + // 6. 临时表非用户表/系统表/唯一索引/全局索引,故该函数也会过滤临时表 + else if (! tb_schema->is_user_table() + && ! BackupTableHelper::is_sys_table_exist_on_backup_mode( + tb_schema->is_sys_table(), tb_schema->get_table_id()) + && ! tb_schema->is_global_index_table() + && ! tb_schema->is_unique_index()) { + LOG_INFO("ignore tmp table or non-user, sys table but not on backup mode, " + "non-global-index and non-unique-index table", + "table_name", tb_schema->get_table_name(), + "table_id", tb_schema->get_table_id(), + "is_tmp_table", tb_schema->is_tmp_table(), + "is_user_table", tb_schema->is_user_table(), + "is_sys_table", tb_schema->is_sys_table(), + "is_backup_mode", is_backup_mode(), + "is_index_table", tb_schema->is_index_table(), + "is_unique_index_table", tb_schema->is_unique_index(), + "id_ddl_table", is_ddl_table(tb_schema->get_table_id()), + "is_global_index_table", tb_schema->is_global_index_table()); + // filter out + table_is_ignored = true; + } else { + uint64_t tenant_id = tb_schema->get_tenant_id(); + uint64_t db_id = tb_schema->get_database_id(); + DBSchemaInfo db_schema_info; + TenantSchemaInfo tenant_schema_info; + + table_is_ignored = false; + + if (OB_FAIL(schema_guard.get_tenant_schema_info(tenant_id, tenant_schema_info, timeout))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get tenant schema info fail", KR(ret), K(tenant_id)); + } + } else if (OB_FAIL(schema_guard.get_database_schema_info(db_id, db_schema_info, timeout))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get database schema info fail", KR(ret), K(db_id)); + } + } else { + tenant_name = tenant_schema_info.name_; + db_name = db_schema_info.name_; + } + } + + return ret; +} + +// @retval OB_SUCCESS success +// @retval OB_TIMEOUT timeout +// @retval OB_TENANT_HAS_BEEN_DROPPED caller should ignore error code if schema error like tenant/database not exist +// @retval other error code fail +int ObLogPartMgr::get_schema_guard_and_table_schema_(const uint64_t table_id, + const int64_t schema_version, + const int64_t timeout, + ObLogSchemaGuard &schema_guard, + const ObSimpleTableSchemaV2 *&tb_schema) +{ + int ret = OB_SUCCESS; + + tb_schema = NULL; + const uint64_t tenant_id = extract_tenant_id(table_id); + IObLogSchemaGetter *schema_getter = TCTX.schema_getter_; + + if (OB_ISNULL(schema_getter)) { + LOG_ERROR("invalid schema getter", K(schema_getter)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(schema_getter->get_schema_guard_and_table_schema(table_id, + schema_version, + timeout, + schema_guard, + tb_schema))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get_schema fail", KR(ret), K(tenant_id), K(schema_version)); + } + } else if (OB_ISNULL(tb_schema)) { + LOG_ERROR("table schema is NULL, tenant may be dropped", K(table_id), K(schema_version)); + // TODO review this !!!! + ret = OB_TENANT_HAS_BEEN_DROPPED; + } + + return ret; +} + +// @retval OB_SUCCESS success +// @retval OB_TIMEOUT timeout +// @retval OB_TENANT_HAS_BEEN_DROPPED caller should ignore if tenant/database not exist +// @retval other error code fail +int ObLogPartMgr::get_lazy_schema_guard_and_tablegroup_schema_( + const uint64_t tablegroup_id, + const int64_t schema_version, + const int64_t timeout, + ObLogSchemaGuard &schema_guard, + const ObTablegroupSchema *&tg_schema) +{ + int ret = OB_SUCCESS; + + tg_schema = NULL; + const uint64_t tenant_id = extract_tenant_id(tablegroup_id); + IObLogSchemaGetter *schema_getter = TCTX.schema_getter_; + + if (OB_ISNULL(schema_getter)) { + LOG_ERROR("invalid schema getter", K(schema_getter)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(schema_getter->get_lazy_schema_guard(tenant_id, schema_version, timeout, + schema_guard))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get_lazy_schema_guard fail", KR(ret), K(tenant_id), K(schema_version)); + } + } else if (OB_FAIL(schema_guard.get_tablegroup_schema(tablegroup_id, tg_schema, timeout))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get tablegroup schema fail", KR(ret), K(tenant_id), K(tablegroup_id), KPC(tg_schema)); + } + } else if (OB_ISNULL(tg_schema)) { + LOG_WARN("schema error: tablegroup does not exist in target schema", K(tenant_id), K(tablegroup_id), + "schema_version", schema_version); + ret = OB_TENANT_HAS_BEEN_DROPPED; + } + + return ret; +} + +// fetch Simple Table Schema +// use Full Table Schema instead,cause could only get Full Table Schema under lazy mode +int ObLogPartMgr::get_simple_table_schema_(const uint64_t table_id, + const int64_t timeout, + ObLogSchemaGuard &schema_guard, + const ObSimpleTableSchemaV2 *&tb_schema) +{ + int ret = OB_SUCCESS; + const uint64_t tenant_id = extract_tenant_id(table_id); + int64_t schema_version = OB_INVALID_TIMESTAMP; + tb_schema = NULL; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("PartMgr has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(schema_guard.get_table_schema(table_id, tb_schema, timeout))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get table schema fail", KR(ret), K(table_id)); + } + } else if (OB_FAIL(schema_guard.get_schema_version(tenant_id, schema_version))) { + LOG_ERROR("schema_gurard get_schema_version fail", KR(ret), K(tenant_id), K(schema_version)); + } else if (OB_ISNULL(tb_schema)) { + ret = OB_TENANT_HAS_BEEN_DROPPED; + LOG_WARN("schema error: table does not exist in target schema", K(table_id), + "schema_version", schema_version); + } + + return ret; +} + +// 获取Full Table Schema +// +// @retval OB_SUCCESS success +// @retval OB_TIMEOUT timeout +// @retval OB_TENANT_HAS_BEEN_DROPPED caller should ignore if tenant not exist +// @retval other error code fail +int ObLogPartMgr::get_full_table_schema_(const uint64_t table_id, + const int64_t timeout, + ObLogSchemaGuard &schema_guard, + const ObTableSchema *&tb_schema) +{ + int ret = OB_SUCCESS; + const uint64_t tenant_id = extract_tenant_id(table_id); + int64_t schema_version = OB_INVALID_TIMESTAMP; + tb_schema = NULL; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("PartMgr has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(schema_guard.get_table_schema(table_id, tb_schema, timeout))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get table schema fail", KR(ret), K(table_id)); + } + } else if (OB_FAIL(schema_guard.get_schema_version(tenant_id, schema_version))) { + LOG_ERROR("schema_gurard get_schema_version fail", KR(ret), K(tenant_id), K(schema_version)); + } else if (OB_ISNULL(tb_schema)) { + ret = OB_TENANT_HAS_BEEN_DROPPED; + LOG_WARN("schema error: table does not exist in target schema", K(table_id), + "schema_version", schema_version); + } + + return ret; +} + +// @retval OB_SUCCESS success +// @retval OB_TIMEOUT timeout +// @retval OB_TENANT_HAS_BEEN_DROPPED caller should ignore if tenant not exist +// @retval other error code fail +int ObLogPartMgr::get_schema_guard_and_schemas_(const uint64_t table_id, + const int64_t schema_version, + const int64_t timeout, + bool &table_is_ignored, + ObLogSchemaGuard &schema_guard, + const ObSimpleTableSchemaV2 *&tb_schema, + const char *&tenant_name, + const char *&db_name) +{ + int ret = OB_SUCCESS; + + table_is_ignored = false; + tb_schema = NULL; + tenant_name = NULL; + db_name = NULL; + + if (OB_FAIL(get_schema_guard_and_table_schema_(table_id, schema_version, timeout, + schema_guard, tb_schema))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get table schema fail", KR(ret), K(table_id), K(schema_version), KPC(tb_schema)); + } + } else if (OB_FAIL(get_schema_info_based_on_table_schema_(tb_schema, schema_guard, + timeout, table_is_ignored, tenant_name, db_name))) { + if (OB_TIMEOUT != ret && OB_TENANT_HAS_BEEN_DROPPED != ret) { + LOG_ERROR("get_schema_info_based_on_table_schema_ fail", KR(ret), K(table_id), + K(schema_version)); + } + } else { + // success + } + + if (OB_SUCCESS != ret) { + tenant_name = NULL; + db_name = NULL; + } + return ret; +} + +int ObLogPartMgr::alter_table_add_or_drop_partition_( + const bool is_tablegroup, + const bool has_physical_part, + const int64_t start_serve_timestamp, + const ObPartitionSchema *old_tb_schema, + const ObPartitionSchema *new_tb_schema, + const int64_t new_database_id, + const char *event) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("PartMgr has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(old_tb_schema) || OB_ISNULL(new_tb_schema)) { + LOG_ERROR("invalid schema", K(old_tb_schema), K(new_tb_schema)); + ret = OB_INVALID_ARGUMENT; + } else { + ObArray add_part_ids; + ObArray drop_part_ids; + + uint64_t old_table_id = old_tb_schema->get_table_id(); + int64_t old_table_part_cnt = old_tb_schema->get_partition_cnt(); + uint64_t new_table_id = new_tb_schema->get_table_id(); + // PKEY的partition count字段 + int64_t new_table_part_cnt = new_tb_schema->get_partition_cnt(); + + if (OB_UNLIKELY(old_table_id != new_table_id) + || OB_UNLIKELY(old_table_part_cnt != new_table_part_cnt)) { + LOG_ERROR("table_id or table_part_cnt is not equal in old/new table schema", + K(old_table_id), K(new_table_id), K(old_table_part_cnt), K(new_table_part_cnt)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(ObPartMgrUtils::get_part_diff(*old_tb_schema, *new_tb_schema, + drop_part_ids, add_part_ids))) { + LOG_ERROR("get_part_diff fail", KR(ret), K(old_tb_schema), K(new_tb_schema), + K(drop_part_ids), K(add_part_ids)); + } else { + if (OB_FAIL(alter_table_drop_partition_( + is_tablegroup, + old_table_id, + drop_part_ids, + old_table_part_cnt))) { + LOG_ERROR("alter table drop partition fail", KR(ret), + K(is_tablegroup), + K(old_table_id), + K(drop_part_ids), + K(old_table_part_cnt)); + } else if (OB_FAIL(alter_table_add_partition_( + is_tablegroup, + has_physical_part, + new_table_id, + add_part_ids, + new_table_part_cnt, + start_serve_timestamp, + new_tb_schema->get_tablegroup_id(), + new_database_id))) { + LOG_ERROR("alter table add partition fail", KR(ret), + K(is_tablegroup), + K(has_physical_part), + K(new_table_id), + K(add_part_ids), + K(new_table_part_cnt), + K(start_serve_timestamp), + K(new_database_id)); + } else { + // do nothing + } + + ISTAT("[DDL] [ALTER_TABLE_ADD_OR_DROP_PART]", KR(ret), K(event), K(is_tablegroup), + K(has_physical_part), K_(tenant_id), K(new_table_id), + "drop_cnt", drop_part_ids.count(), K(drop_part_ids), + "add_cnt", add_part_ids.count(), K(add_part_ids)); + } + } + + return ret; +} + +int ObLogPartMgr::alter_table_drop_partition_( + const bool is_tablegroup, + const uint64_t table_id, + const common::ObArray &drop_part_ids, + const int64_t partition_cnt) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("PartMgr has not been initialized"); + ret = OB_NOT_INIT; + } else { + // 删除分区 + for (int64_t idx = 0; OB_SUCC(ret) && idx < drop_part_ids.count(); ++idx) { + int64_t partition_id = drop_part_ids.at(idx); + ObPartitionKey pkey(table_id, partition_id, partition_cnt); + + if (OB_FAIL(offline_partition_(pkey))) { + if (OB_ENTRY_NOT_EXIST == ret) { + ISTAT("[DDL] [ALTER_TABLE_DROP_PART] partition not served", K(pkey)); + ret = OB_SUCCESS; + } + } else { + ISTAT("[DDL] [ALTER_TABLE_DROP_PART]", K(is_tablegroup), K_(tenant_id), K(pkey), + K(table_id), K(idx), "drop_part_count", drop_part_ids.count()); + } + } // for + } + + return ret; +} + +int ObLogPartMgr::alter_table_add_partition_( + const bool is_tablegroup, + const bool has_physical_part, + const uint64_t table_id, + const common::ObArray &add_part_ids, + const int64_t partition_cnt, // PKEY的partition count字段 + const int64_t start_serve_timestamp, + const uint64_t tablegroup_id, + const uint64_t database_id) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("PartMgr has not been initialized"); + ret = OB_NOT_INIT; + } else { + // 动态新增的分区 + const bool is_create_partition = true; + + // 增加分区 + for (int64_t idx = 0; OB_SUCC(ret) && idx < add_part_ids.count(); ++idx) { + int64_t partition_id = add_part_ids.at(idx); + ObPartitionKey pkey(table_id, partition_id, partition_cnt); + bool add_succ = false; + + if (OB_FAIL(add_served_partition_( + pkey, + pkey, + start_serve_timestamp, + is_create_partition, + has_physical_part, + tablegroup_id, + database_id, + add_succ))) { + LOG_ERROR("alter table add partition fail", KR(ret), K(pkey), K(start_serve_timestamp), + K(is_create_partition), K(has_physical_part), K(add_succ)); + } else if (add_succ) { + ISTAT("[DDL] [ALTER_TABLE_ADD_PART]", K_(tenant_id), K(table_id), K(is_tablegroup), + K(has_physical_part), K(pkey), K(add_succ)); + } else { + // do nothing + } + } // for + } + + return ret; +} + +int ObLogPartMgr::split_table_(const ObSimpleTableSchemaV2 *tb_schema, + const char *tenant_name, + const char *db_name, + const int64_t start_serve_timestamp, + const share::ObWorker::CompatMode &compat_mode) +{ + int ret = OB_SUCCESS; + bool table_is_chosen = false; + bool is_primary_table_chosen = false; + + if (OB_ISNULL(tb_schema) || OB_ISNULL(tenant_name) || OB_ISNULL(db_name)) { + LOG_ERROR("invalid schema", K(tb_schema), K(tenant_name), K(db_name)); + ret = OB_INVALID_ARGUMENT; + } + // 过滤表,只操作白名单中的表 + else if (OB_FAIL(filter_table_(tb_schema, tenant_name, db_name, compat_mode, table_is_chosen, is_primary_table_chosen))) { + LOG_ERROR("filter table fail", KR(ret)); + } else if (! table_is_chosen) { + // table is ignored + LOG_INFO("table is not served, table split DDL is filtered", K(table_is_chosen), + "table_id", tb_schema->get_table_id(), + "table_name", tb_schema->get_table_name(), + K(db_name), + K(tenant_name)); + } else { + const bool is_tablegroup = false; + const uint64_t table_id = tb_schema->get_table_id(); + const uint64_t tg_id = tb_schema->get_tablegroup_id(); + const uint64_t db_id = tb_schema->get_database_id(); + // table非bind, 非PG才有实体分区 + const bool has_physical_part = has_physical_part_(*tb_schema); + bool check_dropped_schema = false; + ObTablePartitionKeyIter pkey_iter(*tb_schema, check_dropped_schema); + + if (OB_FAIL(split_table_or_tablegroup_( + is_tablegroup, + table_id, + tg_id, + db_id, + has_physical_part, + start_serve_timestamp, + pkey_iter, + *tb_schema))) { + LOG_ERROR("split_table_or_tablegroup_ fail", KR(ret), + K(is_tablegroup), + K(table_id), + K(tg_id), + K(db_id), + K(has_physical_part), + K(start_serve_timestamp)); + } else { + _ISTAT("[DDL] [SPLIT_TABLE] TENANT=%s(%lu) TABLE=%s.%s.%s(%ld) START_TSTAMP=%ld HAS_PHY_PART=%d", + tenant_name, tenant_id_, tenant_name, db_name, + tb_schema->get_table_name(), tb_schema->get_table_id(), + start_serve_timestamp, has_physical_part); + } + } + + return ret; +} + +int ObLogPartMgr::add_table_(const int64_t start_serve_tstamp, + const bool is_create_partition, + const ObSimpleTableSchemaV2 *tb_schema, + const char *tenant_name, + const char *db_name, + const int64_t timeout, + const ObSimpleTableSchemaV2 *primary_table_schema) +{ + int ret = OB_SUCCESS; + bool table_is_chosen = false; + bool is_primary_table_chosen = false; + // get tenant mode: MYSQL or ORACLE + // 1. oracle database/table matc needs to be case sensitive + // 2. mysql match don't needs to be case sensitive + share::ObWorker::CompatMode compat_mode = share::ObWorker::CompatMode::INVALID; + // The primary table schema is invalid, indicating that it is the primary table itself + const uint64_t primary_table_id = + (NULL == primary_table_schema) ? tb_schema->get_table_id() : primary_table_schema->get_table_id(); + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("PartMgr has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(get_tenant_compat_mode(tenant_id_, compat_mode, timeout))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get_tenant_compat_mode fail", KR(ret), K(tenant_id_)); + } + // The filter_table_ function checks the validity of the schema, so current function no longer checks + } else if (OB_FAIL(filter_table_(tb_schema, tenant_name, db_name, compat_mode, table_is_chosen, + is_primary_table_chosen, primary_table_schema))) { + LOG_ERROR("filter table fail", KR(ret), K(db_name), K(tenant_name), K(compat_mode)); + } else if (! table_is_chosen) { + // table is ignored + // If the primary table matches: + // 1. global common index, then add global common index cache + // 2. unique index (not global unique index), then add TableIDCache directly + + if (is_primary_table_chosen) { + if (tb_schema->is_global_normal_index_table() + || is_unique_index_table_but_expect_global_unqiue_index_(*tb_schema)) { + // Handling global general indexes + if (OB_FAIL(add_table_id_into_cache_(*tb_schema, db_name, primary_table_id))) { + LOG_ERROR("add_table_id_into_cache_ fail", KR(ret), + "table_id", tb_schema->get_table_id(), + "table_name", tb_schema->get_table_name(), + K(db_name), + K(primary_table_id), + "is_global_normal_index_table", tb_schema->is_global_normal_index_table(), + "is_unique_index", tb_schema->is_unique_index()); + } else { + // succ + } + } + } + } else { + const bool is_tablegroup = false; + const uint64_t tb_id = tb_schema->get_table_id(); + const uint64_t tg_id = tb_schema->get_tablegroup_id(); + const uint64_t db_id = tb_schema->get_database_id(); + // table is not bind, only non-PG has entity partition + const bool has_physical_part = has_physical_part_(*tb_schema); + bool check_dropped_schema = false; + ObTablePartitionKeyIter pkey_iter(*tb_schema, check_dropped_schema); + int64_t served_part_count = 0; + + if (TCONF.enable_hbase_mode && ! tb_schema->is_in_recyclebin()) { + const char *tb_name = tb_schema->get_table_name(); + const int64_t schema_version = tb_schema->get_schema_version(); + // If the table name contains $, then it is probably an hbase model table, get ObTableSchema + ObString tb_name_str(tb_name); + if (NULL != tb_name_str.find('$')) { + // HBase mode: If you encounter an HBase table, you need to get the ObTableSchema, cannot use force lazy mode at this time. + const ObTableSchema *full_table_schema = NULL; + ObLogSchemaGuard schema_guard_for_full_table_schema; + IObLogSchemaGetter *schema_getter = TCTX.schema_getter_; + + if (OB_ISNULL(schema_getter)) { + LOG_ERROR("invalid schema getter", K(schema_getter)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(schema_getter->get_schema_guard_and_full_table_schema(tb_id, schema_version, timeout, + schema_guard_for_full_table_schema, full_table_schema))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get_schema_guard_and_full_table_schema fail", KR(ret), K(tb_id), KPC(full_table_schema)); + } + } else if (OB_ISNULL(full_table_schema)) { + LOG_ERROR("full_table_schema is NULL", K(tb_id), K(schema_version), K(full_table_schema)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(TCTX.hbase_util_.add_hbase_table_id(*full_table_schema))) { + LOG_ERROR("hbase_util_ add_hbase_table_id", KR(ret), K(tb_id), K(tb_name)); + } else { + // succ + } + } + } + + if (OB_FAIL(ret)) { + // fail + } else if (OB_FAIL(add_table_id_into_cache_(*tb_schema, db_name, primary_table_id))) { + // Adding primary tables and globally unique index tables to TableIDCache + LOG_ERROR("add_table_id_into_cache_ fail", KR(ret), + "table_id", tb_schema->get_table_id(), + "table_name", tb_schema->get_table_name(), + K(db_name), + K(primary_table_id), + K(db_id), + "is_global_unique_index_table", tb_schema->is_global_unique_index_table()); + } else if (OB_FAIL(add_table_or_tablegroup_( + is_tablegroup, + tb_id, + tg_id, + db_id, + has_physical_part, + is_create_partition, + start_serve_tstamp, + pkey_iter, + *tb_schema, + served_part_count))) { + LOG_ERROR("add_table_or_tablegroup_ fail", KR(ret), + K(is_tablegroup), + K(tb_id), + K(tg_id), + K(db_id), + K(has_physical_part), + K(is_create_partition), + K(start_serve_tstamp), + K(tb_schema)); + } else { + _ISTAT("[DDL] [ADD_TABLE] TENANT=%s(%ld) TABLE=%s.%s.%s(%ld) HAS_PHY_PART=%d " + "SERVED_PART_COUNT=%ld TABLE_GROUP=%ld DATABASE=%ld " + "START_TSTAMP=%ld IS_CREATE=%d IS_GLOBAL_UNIQUE_INDEX=%d.", + tenant_name, tenant_id_, tenant_name, db_name, + tb_schema->get_table_name(), tb_id, has_physical_part, + served_part_count, tg_id, db_id, start_serve_tstamp, is_create_partition, + tb_schema->is_global_unique_index_table()); + } + } + + return ret; +} + +template +int ObLogPartMgr::add_table_or_tablegroup_( + const bool is_tablegroup, + const uint64_t table_id, + const uint64_t tablegroup_id, + const uint64_t db_id, + const bool has_physical_part, + const bool is_create_partition, + const int64_t start_serve_tstamp, + PartitionKeyIter &pkey_iter, + PartitionSchema &table_schema, + int64_t &served_part_count) +{ + int ret = OB_SUCCESS; + served_part_count = 0; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("PartMgr has not been initialized"); + ret = OB_NOT_INIT; + } else { + ObPartitionKey iter_pkey; + const bool is_in_split = table_schema.is_in_logical_split(); + int64_t table_schema_version = table_schema.get_schema_version(); + // The start service time of the split source partition selects the time at which the split begins + // + // The purpose is: to ensure that the split source partition must be able to pull the split log so that the split source partition + // can end itself based on the split log. Otherwise, if the split source partition's start timestamp is greater than the split log timestamp, + // the split source partition will not be able to pull the split log + // Since OB has no mechanism to guarantee that the ilog of the partition's OFFLINE log must fall off the disk, there may be scenarios + // where the OFFLINE log is never pulled, so that the OFFLINE log cannot be relied upon to end the partition, and the split source partition + // still relies on the split log to end itself. + // + // If a table is in split, the schema version of the table is the start time of the split (not the exact point in time, + // it may be larger than the split log timestamp), here the minimum value of the schema version and the start service timestamp is chosen + // as the source partition start timestamp. + int64_t split_src_start_serve_tstamp = std::min(table_schema_version, start_serve_tstamp); + + // Iterate through all partitions + while (OB_SUCCESS == ret && OB_SUCC(pkey_iter.next_partition_key_v2(iter_pkey))) { + // The default is to decide for self whether to serve or not + ObPartitionKey check_serve_info_pkey = iter_pkey; + ObPartitionKey src_pkey; + + // Dealing with split situations + if (is_in_split) { + bool src_part_add_succ = false; + + // Get split source partition + if (OB_FAIL(table_schema.get_split_source_partition_key(iter_pkey, src_pkey))) { + LOG_ERROR("get_split_source_partition_key fail", KR(ret), K(iter_pkey), K(table_id), + K(is_tablegroup)); + } else { + // A partition is in the process of splitting and whether or not it is served should depend on whether or not its split source partition is served + check_serve_info_pkey = src_pkey; + + if (src_pkey == iter_pkey) { + // Partition not split, not processed + } + // Add the split source partition, it decides for itself whether to service it or not + // Note: the same split source partition will be added more than once, here it returns success + else if (OB_FAIL(add_served_partition_( + src_pkey, + src_pkey, + split_src_start_serve_tstamp, + is_create_partition, + has_physical_part, + tablegroup_id, + db_id, + src_part_add_succ))) { + LOG_ERROR("add_served_partition_ fail", KR(ret), + K(src_pkey), + K(split_src_start_serve_tstamp), + K(is_create_partition), + K(has_physical_part), + K(tablegroup_id), K(db_id)); + } else if (src_part_add_succ) { + ISTAT("[DDL] [ADD_TABLE_OR_TABLEGROUP] [ADD_SPLIT_SRC_PART]", + K_(tenant_id), K(is_tablegroup), K(has_physical_part), + K(src_pkey), "dst_pkey", iter_pkey, + "split_src_start_serve_tstamp", TS_TO_STR(split_src_start_serve_tstamp), + "tablegroup_schema_version", TS_TO_STR(table_schema_version), + "start_serve_tstamp", TS_TO_STR(start_serve_tstamp), K(is_create_partition)); + } + } + } + + if (OB_SUCC(ret)) { + bool add_succ = false; + if (OB_FAIL(add_served_partition_( + iter_pkey, + check_serve_info_pkey, + start_serve_tstamp, + is_create_partition, + has_physical_part, + tablegroup_id, + db_id, + add_succ))) { + LOG_ERROR("add served partition fail", KR(ret), + K(iter_pkey), + K(start_serve_tstamp), + K(is_create_partition), + K(has_physical_part), + K(add_succ), + K(tablegroup_id), + K(db_id)); + } else if (add_succ) { + served_part_count++; + } else { + // do nothing + } + } + + DSTAT("[DDL] [ADD_TABLE_OR_TABLEGROUP] FOR_EACH_PART", KR(ret), K(is_tablegroup), K(is_in_split), + K_(tenant_id), K(iter_pkey), K(src_pkey)); + + iter_pkey.reset(); + } // while + + if (OB_ITER_END == ret) { + ret = OB_SUCCESS; + } + } + + return ret; +} + +int ObLogPartMgr::drop_table_(const ObSimpleTableSchemaV2 *table_schema) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("PartMgr has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(table_schema)) { + LOG_ERROR("invalid argument", K(table_schema)); + ret = OB_INVALID_ARGUMENT; + } else if (table_schema->is_global_normal_index_table() + || is_unique_index_table_but_expect_global_unqiue_index_(*table_schema)) { + if (OB_FAIL(remove_table_id_from_cache_(*table_schema))) { + LOG_ERROR("remove_table_id_from_cache_ fail", KR(ret), + "table_id", table_schema->get_table_id(), + "table_name", table_schema->get_table_name(), + "is_global_normal_index_table", table_schema->is_global_normal_index_table(), + "is_unique_index", table_schema->is_unique_index()); + } + } else { + ObTableType table_type = table_schema->get_table_type(); + const uint64_t table_id = table_schema->get_table_id(); + const char *table_name = table_schema->get_table_name(); + int64_t served_part_count = 0; + const bool is_tablegroup = false; + + // Delete only user tables and globally unique index tables + if ((share::schema::USER_TABLE == table_type) + || (table_schema->is_global_unique_index_table())) { + if (OB_FAIL(remove_table_id_from_cache_(*table_schema))) { + LOG_ERROR("remove_table_id_from_cache_ fail", KR(ret), + "table_id", table_schema->get_table_id(), + "table_name", table_schema->get_table_name(), + "is_global_unique_index_table", table_schema->is_global_unique_index_table()); + } else if (OB_FAIL(drop_table_or_tablegroup_( + is_tablegroup, + table_id, + table_name, + *table_schema, + served_part_count))) { + LOG_ERROR("drop_table_or_tablegroup_ fail", KR(ret), + K(is_tablegroup), + K(table_id), + K(table_name)); + } else { + _ISTAT("[DDL] [DROP_TABLE] [END] TENANT=%lu TABLE=%s(%ld) " + "IS_GLOBAL_UNIQUE_INDEX=%d SERVED_PART_COUNT=%ld TOTAL_PART_COUNT=%ld", + tenant_id_, + table_schema->get_table_name(), + table_schema->get_table_id(), + table_schema->is_global_unique_index_table(), + served_part_count, + map_->get_valid_count()); + } + } + } + + return ret; +} + +int ObLogPartMgr::filter_table_(const ObSimpleTableSchemaV2 *table_schema, + const char *tenant_name, + const char *db_name, + const share::ObWorker::CompatMode &compat_mode, + bool &chosen, + bool &is_primary_table_chosen, + const ObSimpleTableSchemaV2 *primary_table_schema) +{ + int ret = OB_SUCCESS; + is_primary_table_chosen = false; + // 1. filter_table_ only matches based on the primary table + // 2. The current table is a global index table and needs to use the corresponding primary table + const ObSimpleTableSchemaV2 *target_table_schema = NULL; + IObLogTableMatcher *tb_matcher = TCTX.tb_matcher_; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("PartMgr has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(table_schema) || OB_ISNULL(db_name) || OB_ISNULL(tenant_name) || OB_ISNULL(tb_matcher)) { + LOG_ERROR("invalid argument", K(table_schema), K(db_name), K(tenant_name), K(tb_matcher)); + ret = OB_INVALID_ARGUMENT; + } else if (table_schema->is_global_index_table() || table_schema->is_unique_index()) { + // 1. When manipulating a global index table/unique index table, the primary schema should be valid + // 2. The global index table/unique index table is filtered based on the primary table, if the primary table matches, the global index table also matches + if (OB_ISNULL(primary_table_schema)) { + LOG_ERROR("invalid argument", K(primary_table_schema), KPC(table_schema), + K(db_name), K(tenant_name)); + ret = OB_INVALID_ARGUMENT; + } else { + target_table_schema = primary_table_schema; + } + } else { + target_table_schema = table_schema; + } + + if (OB_SUCC(ret)) { + // match primary table + ObTableType table_type = target_table_schema->get_table_type(); + const char *tb_name = target_table_schema->get_table_name(); + uint64_t table_id = target_table_schema->get_table_id(); + // Default mysql and oracle mode are both case-insensitive + // when configured with enable_oracle_mode_match_case_sensitive=1, oracle is case sensitive + int fnmatch_flags = FNM_CASEFOLD; + if (compat_mode == share::ObWorker::CompatMode::ORACLE + && enable_oracle_mode_match_case_sensitive_) { + fnmatch_flags = FNM_NOESCAPE; + } + + chosen = false; + + if (OB_UNLIKELY(is_ddl_table(table_id))) { + // No need to process DDL tables, DDL table partitions are added independently + chosen = false; + LOG_INFO("filter_table: DDL table is filtered", K_(tenant_id), K(table_id), K(tb_name), + K(db_name), K(tenant_name)); + } else if (BackupTableHelper::is_sys_table_exist_on_backup_mode( + target_table_schema->is_sys_table(), + table_id)) { + // Internal tables that need to be included in the backup schema must not be filtered + chosen = true; + LOG_INFO("do not filter inner tables on backup mode", K_(tenant_id), K(table_id), K(tb_name), + K(db_name), K(tenant_name)); + } else if (OB_UNLIKELY(share::schema::USER_TABLE != table_type)) { + // Synchronise only user tables + chosen = false; + } + // Asynchronous PROXY table + else if (OB_UNLIKELY(is_proxy_table(tenant_name, db_name, tb_name))) { + chosen = false; + } else if (OB_FAIL(tb_matcher->table_match(tenant_name, db_name, tb_name, chosen, fnmatch_flags))) { + LOG_ERROR("match table fail", KR(ret), "table_name", target_table_schema->get_table_name(), + K(db_name), K(tenant_name)); + } else { + // succ + } + } + + if (OB_SUCC(ret)) { + if (table_schema->is_global_index_table()) { + // Primary Table Matching + if (chosen) { + is_primary_table_chosen = true; + } + + if (table_schema->is_global_normal_index_table()) { + // Global general indexes do not care about partition changes + chosen = false; + } + + LOG_DEBUG("filter_global_index_table_ succ", "index_table_id", table_schema->get_table_id(), + "index_table", table_schema->get_table_name(), + "table_name", primary_table_schema->get_table_name(), + K(chosen), K(is_primary_table_chosen)); + } else if (is_unique_index_table_but_expect_global_unqiue_index_(*table_schema)) { + // Primary Table Matching + if (chosen) { + is_primary_table_chosen = true; + } + // Unique index tables (not global unique index tables) do not care about partition changes and are only used to add TableIDCache + chosen = false; + } else { + // succ + } + } + + if (OB_SUCC(ret)) { + // If you are going to add a globally unique index table, check if it is a multi-instance scenario, which does not support globally unique indexes + if (chosen + && table_schema->is_global_unique_index_table() + && TCONF.instance_num > SINGLE_INSTANCE_NUMBER + && ! TCONF.enable_global_unique_index_belong_to_multi_instance) { + ret = OB_NOT_SUPPORTED; + LOG_ERROR("global unique index table under multi-instance NOT SUPPORTED", + "instance_num", (int64_t)(TCONF.instance_num), + "table_name", table_schema->get_table_name(), + "table_id", table_schema->get_table_id(), + K(primary_table_schema), + "primary_table_name", primary_table_schema ? primary_table_schema->get_table_name() : "NULL", + "primary_table_id", primary_table_schema ? primary_table_schema->get_table_id() : 0, + K(is_primary_table_chosen), K(chosen)); + } + } + + return ret; +} + +bool ObLogPartMgr::is_unique_index_table_but_expect_global_unqiue_index_(const ObSimpleTableSchemaV2 &table_schema) const +{ + bool bool_ret = false; + + bool_ret = (table_schema.is_unique_index()) && (! table_schema.is_global_unique_index_table()); + + return bool_ret; +} + +int ObLogPartMgr::filter_tablegroup_(const ObTablegroupSchema *tg_schema, + const char *tenant_name, + const share::ObWorker::CompatMode &compat_mode, + bool &chosen) +{ + int ret = OB_SUCCESS; + IObLogTableMatcher *tb_matcher = TCTX.tb_matcher_; + chosen = false; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("PartMgr has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(tg_schema) || OB_ISNULL(tenant_name) || OB_ISNULL(tb_matcher)) { + LOG_ERROR("invalid argument", K(tg_schema), K(tenant_name), K(tb_matcher)); + ret = OB_INVALID_ARGUMENT; + } else { + const char *tablegroup_name = tg_schema->get_tablegroup_name_str(); + + // Default mysql and oracle mode are both case-insensitive + // when configured with enable_oracle_mode_match_case_sensitive=1, oracle is case sensitive + int fnmatch_flags = FNM_CASEFOLD; + if (compat_mode == share::ObWorker::CompatMode::ORACLE + && enable_oracle_mode_match_case_sensitive_) { + fnmatch_flags = FNM_NOESCAPE; + } + + if (OB_FAIL(tb_matcher->tablegroup_match(tenant_name, tablegroup_name, chosen, fnmatch_flags))) { + LOG_ERROR("match table fail", KR(ret), K(tablegroup_name), K(tenant_name)); + } else { + // succ + } + } + + return ret; +} + +// check_serve_info_pkey: partition key used to check whether the partition is served, normally the partition itself, +// feels itself served or not, in split scenarios the partition key before the split is used to determine whether the partition key after the split is served +int ObLogPartMgr::add_served_partition_(const ObPartitionKey &pkey, + const ObPartitionKey &check_serve_info_pkey, + const int64_t start_serve_tstamp, + const bool is_create_partition, + const bool has_physical_part, + const uint64_t tablegroup_id, + const uint64_t db_id, + bool &add_succ) +{ + int ret = OB_SUCCESS; + add_succ = false; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("PartMgr has not been initialized"); + ret = OB_NOT_INIT; + } else if (! has_physical_part) { + // No physical partition, no treatment + ISTAT("[ADD_SERVED_PART] no physical partition, need not add", K(pkey), K(has_physical_part), + K(tablegroup_id), K(db_id), K(is_create_partition), K(start_serve_tstamp)); + } else { + // Check if the partition is serviced + bool is_served = is_partition_served_(check_serve_info_pkey, tablegroup_id, db_id); + + // Add partitions + // Include partitions that are serviced and partitions that are not serviced + if (OB_FAIL(add_partition_(pkey, start_serve_tstamp, is_create_partition, is_served))) { + if (OB_ENTRY_EXIST == ret) { + // partition already exist + ret = OB_SUCCESS; + } else { + LOG_ERROR("add_partition_ fail", KR(ret), K(pkey), K(start_serve_tstamp), + K(is_create_partition), K(tablegroup_id), K(db_id), K(is_served)); + } + } else { + if (is_served) { + add_succ = true; + } + } + } + + return ret; +} + +// Pre-check before adding a service partition +int ObLogPartMgr::add_served_part_pre_check_(const ObPartitionKey &pkey) +{ + int ret = OB_SUCCESS; + bool is_tenant_serving = false; + // Request a partition slot from the tenant structure and if the tenant is no longer in service, it cannot be added further + if (OB_FAIL(host_.inc_part_count_on_serving(pkey, is_tenant_serving))) { + LOG_ERROR("inc_part_count_on_serving fail", KR(ret), K(pkey), K_(host)); + } else if (OB_UNLIKELY(! is_tenant_serving)) { + // The tenant is not in service + // The current implementation, when a tenant is not in service, does not perform DDL tasks for that tenant, so that cannot happen here + LOG_ERROR("add partition when tenant is not serving, unexpected", + K(is_tenant_serving), K(host_), K(pkey)); + ret = OB_ERR_UNEXPECTED; + } + return ret; +} + +// Note: add_partition is not thread-safe +// The caller has to ensure that adding and deleting partitions are executed serially under lock protection +int ObLogPartMgr::add_partition_(const ObPartitionKey& pkey, + const int64_t start_serve_tstamp, + const bool is_create_partition, + const bool is_served) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("PartMgr has not been initialized"); + ret = OB_NOT_INIT; + } else if (start_serve_tstamp <= 0) { + LOG_ERROR("invalid argument", K(start_serve_tstamp)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ENTRY_EXIST == (ret = map_->contains_key(pkey))) { + // add repeatly + LOG_INFO("partition has been added", KR(ret), K_(tenant_id), K(pkey), K(start_serve_tstamp), + K(is_create_partition), K(is_served)); + } else if (OB_UNLIKELY(OB_ENTRY_NOT_EXIST != ret)) { + LOG_ERROR("check partition exist fail", KR(ret), K(pkey), K(start_serve_tstamp), + K(is_create_partition)); + ret = OB_SUCCESS == ret ? OB_ERR_UNEXPECTED : ret; + } else { + // If the element does not exist, continue adding + ret = OB_SUCCESS; + + ObLogPartInfo *info = NULL; + uint64_t start_log_id = is_create_partition ? 1 : OB_INVALID_ID; + + // If partitioning services, perform preflight checks + if (is_served && OB_FAIL(add_served_part_pre_check_(pkey))) { + LOG_ERROR("add_served_part_pre_check_ fail", KR(ret), K(pkey), K(is_served)); + } + // Dynamic assignment of an element + else if (OB_ISNULL(info = map_->alloc())) { + LOG_ERROR("alloc part info fail", K(info), K(pkey)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } + // Perform initialization + else if (OB_FAIL(info->init(pkey, is_create_partition, start_serve_tstamp, is_served))) { + LOG_ERROR("init part info fail", KR(ret), K(pkey), K(start_serve_tstamp), + K(is_create_partition), K(is_served)); + map_->free(info); + info = NULL; + } else { + // Print before performing map insertion, as the insertion will be seen by others afterwards and there is a concurrent modification scenario + if (is_served) { + PART_ISTAT(info, "[DDL] [ADD_PART]"); + } else { + PART_ISTAT(info, "[DDL] [ADD_NOT_SERVED_PART]"); + } + + // Inserting elements, the insert interface does not have get() semantics and does not require revert + if (OB_FAIL(map_->insert(pkey, info))) { + LOG_ERROR("insert part into map fail", KR(ret), K(pkey), KPC(info)); + } else { + // The info structure cannot be reapplied afterwards and may be deleted at any time + info = NULL; + + // For partitioning of services, execute the add-partition callback + if (is_served + && OB_FAIL(call_add_partition_callbacks_(pkey, start_serve_tstamp, start_log_id))) { + // The add partition callback should not add partitions repeatedly, here it returns OB_ERR_UNEXPECTED, an error exits + if (OB_ENTRY_EXIST == ret) { + LOG_ERROR("call add-partition callbacks fail, add repeated partition", + KR(ret), K(pkey), K(start_serve_tstamp), K(start_log_id)); + ret = OB_ERR_UNEXPECTED; + } else { + LOG_ERROR("call add-partition callbacks fail", KR(ret), K(pkey), K(start_serve_tstamp), + K(start_log_id)); + } + } + } + } + } + + return ret; +} + +// ensure_recycled_when_offlined: Whether recycling is guaranteed to be successful in the event of going offline, +// i.e. whether the number of transactions left behind is 0 +int ObLogPartMgr::offline_partition_(const ObPartitionKey &pkey, + const bool ensure_recycled_when_offlined) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("PartMgr has not been initialized"); + ret = OB_NOT_INIT; + } else { + ObLogPartInfo *info = NULL; + bool enable_create = false; + int64_t end_trans_count = 0; + + ret = map_->get(pkey, info, enable_create); + if (OB_ENTRY_NOT_EXIST == ret) { + // not exist + } else if (OB_SUCCESS != ret) { + LOG_ERROR("get PartInfo from map fail", KR(ret), K(pkey)); + } else if (OB_ISNULL(info)) { + LOG_ERROR("get PartInfo from map fail, PartInfo is NULL", KR(ret), K(pkey)); + ret = OB_ERR_UNEXPECTED; + } else if (info->offline(end_trans_count)) { // Atomic switch to OFFLINE state + ISTAT("[OFFLINE_PART] switch offline state success", K(pkey), K(end_trans_count), + K(ensure_recycled_when_offlined), KPC(info)); + + // Recycle the partition if there are no ongoing transactions on it + if (0 == end_trans_count) { + if (OB_FAIL(recycle_partition_(pkey, info))) { + LOG_ERROR("recycle_partition_ fail", KR(ret), K(pkey), K(info)); + } + } + // An error is reported if a recycle operation must be performed when asked to go offline, + // indicating that in this case there should be no residual transactions for the external guarantee + else if (ensure_recycled_when_offlined) { + LOG_ERROR("there are still transactions waited, can not recycle", K(pkey), + K(end_trans_count), KPC(info)); + ret = OB_INVALID_DATA; + } + } else { + PART_ISTAT(info, "[OFFLINE_PART] partition has been in offline state"); + } + + REVERT_PART_INFO(info, ret); + } + + return ret; +} + +int ObLogPartMgr::recycle_partition_(const ObPartitionKey &pkey, ObLogPartInfo *info) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("PartMgr has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(info)) { + LOG_ERROR("invalid argument", K(info)); + ret = OB_INVALID_ARGUMENT; + } else { + PART_ISTAT(info, "[RECYCLE_PART]"); + + // Notify the modules that the partition is being recycled + if (OB_FAIL(call_recycle_partition_callbacks_(pkey))) { + LOG_ERROR("call recycle-partition callbacks fail", KR(ret), K(pkey)); + } else if (OB_FAIL(map_->remove(pkey))) { // Deleting records from Map + LOG_ERROR("remove PartInfo from map fail", KR(ret), K(pkey)); + } else { + // succ + } + } + return ret; +} + +int ObLogPartMgr::call_add_partition_callbacks_(const ObPartitionKey &pkey, + const int64_t start_serve_tstamp, + const uint64_t start_log_id) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("PartMgr has not been initialized"); + ret = OB_NOT_INIT; + } else { + for (int64_t index = 0; OB_SUCCESS == ret && index < part_add_cb_array_->count(); index++) { + PartAddCallback *cb = NULL; + if (OB_FAIL(part_add_cb_array_->at(index, reinterpret_cast(cb)))) { + LOG_ERROR("get callback from array fail", KR(ret), K(index)); + } else if (OB_ISNULL(cb)) { + LOG_ERROR("get callback from array fail, callback is NULL", KR(ret), K(index), K(cb)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(cb->add_partition(pkey, start_serve_tstamp, start_log_id))) { + LOG_ERROR("add_partition fail", KR(ret), K(pkey), K(start_serve_tstamp), K(cb), + K(start_log_id)); + } else { + // succ + } + } + } + + return ret; +} + +int ObLogPartMgr::call_recycle_partition_callbacks_(const ObPartitionKey &pkey) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("PartMgr has not been initialized"); + ret = OB_NOT_INIT; + } else { + for (int64_t index = 0; OB_SUCCESS == ret && index < part_rc_cb_array_->count(); index++) { + PartRecycleCallback *cb = NULL; + if (OB_FAIL(part_rc_cb_array_->at(index, (int64_t &)cb))) { + LOG_ERROR("get callback from array fail", KR(ret), K(index)); + } else if (OB_ISNULL(cb)) { + LOG_ERROR("get callback from array fail, callback is NULL", KR(ret), K(index), K(cb)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(cb->recycle_partition(pkey))) { + if (OB_ENTRY_NOT_EXIST == ret) { + // partition not exist as expected + ret = OB_SUCCESS; + } else { + LOG_ERROR("recycle_partition fail", KR(ret), K(pkey), K(cb)); + } + } else { + // succ + } + } + } + + return ret; +} + +bool ObLogPartMgr::is_proxy_table(const char *tenant_name, const char *db_name, const char *tb_name) +{ + bool bool_ret = false; + + // TODO: configure proxy tenant and database + if (OB_ISNULL(tenant_name) || OB_ISNULL(db_name) || OB_ISNULL(tb_name)) { + bool_ret = false; + } else if (0 != STRCMP(OB_SYS_TENANT_NAME, tenant_name)) { + bool_ret = false; + } else if (0 != STRCMP(OB_SYS_DATABASE_NAME, db_name)) { + bool_ret = false; + } else { + bool_ret = (0 == STRCMP(PROXY_INFO_TABLE_NAME, tb_name)); + bool_ret = bool_ret || (0 == STRCMP(PROXY_CONFIG_TABLE_NAME, tb_name)); + bool_ret = bool_ret || (0 == STRCMP(PROXY_CONFIG_TABLE_OLD_NAME, tb_name)); + bool_ret = bool_ret || (0 == STRCMP(PROXY_STAT_TABLE_NAME, tb_name)); + bool_ret = bool_ret || (0 == STRCMP(PROXY_KV_TABLE_NAME, tb_name)); + bool_ret = bool_ret || (0 == STRCMP(PROXY_VIP_TENANT_TABLE_NAME, tb_name)); + bool_ret = bool_ret || (0 == STRCMP(PROXY_VIP_TENANT_TABLE_OLD_NAME, tb_name)); + } + + return bool_ret; +} + +bool ObLogPartMgr::need_to_support_split_when_in_multi_instance_() const +{ + bool bool_ret = false; + + if (TCONF.instance_num > 1) { + bool_ret = false; + } else { + bool_ret = true; + } + + return bool_ret; +} + +int ObLogPartMgr::drop_all_tables() +{ + int ret = OB_SUCCESS; + PartInfoScannerByTenant scanner(tenant_id_); + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not inited", K(inited_)); + ret = OB_NOT_INIT; + } + // Iterate through the partitions being served by the current tenant + else if (OB_FAIL(map_->for_each(scanner))) { + LOG_ERROR("scan map fail", KR(ret), K(tenant_id_)); + } else { + _ISTAT("[DDL] [DROP_TENANT] [DROP_ALL_TABLES] [BEGIN] TENANT=%ld TENANT_PART_COUNT=%ld " + "TOTAL_PART_COUNT=%ld", + tenant_id_, scanner.pkey_array_.count(), map_->get_valid_count()); + + for (int64_t index = 0; OB_SUCCESS == ret && index < scanner.pkey_array_.count(); index++) { + const ObPartitionKey &pkey = scanner.pkey_array_.at(index); + ret = offline_partition_(pkey); + + if (OB_ENTRY_NOT_EXIST == ret) { + DSTAT("[DDL] [DROP_TENANT] partition not served", K(pkey)); + ret = OB_SUCCESS; + } else if (OB_SUCCESS != ret) { + LOG_ERROR("offline partition fail", KR(ret), K(pkey)); + } else { + // succ + } + } + + if (OB_SUCCESS == ret) { + if (OB_FAIL(clean_table_id_cache_())) { + LOG_ERROR("clean_table_id_cache_ fail", KR(ret), K(tenant_id_)); + } + } + + _ISTAT("[DDL] [DROP_TENANT] [DROP_ALL_TABLES] [END] TENANT=%ld TOTAL_PART_COUNT=%ld", + tenant_id_, map_->get_valid_count()); + } + return ret; +} + +int ObLogPartMgr::clean_table_id_cache_() +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("PartMgr has not been initialized"); + ret = OB_NOT_INIT; + } else { + TableInfoEraserByTenant gidx_tb_eraser_by_tenant(tenant_id_, true/*is_global_normal_index*/); + TableInfoEraserByTenant tableid_tb_eraser_by_tenant(tenant_id_, false/*is_global_normal_index*/); + + if (OB_FAIL(global_normal_index_table_cache_->remove_if(gidx_tb_eraser_by_tenant))) { + LOG_ERROR("global_normal_index_table_cache_ remove_if fail", KR(ret), K(tenant_id_)); + } else if (OB_FAIL(table_id_cache_->remove_if(tableid_tb_eraser_by_tenant))) { + LOG_ERROR("table_id_cache_ remove_if fail", KR(ret), K(tenant_id_)); + } else { + // do nothing + } + } + + return ret; +} + +int ObLogPartMgr::add_table_id_into_cache_(const ObSimpleTableSchemaV2 &tb_schema, + const char *db_name, + const uint64_t primary_table_id) +{ + int ret = OB_SUCCESS; + const uint64_t table_id = tb_schema.get_table_id(); + const uint64_t db_id = tb_schema.get_database_id(); + TableID table_id_key(table_id); + TableInfo tb_info; + const bool is_global_normal_index = tb_schema.is_global_normal_index_table(); + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("PartMgr has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(OB_INVALID_ID == table_id) || OB_UNLIKELY(OB_INVALID_ID == primary_table_id)) { + LOG_ERROR("invalid argument", K(table_id), K(primary_table_id)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(tb_info.init(primary_table_id))) { + LOG_ERROR("tb_info init fail", KR(ret), K(table_id), K(primary_table_id)); + } else { + if (is_global_normal_index) { + if (OB_FAIL(global_normal_index_table_cache_->insert(table_id_key, tb_info))) { + if (OB_ENTRY_EXIST == ret) { + // cache ready exist + ret = OB_SUCCESS; + } else { + LOG_ERROR("global_normal_index_table_cache_ insert fail", KR(ret), + K(table_id_key), K(tb_info), + K(table_id), "index_table_name", tb_schema.get_table_name(), + "is_global_normal_index_table", tb_schema.is_global_normal_index_table(), + K(db_id)); + } + } else { + LOG_INFO("[GLOBAL_NORMAL_INDEX_TBALE] [ADD]", K(table_id_key), K(tb_info), + K(table_id), "index_table_name", tb_schema.get_table_name(), + "is_global_normal_index_table", tb_schema.is_global_normal_index_table(), + K(db_id), K(db_name)); + } + } else { + if (OB_FAIL(table_id_cache_->insert(table_id_key, tb_info))) { + if (OB_ENTRY_EXIST == ret) { + // cache ready exist + ret = OB_SUCCESS; + } else { + LOG_ERROR("table_id_cache_ insert fail", KR(ret), + K(table_id_key), K(tb_info), + K(table_id), "table_name", tb_schema.get_table_name(), + "is_unique_index", tb_schema.is_unique_index(), + "is_global_unique_index_table", tb_schema.is_global_unique_index_table(), + K(db_id), K(db_name)); + } + } else { + LOG_INFO("[SERVED_TABLE_ID_CACHE] [ADD]", K(table_id_key), K(tb_info), + K(table_id), "table_name", tb_schema.get_table_name(), + "is_unique_index", tb_schema.is_unique_index(), + "is_global_unique_index_table", tb_schema.is_global_unique_index_table(), + K(db_id), K(db_name)); + } + } + } + + return ret; +} + +int ObLogPartMgr::remove_table_id_from_cache_(const ObSimpleTableSchemaV2 &tb_schema) +{ + int ret = OB_SUCCESS; + const uint64_t table_id = tb_schema.get_table_id(); + TableID table_id_key(table_id); + const bool is_global_normal_index = tb_schema.is_global_normal_index_table(); + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("PartMgr has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(OB_INVALID_ID == table_id)) { + LOG_ERROR("invalid argument", K(table_id)); + ret = OB_INVALID_ARGUMENT; + } else { + if (is_global_normal_index) { + // Global common index, operate global common index cache + if (OB_FAIL(global_normal_index_table_cache_->erase(table_id_key))) { + // Partition may not exist, normal + if (OB_ENTRY_NOT_EXIST == ret) { + ret = OB_SUCCESS; + } else { + LOG_ERROR("global_normal_index_table_cache_ erase fail", KR(ret), K(table_id_key)); + } + } else { + LOG_INFO("[GLOBAL_NORMAL_INDEX_TBALE] [REMOVE]", K(table_id_key), + K(table_id), "index_table_name", tb_schema.get_table_name(), + "is_global_normal_index_table", tb_schema.is_global_normal_index_table()); + } + } else { + if (OB_FAIL(table_id_cache_->erase(table_id_key))) { + // Partition may not exist, normal + if (OB_ENTRY_NOT_EXIST == ret) { + ret = OB_SUCCESS; + } else { + LOG_ERROR("table_id_cache erase fail", KR(ret), K(table_id_key)); + } + } else { + LOG_INFO("[SERVED_TABLE_ID_CACHE] [REMOVE]", K(table_id_key), + K(table_id), "table_name", tb_schema.get_table_name(), + "is_unique_index", tb_schema.is_unique_index(), + "is_global_unique_index_table", tb_schema.is_global_unique_index_table()); + } + } + } + + return ret; +} + +int ObLogPartMgr::is_exist_table_id_cache_(const uint64_t table_id, + const bool is_global_normal_index, + bool &is_exist) +{ + int ret = OB_SUCCESS; + TableID table_id_key(table_id); + TableInfo info; + is_exist = false; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("PartMgr has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(OB_INVALID_ID == table_id)) { + LOG_ERROR("invalid argument", K(table_id)); + ret = OB_INVALID_ARGUMENT; + } else { + if (is_global_normal_index) { + if (OB_FAIL(global_normal_index_table_cache_->get(table_id_key, info))) { + if (OB_ENTRY_NOT_EXIST == ret) { + ret = OB_SUCCESS; + is_exist = false; + } else { + LOG_ERROR("global_normal_index_table_cache_ get fail", KR(ret), K(table_id_key)); + } + } else { + is_exist = true; + } + } else { + if (OB_FAIL(table_id_cache_->get(table_id_key, info))) { + if (OB_ENTRY_NOT_EXIST == ret) { + ret = OB_SUCCESS; + is_exist = false; + } else { + LOG_ERROR("table_id_cache_ get fail", KR(ret), K(table_id_key)); + } + } else { + is_exist = true; + } + + LOG_DEBUG("[SERVED_TABLE_ID_CACHE] [IS_EXIST]", K(tenant_id_), K(table_id), K(is_exist)); + } + } + + return ret; +} + +} +} diff --git a/src/liboblog/src/ob_log_part_mgr.h b/src/liboblog/src/ob_log_part_mgr.h new file mode 100644 index 0000000000000000000000000000000000000000..0931e6f1d4c8829054341ca3cf7418c7b4b0b0d2 --- /dev/null +++ b/src/liboblog/src/ob_log_part_mgr.h @@ -0,0 +1,883 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_PART_MGR_H_ +#define OCEANBASE_LIBOBLOG_OB_LOG_PART_MGR_H_ + +#include "common/ob_partition_key.h" // ObPartitionKey +#include "lib/lock/ob_thread_cond.h" // ObThreadCond +#include "share/schema/ob_schema_struct.h" // PartitionStatus + +#include "ob_log_part_info.h" // ObLogPartInfo, PartInfoMap +#include "ob_log_table_id_cache.h" // GIndexCache, TableIDCache +#include "ob_log_part_callback.h" // PartCBArray + +namespace oceanbase +{ +namespace share +{ +namespace schema { +class ObPartitionSchema; +class ObSimpleTableSchemaV2; +class ObTableSchema; +class ObTablegroupSchema; +} // namespace schema +} // namespace share + +using share::schema::ObPartitionSchema; +using share::schema::ObSimpleTableSchemaV2; +using share::schema::ObTableSchema; +using share::schema::ObTablegroupSchema; + +namespace liboblog +{ +class ObLogSchemaGuard; + +//////////////////////////////////////////////////////////////////////////////////////// + +class IObLogPartMgr +{ +public: + virtual ~IObLogPartMgr() {} + +public: + /// Add a table + /// @note must be called by a single thread in order according to the Schema version, should not concurrently add table in a random order + /// + /// @param table_id Table ID + /// @param start_schema_version The Schema version of the start service + /// @param start_serve_tstamp The timestamp of the start service + /// @param is_create_partition whether it is a newly created partition + /// @param [out] is_table_should_ignore_in_committer Whether to filter the DDL of this added table in the committer + /// @param [out] schema_guard schema guard + /// @param [out] tenant_name Returned tenant name + /// @param [out] db_name DB name returned + /// @param timeout timeout time + //// + /// TODO Consider the specific meaning of table/db/tenant schema when it is NULL, currently unified as TENANT_HAS_BEEN_DROPPED, which has a problematic meaning, if the caller encounters this error code and ignores it, the result is fine + /// @retval OB_SUCCESS success + /// @retval OB_TIMEOUT timeout + /// @retval OB_TENANT_HAS_BEEN_DROPPED fail to fetch schema table, database, tenant, tenant may dropped + /// @retval other error code fail + virtual int add_table(const uint64_t table_id, + const int64_t start_schema_version, + const int64_t start_serve_tstamp, + const bool is_create_partition, + bool &is_table_should_ignore_in_committer, + ObLogSchemaGuard &schema_guard, + const char *&tenant_name, + const char *&db_name, + const int64_t timeout) = 0; + + /// Add a global unique index table, create index table scenario + /// @note must be called by a single thread in order by Schema version, not concurrently and in random order + //// + /// Support for handling OB_DDL_CREATE_GLOBAL_INDEX global normal indexes and global unique index tables + /// Support for handling OB_DDL_CREATE_INDEX The process of refreshing the schema will filter out the normal indexes and keep the unique indexes + /// 1. For globally unique indexes, add partitions if they match, and add TableIDCache + /// 2. For global common indexes, add global common index cache + /// 3. Add TableIDCache for unique indexes (not global) + /// + /// @param table_id Table ID + /// @param start_schema_version The Schema version of the start service + /// @param start_serve_tstamp Timestamp of the start service + /// @param [out] schema_guard schema guard + /// @param [out] tenant_name The name of the tenant returned + /// @param [out] db_name DB name returned + /// @param timeout timeout time + /// + /// @retval OB_SUCCESS success + /// @retval OB_TIMEOUT timeout + /// @retval OB_TENANT_HAS_BEEN_DROPPED fail to fetch schema table, database, tenant, tenant may dropped + /// @retval other error code fail + virtual int add_index_table(const uint64_t table_id, + const int64_t start_schema_version, + const int64_t start_serve_tstamp, + ObLogSchemaGuard &schema_guard, + const char *&tenant_name, + const char *&db_name, + const int64_t timeout) = 0; + + /// alter table + /// @note This function must be called in order by a single thread according to the schema version, + /// can not concurrently added in random order + /// + /// @param table_id tableId + /// @param schema_version_before_alter old Schema version + /// @param schema_version_after_alter new Schema version + /// @param [out] old_schema_guard old schema guard + /// @param [out] new_schema_guard new schema guard + /// @param [out] old_tenant_name return old tenant name + /// @param [out] old_db_name return db name + /// @param timeout Timeout + /// + /// @retval OB_SUCCESS success + /// @retval OB_TIMEOUT timeout + /// @retval OB_TENANT_HAS_BEEN_DROPPED fail to fetch schema table, database, tenant, tenant may dropped + /// @retval other error code fail + virtual int alter_table(const uint64_t table_id, + const int64_t schema_version_before_alter, + const int64_t schema_version_after_alter, + const int64_t start_serve_timestamp, + ObLogSchemaGuard &old_schema_guard, + ObLogSchemaGuard &new_schema_guard, + const char *&old_tenant_name, + const char *&old_db_name, + const char *event, + const int64_t timeout) = 0; + + /// Table splitting + /// This call corresponds to the "start split" DDL, the main purpose of which is to add the split partition + /// + /// @param table_id Table ID + /// @param new_schema_version The version of the schema after the split + /// @param start_serve_timestamp The starting service time of the new partition + /// @param [out] new_schema_guard new schema guard + /// @param [out] tenant_name The new tenant name returned + /// @param [out] db_name new DB name returned + /// @param timeout timeout + /// + /// @retval OB_SUCCESS success + /// @retval OB_TIMEOUT timeout + /// @retval OB_TENANT_HAS_BEEN_DROPPED fail to fetch schema table, database, tenant, tenant may dropped + /// @retval other error code fail + virtual int split_table(const uint64_t table_id, + const int64_t new_schema_version, + const int64_t start_serve_timestamp, + ObLogSchemaGuard &new_schema_guard, + const char *&tenant_name, + const char *&db_name, + const int64_t timeout) = 0; + + /// Delete a table + /// @note must be called by a single thread in order by Schema version, no concurrent messy deletions + //// + /// @param table_id Table ID + /// @param schema_version_before_drop Deletes the Schema version before the table is dropped + /// @param schema_version_after_drop Deletes the Schema version after the table + /// @param [out] is_table_should_ignore_in_committer whether to filter the DDL of this drop table in the committer + /// @param [out] old_schema_guard old schema guard + /// @param [out] tenant_name The old tenant name returned + /// @param [out] db_name Old DB name returned + /// @param timeout timeout time + /// + /// @retval OB_SUCCESS success + /// @retval OB_TIMEOUT timeout + /// @retval OB_TENANT_HAS_BEEN_DROPPED fail to fetch schema table, database, tenant, tenant may dropped + /// @retval other error code fail + virtual int drop_table(const uint64_t table_id, + const int64_t schema_version_before_drop, + const int64_t schema_version_after_drop, + bool &is_table_should_ignore_in_committer, + ObLogSchemaGuard &old_schema_guard, + const char *&tenant_name, + const char *&db_name, + const int64_t timeout) = 0; + + /// Delete global index table, Delete index table scenario + /// @note must be called by a single thread in order by Schema version, not concurrently in order + //// + /// Support for handling OB_DDL_DROP_GLOBAL_INDEX global normal indexes and global unique index tables + /// Support for handling OB_DDL_DROP_INDEX The process of refreshing the schema will filter out the normal indexes and keep the unique indexes + /// 1. For globally unique indexes, delete the corresponding partition + /// 2. For global common indexes, delete the global common index cache + /// 3. Delete TableIDCache for unique indexes (not global) + //// + /// @param table_id TableID + /// @param schema_version_before_drop Delete the Schema version before the table + /// @param schema_version_after_drop Delete the Schema version after the table + /// @param [out] old_schema_guard old schema guard + /// @param [out] tenant_name old tenant name returned + /// @param [out] db_name Old DB name returned + /// @param timeout timeout time + /// + /// @retval OB_SUCCESS success + /// @retval OB_TIMEOUT timeout + /// @retval OB_TENANT_HAS_BEEN_DROPPED fail to fetch schema table, database, tenant, tenant may dropped + /// @retval other error code fail + virtual int drop_index_table(const uint64_t table_id, + const int64_t schema_version_before_drop, + const int64_t schema_version_after_drop, + ObLogSchemaGuard &old_schema_guard, + const char *&tenant_name, + const char *&db_name, + const int64_t timeout) = 0; + + /// Supports tablegroup additions + /// @note must be called by a single thread in order according to the Schema version, not concurrently and in random order + /// + /// @param tablegroup_id Tablegroup ID + /// @param schema_version Schema version + /// @param start_serve_timestamp Start service timestamp + /// @param [out] schema_guard schema guard + /// @param [out] tenant_name Returned tenant name + /// @param timeout timeout time + /// + /// @retval OB_SUCCESS success + /// @retval OB_TIMEOUT timeout + /// @retval OB_TENANT_HAS_BEEN_DROPPED fail to fetch schema table, database, tenant, tenant may dropped + /// @retval other error code fail + virtual int add_tablegroup_partition( + const uint64_t tablegroup_id, + const int64_t schema_version, + const int64_t start_serve_timestamp, + ObLogSchemaGuard &schema_guard, + const char *&tenant_name, + const int64_t timeout) = 0; + + /// Supports tablegroup deletion + /// @note must be called by a single thread in order by Schema version, not concurrently added in random order + /// @param tablegroup_id tablegroup ID + /// @param schema_version_before_drop The version of the Schema before the drop + /// @param schema_version_after_drop Schema version after drop + /// @param [out] schema_guard schema guard + /// @param [out] tenant_name The tenant name returned + /// @param timeout timeout time + /// + /// @retval OB_SUCCESS success + /// @retval OB_TIMEOUT timeout + /// @retval OB_TENANT_HAS_BEEN_DROPPED fail to fetch schema table, database, tenant, tenant may dropped + /// @retval other error code fail + virtual int drop_tablegroup_partition( + const uint64_t tablegroup_id, + const int64_t schema_version_before_drop, + const int64_t schema_version_after_drop, + ObLogSchemaGuard &schema_guard, + const char *&tenant_name, + const int64_t timeout) = 0; + + /// PG split + /// This call corresponds to the "start split" DDL, the main purpose of which is to add the split partition + /// + /// @param tenant_id Tenant ID + /// @param tablegroup_id tablegroup ID + /// @param new_schema_version The version of the schema after the split + /// @param start_serve_timestamp Start service time of the new partition + /// @param [out] schema_guard schema guard + /// @param [out] tenant_name Returned tenant name + /// @param timeout timeout time + /// + /// @retval OB_SUCCESS success + /// @retval OB_TIMEOUT timeout + /// @retval OB_TENANT_HAS_BEEN_DROPPED fail to fetch schema table, database, tenant, tenant may dropped + /// @retval other error code fail + virtual int split_tablegroup_partition( + const uint64_t tablegroup_id, + const int64_t new_schema_version, + const int64_t start_serve_timestamp, + ObLogSchemaGuard &schema_guard, + const char *&tenant_name, + const int64_t timeout) = 0; + + /// Modify tablegroup + /// 1. Support for dynamic partitioning of tablegroups + /// 2. Support tablegroup splitting + /// @note must be called by a single thread in order by Schema version, not concurrently added in random order + /// + /// @param tablegroup_id tablegroup ID + /// @param schema_version_before_alter old Schema version + /// @param schema_version_after_alter New Schema version + /// @param start_serve_timestamp Start service timestamp + /// @param [out] old_schema_guard old schema guard + /// @param [out] new_schema_guard new schema guard + /// @param [out] tenant_name Returned tenant name + /// @param timeout timeout time + /// + /// @retval OB_SUCCESS success + /// @retval OB_TIMEOUT timeout + /// @retval OB_TENANT_HAS_BEEN_DROPPED fail to fetch schema table, database, tenant, tenant may dropped + /// @retval other error code fail + virtual int alter_tablegroup_partition( + const uint64_t tablegroup_id, + const int64_t schema_version_before_alter, + const int64_t schema_version_after_alter, + const int64_t start_serve_timestamp, + ObLogSchemaGuard &old_schema_guard, + ObLogSchemaGuard &new_schema_guard, + const char *&tenant_name, + const int64_t timeout) = 0; + + /// Add all tables under the current tenant + /// + /// @retval OB_SUCCESS success + /// @retval OB_TIMEOUT timeout + /// @retval OB_TENANT_HAS_BEEN_DROPPED fail to fetch schema table, database, tenant, tenant may dropped + /// @retval other error code fail + virtual int add_all_tables( + const int64_t start_serve_tstamp, + const int64_t start_schema_version, + const int64_t timeout) = 0; + + /// Adding a general tenant internal table + /// + /// @retval OB_SUCCESS success + /// @retval OB_TIMEOUT timeout + /// @retval OB_TENANT_HAS_BEEN_DROPPED fail to fetch schema table, database, tenant, tenant may dropped + /// @retval other error code fail + virtual int add_inner_tables(const int64_t start_serve_tstamp, + const int64_t start_schema_version, + const int64_t timeout) = 0; + + /// Delete all tables under the tenant + virtual int drop_all_tables() = 0; + + // Add the __all_ddl_operation table for this tenant + // must be added successfully, otherwise an error is reported + virtual int add_ddl_table( + const int64_t start_serve_tstamp, + const int64_t start_schema_version, + const bool is_create_tenant) = 0; + + /// update schema version + virtual int update_schema_version(const int64_t schema_version) = 0; + + /// Print partition service information + virtual void print_part_info(int64_t &serving_part_count, + int64_t &offline_part_count, + int64_t ¬_served_part_count) = 0; + + /// Check if a partition transaction is being served and if so, increase the number of running transactions on the partition + /// Supports DDL and DML partitions + /// + /// @param [out] is_serving return value, identifies whether the partition transaction is being served + /// @param [in] key Partition key + /// @param [in] prepare_log_id Prepare log ID + /// @param [in] prepare_log_timestamp Prepare log timestamp + /// @param [in] inc_trans_count Whether to increase the number of ongoing transactions if a partitioned transaction is being served + /// @param [in] timeout timeout time + //// + /// @retval OB_SUCCESS Success + /// @retval OB_TIMEOUT timeout + /// @retval Other return values Fail + virtual int inc_part_trans_count_on_serving(bool &is_serving, + const common::ObPartitionKey &key, + const uint64_t prepare_log_id, + const int64_t prepare_log_timestamp, + const bool print_participant_not_serve_info, + const int64_t timeout) = 0; + + /// Decrement the number of running transactions in a partition + /// + /// @param key paritition key + /// + /// @retval OB_SUCCESS Success + /// @retval Other return values Fail + virtual int dec_part_trans_count(const common::ObPartitionKey &key) = 0; + + /// Offline partitions and perform recycling operations + /// + /// Calling scenario: The Committer receives the task to downline partitioning, ensures that all previous data has been output, and calls this interface to downline partitioning + //// + //// Cautions. + /// 1. The partition's downline log is parallel to the DDL, and the downline log may be called in parallel with the DDL delete partition + /// 2. The partition may be deleted before the "delete partition DDL" arrives, but it is guaranteed that no data will be dependent on the partition. + /// 3. For partition splitting scenarios, the interface can only be relied upon to delete the old partition, the DDL is not responsible for the deletion and the aim is to ensure that all data is output + /// 4. requires that there are no dependent transactions on the partition, i.e. a transaction count of 0, otherwise the partition cannot be reclaimed + /// + /// @retval OB_SUCCESS success + /// @retval OB_TENANT_HAS_BEEN_DROPPED fail to fetch schema table, database, tenant, tenant may dropped + /// @retval other error code fail + virtual int offline_and_recycle_partition(const common::ObPartitionKey &pkey) = 0; + + /// Only offline partition, no forced recycle required + /// + /// @retval OB_SUCCESS success + /// @retval OB_TENANT_HAS_BEEN_DROPPED fail to fetch schema table, database, tenant, tenant may dropped + /// @retval other error code fail + virtual int offline_partition(const common::ObPartitionKey &pkey) = 0; + + /// Filtering table data within PG, based on table_id to determine if it is in TableIDCache + /// + /// @retval OB_SUCCESS success + /// @retval other error code fail + virtual int is_exist_table_id_cache(const uint64_t table_id, + bool &is_exist) = 0; + + /// Filtering table data within PG, handling future table logic based on table_version + virtual int handle_future_table(const uint64_t table_id, + const int64_t table_version, + const int64_t timeout, + bool &is_exist) = 0; +}; + +///////////////////////////////////////////////////////////////////////////// + +class ObLogTenant; +class ObLogPartMgr : public IObLogPartMgr +{ +private: + static const int64_t PRINT_LOG_INTERVAL = 10 * _SEC_; + +public: + explicit ObLogPartMgr(ObLogTenant &tenant); + virtual ~ObLogPartMgr(); + +public: + int init(const uint64_t tenant_id, + const int64_t start_schema_version, + const bool enable_oracle_mode_match_case_sensitive, + PartInfoMap &map, + GIndexCache &gi_cache, + TableIDCache &table_id_cache, + PartCBArray &part_add_cb_array, + PartCBArray &part_rc_cb_array); + void reset(); + int64_t get_schema_version() const { return ATOMIC_LOAD(&cur_schema_version_); } + +public: + virtual int add_table(const uint64_t table_id, + const int64_t start_schema_version, + const int64_t start_serve_tstamp, + const bool is_create_partition, + bool &is_table_should_ignore_in_committer, + ObLogSchemaGuard &schema_guard, + const char *&tenant_name, + const char *&db_name, + const int64_t timeout); + virtual int alter_table(const uint64_t table_id, + const int64_t schema_version_before_alter, + const int64_t schema_version_after_alter, + const int64_t start_serve_timestamp, + ObLogSchemaGuard &old_schema_guard, + ObLogSchemaGuard &new_schema_guard, + const char *&old_tenant_name, + const char *&old_db_name, + const char *event, + const int64_t timeout); + virtual int split_table(const uint64_t table_id, + const int64_t new_schema_version, + const int64_t start_serve_timestamp, + ObLogSchemaGuard &new_schema_guard, + const char *&tenant_name, + const char *&db_name, + const int64_t timeout); + virtual int drop_table(const uint64_t table_id, + const int64_t schema_version_before_drop, + const int64_t schema_version_after_drop, + bool &is_table_should_ignore_in_committer, + ObLogSchemaGuard &old_schema_guard, + const char *&tenant_name, + const char *&db_name, + const int64_t timeout); + virtual int add_index_table(const uint64_t table_id, + const int64_t start_schema_version, + const int64_t start_serve_tstamp, + ObLogSchemaGuard &schema_guard, + const char *&tenant_name, + const char *&db_name, + const int64_t timeout); + virtual int drop_index_table(const uint64_t table_id, + const int64_t schema_version_before_drop, + const int64_t schema_version_after_drop, + ObLogSchemaGuard &old_schema_guard, + const char *&tenant_name, + const char *&db_name, + const int64_t timeout); + virtual int add_tablegroup_partition( + const uint64_t tablegroup_id, + const int64_t schema_version, + const int64_t start_serve_timestamp, + ObLogSchemaGuard &schema_guard, + const char *&tenant_name, + const int64_t timeout); + virtual int drop_tablegroup_partition( + const uint64_t tablegroup_id, + const int64_t schema_version_before_drop, + const int64_t schema_version_after_drop, + ObLogSchemaGuard &schema_guard, + const char *&tenant_name, + const int64_t timeout); + virtual int split_tablegroup_partition( + const uint64_t tablegroup_id, + const int64_t new_schema_version, + const int64_t start_serve_timestamp, + ObLogSchemaGuard &schema_guard, + const char *&tenant_name, + const int64_t timeout); + virtual int alter_tablegroup_partition( + const uint64_t tablegroup_id, + const int64_t schema_version_before_alter, + const int64_t schema_version_after_alter, + const int64_t start_serve_timestamp, + ObLogSchemaGuard &old_schema_guard, + ObLogSchemaGuard &new_schema_guard, + const char *&tenant_name, + const int64_t timeout); + int add_all_tables( + const int64_t start_serve_tstamp, + const int64_t start_schema_version, + const int64_t timeout); + int add_inner_tables(const int64_t start_serve_tstamp, + const int64_t start_schema_version, + const int64_t timeout); + int drop_all_tables(); + int add_ddl_table( + const int64_t start_serve_tstamp, + const int64_t start_schema_version, + const bool is_create_tenant); + virtual int update_schema_version(const int64_t schema_version); + virtual int inc_part_trans_count_on_serving(bool &is_serving, + const common::ObPartitionKey &key, + const uint64_t prepare_log_id, + const int64_t prepare_log_timestamp, + const bool print_participant_not_serve_info, + const int64_t timeout); + virtual int dec_part_trans_count(const common::ObPartitionKey &key); + virtual void print_part_info(int64_t &serving_part_count, + int64_t &offline_part_count, + int64_t ¬_served_part_count); + virtual int offline_and_recycle_partition(const common::ObPartitionKey &pkey); + virtual int offline_partition(const common::ObPartitionKey &pkey); + virtual int is_exist_table_id_cache(const uint64_t table_id, + bool &is_exist); + virtual int handle_future_table(const uint64_t table_id, + const int64_t table_version, + const int64_t timeout, + bool &is_exist); + +private: + int add_tenant_all_tablegroup_(const uint64_t tenant_id, + const int64_t schema_version, + const int64_t start_serve_tstamp, + const int64_t timeout); + // operation: + // 1. global normal index cache + // 2. TableIDCache + int add_table_id_into_cache_(const ObSimpleTableSchemaV2 &tb_schema, + const char *db_name, + const uint64_t primary_table_id); + int clean_table_id_cache_(); + int remove_table_id_from_cache_(const ObSimpleTableSchemaV2 &tb_schema); + int is_exist_table_id_cache_(const uint64_t table_id, + const bool is_global_normal_index, + bool &is_exist); + + int add_tablegroup_partition_( + const uint64_t tablegroup_id, + const ObTablegroupSchema &tg_schema, + const int64_t start_serve_timestamp, + const bool is_create_partition, + const char *tenant_name, + const int64_t timeout); + int drop_tablegroup_partition_( + const uint64_t tablegroup_id, + const ObTablegroupSchema &tg_schema); + int split_tablegroup_partition_( + const uint64_t tablegroup_id, + const ObTablegroupSchema &tg_schema, + const int64_t start_serve_timestamp); + int get_table_ids_in_tablegroup_(const uint64_t tenant_id, + const uint64_t tablegroup_id, + const int64_t schema_version, + const int64_t timeout, + common::ObArray &table_id_array); + int alter_tablegroup_partition_when_is_not_binding_( + const uint64_t tablegroup_id, + const int64_t schema_version_before_alter, + ObLogSchemaGuard &old_schema_guard, + const int64_t schema_version_after_alter, + ObLogSchemaGuard &new_schema_guard, + const int64_t start_serve_timestamp, + const share::ObWorker::CompatMode &compat_mode, + const int64_t timeout); + + /// add/drop partition dynamicly(means by ddl) + int alter_table_add_or_drop_partition_( + const bool is_tablegroup, + const bool has_physical_part, + const int64_t start_serve_timestamp, + const ObPartitionSchema *old_tb_schema, + const ObPartitionSchema *new_tb_schema, + const int64_t database_id, + const char *event); + int alter_table_drop_partition_( + const bool is_tablegroup, + const uint64_t table_id, + const common::ObArray &drop_part_ids, + const int64_t partition_cnt); + int alter_table_add_partition_( + const bool is_tablegroup, + const bool has_physical_part, + const uint64_t table_id, + const common::ObArray &add_part_ids, + const int64_t partition_cnt, + const int64_t start_serve_timestamp, + const uint64_t tablegroup_id, + const uint64_t database_id); + int split_table_(const ObSimpleTableSchemaV2 *tb_schema, + const char *tenant_name, + const char *db_name, + const int64_t start_serve_timestamp, + const share::ObWorker::CompatMode &compat_mode); + /// Add a table + /// When manipulating a global index table, primary_table_schema represents its primary table schema + /// + // The add_table_function is as follows. + // (1) Partition changes: add primary table partitions, global unique index table partitions + // (2) Global common index cache: if the main table matches, add a global common index cache + // (3) TableIDCache: add main table table_id, unique index table_id, global unique index table_id, and the above two functions have intersection, need special attention under + // + // In particular. + // 1. the start-up moment: + // (1) Partition addition: Adding main table partition, global unique index table partition + // (2) Global common index cache: if the primary table matches, add the global common index cache + // (3) TableIDCache. + // a. Main table table_id + // b. global index table_id + // c. get_schemas_based_on_table_schema_no_more_filtering_for_unique_index_tables, where you need to add unique index tables + // + // 2. handle OB_DDL_CREATE_TABLE + // (1) Add a master table partition that matches the whitelist, and add a TableIDCache + // 3. Process OB_DDL_CREATE_GLOBAL_INDEX + // add_table_ supports handling of global normal indexes and global unique index tables + // (1) For globally unique indexes, add partitions if they match, and add TableIDCache + // (2) For global common indexes, add cache + int add_table_(const int64_t start_serve_tstamp, + const bool is_create_partition, + const ObSimpleTableSchemaV2 *tb_schema, + const char *tenant_name, + const char *db_name, + const int64_t timeout, + const ObSimpleTableSchemaV2 *primary_table_schema = NULL); + + // drop_table_ functions as follows. + // (1) Partition change: delete the main table partition, the global unique index table partition + // (2) Global common index cache: delete the corresponding global common index + // (3) TableIDCache: + // a. Delete unique indexes + // b. Delete main table, global unique index + int drop_table_(const ObSimpleTableSchemaV2 *table_schema); + // Filtering tables + // 1. DDL tables will not be filtered + // 2. Non-user tables are filtered (global unique indexes/unique indexes require special handling) + // 3. proxy tables will be filtered + // 4. user tables are matched based on a whitelist + // where the global unique index table needs to be refreshed with the master schema, so that it is whitelisted based on the master table + int filter_table_(const ObSimpleTableSchemaV2 *table_schema, + const char *tenant_name, + const char *db_name, + const share::ObWorker::CompatMode &compat_mode, + bool &chosen, + bool &is_primary_table_chosen, /* Indicates whether the global index table corresponds to the main table */ + const ObSimpleTableSchemaV2 *primary_table_schema = NULL); + // is a unique index table (not a global unique index) + // The unique index table contains. + // INDEX_TYPE_UNIQUE_LOCAL + // INDEX_TYPE_UNIQUE_GLOBAL + // INDEX_TYPE_UNIQUE_GLOBAL_LOCAL_STORAGE + // where INDEX_TYPE_UNIQUE_GLOBAL is a globally unique index + bool is_unique_index_table_but_expect_global_unqiue_index_(const ObSimpleTableSchemaV2 &table_schema) const; + + /// Filtering PG + /// tablegroup matches based on whitelist + int filter_tablegroup_(const ObTablegroupSchema *tg_schema, + const char *tenant_name, + const share::ObWorker::CompatMode &compat_mode, + bool &chosen); + /// add a partition + int add_served_partition_(const common::ObPartitionKey &pkey, + const common::ObPartitionKey &check_serve_info_pkey, + const int64_t start_serve_tstamp, + const bool is_create_partition, + const bool has_physical_part, + const uint64_t tablegroup_id, + const uint64_t database_id, + bool &add_succ); + int add_served_part_pre_check_(const common::ObPartitionKey &pkey); + int add_partition_(const common::ObPartitionKey& pkey, + const int64_t start_tstamp, + const bool is_create_partition, + const bool is_served); + int offline_partition_(const common::ObPartitionKey &pkey, + const bool ensure_recycled_when_offlined = false); + /// Recycle a partition: delete all relevant data structures of the partition + int recycle_partition_(const common::ObPartitionKey &pkey, ObLogPartInfo *info); + /// call callbacks of add-partition + int call_add_partition_callbacks_(const common::ObPartitionKey &pkey, + const int64_t start_tstamp, + const uint64_t start_log_id); + /// Notify the modules that the partition is ready for recycling + int call_recycle_partition_callbacks_(const common::ObPartitionKey &pkey); + + /// Tenant monitoring items: adding and removing tenants + /// Adding "Tenants" to the service + int add_served_tenant_for_stat_(const char *tenant_name, + const uint64_t tenant_id); + /// delete served tenant + int del_served_tenant_for_stat_(const uint64_t tenant_id); + + /// filter tenant + int filter_tenant_(const char *tenant_name, + bool &chosen); + + // Check if a partition is served + // 1. partition by default according to the old way of calculating partitions, i.e.: partition tasks by table-group-id + partition-id + // If table-group-id is invalid, divide tasks by database-id + partition-id + // 2. When enable_new_partition_hash_algorithm = 1, partition according to the new calculation. + // partition tasks by table_id + partition_id, avoiding tablegroup_id, database_id dependencies + // TODO optimization + // In a multi-instance scenario, this partitioning rule does not guarantee that partitions belonging to the same Partition Group are partitioned into one instance + bool is_partition_served_(const common::ObPartitionKey &pkey, + const uint64_t tablegroup_id, + const uint64_t database_id) const; + // get Schema + int get_schema_(const int64_t timestamp, const int64_t timeout, ObLogSchemaGuard &schema_guard); + + /// Check if the partition is serviced, if so, increase the number of partition statements + /// @retval OB_SUCCESS success + /// @retval OB_TENANT_HAS_BEEN_DROPPED fail to fetch schema table, database, tenant, tenant may dropped + /// @retval other error code fail + int inc_trans_count_on_serving_(bool &is_serving, + const common::ObPartitionKey &key, + const bool print_participant_not_serve_info); + + /// Check partition status + int check_part_status_(const common::ObPartitionKey &pkey, + const int64_t schema_version, + const int64_t timeout, + share::schema::PartitionStatus &part_status); + + /// Handling future table scenarios + int handle_future_part_when_inc_trans_count_on_serving_(bool &is_serving, + const common::ObPartitionKey &key, + const bool print_participant_not_serve_info, + const int64_t base_schema_version, + const int64_t timeout); + int check_cur_schema_version_when_handle_future_part_(const int64_t schema_version, + const int64_t end_time); + int check_cur_schema_version_when_handle_future_table_(const int64_t schema_version, + const int64_t end_time); + + bool is_proxy_table(const char *tenant_name, const char *db_name, const char *tb_name); + + // Multi-instance scenarios, splitting not supported + bool need_to_support_split_when_in_multi_instance_() const; + + int get_ddl_pkey_(const uint64_t tenant_id, const int64_t schema_version, ObPartitionKey &pkey); + + int add_ddl_table_(const uint64_t tenant_id, + const int64_t start_serve_tstamp, + ObLogSchemaGuard &schema_guard, + const int64_t timeout); + + template + int add_table_or_tablegroup_( + const bool is_tablegroup, + const uint64_t table_id, + const uint64_t tablegroup_id, + const uint64_t db_id, + const bool has_physical_part, + const bool is_create_partition, + const int64_t start_serve_timestamp, + PartitionKeyIter &pkey_iter, + PartitionSchema &table_schema, + int64_t &served_part_count); + template + int drop_table_or_tablegroup_( + const bool is_tablegroup, + const uint64_t table_id, + const char *table_name, + PartitionSchema &table_schema, + int64_t &served_part_count); + template + int split_table_or_tablegroup_( + const bool is_tablegroup, + const uint64_t table_id, + const uint64_t tablegroup_id, + const uint64_t db_id, + const bool has_physical_part, + const int64_t start_serve_timestamp, + PartitionKeyIter &pkey_iter, + PartitionSchema &table_schema); + int do_add_all_tablegroups_( + ObLogSchemaGuard &schema_guard, + const int64_t start_serve_tstamp, + const int64_t start_schema_version, + const int64_t timeout); + int do_add_all_tables_( + ObLogSchemaGuard &schema_guard, + const int64_t start_serve_tstamp, + const int64_t start_schema_version, + const int64_t timeout); + int do_add_inner_tables_( + ObLogSchemaGuard &schema_guard, + const int64_t start_serve_tstamp, + const int64_t start_schema_version, + const int64_t timeout); + +// function about schema +private: + bool has_physical_part_(const ObSimpleTableSchemaV2 &table_schema); + bool has_physical_part_(const ObTablegroupSchema &tg_schema); + // get Simple Table Schema + int get_simple_table_schema_(const uint64_t table_id, + const int64_t timeout, + ObLogSchemaGuard &schema_guard, + const ObSimpleTableSchemaV2 *&table_schema); + // get Full Table Schema + int get_full_table_schema_(const uint64_t table_id, + const int64_t timeout, + ObLogSchemaGuard &schema_guard, + const ObTableSchema *&tb_schema); + int get_schema_guard_and_table_schema_(const uint64_t table_id, + const int64_t schema_version, + const int64_t timeout, + ObLogSchemaGuard &schema_guard, + const ObSimpleTableSchemaV2 *&tb_schema); + int get_lazy_schema_guard_and_tablegroup_schema_( + const uint64_t tablegroup_id, + const int64_t schema_version, + const int64_t timeout, + ObLogSchemaGuard &schema_guard, + const ObTablegroupSchema *&tg_schema); + // get schema info based on Table Schema + int get_schema_info_based_on_table_schema_(const ObSimpleTableSchemaV2 *tb_schema, + ObLogSchemaGuard &schema_guard, + const int64_t timeout, + bool &table_is_ignored, + const char *&tenant_name, + const char *&db_schema); + // init guard to get Simple Table Schema、db and tenant info + int get_schema_guard_and_schemas_(const uint64_t table_id, + const int64_t schema_version, + const int64_t timeout, + bool &table_is_ignored, + ObLogSchemaGuard &schema_guard, + const ObSimpleTableSchemaV2 *&tb_schema, + const char *&tenant_name, + const char *&db_name); + +private: + ObLogTenant &host_; + + bool inited_; + uint64_t tenant_id_; + PartInfoMap *map_; + GIndexCache *global_normal_index_table_cache_; // global normal index cache + TableIDCache *table_id_cache_; + PartCBArray *part_add_cb_array_; + PartCBArray *part_rc_cb_array_; + + int64_t cur_schema_version_ CACHE_ALIGNED; + + // Default whitelist match insensitive + bool enable_oracle_mode_match_case_sensitive_; + + // Conditional + common::ObThreadCond schema_cond_; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogPartMgr); +}; +} +} +#endif diff --git a/src/liboblog/src/ob_log_part_progress_controller.cpp b/src/liboblog/src/ob_log_part_progress_controller.cpp new file mode 100644 index 0000000000000000000000000000000000000000..574f6ecae878da35b04c9bbec11d196a931db19b --- /dev/null +++ b/src/liboblog/src/ob_log_part_progress_controller.cpp @@ -0,0 +1,305 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG_FETCHER + +#include "ob_log_part_progress_controller.h" + +#include "share/ob_errno.h" // OB_SUCCESS +#include "lib/utility/ob_macro_utils.h" // OB_UNLIKELY +#include "lib/oblog/ob_log_module.h" // LOG_ERROR + +#include "ob_log_utils.h" // TS_TO_STR + +using namespace oceanbase::common; +namespace oceanbase +{ +namespace liboblog +{ +//////////////////////////// PartProgressController //////////////////////////// + +PartProgressController::PartProgressController() : + inited_(false), + max_progress_cnt_(0), + progress_list_(NULL), + recycled_indices_(), + recycled_indices_lock_(), + progress_cnt_(0), + valid_progress_cnt_(0), + thread_counter_(0), + last_global_count_and_timeval_(), + global_count_and_timeval_() +{ + last_global_count_and_timeval_.lo = 0; + last_global_count_and_timeval_.hi = 0; + global_count_and_timeval_.lo = 0; + global_count_and_timeval_.hi = 0; +} + +PartProgressController::~PartProgressController() +{ + destroy(); +} + +int PartProgressController::init(const int64_t max_progress_cnt) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("init twice"); + ret = OB_INIT_TWICE; + } else if (OB_UNLIKELY(max_progress_cnt <= 0)) { + LOG_ERROR("invalid progress cnt", K(max_progress_cnt)); + ret = OB_INVALID_ARGUMENT; + } else { + const int64_t size = max_progress_cnt * static_cast(sizeof(Item)); + progress_list_ = static_cast(ob_malloc(size, ObModIds::OB_LOG_PART_PROGRESS_CONTROLLER)); + + if (OB_ISNULL(progress_list_)) { + LOG_ERROR("alloc progress list fail", K(size), K(max_progress_cnt)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + max_progress_cnt_ = max_progress_cnt; + + for (int64_t idx = 0, cnt = max_progress_cnt; OB_SUCCESS == ret && (idx < cnt); ++idx) { + Item &item = *(progress_list_ + idx); + item.reset(); + } + } + + if (OB_SUCCESS == ret) { + thread_counter_ = 0; + last_global_count_and_timeval_.lo = 0; + last_global_count_and_timeval_.hi = 0; + global_count_and_timeval_.lo = 0; + global_count_and_timeval_.hi = 0; + inited_ = true; + } + } + return ret; +} + +void PartProgressController::destroy() +{ + inited_ = false; + + if (NULL != progress_list_) { + ob_free(progress_list_); + progress_list_ = NULL; + } + + max_progress_cnt_ = 0; + progress_cnt_ = 0; + valid_progress_cnt_ = 0; + recycled_indices_.destroy(); + thread_counter_ = 0; + last_global_count_and_timeval_.lo = 0; + last_global_count_and_timeval_.hi = 0; + global_count_and_timeval_.lo = 0; + global_count_and_timeval_.hi = 0; +} + +int PartProgressController::acquire_progress(int64_t &progress_id, const int64_t start_progress) +{ + int ret = OB_SUCCESS; + ObSpinLockGuard guard(recycled_indices_lock_); + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(INVALID_PROGRESS == start_progress)) { + LOG_ERROR("invalid start progress", K(start_progress)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(progress_list_)) { + LOG_ERROR("invalid progress list", K(progress_list_)); + ret = OB_ERR_UNEXPECTED; + } else if ((0 == recycled_indices_.count()) && (max_progress_cnt_ <= progress_cnt_)) { + LOG_WARN("progress id used up", K(progress_cnt_), K(max_progress_cnt_)); + ret = OB_NEED_RETRY; + } else { + if (0 < recycled_indices_.count()) { + progress_id = recycled_indices_.at(recycled_indices_.count() - 1); + recycled_indices_.pop_back(); + } else { + progress_id = progress_cnt_; + ATOMIC_INC(&progress_cnt_); + } + + // Set the starting progress value + progress_list_[progress_id].reset(start_progress); + + ATOMIC_INC(&valid_progress_cnt_); + + _LOG_INFO("[STAT] [PROGRESS_CONTROLLER] [ACQUIRE] progress_id=%ld start_progress=%ld(%s) " + "progress_cnt=(total=%ld,valid=%ld,recycled=%ld,max=%ld) ", + progress_id, start_progress, TS_TO_STR(start_progress), + progress_cnt_, valid_progress_cnt_, recycled_indices_.count(), max_progress_cnt_); + } + + return ret; +} + +int PartProgressController::release_progress(const int64_t progress_id) +{ + int ret = OB_SUCCESS; + + ObSpinLockGuard guard(recycled_indices_lock_); + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(progress_list_)) { + LOG_ERROR("invalid progress list", K(progress_list_)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_UNLIKELY(progress_id < 0) || OB_UNLIKELY(progress_cnt_ <= progress_id)) { + LOG_ERROR("invalid progress id", K(progress_id), K(progress_cnt_)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(recycled_indices_.push_back(progress_id))) { + LOG_ERROR("push back recycled index fail", KR(ret), K(progress_id)); + } else { + Item &item = *(progress_list_ + progress_id); + item.reset(); + + ATOMIC_DEC(&valid_progress_cnt_); + + _LOG_INFO("[STAT] [PROGRESS_CONTROLLER] [RELEASE] progress_id=%ld progress_cnt=(total=%ld," + "valid=%ld,recycled=%ld,max=%ld)", + progress_id, progress_cnt_, valid_progress_cnt_, recycled_indices_.count(), + max_progress_cnt_); + } + return ret; + +} + +int PartProgressController::update_progress(const int64_t progress_id, const int64_t progress) +{ + int ret = OB_SUCCESS; + const int64_t progress_cnt = ATOMIC_LOAD(&progress_cnt_); + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(progress_list_)) { + LOG_ERROR("invalid progress list", K(progress_list_)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_UNLIKELY(progress_id < 0) || OB_UNLIKELY(progress_cnt <= progress_id)) { + LOG_ERROR("invalid progress id", K(progress_id), K(progress_cnt)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_UNLIKELY(INVALID_PROGRESS == progress)) { + LOG_ERROR("invalid progress value", K(progress)); + ret = OB_INVALID_ARGUMENT; + } else { + Item &item = *(progress_list_ + progress_id); + item.update(progress); + + _LOG_DEBUG("[STAT] [PROGRESS_CONTROLLER] [UPDATE] progress_id=%ld progress=%ld(%s) " + "delay=%s progress_cnt=(total=%ld,valid=%ld,recycled=%ld,max=%ld)", + progress_id, progress, TS_TO_STR(progress), TS_TO_DELAY(progress), + progress_cnt_, valid_progress_cnt_, recycled_indices_.count(), max_progress_cnt_); + } + return ret; +} + +// If there is no minimum, an invalid value is returned: INVALID_PROGRESS +int PartProgressController::get_min_progress(int64_t &progress) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(progress_list_)) { + LOG_ERROR("invalid progress list", K(progress_list_)); + ret = OB_ERR_UNEXPECTED; + } else if (ATOMIC_LOAD(&valid_progress_cnt_) <= 0) { + progress = INVALID_PROGRESS; + } else { + progress = INVALID_PROGRESS; + const int64_t cnt = ATOMIC_LOAD(&progress_cnt_); + int64_t execution_time = get_timestamp(); + int64_t min_progress_id = 0; + + for (int64_t idx = 0; OB_SUCCESS == ret && (idx < cnt); ++idx) { + const int64_t this_progress = progress_list_[idx].get(); + if (INVALID_PROGRESS != this_progress) { + if ((INVALID_PROGRESS == progress) || (this_progress < progress)) { + progress = this_progress; + min_progress_id = idx; + } + } + } + + LOG_DEBUG("[FETCHER] [GET_MIN_PROGRESS] ", K(progress), K_(progress_cnt), K(min_progress_id)); + execution_time = get_timestamp() - execution_time ; + // Update execution time, print execution time periodically + update_execution_time_(execution_time); + } + + return ret; +} + +int64_t PartProgressController::get_itid_() +{ + static __thread int64_t index = -1; + return index < 0 ? (index = ATOMIC_FAA(&thread_counter_, 1)) : index; +} + +void PartProgressController::update_execution_time_(int64_t execution_time) +{ + // Multi-threaded resource seizure, recording the id number of the seized thread and subsequently having the thread periodically print the average execution time + static int64_t tid_flag = -1; + // get tid + int64_t tid = get_itid_(); + + if (OB_UNLIKELY(-1 == tid_flag)) { + ATOMIC_CAS(&tid_flag, -1, tid); + } + + while (true) { + types::uint128_t old_v; + types::uint128_t new_v; + + LOAD128(old_v, &global_count_and_timeval_); + + // Scan the array + 1, and add up the execution time + new_v.lo = old_v.lo + 1; + new_v.hi = old_v.hi + execution_time; + + if (CAS128(&global_count_and_timeval_, old_v, new_v)) { + // success, break + break; + } else { + PAUSE(); + } + } + + if (tid_flag == tid) { + if (REACH_TIME_INTERVAL(PRINT_GET_MIN_PROGRESS_INTERVAL)) { + types::uint128_t global_count_and_timeval; + LOAD128(global_count_and_timeval, &global_count_and_timeval_); + // Calculate the number of times the array was scanned during this interval and the average time to scan an array + uint64_t scan_cnt = global_count_and_timeval.lo - last_global_count_and_timeval_.lo; + uint64_t time = (global_count_and_timeval.hi - last_global_count_and_timeval_.hi) /scan_cnt; + // part count + int64_t part_cnt = ATOMIC_LOAD(&progress_cnt_); + // Record current statistics + last_global_count_and_timeval_.lo = global_count_and_timeval.lo; + last_global_count_and_timeval_.hi = global_count_and_timeval.hi; + + _LOG_INFO("[STAT] [GET_MIN_PROGRESS] AVG_TIME=%ld PART_COUNT=%ld SCAN_COUNT=%ld", + time, part_cnt, scan_cnt); + } + } +} + +} +} diff --git a/src/liboblog/src/ob_log_part_progress_controller.h b/src/liboblog/src/ob_log_part_progress_controller.h new file mode 100644 index 0000000000000000000000000000000000000000..610cd40d60f428df0aa1e4ec0ddbdfbbe4c1ab2b --- /dev/null +++ b/src/liboblog/src/ob_log_part_progress_controller.h @@ -0,0 +1,128 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_PART_PROGRESS_CONTROLLER_H__ +#define OCEANBASE_LIBOBLOG_OB_LOG_PART_PROGRESS_CONTROLLER_H__ + +#include "share/ob_define.h" // OB_INVALID_TIMESTAMP +#include "lib/container/ob_array.h" // ObArray +#include "lib/lock/ob_spin_lock.h" // ObSpinLock +#include "lib/atomic/ob_atomic.h" // ATOMIC_* +#include "lib/atomic/atomic128.h" // uint128_t, CAS128, LOAD128 +#include "ob_log_utils.h" + +namespace oceanbase +{ +namespace liboblog +{ +//////////////////////////// PartProgressController //////////////////////////// +// Partition progress controller +// 1. track the progress of each partition, providing an interface to get the minimum progress +// 2. progress is essentially a timestamp, the invalid value is OB_INVALID_TIMESTAMP +// +// TODO: +// 1. support any number of partitions +// 2. try to make it a lock-free data structure +class PartProgressController +{ + static const int64_t INVALID_PROGRESS = common::OB_INVALID_TIMESTAMP; + // Print execution interval + static const int64_t PRINT_GET_MIN_PROGRESS_INTERVAL = 1 * _MIN_; +public: + PartProgressController(); + virtual ~PartProgressController(); + int init(const int64_t max_progress_cnt); + void destroy(); + + /// Assigning an ID to uniquely identify the progress and also setting the initial value of the progress + /// + /// Returns OB_NEED_RETRY if not enough identifiers are available + int acquire_progress(int64_t &progress_id, const int64_t start_progress); + + /// Release progress ID into recycling pool + int release_progress(const int64_t progress_id); + + /// update progress values with specify ID + int update_progress(const int64_t progress_id, const int64_t progress); + + /// Get the current minimum progress value + int get_min_progress(int64_t &progress); + + TO_STRING_KV(K_(progress_cnt), + K_(valid_progress_cnt), + "recycled_cnt", recycled_indices_.count(), + K_(max_progress_cnt)); +private: + // Assign IDs to each thread + int64_t get_itid_(); + + // Update get_min_progress execution time + void update_execution_time_(int64_t execution_time); +private: + struct Item + { + int64_t progress_; + + void reset() + { + ATOMIC_STORE(&progress_, INVALID_PROGRESS); + } + + void reset(const int64_t start_progress) + { + ATOMIC_STORE(&progress_, start_progress); + } + + void update(const int64_t new_progress) + { + int64_t oldv = ATOMIC_LOAD(&(progress_)); + + while ((oldv < new_progress) || (INVALID_PROGRESS == oldv)) { + oldv = ATOMIC_VCAS(&(progress_), oldv, new_progress); + } + } + + int64_t get() const { return ATOMIC_LOAD(&progress_); } + }; + + typedef common::ObArray IndexArray; + +private: + bool inited_; + int64_t max_progress_cnt_; + Item *progress_list_; + + // recycle array of progress id + IndexArray recycled_indices_; + common::ObSpinLock recycled_indices_lock_; + + // The maximum number of progress ids that have been allocated, including the reclaimed + int64_t progress_cnt_ CACHE_ALIGNED; + + // Number of valid progress ids, not including recycled + int64_t valid_progress_cnt_ CACHE_ALIGNED; + + // Used to assign serial numbers to threads + int64_t thread_counter_; + + // Low 64 bits: records the number of times the array was scanned; High 64 bits: records the total time the array was scanned + types::uint128_t last_global_count_and_timeval_; + types::uint128_t global_count_and_timeval_; + +private: + DISALLOW_COPY_AND_ASSIGN(PartProgressController); +}; + +} +} + +#endif diff --git a/src/liboblog/src/ob_log_part_serve_info.h b/src/liboblog/src/ob_log_part_serve_info.h new file mode 100644 index 0000000000000000000000000000000000000000..af3c2cf06bf8f3a2208708324d904e1b7c6297a0 --- /dev/null +++ b/src/liboblog/src/ob_log_part_serve_info.h @@ -0,0 +1,70 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_DATA_FILTER_H__ +#define OCEANBASE_LIBOBLOG_OB_LOG_DATA_FILTER_H__ + +#include "ob_log_utils.h" // TS_TO_STR + +namespace oceanbase +{ +namespace liboblog +{ + +struct PartServeInfo +{ + PartServeInfo() : start_serve_from_create_(false), start_serve_timestamp_(0) + { + } + ~PartServeInfo() {} + + void reset() + { + start_serve_from_create_ = false; + start_serve_timestamp_ = 0; + } + + void reset(const bool start_serve_from_create, const int64_t start_serve_timestamp) + { + start_serve_from_create_ = start_serve_from_create; + start_serve_timestamp_ = start_serve_timestamp; + } + + // Determine if a partitioned transaction is in service + // 1. if the input parameter is prepare log timestamp, return no service must not be served; return service must be served + // 2. If the input parameter is commit log timestamp, return no service must be no service; return service must be no service + bool is_served(const int64_t tstamp) const + { + bool bool_ret = false; + + // If a partition is served from the moment it is created, all its partition transactions are served + if (start_serve_from_create_) { + bool_ret = true; + } else { + // Otherwise, require the prepare log timestamp to be greater than or equal to the start service timestamp before the partitioned transaction is serviced + bool_ret = (tstamp >= start_serve_timestamp_); + } + + return bool_ret; + } + + TO_STRING_KV(K_(start_serve_from_create), + "start_serve_timestamp", TS_TO_STR(start_serve_timestamp_)); + + bool start_serve_from_create_; // If start service from partition creation or not + uint64_t start_serve_timestamp_; // Timestamp of the starting service +}; + +} +} + +#endif diff --git a/src/liboblog/src/ob_log_part_svr_list.cpp b/src/liboblog/src/ob_log_part_svr_list.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ef44921002c88026111000facacbf8aa298d8e32 --- /dev/null +++ b/src/liboblog/src/ob_log_part_svr_list.cpp @@ -0,0 +1,796 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG_FETCHER + +#include "ob_log_part_svr_list.h" +#include // std::sort + +using namespace oceanbase::common; + +namespace oceanbase +{ +namespace liboblog +{ +PartSvrList::PartSvrList() : svr_items_(ObModIds::OB_LOG_PART_SVR_LIST, OB_MALLOC_NORMAL_BLOCK_SIZE) +{ + reset(); +} + +PartSvrList::~PartSvrList() +{ + reset(); +} + +void PartSvrList::reset() +{ + next_svr_index_ = 0; + svr_items_.reset(); +} + +int PartSvrList::add_server_or_update(const common::ObAddr &svr, + const uint64_t start_log_id, + const uint64_t end_log_id, + const bool is_located_in_meta_table, + const RegionPriority region_prio, + const ReplicaPriority replica_prio, + const bool is_leader, + const bool is_log_range_valid /* = true */) +{ + int ret = OB_SUCCESS; + bool found_svr = false; + + for (int64_t index = 0; ! found_svr && OB_SUCCESS == ret && index < svr_items_.count(); index++) { + SvrItem &svr_item = svr_items_.at(index); + if (svr_item.svr_ == svr) { + found_svr = true; + // Updateis_located_in_meta_table, replica_prio, leader info if server exist + svr_item.reset(is_located_in_meta_table, replica_prio, is_leader); + + // Update if log range is valid + if (is_log_range_valid) { + if (OB_FAIL(svr_item.add_range(start_log_id, end_log_id))) { + LOG_ERROR("server item add range fail", KR(ret), K(start_log_id), + K(end_log_id), K(svr_item)); + } + } + } + } + + if (OB_SUCCESS == ret && ! found_svr) { + SvrItem svr_item; + svr_item.reset( + svr, + start_log_id, + end_log_id, + is_located_in_meta_table, + region_prio, + replica_prio, + is_leader); + + if (OB_FAIL(svr_items_.push_back(svr_item))) { + LOG_ERROR("push_back svr item fail", KR(ret), K(svr_item), K(svr_items_)); + } else { + // succ + } + } + + return ret; +} + +bool PartSvrList::exist(const common::ObAddr &svr, int64_t &svr_index) const +{ + bool svr_existed = false; + + int64_t index = 0; + for (; !svr_existed && index < svr_items_.count(); index++) { + svr_existed = (svr_items_.at(index).svr_ == svr); + } + + if (svr_existed) { + svr_index = index - 1; + } else { + svr_index = -1; + } + + return svr_existed; +} + +int PartSvrList::next_server(const uint64_t next_log_id, + const IBlackList &blacklist, + common::ObAddr &svr) +{ + int ret = OB_SUCCESS; + + if (svr_items_.count() <= 0) { + ret = OB_ITER_END; + } else if (OB_FAIL(get_next_server_based_on_blacklist_(next_log_id, blacklist, svr))) { + if (OB_ITER_END != ret) { + LOG_ERROR("get_next_server_based_on_blacklist_ fail", KR(ret), K(next_log_id)); + } + } + + return ret; +} + +int PartSvrList::get_server_array_for_locate_start_log_id(StartLogIdLocateReq::SvrList &svr_list) const +{ + int ret = OB_SUCCESS; + + // Get the temporary SvrItemArray before getting the server list, then sort by policy + SvrItemArray sort_svr_items; + for (int64_t idx = 0; OB_SUCC(ret) && idx < svr_items_.count(); ++idx) { + const SvrItem &svr_item = svr_items_.at(idx); + if (OB_FAIL(sort_svr_items.push_back(svr_item))) { + LOG_ERROR("sort_svr_items push_back fail", KR(ret), K(idx), K(svr_item)); + } else { + // succ + } + } + + // The returned svr list is returned according to the following policy: server service log time in descending order (i.e. new server first, old server second) + if (OB_SUCC(ret)) { + sort_by_priority_for_locate_start_log_id_(sort_svr_items); + } + + + // Finally, the list of ordered servers is returned + for (int64_t idx = 0; OB_SUCC(ret) && idx < sort_svr_items.count(); ++idx) { + // Generate a SvrItem + StartLogIdLocateReq::SvrItem start_log_id_svr_item; + start_log_id_svr_item.reset(sort_svr_items.at(idx).svr_); + + if (OB_FAIL(svr_list.push_back(start_log_id_svr_item))) { + LOG_ERROR("StartLogIdLocateReq::SvrList push_back fail", KR(ret), K(idx), + K(start_log_id_svr_item), K(svr_list)); + } else { + // succ + } + } + + return ret; +} + +void PartSvrList::sort_by_priority_for_locate_start_log_id_(SvrItemArray &svr_items) const +{ + std::sort(svr_items.begin(), svr_items.end(), LocateStartLogIdCompare()); +} + +void PartSvrList::sort_by_priority() +{ + std::sort(svr_items_.begin(), svr_items_.end(), SvrItemCompare()); +} + +int PartSvrList::filter_by_svr_blacklist(const ObLogSvrBlacklist &svr_blacklist, + common::ObArray &remove_svrs) +{ + int ret = OB_SUCCESS; + bool has_done = false; + const int64_t svr_blacklist_cnt = svr_blacklist.count(); + int64_t svr_remove_cnt = 0; + + // 1. Iterate through the blacklist of servers in reverse order, eliminating svr's that are in the blacklist, since they are already sorted by priority, so here the low priority servers are removed in reverse order + // 2. keep at least one server + for (int64_t svr_idx = svr_items_.count() - 1; OB_SUCC(ret) && ! has_done && svr_idx >= 0; --svr_idx) { + const ObAddr &svr = svr_items_.at(svr_idx).svr_; + const int64_t svr_count = svr_items_.count(); + + if (1 == svr_count) { + // Retain, do not dispose + has_done = true; + } else if (svr_remove_cnt >= svr_blacklist_cnt) { + // Based on the svr blacklist, the server list has been cleaned up + has_done = true; + } else { + if (svr_blacklist.is_exist(svr)) { + if (OB_FAIL(remove_svrs.push_back(svr))) { + LOG_ERROR("remove_svrs push_back fail", KR(ret), K(svr)); + } else if (OB_FAIL(svr_items_.remove(svr_idx))) { + LOG_ERROR("remove svr item fail", KR(ret), K(svr_idx), K(svr), K(svr_items_)); + } else { + // succ + ++svr_remove_cnt; + } + } else { + // do nothing + } + } + } // for + + + return ret; +} + +bool PartSvrList::need_switch_server(const uint64_t next_log_id, + IBlackList &blacklist, + const common::ObPartitionKey &pkey, + const common::ObAddr &cur_svr) +{ + int ret = OB_SUCCESS; + bool svr_found = false; + int64_t avail_svr_count = svr_items_.count(); + + IBlackList::BLSvrArray wash_svr_array; + wash_svr_array.reset(); + + // Note: The blacklist must be white-washed before the lookup, to ensure that higher priority servers can be detected + // (in the blacklist, but the whitewash condition is already met) + if (OB_FAIL(blacklist.do_white_washing(wash_svr_array))) { + LOG_ERROR("blacklist do while washing fail", KR(ret), K(pkey)); + } else { + if (wash_svr_array.count() > 0) { + LOG_INFO("[STAT] [BLACK_LIST] [WASH]", KR(ret), K(pkey), + "wash_svr_cnt", wash_svr_array.count(), K(wash_svr_array)); + + } + + for (int64_t svr_idx = 0; OB_SUCC(ret) && ! svr_found && svr_idx < avail_svr_count; ++svr_idx) { + bool is_log_served = false; + bool is_svr_invalid = false; + SvrItem &svr_item = svr_items_.at(svr_idx); + + svr_item.check_and_update_serve_info(next_log_id, is_log_served, is_svr_invalid); + + if (is_log_served && !is_svr_invalid && !blacklist.exist(svr_item.svr_)) { + if (cur_svr == svr_item.svr_) { + // End of lookup, no higher priority svr, no active flow cut required + break; + } else { + svr_found = true; + + // Check the priority of the found svr and the current svr, if they are the same do not switch, otherwise do + if (OB_FAIL(check_found_svr_priority_(pkey, svr_idx, avail_svr_count, cur_svr, svr_found))) { + LOG_ERROR("check_found_svr_priority_ fail", KR(ret), K(pkey), K(svr_idx), K(avail_svr_count), + K(cur_svr), K(svr_found), K(svr_items_)); + } else if (! svr_found) { + // End of search with equal priority + break; + } else { + // There is a higher priority svr, and set next_svr_index_to 0 to ensure that the dispatch cut starts at the beginning of the server list + next_svr_index_ = 0; + } + } + } + } + } + + return svr_found; +} + +int PartSvrList::check_found_svr_priority_(const common::ObPartitionKey &pkey, + const int64_t found_svr_idx, + const int64_t avail_svr_count, + const common::ObAddr &cur_svr, + bool &need_switch) +{ + int ret = OB_SUCCESS; + int64_t cur_svr_idx = -1; + const SvrItem &found_svr_item = svr_items_.at(found_svr_idx); + + // Get the SvrItem where the current fetch log stream svr is located + for (int64_t svr_idx = found_svr_idx + 1; OB_SUCC(ret) && (-1 == cur_svr_idx) && svr_idx < avail_svr_count; + ++svr_idx) { + const ObAddr &svr = svr_items_.at(svr_idx).svr_; + + if (cur_svr == svr) { + // Find update idx + cur_svr_idx = svr_idx; + } + } // for + + if (OB_SUCC(ret)) { + if (cur_svr_idx != -1) { + SvrItem &cur_svr_item = svr_items_.at(cur_svr_idx); + + // The svr found has the same priority as the current svr, there is no need to switch servers at this point + if (cur_svr_item.is_priority_equal(found_svr_item)) { + need_switch = false; + } else { + need_switch = true; + } + } else { + // Current server not found, switch + need_switch = true; + } + + LOG_INFO("[CHECK_NEED_SWITCH_SERVER] find different server", K(pkey), K(need_switch), + K(cur_svr_idx), K(found_svr_idx), + "cur_svr_item", (-1 == cur_svr_idx) ? to_cstring(cur_svr) : to_cstring(svr_items_.at(cur_svr_idx)), + K(found_svr_item)); + } + + return ret; +} + +int PartSvrList::get_next_server_based_on_blacklist_(const uint64_t next_log_id, + const IBlackList &blacklist, + common::ObAddr &svr) +{ + int ret = OB_SUCCESS; + bool svr_found = false; + int64_t avail_svr_count = svr_items_.count(); + + // Iterate through all servers and find the server that serves the target log + for (int64_t index = 0; OB_SUCCESS == ret && ! svr_found && index < avail_svr_count; index++) { + if (svr_items_.count() <= 0) { + break; + } else { + bool is_log_served = false; + bool is_svr_invalid = false; + // Automatically modify next_svr_index_ to move to the next server + int64_t svr_idx = next_svr_index_++ % svr_items_.count(); + SvrItem &svr_item = svr_items_.at(svr_idx); + + svr_item.check_and_update_serve_info(next_log_id, is_log_served, is_svr_invalid); + + // server does not serve the target log + if (! is_log_served) { + if (is_svr_invalid) { + // server is invalid, remove server from the array + if (OB_FAIL(svr_items_.remove(svr_idx))) { + LOG_ERROR("remove svr item fail", KR(ret), K(svr_idx), K(svr_items_)); + } else { + // next_svr_index_ does not change as elements in the array are deleted + next_svr_index_--; + } + } else { + // server is valid, move to the next server + } + } else { + if (blacklist.exist(svr_item.svr_)) { + // Filtering on blacklisted servers + } else { + if (OB_SUCCESS == ret) { + svr_found = true; + svr = svr_item.svr_; + } + } + } + } + } + + if (OB_SUCCESS == ret && ! svr_found) { + ret = OB_ITER_END; + } + + return ret; +} + +/////////////////////////////////// SvrItem /////////////////////////////// + +void PartSvrList::SvrItem::reset() +{ + svr_.reset(); + range_num_ = 0; + (void)memset(log_ranges_, 0, sizeof(log_ranges_)); + is_located_in_meta_table_ = false; + region_prio_ = REGION_PRIORITY_UNKNOWN; + replica_prio_ = REPLICA_PRIORITY_UNKNOWN; + is_leader_ = false; +} + +void PartSvrList::SvrItem::reset(const bool is_located_in_meta_table, + const ReplicaPriority replica_prio, + const bool is_leader) +{ + is_located_in_meta_table_ = is_located_in_meta_table; + replica_prio_ = replica_prio; + is_leader_ = is_leader; +} + +void PartSvrList::SvrItem::reset(const common::ObAddr &svr, + const uint64_t start_log_id, + const uint64_t end_log_id, + const bool is_located_in_meta_table, + const RegionPriority region_prio, + const ReplicaPriority replica_prio, + const bool is_leader) +{ + svr_ = svr; + // Initialise a log range by default + range_num_ = 1; + log_ranges_[0].reset(start_log_id, end_log_id); + // Initialization priority + is_located_in_meta_table_ = is_located_in_meta_table; + region_prio_ = region_prio; + replica_prio_ = replica_prio; + is_leader_ = is_leader; +} + +int PartSvrList::SvrItem::find_pos_and_merge_(const uint64_t start_log_id, + const uint64_t end_log_id, + bool &merged, + int64_t &target_index) +{ + int ret = OB_SUCCESS; + int64_t merge_start = -1; + int64_t merge_end = -1; + uint64_t merge_start_log_id = start_log_id; + uint64_t merge_end_log_id = end_log_id; + + merged = false; + target_index = 0; + + for (target_index = 0; target_index < range_num_; target_index++) { + LogIdRange &range = log_ranges_[target_index]; + + // Find the insertion position + if (merge_end_log_id < range.start_log_id_) { + break; + } + // Find segments that can be merged + else if (merge_start_log_id <= range.end_log_id_) { + merge_start_log_id = std::min(merge_start_log_id, range.start_log_id_); + merge_end_log_id = std::max(merge_end_log_id, range.end_log_id_); + + merge_end = target_index; + if (-1 == merge_start) { + merge_start = target_index; + } + + merged = true; + + range.reset(merge_start_log_id, merge_end_log_id); + } + // Skip the current block + else { + } + } + + // If a merge has occurred, collate the merged array + if (merged) { + if (OB_UNLIKELY(merge_start < 0) || OB_UNLIKELY(merge_end < 0)) { + LOG_ERROR("merge_start or merge_end is invalid", K(merge_start), K(merge_end)); + ret = OB_ERR_UNEXPECTED; + } else { + int64_t merge_delta = merge_end - merge_start; + + if (merge_delta > 0) { + // Reorganise the array to cover the excess elements after the merge + for (int64_t old_idx = merge_end, new_idx = merge_start; + old_idx < range_num_; + old_idx++, new_idx++) { + log_ranges_[new_idx] = log_ranges_[old_idx]; + } + } + range_num_ -= merge_delta; + } + } + + return ret; +} + +int PartSvrList::SvrItem::insert_range_(const uint64_t start_log_id, + const uint64_t end_log_id, + const int64_t target_insert_index) +{ + int ret = OB_SUCCESS; + int64_t target_index = target_insert_index; + + if (OB_UNLIKELY(target_insert_index < 0)) { + LOG_ERROR("invalid index", K(target_insert_index)); + ret = OB_INVALID_ARGUMENT; + } + // The array is full, perform a manual merge: merge with the target_insert_index range + else if (range_num_ >= MAX_RANGE_NUM) { + if (target_index >= range_num_) { + // If larger than all segments, merge with the last element + target_index = range_num_ - 1; + } + + log_ranges_[target_index].start_log_id_ = + std::min(log_ranges_[target_index].start_log_id_, start_log_id); + + log_ranges_[target_index].end_log_id_ = + std::max(log_ranges_[target_index].end_log_id_, end_log_id); + } + // Performing ordered insertion operations + else { + // Move subsequent elements backwards + for (int64_t index = range_num_ - 1; index >= target_index; index--) { + log_ranges_[index + 1] = log_ranges_[index]; + } + + log_ranges_[target_index].start_log_id_ = start_log_id; + log_ranges_[target_index].end_log_id_ = end_log_id; + range_num_++; + } + return ret; +} + +int PartSvrList::SvrItem::add_range(const uint64_t start_log_id, const uint64_t end_log_id) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(range_num_ < 0)) { + LOG_ERROR("range num is invalid", K(range_num_)); + ret = OB_INVALID_ERROR; + } else if (OB_UNLIKELY(OB_INVALID_ID == start_log_id)) { + LOG_ERROR("invalid argument", K(start_log_id)); + ret = OB_INVALID_ARGUMENT; + } else { + int64_t target_index = 0; + bool merged = false; + + // Find the insert and try to perform a merge operation + if (OB_FAIL(find_pos_and_merge_(start_log_id, end_log_id, merged, target_index))) { + LOG_ERROR("find_pos_and_merge_ fail", KR(ret), K(start_log_id), K(end_log_id)); + } else if (merged) { + // Merging is equivalent to inserting a range, so there is no need to perform the operation again + } + // If there is no merge, the elements need to be inserted; if the array is full, then perform a manual merge + else if (OB_FAIL(insert_range_(start_log_id, end_log_id, target_index))) { + LOG_ERROR("insert_range_ fail", KR(ret), K(start_log_id), K(end_log_id)); + } else { + // Insert range successfully + } + } + return ret; +} + +void PartSvrList::SvrItem::check_and_update_serve_info(const uint64_t log_id, + bool &is_log_served, + bool &is_server_invalid) +{ + is_log_served = false; + is_server_invalid = false; + + int64_t target_index = 0; + for (target_index = 0; target_index < range_num_; target_index++) { + is_log_served = log_ranges_[target_index].is_log_served(log_id); + + if (is_log_served) { + break; + } + // log id at lower range limit, exit loop directly + else if (log_ranges_[target_index].is_lower_bound(log_id)) { + break; + } + } + + // No valid range found, then the server is no longer valid + if (target_index >= range_num_) { + range_num_ = 0; + is_server_invalid = true; + } + // Delete all ranges smaller than log_id + else if (target_index > 0) { + int64_t delta = target_index; + for (int64_t index = target_index; index < range_num_; index++) { + log_ranges_[index - delta] = log_ranges_[index]; + } + + range_num_ -= target_index; + } +} + +bool PartSvrList::SvrItem::is_priority_equal(const SvrItem &svr_item) const +{ + bool bool_ret = false; + + bool_ret = (is_located_in_meta_table_ == svr_item.is_located_in_meta_table_) + && (region_prio_ == svr_item.region_prio_) + && (replica_prio_ == svr_item.replica_prio_) + && (is_leader_ == svr_item.is_leader_); + + return bool_ret; +} + +int64_t PartSvrList::SvrItem::to_string(char *buffer, int64_t length) const +{ + int64_t pos = 0; + (void)databuff_printf(buffer, length, pos, "{server:%s, range_num:%ld, ranges:[", + to_cstring(svr_), range_num_); + + for (int64_t index = 0; index < range_num_; index++) { + (void)databuff_printf(buffer, length, pos, "%s", to_cstring(log_ranges_[index])); + if (index < range_num_ - 1) { + (void)databuff_printf(buffer, length, pos, ", "); + } + } + + (void)databuff_printf(buffer, length, pos, "], "); + (void)databuff_printf(buffer, length, pos, + "priority:[is_meta_table_record:%d region:%s, replica:%s, is_leader:%d]}", + is_located_in_meta_table_, + print_region_priority(region_prio_), + print_replica_priority(replica_prio_), + is_leader_); + + return pos; +} + +//////////////////////////////////////////////////////////////////////////////// + +int64_t BlackList::g_blacklist_survival_time_upper_limit = + ObLogConfig::default_blacklist_survival_time_upper_limit_min * _MIN_; +int64_t BlackList::g_blacklist_survival_time_penalty_period = + ObLogConfig::default_blacklist_survival_time_penalty_period_min * _MIN_; +int64_t BlackList::g_blacklist_history_overdue_time = + ObLogConfig::default_blacklist_history_overdue_time_min * _MIN_; + +BlackList::BlackList() : bl_svr_items_(ObModIds::OB_LOG_PART_SVR_LIST_BLACK_LIST, OB_MALLOC_NORMAL_BLOCK_SIZE), + history_svr_items_(ObModIds::OB_LOG_PART_SVR_LIST_HISTORY_LIST, OB_MALLOC_NORMAL_BLOCK_SIZE) +{ + reset(); +} + +BlackList::~BlackList() +{ + reset(); +} + +void BlackList::reset() +{ + bl_svr_items_.reset(); + history_svr_items_.reset(); +} + +int64_t BlackList::count() const +{ + return bl_svr_items_.count(); +} + +int BlackList::add(const common::ObAddr &svr, + const int64_t svr_service_time, + int64_t &survival_time) +{ + int ret = OB_SUCCESS; + BLSvrItem bl_svr_item; + bl_svr_item.reset(svr, survival_time, get_timestamp()); + + if (handle_based_on_history_(svr_service_time, bl_svr_item)) { + LOG_ERROR("handle based svr history fail", KR(ret), K(svr_service_time), K(bl_svr_item), K(bl_svr_items_)); + } else if (OB_FAIL(bl_svr_items_.push_back(bl_svr_item))) { + LOG_ERROR("push_back balcklist item fail", KR(ret), K(bl_svr_item), K(bl_svr_items_)); + } else { + // succ + survival_time = bl_svr_item.survival_time_; + } + + return ret; +} + +int BlackList::handle_based_on_history_( + const int64_t svr_service_time, + BLSvrItem &item) +{ + int ret = OB_SUCCESS; + int64_t found_svr_index = -1; + + if (!exist_in_history_(item.svr_, found_svr_index)) { + // History not found, add to history + if (OB_FAIL(history_svr_items_.push_back(item))) { + LOG_ERROR("push_back balcklist history fail", KR(ret), K(item), K(history_svr_items_)); + } + } else { + // Find history, decide surival time based on history records + int64_t blacklist_survival_time_upper_limit = ATOMIC_LOAD(&g_blacklist_survival_time_upper_limit); + int64_t blacklist_survival_time_penalty_period = ATOMIC_LOAD(&g_blacklist_survival_time_penalty_period); + + BLSvrItem &history_item = history_svr_items_.at(found_svr_index); + int64_t history_survival_time = history_item.survival_time_; + + // The partition has been in service with the server for too short a time, and when it is added to the blacklist again, the time the svr has been in the blacklist is doubled + if (svr_service_time < blacklist_survival_time_penalty_period) { + if (history_survival_time >= blacklist_survival_time_upper_limit) { + // Start again after one cycle, without updating the survival time + } else { + item.survival_time_ = std::max(item.survival_time_, + std::min(UPDATE_SURVIVAL_TIME_MUTIPLE * history_survival_time, blacklist_survival_time_upper_limit)); + } + } else { + // do nothing + } + // update history records + history_item.reset(item.svr_, item.survival_time_, item.access_timestamp_); + } + + return ret; +} + +bool BlackList::exist_in_history_(const common::ObAddr &svr, int64_t &svr_index) const +{ + int ret = OB_SUCCESS; + bool found_svr = false; + svr_index = -1; + + // lookup history records + for (int64_t idx = 0; OB_SUCCESS == ret && !found_svr && idx < history_svr_items_.count(); ++idx) { + const BLSvrItem &history_item = history_svr_items_.at(idx); + if (svr == history_item.svr_) { + found_svr = true; + svr_index = idx; + } + } + + return found_svr; +} + +bool BlackList::exist(const common::ObAddr &svr) const +{ + int ret = OB_SUCCESS; + bool svr_existed = false; + + for (int64_t idx = 0; OB_SUCCESS == ret && !svr_existed && idx < bl_svr_items_.count(); ++idx) { + svr_existed = (svr == bl_svr_items_.at(idx).svr_); + } + + return svr_existed; +} + +int BlackList::do_white_washing(BLSvrArray &wash_svr_array) +{ + int ret = OB_SUCCESS; + wash_svr_array.reset(); + int64_t current_time = get_timestamp(); + + // Iterate through the server blacklist in reverse order, removing the servers that should be whitewashed + for (int64_t svr_idx = bl_svr_items_.count() - 1; OB_SUCCESS == ret && svr_idx >= 0; --svr_idx) { + BLSvrItem &item = bl_svr_items_.at(svr_idx); + if ((current_time - item.access_timestamp_) >= item.survival_time_) { + // Current svr can be whitewashed + if (OB_FAIL(wash_svr_array.push_back(item))) { + LOG_ERROR("wash svr array push back fail", KR(ret), K(svr_idx), K(item)); + } else if (OB_FAIL(bl_svr_items_.remove(svr_idx))) { + LOG_ERROR("remove svr from blacklist fail", KR(ret), K(svr_idx), K(item)); + } else { + // do nothing + } + } + } + + return ret; +} + +int BlackList::clear_overdue_history(SvrHistoryArray &clear_svr_array) +{ + int ret = OB_SUCCESS; + clear_svr_array.reset(); + int64_t current_time = get_timestamp(); + int64_t blacklist_history_overdue_time = ATOMIC_LOAD(&g_blacklist_history_overdue_time); + + // Iterate through the history in reverse order and delete + for (int64_t svr_idx = history_svr_items_.count() - 1; OB_SUCCESS == ret && svr_idx >= 0; --svr_idx) { + BLSvrItem &item = history_svr_items_.at(svr_idx); + if ((current_time - item.access_timestamp_) >= blacklist_history_overdue_time) { + if(OB_FAIL(clear_svr_array.push_back(item))) { + LOG_ERROR("clear svr array push back fail", KR(ret), K(svr_idx), K(item)); + } else if (OB_FAIL(history_svr_items_.remove(svr_idx))) { + LOG_ERROR("remove svr from blacklist history fail", KR(ret), K(svr_idx), K(item)); + } else { + // do nothing + } + } else { + // do nothing + } + } + + return ret; +} + +void BlackList::configure(const ObLogConfig & config) +{ + int64_t blacklist_survival_time_upper_limit_min = config.blacklist_survival_time_upper_limit_min; + ATOMIC_STORE(&g_blacklist_survival_time_upper_limit, blacklist_survival_time_upper_limit_min * _MIN_); + int64_t blacklist_survival_time_penalty_period_min = config.default_blacklist_survival_time_penalty_period_min; + ATOMIC_STORE(&g_blacklist_survival_time_penalty_period, blacklist_survival_time_penalty_period_min * _MIN_); + int64_t blacklist_history_overdue_time_min = config.blacklist_history_overdue_time_min; + ATOMIC_STORE(&g_blacklist_history_overdue_time, blacklist_history_overdue_time_min * _MIN_); + + LOG_INFO("[CONFIG]", K(blacklist_survival_time_upper_limit_min)); + LOG_INFO("[CONFIG]", K(blacklist_survival_time_penalty_period_min)); + LOG_INFO("[CONFIG]", K(blacklist_history_overdue_time_min)); +} + +} +} diff --git a/src/liboblog/src/ob_log_part_svr_list.h b/src/liboblog/src/ob_log_part_svr_list.h new file mode 100644 index 0000000000000000000000000000000000000000..12823407423f6f3bcdbf021a5cd3b0ad267b5885 --- /dev/null +++ b/src/liboblog/src/ob_log_part_svr_list.h @@ -0,0 +1,445 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_PART_SVR_LIST_H__ +#define OCEANBASE_LIBOBLOG_OB_LOG_PART_SVR_LIST_H__ + +#include "lib/net/ob_addr.h" // ObAddr +#include "share/ob_define.h" // OB_INVALID_ID, ObReplicaType +#include "lib/utility/ob_print_utils.h" // TO_STRING_KV +#include "lib/container/ob_se_array.h" // ObSEArray +#include "lib/hash/ob_ext_iter_hashset.h" // ObExtIterHashSet +#include "lib/allocator/page_arena.h" // ObArenaAllocator + +#include "ob_log_utils.h" // get_timestamp +#include "ob_log_server_priority.h" // RegionPriority, ReplicaPriority +#include "ob_log_config.h" // ObLogConfig +#include "ob_log_svr_blacklist.h" // ObLogSvrBlacklist +#include "ob_log_start_log_id_locator.h" // StartLogIdLocateReq + +namespace oceanbase +{ +namespace liboblog +{ +// server黑名单 +class IBlackList +{ +public: + struct BLSvrItem + { + void reset(const common::ObAddr &svr, + const int64_t survival_time, + const int64_t timestamp) + { + svr_ = svr; + survival_time_ = survival_time; + access_timestamp_ = timestamp; + } + + common::ObAddr svr_; + int64_t survival_time_; // lifetime of server in blacklist + int64_t access_timestamp_; // time when server was added into blacklist + + TO_STRING_KV(K_(svr), + K_(survival_time), + "access_timestamp", TS_TO_STR(access_timestamp_)); + }; + static const int64_t DEFAULT_SERVER_NUM = 16; + static const int64_t DEFAULT_SERVER_HISTORY_NUM = 16; + typedef common::ObSEArray BLSvrArray; + typedef common::ObSEArray SvrHistoryArray; + +public: + virtual ~IBlackList() {} + +public: + /// Add server to blacklist. + /// Notes: + /// When adding a server, there is no need to check for duplicate servers, because each time a server is served + /// for the next log for a partition iteration, it will filter out any servers that are on the blacklist + /// at that time, so it is unlikely that there is a server that is duplicated on the blacklist + //// + /// + /// @param [in] svr blacklisted server + /// @param [in] svr_service_time the total time of the current partition of the server service + /// @param [in] survival_time survival_time + /// + /// @retval OB_SUCCESS Success: add svr to blacklist + /// @retval Other error codes Fail + virtual int add(const common::ObAddr &svr, + const int64_t svr_service_time, + int64_t &survival_time) = 0; + + /// Find out if the server is on the blacklist + /// + /// @retval true exists + /// @retval false does not exist + virtual bool exist(const common::ObAddr &svr) const = 0; + + /// Whitewash: Iterate through the blacklist to whitewash + /// + /// There are two calls to the whitewash function. + /// 1. before the server that serves the next log for each iteration of the partition + /// 2. periodically before checking for the presence of a higher level server for the partition + /// + /// @param [out] wash_svr_array The server to be cleared + /// + /// @retval OB_SUCCESS Success + /// @retval Other error codes Fail + virtual int do_white_washing(BLSvrArray &wash_svr_array) = 0; + + /// Clean up your history periodically + /// 1. Delete history records that have not been updated for a period of time + /// 2. Delete history records whose survial_time has reached the upper threshold + //// + /// @param [out] clear_svr_array The server to be cleared + /// + /// @retval OB_SUCCESS Clear success + //// @retval Other error codes Fail + virtual int clear_overdue_history(SvrHistoryArray &clear_svr_array) = 0; + +}; + + +// IPartSvrList +// +// List of servers describing the range of logs serving the partition +class IPartSvrList +{ +public: + static const int64_t DEFAULT_SERVER_NUM = 16; + +public: + virtual ~IPartSvrList() {} + +public: + virtual void reset() = 0; + + // Add a server + // When the server does not exist, update the corresponding information + // + // @note is_log_range_valid indicates whether the log range passed in is valid + virtual int add_server_or_update(const common::ObAddr &svr, + const uint64_t start_log_id, + const uint64_t end_log_id, + const bool is_located_in_meta_table, + const RegionPriority region_prio, + const ReplicaPriority replica_prio, + const bool is_leader, + const bool is_log_range_valid = true) = 0; + + // Iterate over the server that serves the next log + // 1. return OB_ITER_END when the list of servers has been traversed or when no servers are available + // 2. After returning OB_ITER_END, the history is cleared and the server can be iterated over normally next time + // + // @param [in] next_log_id Next log ID + // @param [in] blacklist Partition blacklist + // @param [out] svr the server to return to + // + // @retval OB_SUCCESS Success, found the server that served the next log + // @retval OB_ITER_END server that did not serve the next log + // @retval Other return values Failed + virtual int next_server(const uint64_t next_log_id, + const IBlackList &blacklist, + common::ObAddr &svr) = 0; + + + // detect whether the current partition task needs to switch the stream + // + // @param [in] next_log_id next log ID + // @param [in] blacklist The partition blacklist + // @param [in] pkey partition + // @param [in] cur_svr partition task located at current fetch log stream - target server + // + // @retval true requires switch stream, i.e. a higher priority server exists + // @retval false no stream switch required + virtual bool need_switch_server(const uint64_t next_log_id, + IBlackList &blacklist, + const common::ObPartitionKey &pkey, + const common::ObAddr &cur_svr) = 0; + + /// whether server exists, if so svr_index returns the svr index position, otherwise -1 + virtual bool exist(const common::ObAddr &svr, int64_t &svr_index) const = 0; + + virtual int get_server_array_for_locate_start_log_id(StartLogIdLocateReq::SvrList &svr_list) const = 0; + + // Return the number of available servers + virtual int64_t count() const = 0; + + // Sorting the server list by priority + virtual void sort_by_priority() = 0; + + // Server-based blacklist filtering + virtual int filter_by_svr_blacklist(const ObLogSvrBlacklist &svr_blacklist, + common::ObArray &remove_svrs) = 0; + + virtual int64_t to_string(char* buf, const int64_t buf_len) const = 0; +}; + +///////////////////////////////////////////// PartSvrList ////////////////////////////////////////////// + +class PartSvrList : public IPartSvrList +{ +public: + PartSvrList(); + virtual ~PartSvrList(); + +public: + void reset(); + int add_server_or_update(const common::ObAddr &svr, + const uint64_t start_log_id, + const uint64_t end_log_id, + const bool is_located_in_meta_table, + const RegionPriority region_prio, + const ReplicaPriority replica_prio, + const bool is_leader, + const bool is_log_range_valid = true); + int next_server(const uint64_t next_log_id, + const IBlackList &blacklist, + common::ObAddr &svr); + bool need_switch_server(const uint64_t next_log_id, + IBlackList &blacklist, + const common::ObPartitionKey &pkey, + const common::ObAddr &cur_svr); + bool exist(const common::ObAddr &svr, int64_t &svr_index) const; + int get_server_array_for_locate_start_log_id(StartLogIdLocateReq::SvrList &svr_list) const; + int64_t count() const { return svr_items_.count(); } + void sort_by_priority(); + int filter_by_svr_blacklist(const ObLogSvrBlacklist &svr_blacklist, + common::ObArray &remove_svrs); + +private: + struct SvrItem; + typedef common::ObSEArray SvrItemArray; + +private: + void sort_by_priority_for_locate_start_log_id_(SvrItemArray &svr_items) const; + int get_next_server_based_on_blacklist_(const uint64_t next_log_id, + const IBlackList &blacklist, + common::ObAddr &svr); + int check_found_svr_priority_(const common::ObPartitionKey &pkey, + const int64_t found_svr_idx, + const int64_t avail_svr_count, + const common::ObAddr &cur_svr, + bool &need_switch); + +private: + // Log serve range + // Note: the interval is "left open, right closed" + struct LogIdRange + { + uint64_t start_log_id_; + uint64_t end_log_id_; + + void reset() + { + start_log_id_ = common::OB_INVALID_ID; + end_log_id_ = common::OB_INVALID_ID; + } + + void reset(const uint64_t start_log_id, const uint64_t end_log_id) + { + start_log_id_ = start_log_id; + end_log_id_ = end_log_id; + } + + bool is_log_served(const int64_t log_id) + { + return (log_id > start_log_id_) && (log_id <= end_log_id_); + } + + bool is_lower_bound(const int64_t log_id) + { + return log_id <= start_log_id_; + } + + TO_STRING_KV(K_(start_log_id), K_(end_log_id)); + }; + + // Description of server information for a single service log + // Each server may serve multiple segments of logs, only a fixed number of segments are logged here, the extra segments are randomly merged with other segments + // + // For ease of implementation, log segments are sorted by end_log_id and start_log_id incrementally, with no overlap between adjacent segments + struct SvrItem + { + static const int64_t MAX_RANGE_NUM = 4; + + common::ObAddr svr_; + int64_t range_num_; + LogIdRange log_ranges_[MAX_RANGE_NUM]; + + // Definition of priority + bool is_located_in_meta_table_; // Is it in the meta table + RegionPriority region_prio_; // region priority definition + ReplicaPriority replica_prio_; // replica type priority definition + bool is_leader_; // server is leader or not + + void reset(); + // The current partitioned query server list is __clog_histore_info_v2 before querying the meta table + // After querying the meta table, this interface is called to update the relevant information: whether it is in the meta table, the copy type and whether it is the leader + void reset(const bool is_located_in_meta_table, + const ReplicaPriority replica_prio, + const bool is_leader); + + void reset(const common::ObAddr &svr, + const uint64_t start_log_id, + const uint64_t end_log_id, + const bool is_located_in_meta_table, + const RegionPriority region_prio, + const ReplicaPriority replica_prio, + const bool is_leader); + + bool is_valid() const { return svr_.is_valid(); } + + // add a log range + int add_range(const uint64_t start_log_id, const uint64_t end_log_id); + + // 1. check if a log is serviced + // 2. Update service information: as log IDs are incremental, expired logs can be removed from the service range + // + // @param [in] log_id target log ID + // @param [out] is_log_served whether serve target log id + // @param [out] is_server_invalid whether the server is no longer valid (no more valid ranges) + void check_and_update_serve_info(const uint64_t log_id, + bool &is_log_served, + bool &is_server_invalid); + + bool is_priority_equal(const SvrItem &svr_item) const; + + int64_t to_string(char *buffer, int64_t length) const; + + private: + int insert_range_(const uint64_t start_log_id, + const uint64_t end_log_id, + const int64_t target_insert_index); + + int find_pos_and_merge_(const uint64_t start_log_id, + const uint64_t end_log_id, + bool &merged, + int64_t &target_index); + }; + + class SvrItemCompare + { + // 1. prioritize the server in the meta table to synchronize logs + // 2. sort by region priority from largest to smallest + // 3. prioritize by replica type from largest to smallest + // 4. prioritize synchronizing from followers, followed by leaders + // + // Note: The lower the region and replica_type values, the higher the priority + public: + bool operator() (const SvrItem &a, const SvrItem &b) + { + bool bool_ret = false; + + if (a.is_located_in_meta_table_ != b.is_located_in_meta_table_) { + bool_ret = static_cast(a.is_located_in_meta_table_) > static_cast(b.is_located_in_meta_table_); + } else if (a.region_prio_ != b.region_prio_) { + bool_ret = a.region_prio_ < b.region_prio_; + } else if (a.replica_prio_ != b.replica_prio_) { + bool_ret = a.replica_prio_ < b.replica_prio_; + } else { + bool_ret = static_cast(a.is_leader_) < static_cast(b.is_leader_); + } + + return bool_ret; + } + }; + + class LocateStartLogIdCompare + { + // 1. Prioritize the leader + // 2. prioritize the records in the meta table + // 3. Next to the remaining records + // + // No more sorting by record creation time from oldest to youngest (i.e. new server records are first and old server records are second. + // When partitioning the start_log_id, you need to prioritize the new server to prevent the start_log_id from going back too much), because the meta table records + // are the most recent, and locating them in the leader will block out some of the OB issues, such as locating the start log id incorrectly and being too large + public: + bool operator() (const SvrItem &a, const SvrItem &b) + { + bool bool_ret = false; + + if (a.is_leader_ != b.is_leader_) { + bool_ret = static_cast(a.is_leader_) > static_cast(b.is_leader_); + } else { + bool_ret = static_cast(a.is_located_in_meta_table_) > static_cast(b.is_located_in_meta_table_); + } + + return bool_ret; + } + }; + +public: + TO_STRING_KV(K_(next_svr_index), + "svr_num", svr_items_.count(), + K_(svr_items)); + + // Internal member variables +private: + int64_t next_svr_index_; // Index of the next server item + SvrItemArray svr_items_; // server list + +private: + DISALLOW_COPY_AND_ASSIGN(PartSvrList); +}; + +class BlackList : public IBlackList +{ + // Class global variables +public: + static int64_t g_blacklist_survival_time_upper_limit; + static int64_t g_blacklist_survival_time_penalty_period; + static int64_t g_blacklist_history_overdue_time; + +public: + static const int64_t UPDATE_SURVIVAL_TIME_MUTIPLE = 2; + +public: + BlackList(); + virtual ~BlackList(); + +public: + void reset(); + int64_t count() const; + int add(const common::ObAddr &svr, + const int64_t svr_service_time, + int64_t &survival_time); + bool exist(const common::ObAddr &svr) const; + int do_white_washing(BLSvrArray &wash_svr_array); + int clear_overdue_history(SvrHistoryArray &clear_svr_array); + +public: + static void configure(const ObLogConfig & config); + +public: + TO_STRING_KV(K_(bl_svr_items), + "bl_svr_num", bl_svr_items_.count(), + K_(history_svr_items), + "history_svr_num", history_svr_items_.count()); + +private: + // Determine the surival time for the server to be blacklisted based on the history + int handle_based_on_history_(const int64_t svr_service_time, BLSvrItem &item); + // Find the history, determine if the server exists, if so svr_index return the svr index position, otherwise return -1 + bool exist_in_history_(const common::ObAddr &svr, int64_t &svr_index) const; + +private: + BLSvrArray bl_svr_items_; + SvrHistoryArray history_svr_items_; +}; + + +} +} + +#endif diff --git a/src/liboblog/src/ob_log_part_trans_dispatcher.cpp b/src/liboblog/src/ob_log_part_trans_dispatcher.cpp new file mode 100644 index 0000000000000000000000000000000000000000..119ad6dd95554dd061832d7420a37ba9c16be496 --- /dev/null +++ b/src/liboblog/src/ob_log_part_trans_dispatcher.cpp @@ -0,0 +1,705 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG_FETCHER + +#include "ob_log_part_trans_dispatcher.h" // PartTransDispatcher + +#include "share/ob_errno.h" // OB_SUCCESS +#include "lib/utility/ob_macro_utils.h" // OB_UNLIKELY +#include "lib/oblog/ob_log_module.h" // LOG_ERROR + +#include "ob_log_fetcher_dispatcher.h" // IObLogFetcherDispatcher +#include "ob_log_utils.h" + +using namespace oceanbase::common; + +namespace oceanbase +{ +namespace liboblog +{ +int64_t PartTransDispatcher::g_part_trans_task_count = 0; + +PartTransID::PartTransID(const transaction::ObTransID &trans_id, const common::ObPartitionKey &pkey) : + trans_id_(trans_id), pkey_(pkey) +{} + +bool PartTransID::operator == (const PartTransID &part_trans_id) const +{ + return trans_id_ == part_trans_id.trans_id_ && pkey_ == part_trans_id.pkey_; +} + +uint64_t PartTransID::hash() const +{ + uint64_t hv = trans_id_.hash(); + return murmurhash(&hv, sizeof(hv), pkey_.hash()); +} + +//////////////////////////////////// PartTransDispatchInfo /////////////////////////////////// + +PartTransDispatchInfo::PartTransDispatchInfo() : + last_dispatch_log_id_(OB_INVALID_ID), + current_checkpoint_(OB_INVALID_VERSION), + pending_task_count_(0), + task_count_in_queue_(0), + next_task_type_("INVALID"), + next_trans_log_id_(OB_INVALID_ID), + next_trans_committed_(false), + next_trans_ready_to_commit_(false), + next_trans_global_version_(OB_INVALID_VERSION) +{} + +//////////////////////////////////// TransCommitInfo /////////////////////////////////// + +void TransCommitInfo::reset() +{ + log_id_ = OB_INVALID_ID; + log_ts_ = OB_INVALID_TIMESTAMP; +} + +TransCommitInfo::TransCommitInfo(const uint64_t log_id, const int64_t log_ts) : + log_id_(log_id), + log_ts_(log_ts) +{ +} + +//////////////////////////////////// PartTransDispatcher /////////////////////////////////// + +PartTransDispatcher::PartTransDispatcher(const char *pkey_str, + TaskPool &task_pool, + PartTransTaskMap &task_map, + TransCommitMap &trans_commit_map, + IObLogFetcherDispatcher &dispatcher): + pkey_(), + pkey_str_(pkey_str), + task_pool_(task_pool), + task_map_(task_map), + trans_commit_map_(trans_commit_map), + dispatcher_(dispatcher), + task_queue_(), + init_dispatch_progress_(OB_INVALID_TIMESTAMP), + last_dispatch_progress_(OB_INVALID_TIMESTAMP), + last_dispatch_log_id_(OB_INVALID_ID), + task_count_only_in_map_(0), + checkpoint_(OB_INVALID_VERSION), + created_trans_count_(0), + last_created_trans_count_(0), + last_stat_time_(get_timestamp()), + dispatch_lock_() +{} + +PartTransDispatcher::~PartTransDispatcher() +{ + // TODO: Consider cleaning up the data, but make sure to call clean_task to empty the task when the partition is offline + // That is, task_queue and task_map should not have tasks from this partition in them + // If want to clean tasks here, we need to save the pkey, but for the time being, don't need to save the pkey in order to optimise memory usage + if (task_queue_.size() > 0) { + LOG_ERROR("task_queue_ is not empty", K(task_queue_.size()), KPC(task_queue_.top())); + } + + pkey_.reset(); + pkey_str_ = NULL; + init_dispatch_progress_ = OB_INVALID_TIMESTAMP; + last_dispatch_progress_ = OB_INVALID_TIMESTAMP; + last_dispatch_log_id_ = OB_INVALID_ID; + task_count_only_in_map_ = 0; + checkpoint_ = OB_INVALID_VERSION; + created_trans_count_ = 0; + last_created_trans_count_ = 0; + last_stat_time_ = 0; +} + +int PartTransDispatcher::init(const common::ObPartitionKey &pkey, + const int64_t start_tstamp) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(OB_INVALID_TIMESTAMP == start_tstamp)) { + LOG_ERROR("invalid argument", K(start_tstamp)); + ret = OB_INVALID_ARGUMENT; + } else { + pkey_ = pkey; + // Only the necessary fields are initialized here, all other fields are initialized during construction + // 1. When there is a transaction in the partition with the same timestamp as the start timestamp and it has not been sent, + // it is possible that the heartbeat will fall back because it takes the "pending task timestamp-1". + // 2. so initialize progress to start timestamp-1 + init_dispatch_progress_ = start_tstamp - 1; + last_dispatch_progress_ = start_tstamp - 1; + } + return ret; +} + +// The heartbeat interface only produces heartbeat tasks, it does not consume them +// requires only one producer, no need to add production lock here +int PartTransDispatcher::heartbeat(const common::ObPartitionKey &pkey, const int64_t hb_tstamp) +{ + int ret = OB_SUCCESS; + PartTransTask *task = NULL; + + if (OB_UNLIKELY(OB_INVALID_TIMESTAMP == hb_tstamp)) { + LOG_ERROR("invalid heartbeat timestamp", K(hb_tstamp), K(pkey)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(task = task_pool_.get(pkey_str_, pkey))) { + LOG_ERROR("alloc part trans task fail", K(task)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else if (OB_FAIL(task->init_part_heartbeat_info(pkey, hb_tstamp))) { + LOG_ERROR("init_part_heartbeat_info fail", KR(ret), K(pkey), K(hb_tstamp), KPC(task)); + } else { + push_task_queue_(*task); + } + + if (OB_SUCCESS != ret && NULL != task) { + task->revert(); + task = NULL; + } + + return ret; +} + +void PartTransDispatcher::update_status_after_consume_task_(const int64_t task_tstamp, + const uint64_t prepare_log_id) +{ + if (OB_INVALID_TIMESTAMP != task_tstamp) { + last_dispatch_progress_ = std::max(last_dispatch_progress_, task_tstamp); + } + + if (OB_INVALID_ID != prepare_log_id) { + last_dispatch_log_id_ = prepare_log_id; + } +} + +int PartTransDispatcher::dispatch_part_trans_task_(PartTransTask &task, volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + // Save the basic information of the task in advance, the output will not be accessible afterwards + int64_t task_tstamp = task.get_timestamp(); + // Note: prepare_log_id is only valid for DDL and DML types + uint64_t prepare_log_id = task.get_prepare_log_id(); + + if (OB_INVALID_TIMESTAMP != task_tstamp) { + // Check if progress is backed up + // The progress here may indeed be backwards because the start time of the target partition is the maximum value + // calculated based on the DDL partition transaction's prepare log timestamp and schema_version, + // in order to prevent the heartbeat timestamp from being backwards; whereas the target partition's own transaction + // timestamp is generated locally, and although the master guarantees that the prepare log timestamp is greater + // than or equal to the GTS time, but only if the target partition and the DDL partition are the same tenant can + // their own prepare timestamp be guaranteed by GTS to be greater than or equal to the DDL partition transaction's + // prepare timestamp. However, if the DDL partition and the target partition are not the same tenant, GTS does + // not have a corresponding guarantee. Therefore, it is normal for data progress to be rolled back here. But it is + // dangerous. This problem can only be completely solved when the schema is split within the tenant. + if (OB_UNLIKELY(task_tstamp < last_dispatch_progress_)) { + LOG_WARN("partition dispatch progress is rollback, we should check it", + "task_tstamp", TS_TO_STR(task_tstamp), + "last_dispatch_progress", TS_TO_STR(last_dispatch_progress_), + K(last_dispatch_log_id_), + K(task), + K(stop_flag)); + } + } + + if (OB_FAIL(handle_before_fetcher_dispatch_(task))) { + LOG_ERROR("handle_before_fetcher_dispatch_ fail", KR(ret), K(task)); + } else if (OB_FAIL(dispatcher_.dispatch(task, stop_flag))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("fetcher dispatch task fail", KR(ret), K(task)); + } + } else { + // Task output successful, update progress values + update_status_after_consume_task_(task_tstamp, prepare_log_id); + } + + // Regardless of whether the dispatch task completes successfully or not, reset the task information recorded by the queue + task_queue_.reset_dispatched_task_info(); + return ret; +} + +int PartTransDispatcher::handle_before_fetcher_dispatch_(PartTransTask &task) +{ + int ret = OB_SUCCESS; + + if (task.is_dml_trans()) { + if (OB_FAIL(handle_dml_trans_before_fetcher_dispatch_(task))) { + LOG_ERROR("handle_dml_trans_before_fetcher_dispatch_ fail", KR(ret), K(task)); + } + } else { + // do nothing + } + + return ret; +} + +int PartTransDispatcher::handle_dml_trans_before_fetcher_dispatch_(PartTransTask &task) +{ + int ret = OB_SUCCESS; + + if (OB_FAIL(task.try_to_set_data_ready_status())) { + LOG_ERROR("try_to_set_data_ready_status fail", KR(ret), K(task)); + } + + return ret; +} + +void PartTransDispatcher::push_task_queue_(PartTransTask &task) +{ + task_queue_.push(&task); + + if (task.is_dml_trans() || task.is_ddl_trans()) { + ATOMIC_INC(&created_trans_count_); + } + + // Update the global task count when the task queue task count changes + ATOMIC_INC(&g_part_trans_task_count); +} + +PartTransTask* PartTransDispatcher::pop_task_queue_() +{ + PartTransTask *ret = task_queue_.pop(); + if (NULL != ret) { + // Update the global task count when the task queue task count changes + ATOMIC_DEC(&g_part_trans_task_count); + } + return ret; +} + +int PartTransDispatcher::prepare_task(PartTransTask &task) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(! task.is_dml_trans() && ! task.is_ddl_trans())) { + LOG_ERROR("invalid task which is not DML or DDL task", K(task)); + ret = OB_INVALID_ARGUMENT; + } else { + push_task_queue_(task); + + // into the queue, minus the number in the map only + // Update the global number of tasks at the same time + ATOMIC_DEC(&task_count_only_in_map_); + ATOMIC_DEC(&g_part_trans_task_count); + } + return ret; +} + +int PartTransDispatcher::check_task_ready_(PartTransTask &task, bool &task_is_ready) +{ + int ret = OB_SUCCESS; + if (task.is_dml_trans() || task.is_ddl_trans()) { + // checkpoint valid, attempt to update partition commit status (single multipartition transaction) + if (OB_INVALID_VERSION != checkpoint_) { + if (OB_FAIL(task.update_trans_committed_status(checkpoint_, *this))) { + LOG_ERROR("update_trans_committed_status fail", KR(ret), K(checkpoint_), K(task)); + } + } + + if (OB_SUCC(ret)) { + task_is_ready = task.is_trans_committed(); + // If the task is ready for output, remove it from the map + // Note: The logic for removing from map is placed here because the task in map must be a DDL + // or DML type transaction, other types of task will not be placed in map. + // + // In order to be compatible with more types of tasks in the future and other tasks do not need to be + // removed from the map, DML and DDL type tasks are removed from the map as soon as they are detected as ready. + if (task_is_ready) { + PartTransID part_trans_id(task.get_trans_id(), task.get_partition()); + // Requires that it must exist + if (OB_FAIL(task_map_.erase(part_trans_id))) { + LOG_ERROR("erase from task map fail", KR(ret), K(part_trans_id), K(task)); + } + } + } + } else { + // Other types of tasks are ready by default + task_is_ready = true; + } + return ret; +} + + +int64_t PartTransDispatcher::get_total_task_count_() +{ + // in MAP only + in QUEUE + return ATOMIC_LOAD(&task_count_only_in_map_) + task_queue_.size(); +} + +int PartTransDispatcher::dispatch_part_trans(volatile bool &stop_flag, int64_t &pending_task_count) +{ + int ret = OB_SUCCESS; + ObByteLockGuard guard(dispatch_lock_); + + while (OB_SUCCESS == ret && ! stop_flag) { + PartTransTask *task = task_queue_.top(); + bool task_is_ready = false; + + // If the task is not ready, exit the loop + if (NULL == task) { + ret = OB_EAGAIN; + } else if (OB_FAIL(check_task_ready_(*task, task_is_ready))) { + LOG_ERROR("check_task_ready_ fail", KR(ret), KPC(task)); + } else if (! task_is_ready) { + ret = OB_EAGAIN; + } else { + // pop out data + (void)pop_task_queue_(); + + // dispatch the current task + if (OB_FAIL(dispatch_part_trans_task_(*task, stop_flag))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("dispatch_part_trans_task_ fail", KR(ret), KPC(task)); + } + } else { + task = NULL; + } + } + } + + if (stop_flag) { + ret = OB_IN_STOP_STATE; + } + + if (OB_EAGAIN == ret) { + ret = OB_SUCCESS; + } + + if (OB_SUCCESS == ret) { + pending_task_count = get_total_task_count_(); + } + + return ret; +} + +int PartTransDispatcher::dispatch_offline_partition_task(const common::ObPartitionKey &pkey, + volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + ObByteLockGuard guard(dispatch_lock_); + PartTransTask *task = NULL; + + // Because offline partitioned tasks are generated in real time and then sent down in real time, + // it is important to ensure that the task queue is empty and there are no queued tasks ahead + // Another purpose is to reclaim all memory and avoid memory leaks + if (OB_UNLIKELY(task_queue_.size() > 0)) { + LOG_ERROR("there are tasks not dispatched, cat not dispatch offline task", + "task_queue_size", task_queue_.size()); + ret = OB_STATE_NOT_MATCH; + } else if (OB_ISNULL(task = task_pool_.get(pkey_str_, pkey))) { + LOG_ERROR("alloc part trans task fail", K(task)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else if (OB_FAIL(task->init_offline_partition_task(pkey))) { + LOG_ERROR("init_offline_partition_task fail", KR(ret), K(pkey), KPC(task)); + } else if (OB_FAIL(dispatch_part_trans_task_(*task, stop_flag))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("dispatch task fail", KR(ret), KPC(task), K(task)); + } + } else { + task = NULL; + } + + if (OB_FAIL(ret)) { + if (NULL != task) { + task->revert(); + task = NULL; + } + } + + return ret; +} + +struct TaskMapCleaner +{ + bool operator()(const PartTransID &key, PartTransTask *&val) + { + bool bool_ret = false; + const bool is_ddl_part = is_ddl_partition(key.pkey_); + + if (key.pkey_ == pkey_) { + if (NULL != val && is_ddl_part) { + val->revert(); + } + + val = NULL; + bool_ret = true; + count_++; + } + + return bool_ret; + } + + TaskMapCleaner(const ObPartitionKey &pkey) : count_(0), pkey_(pkey) {} + + int64_t count_; + const ObPartitionKey &pkey_; +}; + +// Iterate through all tasks, ready or not, and revert them directly +// The aim is to clear all tasks +int PartTransDispatcher::clean_task(const common::ObPartitionKey &pkey) +{ + int ret = OB_SUCCESS; + TaskMapCleaner map_cleaner(pkey); + const bool is_ddl_part = is_ddl_partition(pkey); + + // Clearance tasks are also a form of consumption and require the addition of a consumption lock + ObByteLockGuard guard(dispatch_lock_); + + // First recycle all tasks in the queue, which may all be in the map, so delete the corresponding data items in the map at the same time + // pop out the tasks in the queue, whether they are ready or not + PartTransTask *task = NULL; + int64_t task_queue_size = task_queue_.size(); + + while (NULL != (task = pop_task_queue_()) && OB_SUCCESS == ret) { + // Delete DML and DDL type transactions from map + if (task->is_dml_trans() || task->is_ddl_trans()) { + PartTransID part_trans_id(task->get_trans_id(), task->get_partition()); + if (OB_FAIL(task_map_.erase(part_trans_id))) { + // It is normal for Map not to exist– + if (OB_ENTRY_NOT_EXIST == ret) { + ret = OB_SUCCESS; + } else { + LOG_ERROR("erase from task map fail", KR(ret), K(part_trans_id), K(task)); + } + } + } + + if (OB_SUCCESS == ret) { + // Update state after consumption is complete + update_status_after_consume_task_(task->get_timestamp(), task->get_prepare_log_id()); + // 回收任务 + if (is_ddl_part) { + task->revert(); + } + } + } + + if (OB_SUCCESS == ret) { + // Clear all tasks in the Map that are not in the queue + if (OB_FAIL(task_map_.remove_if(map_cleaner))) { + LOG_ERROR("remove from map fail", KR(ret), K(pkey)); + } else { + // Subtract the number of tasks that are only in the map + (void)ATOMIC_AAF(&g_part_trans_task_count, -map_cleaner.count_); + (void)ATOMIC_AAF(&task_count_only_in_map_, -map_cleaner.count_); + } + } + + if (OB_SUCCESS == ret) { + if (task_count_only_in_map_ != 0) { + LOG_ERROR("task_count_only_in_map_ != 0 after clean all task, unexcepted error", + K(task_count_only_in_map_), K(pkey), K(task_queue_size), + "task_map_size", map_cleaner.count_); + ret = OB_ERR_UNEXPECTED; + } + } + + LOG_INFO("part trans resolver clean task", KR(ret), K(pkey), K(task_queue_size), + "task_map_size", map_cleaner.count_, + K(task_count_only_in_map_), K(last_dispatch_progress_), K(last_dispatch_log_id_)); + + return ret; +} + +int PartTransDispatcher::insert_commit_trans(const transaction::ObTransID &trans_id, + const TransCommitInfo &trans_commit_info) +{ + int ret = OB_SUCCESS; + + if (OB_FAIL(trans_commit_map_.insert(trans_id, trans_commit_info))) { + if (OB_ENTRY_EXIST != ret) { + LOG_ERROR("trans_commit_map insert fail", KR(ret), K(trans_id), K(trans_commit_info)); + } else { + // return success if exist + ret = OB_SUCCESS; + } + } + + return ret; +} + +int PartTransDispatcher::remove_task(const bool is_ddl_part, const PartTransID &trans_id) +{ + int ret = OB_SUCCESS; + // To remove a task from a Queue and therefore add a lock + ObByteLockGuard guard(dispatch_lock_); + PartTransTask *task = NULL; + + // Remove the corresponding task from the Map and return the object value + if (OB_FAIL(task_map_.erase(trans_id, task))) { + if (OB_ENTRY_NOT_EXIST == ret) { + ret = OB_SUCCESS; + } else { + LOG_ERROR("get task from map fail", KR(ret), K(trans_id)); + } + } else if (OB_ISNULL(task)) { + LOG_ERROR("invalid task", K(task), K(trans_id)); + ret = OB_ERR_UNEXPECTED; + } else { + bool exist_in_queue = false; + // Picking tasks from a Queue's chain + task_queue_.remove(task, exist_in_queue); + + // ddl revert task + if (is_ddl_part) { + task->revert(); + task = NULL; + } else { + bool is_unserved_part_trans_task_can_be_recycled = false; + if (OB_FAIL(task->handle_unserved_part_trans(is_unserved_part_trans_task_can_be_recycled))) { + LOG_ERROR("handle_unserved_part_trans fail", KR(ret), K(task)); + } else if (is_unserved_part_trans_task_can_be_recycled) { + task->revert(); + task = NULL; + } + } + + if (! exist_in_queue) { + ATOMIC_DEC(&task_count_only_in_map_); + } + + ATOMIC_DEC(&g_part_trans_task_count); + } + + return ret; +} + +int PartTransDispatcher::find_commit_trans_info(const transaction::ObTransID &trans_id, + bool &is_commit_trans) +{ + int ret = OB_SUCCESS; + is_commit_trans = false; + TransCommitInfo trans_commit_info; + + if (OB_FAIL(trans_commit_map_.get(trans_id, trans_commit_info))) { + if (OB_ENTRY_NOT_EXIST != ret) { + LOG_ERROR("get part trans task fail", KR(ret), K(trans_id)); + } else { + ret = OB_SUCCESS; + is_commit_trans = false; + } + } else { + is_commit_trans = true; + } + + return ret; +} + +// No concurrent update of task queues involved, just application data structures, no locking required +int PartTransDispatcher::alloc_task(const PartTransID &trans_id, PartTransTask *&task) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(task = task_pool_.get(pkey_str_, pkey_))) { + LOG_ERROR("alloc part trans task fail", K(task)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } + // Inserted into Map for subsequent queries based on TransID + else if (OB_FAIL(task_map_.insert(trans_id, task))) { + task->revert(); + task = NULL; + LOG_ERROR("insert part trans task fail", KR(ret), K(trans_id)); + } else { + // This task is only in MAP, not in QUEUE + ATOMIC_INC(&task_count_only_in_map_); + ATOMIC_INC(&g_part_trans_task_count); + } + + return ret; +} + +int PartTransDispatcher::get_task(const PartTransID &trans_id, PartTransTask *&task) +{ + int ret = OB_SUCCESS; + + task = NULL; + if (OB_FAIL(task_map_.get(trans_id, task))) { + if (OB_ENTRY_NOT_EXIST != ret) { + LOG_ERROR("get part trans task fail", KR(ret), K(trans_id)); + } + } else if (OB_ISNULL(task)) { + LOG_ERROR("part_trans_task is NULL"); + ret = OB_ERR_UNEXPECTED; + } else { + if (! task->is_served()) { + ret = OB_INVALID_ERROR; + } + } + + return ret; +} + +// dispatch_progress information does not use dispatch locks, uses task_queue_ locks, risky when getting dispatch_info +// +// 1. get progress based on last dispatch progress +// 2. Update dispatch progress if there are tasks that are dispatching or ready to be dispatched +int PartTransDispatcher::get_dispatch_progress(int64_t &dispatch_progress, + PartTransDispatchInfo &dispatch_info) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(OB_INVALID_TIMESTAMP == last_dispatch_progress_)) { + LOG_ERROR("invalid last_dispatch_progress_", K(last_dispatch_progress_)); + ret = OB_NOT_INIT; + } + // Get PartTransDispatcher member values without locking + else { + // The default is to take the progress of the last issue + dispatch_progress = ATOMIC_LOAD(&last_dispatch_progress_); + dispatch_info.last_dispatch_log_id_ = ATOMIC_LOAD(&last_dispatch_log_id_); + dispatch_info.current_checkpoint_ = ATOMIC_LOAD(&checkpoint_); + + // Access is not atomic and may be biased + dispatch_info.pending_task_count_ = get_total_task_count_(); + + // Update dispatch progress based on the task being dispatched + task_queue_.update_dispatch_progress_by_task_queue(dispatch_progress, dispatch_info); + } + + return ret; +} + +double PartTransDispatcher::get_tps() +{ + int64_t current_timestamp = get_timestamp(); + int64_t local_created_trans_count = ATOMIC_LOAD(&created_trans_count_); + int64_t local_last_created_trans_count = ATOMIC_LOAD(&last_created_trans_count_); + int64_t local_last_stat_time = last_stat_time_; + int64_t delta_create_count = local_created_trans_count - local_last_created_trans_count; + int64_t delta_time = current_timestamp - local_last_stat_time; + double create_tps = 0.0; + + // Update the last statistics + last_created_trans_count_ = local_created_trans_count; + last_stat_time_ = current_timestamp; + + if (delta_time > 0) { + create_tps = (double)(delta_create_count) * 1000000.0 / (double)delta_time; + } + + return create_tps; +} + +int PartTransDispatcher::update_checkpoint(int64_t new_checkpoint) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(OB_INVALID_VERSION == new_checkpoint)) { + LOG_ERROR("invalid argument", K(new_checkpoint)); + ret = OB_INVALID_ARGUMENT; + } else { + // prepare log records the checkpoint, which is not guaranteed to be incremented due to concurrent transaction scenarios + // so checkpoint is only updated when it is bigger + if (new_checkpoint > checkpoint_) { + ATOMIC_STORE(&checkpoint_, new_checkpoint); + } + } + + return ret; +} + +} +} diff --git a/src/liboblog/src/ob_log_part_trans_dispatcher.h b/src/liboblog/src/ob_log_part_trans_dispatcher.h new file mode 100644 index 0000000000000000000000000000000000000000..ef86ed5c92ce6cb3b460c6a5c9f65f2cc45f2a6a --- /dev/null +++ b/src/liboblog/src/ob_log_part_trans_dispatcher.h @@ -0,0 +1,235 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_PART_TRANS_DISPATCHER_H__ +#define OCEANBASE_LIBOBLOG_OB_LOG_PART_TRANS_DISPATCHER_H__ + +#include "lib/hash/ob_linear_hash_map.h" // ObLinearHashMap +#include "lib/hash_func/murmur_hash.h" // murmurhash +#include "lib/lock/ob_small_spin_lock.h" // ObByteLock +#include "common/ob_partition_key.h" // ObPartitionKey +#include "storage/transaction/ob_trans_define.h" // ObTransID, ObRedoLogIdArray + +#include "ob_log_task_pool.h" // ObLogTransTaskPool +#include "ob_log_part_trans_task.h" // PartTransTask +#include "ob_log_part_trans_task_queue.h" // SafePartTransTaskQueue + +namespace oceanbase +{ +namespace liboblog +{ + +// ObTransID + PKey: Uniquely identifies a partitioned task +struct PartTransID +{ + transaction::ObTransID trans_id_; + common::ObPartitionKey pkey_; + + PartTransID(const transaction::ObTransID &trans_id, const common::ObPartitionKey &pkey); + bool operator == (const PartTransID &part_trans_id) const; + uint64_t hash() const; + + TO_STRING_KV(K_(trans_id), K_(pkey)); +}; + +struct PartTransDispatchInfo +{ + uint64_t last_dispatch_log_id_; + int64_t current_checkpoint_; + int64_t pending_task_count_; // The total number of tasks waiting, both in the queue and in the Map + int64_t task_count_in_queue_; // Number of queued tasks + + const char *next_task_type_; + uint64_t next_trans_log_id_; + bool next_trans_committed_; + bool next_trans_ready_to_commit_; + int64_t next_trans_global_version_; + + PartTransDispatchInfo(); + + TO_STRING_KV( + K_(last_dispatch_log_id), + K_(current_checkpoint), + K_(pending_task_count), + K_(task_count_in_queue), + K_(next_task_type), + K_(next_trans_log_id), + K_(next_trans_committed), + K_(next_trans_ready_to_commit), + K_(next_trans_global_version)); +}; + +class PartTransDispatcher; +struct TransCommitInfo +{ + uint64_t log_id_; + int64_t log_ts_; + + TransCommitInfo(const uint64_t log_id, const int64_t log_ts); + TransCommitInfo() { reset(); } + ~TransCommitInfo() { reset(); } + void reset(); + + TO_STRING_KV( + K_(log_id), + K_(log_ts)); +}; +typedef common::ObLinearHashMap PartTransTaskMap; +typedef common::ObLinearHashMap TransCommitMap; +typedef ObLogTransTaskPool TaskPool; +class IObLogFetcherDispatcher; + +class PartTransDispatcher +{ +public: + PartTransDispatcher(const char *pkey_str, + TaskPool &task_pool, + PartTransTaskMap &task_map, + TransCommitMap &trans_commit_map, + IObLogFetcherDispatcher &dispatcher); + virtual ~PartTransDispatcher(); + +public: + // Class global variable + // count the number of tasks in the Fetcher partition + static int64_t g_part_trans_task_count; + +public: + int init(const common::ObPartitionKey &pkey, const int64_t start_tstamp); + + // Generate heartbeat task + int heartbeat(const common::ObPartitionKey &pkey, + const int64_t tstamp); + + // Pending task on output queue, ready for output + // Note: the task must be a DML transaction or a DDL transaction + int prepare_task(PartTransTask &task); + + // Dispatch tasks in the ready state, that is, tasks that have been committed + int dispatch_part_trans(volatile bool &stop_flag, int64_t &pending_task_count); + + // 分派下线分区任务 + int dispatch_offline_partition_task(const common::ObPartitionKey &pkey, volatile bool &stop_flag); + + // DDL: Clear all tasks and revert all tasks to free memory + // DML: Clear all tasks located in task_map_, and task itself wait callback to free + int clean_task(const common::ObPartitionKey &pkey); + + // Delete partition transaction task with PartTransID, and revert memory + // 1. Call this function when handle abort log + // 2. Call this function when handle unserved partition transaction-prepare log + // + // For DML partition transaction task: + // 1. Remove PartTransTask from task_queue_, but can not revert memory + // 2. Mark the PartTransTask status is UNSERVED, it cat revert memory until all LogEntryTask callback + int remove_task(const bool is_ddl_part, const PartTransID &trans_id); + + int insert_commit_trans(const transaction::ObTransID &trans_id, + const TransCommitInfo &trans_commit_info); + + // Look up transaction table based on trans_id, determine if it is a commit transaction + int find_commit_trans_info(const transaction::ObTransID &trans_id, + bool &is_commit_trans); + + // Assign a task based on trans_id + int alloc_task(const PartTransID &trans_id, PartTransTask *&task); + + // Get the corresponding task based on trans_id + int get_task(const PartTransID &trans_id, PartTransTask *&task); + + // Get the progress of the assignment and ensure atomicity + int get_dispatch_progress(int64_t &progress, PartTransDispatchInfo &dispatch_info); + + // Get TPS information + double get_tps(); + + const common::ObPartitionKey &get_partition() const { return pkey_; } + const char *get_pkey_str() const { return pkey_str_; } + + int64_t get_checkpoint() const { return ATOMIC_LOAD(&checkpoint_); } + + int update_checkpoint(int64_t checkpoint); + +public: + TO_STRING_KV(K_(pkey_str), + K_(init_dispatch_progress), + K_(last_dispatch_progress), + K_(last_dispatch_log_id), + K_(task_count_only_in_map), + "task_queue_size", task_queue_.size(), + K_(checkpoint)); + +protected: + void update_status_after_consume_task_(const int64_t task_tstamp, const uint64_t prepare_log_id); + int dispatch_part_trans_task_(PartTransTask &task, volatile bool &stop_flag); + int handle_before_fetcher_dispatch_(PartTransTask &task); + int handle_dml_trans_before_fetcher_dispatch_(PartTransTask &task); + void push_task_queue_(PartTransTask &task); + PartTransTask* pop_task_queue_(); + int64_t get_total_task_count_(); + int check_task_ready_(PartTransTask &task, bool &task_is_ready); + +protected: + common::ObPartitionKey pkey_; + const char *pkey_str_; + + // Constructors initialise variables + TaskPool &task_pool_; + // Map is shared globally by all partitions and holds all transactions that have not yet been committed + PartTransTaskMap &task_map_; // Map for assembling partitioned transactions + // Shared globally by all partitions, transaction information with commit status is stored + TransCommitMap &trans_commit_map_; + IObLogFetcherDispatcher &dispatcher_; + + // Partitioned transaction sequencing queue + // + // holds the tasks in this partition that have been prepped but not yet exported, which have an intersection with the tasks in task_map_ + // special treatment of tasks in the intersection when recycling tasks + // + // The queue itself is thread-safe and internally lock-controlled + SafeTaskWithRecordQueue task_queue_; + + int64_t init_dispatch_progress_; // init progress + int64_t last_dispatch_progress_; // dispatch progress + uint64_t last_dispatch_log_id_; // The last log ID of the task that has been output, the prepare log ID + int64_t task_count_only_in_map_; // Number of tasks in map only + + // 1. maintain a partition level checkpoint value for single machine multipartition transaction posting + // 2. advance this value in both the partition resolution prepare log and the checkpoint log + // 3. ensure monotonic incrementing + int64_t checkpoint_; + + // Statistical values + int64_t created_trans_count_; // Number of created transactions counted + int64_t last_created_trans_count_; // Number of last created transactions counted + int64_t last_stat_time_; // time of last statistical + + // consumption lock, if you want to consume tasks in task_queue_, you have to add this lock + // + // The usage model for this class is: single-threaded production, multi-threaded consumption + // + // The mutual exclusion between production and consumption is guaranteed by the lock on task_queue_ itself, + // while the mutual exclusion between consumption and consumption is guaranteed by dispatch_lock_. + // + // In order to ensure that get_dispatch_progress() accesses the next element to be dispatched is atomic, + // i.e. the whole process from popping out of the queue to dispatch completion has to be atomic for get_dispatch_progress(), + // otherwise the dispatched progress obtained is not correct. therefore, get_dispatch_progress() should also add dispatch_lock_ + common::ObByteLock dispatch_lock_; + +private: + DISALLOW_COPY_AND_ASSIGN(PartTransDispatcher); +}; + +} +} + +#endif diff --git a/src/liboblog/src/ob_log_part_trans_parser.cpp b/src/liboblog/src/ob_log_part_trans_parser.cpp new file mode 100644 index 0000000000000000000000000000000000000000..059875da3ac8164a23d48143d29e5cb0e239c35f --- /dev/null +++ b/src/liboblog/src/ob_log_part_trans_parser.cpp @@ -0,0 +1,658 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG_PARSER + +#include "ob_log_part_trans_parser.h" + +#include "ob_log_part_trans_task.h" // PartTransTask +#include "ob_log_binlog_record_pool.h" // IObLogBRPool +#include "ob_log_meta_manager.h" // IObLogMetaManager +#include "ob_log_instance.h" // TCTX +#include "ob_log_config.h" // TCONF +#include "ob_log_row_data_index.h" // ObLogRowDataIndex + +using namespace oceanbase::common; +using namespace oceanbase::transaction; +using namespace oceanbase::memtable; + +namespace oceanbase +{ +namespace liboblog +{ + +ObLogPartTransParser::ObLogPartTransParser() : + inited_(false), + br_pool_(NULL), + meta_manager_(NULL), + cluster_id_(OB_INVALID_CLUSTER_ID) +{} + +ObLogPartTransParser::~ObLogPartTransParser() +{ + destroy(); +} + +void ObLogPartTransParser::destroy() +{ + inited_ = false; + cluster_id_ = OB_INVALID_CLUSTER_ID; + br_pool_ = NULL; + meta_manager_ = NULL; +} + +int ObLogPartTransParser::init(IObLogBRPool *br_pool, + IObLogMetaManager *meta_manager, + const int64_t cluster_id) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("parser has been initialized", K(inited_)); + ret = OB_INIT_TWICE; + } else if (OB_ISNULL(br_pool_ = br_pool) + || OB_ISNULL(meta_manager_ = meta_manager) + || OB_UNLIKELY(OB_INVALID_CLUSTER_ID == cluster_id)) { + LOG_ERROR("invalid argument", K(br_pool), K(meta_manager), K(cluster_id)); + ret = OB_INVALID_ARGUMENT; + } else { + cluster_id_ = cluster_id; + inited_ = true; + + LOG_INFO("init PartTransParser succ", K(cluster_id)); + } + return ret; +} + +int ObLogPartTransParser::parse(PartTransTask &task, volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init", K(inited_)); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(! task.is_ddl_trans())) { + LOG_ERROR("task type is not supported", K(task)); + ret = OB_NOT_SUPPORTED; + } else if (OB_UNLIKELY(! task.is_task_info_valid())) { + LOG_ERROR("invalid task", K(task)); + ret = OB_INVALID_ARGUMENT; + } else { + SortedRedoLogList &sorted_redo_list = task.get_sorted_redo_list(); + + // Parse Redo logs if they exist + if (sorted_redo_list.log_num_ > 0 && OB_FAIL(parse_ddl_redo_log_(task, stop_flag))) { + LOG_ERROR("parse_ddl_redo_log_ fail", KR(ret), K(task)); + } + } + + return ret; +} + +int ObLogPartTransParser::parse(ObLogEntryTask &task, volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + PartTransTask *part_trans_task = NULL; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init", K(inited_)); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(! task.is_valid())) { + LOG_ERROR("invalid task", K(task)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(part_trans_task = static_cast(task.get_host()))) { + LOG_ERROR("part_trans_task is NULL", K(part_trans_task)); + ret = OB_ERR_UNEXPECTED; + } else { + ObLogTenant *tenant = NULL; + ObLogTenantGuard guard; + // Incremental within PartTransTask + uint64_t &row_no = part_trans_task->get_row_no(); + const uint64_t tenant_id = part_trans_task->get_tenant_id(); + + // DDL data/non-PG partitioned data need to be deserialized in whole rows, not filtered + // otherwise need to get tenant structure and perform filtering + if (OB_SUCC(ret)) { + if (! should_not_filter_row_(*part_trans_task)) { + if (OB_FAIL(TCTX.get_tenant_guard(tenant_id, guard))) { + // Tenants must exist here + LOG_ERROR("get_tenant_guard fail", KR(ret), K(tenant_id), KPC(part_trans_task)); + } else if (OB_ISNULL(tenant = guard.get_tenant())) { + LOG_ERROR("tenant is null", K(tenant_id), K(tenant), K(task)); + ret = OB_ERR_UNEXPECTED; + } else { + // succ + } + } + } + + if (OB_SUCC(ret)) { + const DmlRedoLogNode &redo_node = task.get_redo_log_node(); + + if (OB_UNLIKELY(! redo_node.is_valid())) { + LOG_ERROR("redo_node is invalid", "redo_node", redo_node); + ret = OB_INVALID_DATA; + // Calibrate data for completeness + } else if (OB_UNLIKELY(! redo_node.check_data_integrity())) { + LOG_ERROR("redo data is not valid", K(redo_node)); + ret = OB_INVALID_DATA; + } else if (OB_FAIL(parse_stmts_(tenant, redo_node.data_, redo_node.size_, task, *part_trans_task, row_no, stop_flag))) { + LOG_ERROR("parse_stmts_ fail", KR(ret), K(tenant), "redo_node", redo_node, K(task), K(row_no)); + } else { + LOG_DEBUG("[PARSE] log_entry_task parse succ", K(task)); + } + } + } + + return ret; +} + +int ObLogPartTransParser::parse_ddl_redo_log_(PartTransTask &task, volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + int64_t redo_num = 0; + SortedRedoLogList &sorted_redo_list = task.get_sorted_redo_list(); + DdlRedoLogNode *redo_node = static_cast(sorted_redo_list.head_); + const uint64_t tenant_id = task.get_tenant_id(); + + if (OB_UNLIKELY(! sorted_redo_list.is_valid())) { + LOG_ERROR("redo log list is invalid", K(sorted_redo_list), K(task)); + ret = OB_ERR_UNEXPECTED; + } else { + // Used to assign row_index to DML and DDL statements, partitioned transaction statements are ordered, starting from 0 + uint64_t row_index = 0; + ObLogTenant *tenant = NULL; + ObLogTenantGuard guard; + // just declear here + ObLogEntryTask redo_log_entry_task; + + // DDL data/non-PG partitioned data need to be deserialized in whole rows, not filtered + // otherwise need to get tenant structure and perform filtering + if (! should_not_filter_row_(task)) { + if (OB_FAIL(TCTX.get_tenant_guard(tenant_id, guard))) { + // tenant must exist here + LOG_ERROR("get_tenant_guard fail", KR(ret), K(tenant_id)); + } else if (OB_ISNULL(tenant = guard.get_tenant())) { + LOG_ERROR("tenant is null", K(tenant_id), K(tenant), K(task)); + ret = OB_ERR_UNEXPECTED; + } else { + // succ + } + } + + if (OB_SUCC(ret)) { + while (OB_SUCCESS == ret && NULL != redo_node) { + LOG_DEBUG("parse redo log", "redo_node", *redo_node); + + if (OB_UNLIKELY(! redo_node->is_valid())) { + LOG_ERROR("redo_node is invalid", "redo_node", *redo_node, "redo_index", redo_num); + ret = OB_INVALID_DATA; + // Verify that the Redo log serial number is accurate + } else if (OB_UNLIKELY(redo_node->start_log_no_ != redo_num)) { + LOG_ERROR("redo log_no is incorrect", "start_redo_no", redo_node->start_log_no_, + "expected_redo_no", redo_num, KPC(redo_node)); + ret = OB_INVALID_DATA; + } + // Calibrate data for completeness + else if (OB_UNLIKELY(! redo_node->check_data_integrity())) { + LOG_ERROR("redo data is not valid", KPC(redo_node)); + ret = OB_INVALID_DATA; + } else if (OB_FAIL(parse_stmts_(tenant, redo_node->data_, redo_node->size_, redo_log_entry_task, task, row_index, stop_flag))) { + LOG_ERROR("parse_stmts_ fail", KR(ret), K(tenant), "redo_node", *redo_node, K(task), K(row_index)); + } else { + redo_num += redo_node->get_log_num(); + redo_node = static_cast(redo_node->next_); + } + } // while + } + } + + return ret; +} + +int ObLogPartTransParser::parse_stmts_(ObLogTenant *tenant, + const char *redo_data, + const int64_t redo_data_len, + ObLogEntryTask &redo_log_entry_task, + PartTransTask &task, + uint64_t &row_index, + volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(redo_data) || OB_UNLIKELY(redo_data_len <= 0)) { + LOG_ERROR("invalid argument", K(redo_data), K(redo_data_len), K(task), K(redo_log_entry_task)); + ret = OB_INVALID_ARGUMENT; + } else { + bool is_ddl_trans = task.is_ddl_trans(); + int64_t pos = 0; + + // parse statement + while (OB_SUCCESS == ret && pos < redo_data_len) { + bool need_filter_row = false; + int32_t row_size = 0; + + if (OB_FAIL(filter_row_data_(tenant, redo_data, redo_data_len, pos, task, need_filter_row, row_size, stop_flag))) { + LOG_ERROR("filter_row_data_ fail", KR(ret), K(tenant), K(pos), K(task), K(need_filter_row), K(row_size)); + } else if (need_filter_row) { + // filter this row, move pos to next row + pos += row_size; + } else { + // parse row + void *mutator_row_buf = NULL; + MutatorRow *row = NULL; + if (is_ddl_trans) { + mutator_row_buf = task.alloc(sizeof(MutatorRow)); + } else { + mutator_row_buf = redo_log_entry_task.alloc(sizeof(MutatorRow)); + } + + if (OB_ISNULL(row = static_cast(mutator_row_buf))) { + LOG_ERROR("alloc memory for MutatorRow fail", K(sizeof(MutatorRow))); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + // FIXME: Destroy MutatorRow from regular channels and free memory + // Currently destroyed in DmlStmtTask and DdlStmtTask, but no memory is freed + // Since this memory is allocated by the Allocator of the PartTransTask, it is guaranteed not to leak + if (is_ddl_trans) { + new (row) MutatorRow(task.get_allocator()); + } else { + new (row) MutatorRow(redo_log_entry_task.get_allocator()); + } + + // Deserialising row data + if (OB_FAIL(row->deserialize(redo_data, redo_data_len, pos))) { + LOG_ERROR("deserialize mutator row fail", KR(ret), KPC(row), K(redo_data_len), K(pos)); + } + // First determine if it is a rollback savepoint by the Row + else if (is_rollback_savepoint_stmt_(*row)) { + if (is_ddl_trans) { + if (OB_FAIL(handle_ddl_part_rollback_savepoint_(row_index, task, *row))) { + LOG_ERROR("handle_ddl_part_rollback_savepoint_ failed", KR(ret), K(row_index), K(task), KPC(row)); + } + } else { + // Non-incrementing row_index + if (OB_FAIL(handle_dml_part_rollback_savepoint_(row_index, task, redo_log_entry_task, *row))) { + LOG_ERROR("handle_dml_part_rollback_savepoint_ failed", KR(ret), K(row_index), K(task), + K(redo_log_entry_task), KPC(row)); + } + } + + if (is_ddl_trans) { + row->~MutatorRow(); + task.free(row); + row = NULL; + } + } + // For DDL partitions, only parse data from the same table and filter data from other unrelated + // tables such as index tables; prevent the generation of non-DDL type statements. + // + // For DML partitioning, you cannot parse only the table data, because Sequencer needs the data + // of the unique index table for dependency analysis and to ensure data consistency. + else if (is_ddl_trans && task.get_partition().get_table_id() != row->table_id_) { + row->~MutatorRow(); + task.free(row); + row = NULL; + } else { + // parse row data + if (is_ddl_trans) { + if (OB_FAIL(parse_ddl_stmts_(row_index, *row, task))) { + LOG_ERROR("parse_ddl_stmts_ fail", KR(ret), K(row_index), K(*row), K(task)); + } + } else if (OB_FAIL(parse_dml_stmts_(row_index, *row, redo_log_entry_task, task))) { + LOG_ERROR("parse_dml_stmts_ fail", KR(ret), K(row_index), K(*row), K(redo_log_entry_task), K(task)); + } + + if (OB_SUCC(ret)) { + ++row_index; + } + } + } + } // need_filter_row=false + } + } + + return ret; +} + +// 1. savepoint is an internal transaction concept, a bitpoint set by the unrolled part of the transaction, for statement-level rollback within the transaction +// 2. +// 2. The OB rolls back a transaction statement to the set savepoint point by rolling back the savepoint, for example: +// (begin)(1)(2)(3)(sp1)(4)(5)(rollback sp1)(6)(commit) +// execute result of this transaction:(1)(2)(3)(6) +// +// 3. savepoint usage example: +// (begin)(sp1)(1)(2)(sp2)(3)(rollback sp2)(commit) => (1)(2) +// (begin)(sp1)(1)(2)(sp2)(3)(rollback sp1)(commit) => null +// (begin)(1)(sp1)(2)(sp2)(3)(rollback sp1)(commit) => (1) +// (begin)(1)(sp1)(2)(sp2)(3)(rollback sp2)(commit) => (1)(2) +// (begin)(1)(sp1)(2)(sp2)(3)(rollback sp2)(4)(rollback sp1)(5) => (1)(5) +// +// 4. liboblog outputs bin record by parsing the clog log, which only records rollback statements in the clog log, not savepoint statements +// savepoint information is stored in the memory of the coordinator of the transaction, and the transaction will fail if the coordinator is down. +// rollback statements are recorded in the clog by flag and sql_no to indicate that the statement is a rollback statement and rolls back to a stmt +// +// 5. so there is no need to process the savepoint statement (which is not recorded in the clog), only the rollback savepoint statement task order traversal, dropping the sql_no is greater than the sql_no recorded in the rollback savepoint statement +// +// Only DDL partition calls are supported +int ObLogPartTransParser::handle_ddl_part_rollback_savepoint_(const uint64_t row_index, + PartTransTask &task, + MutatorRow &row) +{ + int ret = OB_SUCCESS; + const int32_t flag = row.flag_; + const int32_t sql_no = row.sql_no_; + + if (OB_UNLIKELY(! task.is_ddl_trans())) { + LOG_ERROR("task is not ddl trans, unexcepted", K(row_index), K(task), K(row)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_UNLIKELY(1 != flag) || OB_UNLIKELY(sql_no < 0) || OB_UNLIKELY(OB_INVALID_ID == row_index)) { + LOG_ERROR("invalid argument", K(flag), K(sql_no), K(row_index), K(row)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(task.revert_by_rollback_savepoint(row_index, sql_no))) { + LOG_ERROR("task revert_stmt_by_rollback_savepoint failed", KR(ret), K(sql_no), K(task)); + } else { + // succ + LOG_DEBUG("handle rollback savepoint succ", K(row), K(task)); + } + + return ret; +} + +int ObLogPartTransParser::handle_dml_part_rollback_savepoint_(const uint64_t row_index, + PartTransTask &part_trans_task, + ObLogEntryTask &log_entry_task, + MutatorRow &row) +{ + int ret = OB_SUCCESS; + const bool is_rollback = true; + + if (OB_FAIL(parse_dml_stmts_(row_index, row, log_entry_task, part_trans_task, is_rollback))) { + LOG_ERROR("parse_dml_stmts_ fail", KR(ret), K(row_index), K(row), K(log_entry_task), K(part_trans_task), + K(is_rollback)); + } else { + LOG_DEBUG("handle dml rollback savepoint succ", K(row), K(row_index), K(log_entry_task), K(part_trans_task), + K(is_rollback)); + } + + return ret; +} + +// To support filtering of table data within PG, the filtering algorithm is as follows: +// 1. PG-DML transaction parses out the row_size and table_id first, avoiding deserializing the entire row and causing performance overhead +// 2. Query the TableIDCache based on table_id, if it exists, then the data is required +// 3. When it does not exist, the table_id may be blacklisted data or a future table that cannot be filtered +// 4. parse out row_size, table_id, rowkey, table_version +// 5. cur_schema_version based on table_version and PartMgr processing: +// (1) When table_version > cur_schema_version, it means it is a future table, then you need to wait for +// PartMgr processing to push up the schema version, until it is greater than or equal to tabel_version, +// then query TableIDCache again, if it exists, it is needed, otherwise it is filtered +// (2) When table_version <= cur_schema_version, it means it is no longer a future table, then filter it out +int ObLogPartTransParser::filter_row_data_(ObLogTenant *tenant, + const char *redo_data, + const int64_t redo_data_len, + const int64_t cur_pos, + PartTransTask &task, + bool &need_filter_row, + int32_t &row_size, + volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + + // No filtering by default + need_filter_row = false; + row_size = 0; + const bool is_pg = task.is_pg(); + const uint64_t tenant_id = task.get_tenant_id(); + // Temporary row data structure to avoid allocation of row data memory + // TODO allocator + MutatorRow row(task.get_allocator()); + + if (OB_ISNULL(redo_data) || OB_UNLIKELY(redo_data_len <= 0) || OB_UNLIKELY(cur_pos < 0)) { + LOG_ERROR("invalid argument", K(redo_data), K(task), K(redo_data_len), K(cur_pos)); + ret = OB_INVALID_ARGUMENT; + } else if (should_not_filter_row_(task)) { + // DDL data/non-PG partitioned data all need to be deserialized in whole rows, no filtering + need_filter_row = false; + } else { + int64_t pos = cur_pos; + uint64_t table_id = OB_INVALID_ID; + int64_t table_version = 0; + bool is_exist = false; + + // Filtering requires that the tenant must be valid + if (OB_ISNULL(tenant)) { + LOG_ERROR("tenant is null", K(tenant_id), K(tenant)); + ret = OB_ERR_UNEXPECTED; + } else { + IObLogPartMgr &part_mgr = tenant->get_part_mgr(); + // Note: In the TableIDCache based on table_id, you should get the current schema version in advance, + // because the schema version will keep changing, and getting the schema version first will have the following bad case: + // Assume data: table_id=1001, table_version=100 cur_schema_version=90 + // 1. future table, based on table_id query TableIDCache does not exist + // 2. get schema version, at this point cur_schema_version=100 + // 3. Parse to get table_version, because table_version <= cur_schema_version will result in false filtering out + const int64_t part_mgr_cur_schema_verison = tenant->get_schema_version(); + + if (OB_FAIL(row.deserialize_first(redo_data, redo_data_len, pos, row_size, table_id))) { + LOG_ERROR("deserialize row_size and table_id fail", KR(ret), K(row), K(redo_data_len), K(pos), + K(row_size), K(table_id), K(is_pg)); + } else if (OB_FAIL(part_mgr.is_exist_table_id_cache(table_id, is_exist))) { + LOG_ERROR("part_mgr is_exist_table_id_cache fail", KR(ret), K(table_id), K(is_pg), K(is_exist)); + } else if (is_exist) { + // Located in the whitelist, data does not need to be filtered + need_filter_row = false; + } else { + // Not present, may need to filter or future table + if (TCONF.test_mode_on) { + static int cnt = 0; + int64_t block_time_us = TCONF.test_mode_block_parser_filter_row_data_sec * _SEC_; + // Only the first statement blocks + if (block_time_us > 0 && 0 == cnt) { + LOG_INFO("[FILTER_ROW] [TEST_MODE_ON] block to filter row", + K(block_time_us), K(table_id), K(is_pg), K(row_size), K(cur_pos), K(need_filter_row), K(cnt)); + ++cnt; + usleep((useconds_t)block_time_us); + } + } + + // Continue parsing to get table_version + if (OB_FAIL(row.deserialize_second(redo_data, redo_data_len, pos, table_version))) { + LOG_ERROR("deserialize table_version fail", KR(ret), K(row), K(redo_data_len), K(pos), + K(row_size), K(table_id), K(is_pg), K(table_version)); + } else { + // There will be no data with table_version=0 in the current row, if it occurs only an error will be reported and won't exit + if (OB_UNLIKELY(table_version <= 0)) { + LOG_ERROR("desrialize row data, table version is less than 0, unexcepted", + K(table_id), K(is_pg), K(table_version), K(task), K(part_mgr_cur_schema_verison)); + } + + if (table_version <= part_mgr_cur_schema_verison) { + // Blacklisted data needs to be filtered out + need_filter_row = true; + } else { + RETRY_FUNC(stop_flag, part_mgr, handle_future_table, table_id, table_version, DATA_OP_TIMEOUT, is_exist); + + if (OB_SUCC(ret)) { + if (! is_exist) { + need_filter_row = true; + } else { + need_filter_row = false; + } + } + } + } + } + LOG_DEBUG("[FILTER_ROW]", K(tenant_id), K(table_id), K(need_filter_row), + "table_pure_id", extract_pure_id(table_id), + K(row_size), K(table_version), K(cur_pos), K(pos)); + } + } + + return ret; +} + +bool ObLogPartTransParser::should_not_filter_row_(PartTransTask &task) +{ + bool bool_ret = false; + + bool_ret = task.is_ddl_trans() || (! task.is_pg()); + + return bool_ret; +} + +// Parsing DDL statements +// Construct DDL Binlog Record directly +int ObLogPartTransParser::parse_ddl_stmts_(const uint64_t row_index, MutatorRow &row, PartTransTask &task) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_) || OB_UNLIKELY(OB_INVALID_ID == row_index)) { + LOG_ERROR("invalid argument", K(inited_), K(row_index)); + ret = OB_INVALID_ARGUMENT; + } else { + DdlStmtTask *stmt_task = static_cast(task.alloc(sizeof(DdlStmtTask))); + ObLogBR *br = NULL; + int64_t update_schema_version = 0; + ITableMeta *ddl_table_meta = meta_manager_->get_ddl_table_meta(); + + if (OB_ISNULL(stmt_task)) { + LOG_ERROR("allocate memory for DdlStmtTask fail", "size", sizeof(DdlStmtTask)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else if (OB_FAIL(br_pool_->alloc(false/*is_serilized*/, br, &task))) { + LOG_ERROR("alloc binlog record from pool fail", KR(ret), K(br_pool_)); + } else if (OB_ISNULL(br)) { + LOG_ERROR("alloc binlog record fail", K(br)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(br->set_table_meta(ddl_table_meta))) { + LOG_ERROR("set table meta fail", KR(ret), K(br), K(ddl_table_meta)); + } else { + new (stmt_task) DdlStmtTask(task, row, cluster_id_); + uint64_t exec_tennat_id = OB_INVALID_TENANT_ID; + + // Parsing DDL statement information + bool is_valid_ddl = false; + if (OB_FAIL(stmt_task->parse_ddl_info(br, row_index, is_valid_ddl, update_schema_version, exec_tennat_id))) { + LOG_ERROR("parse_ddl_info fail", KR(ret), K(*stmt_task), K(br), K(row_index), K(is_valid_ddl), + K(update_schema_version), K(exec_tennat_id)); + } else if (! is_valid_ddl) { + // Discard invalid DDL statement tasks + stmt_task->~DdlStmtTask(); + task.free(stmt_task); + stmt_task = NULL; + + // recycle Binlog Record + br_pool_->free(br); + br = NULL; + } else if (OB_FAIL(task.add_ddl_stmt(row_index, stmt_task))) { + LOG_ERROR("add stmt into trans task fail", KR(ret), K(task), K(row_index), + "stmt_task", *stmt_task); + } else { + // succ + } + + // Update Schema version with or without DDL statements + if (OB_SUCC(ret)) { + task.update_local_schema_version(update_schema_version); + + if (OB_UNLIKELY(OB_INVALID_TENANT_ID == exec_tennat_id)) { + LOG_ERROR("exec_tennat_id is invalid", K(exec_tennat_id), K(task)); + ret = OB_INVALID_ARGUMENT; + } else { + task.set_exec_tenant_id(exec_tennat_id); + } + } + } + + if (OB_SUCCESS != ret) { + if (NULL != stmt_task) { + stmt_task->~DdlStmtTask(); + task.free(stmt_task); + stmt_task = NULL; + } + + if (NULL != br) { + br_pool_->free(br); + br = NULL; + } + } + } + + return ret; +} + +// parse DML statement +// +// After constructing the DML statement, add it directly to the chain table, without parsing the column data and without constructing the Binlog Record +// 1. to save memory +// 2. the corresponding Schema must be obtained in order to parse the column data correctly +int ObLogPartTransParser::parse_dml_stmts_(const uint64_t row_index, + MutatorRow &row, + ObLogEntryTask &redo_log_entry_task, + PartTransTask &task, + const bool is_rollback) +{ + int ret = OB_SUCCESS; + // DmlStmtTask needs to allocate memory based on LogEntryTask + DmlStmtTask *stmt_task = static_cast(redo_log_entry_task.alloc(sizeof(DmlStmtTask))); + // Row indexes exist globally and require memory allocation based on PartTransTask + ObLogRowDataIndex *row_data_index = static_cast(task.alloc(sizeof(ObLogRowDataIndex))); + const uint64_t tenant_id = task.get_tenant_id(); + const char *participant_key_str = task.get_participant_key_str(); + const uint64_t log_id = redo_log_entry_task.get_log_id(); + const int32_t log_offset = redo_log_entry_task.get_log_offset(); + const int32_t row_sql_no = row.sql_no_; + + if (OB_ISNULL(stmt_task) || OB_ISNULL(row_data_index)) { + LOG_ERROR("allocate memory for DmlStmtTask or ObLogRowDataIndex fail", "Dmlsize", sizeof(DmlStmtTask), + "RowIndexSize", sizeof(ObLogRowDataIndex)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + new (row_data_index) ObLogRowDataIndex(); + + if (OB_FAIL(row_data_index->init(tenant_id, participant_key_str, log_id, log_offset, row_index, is_rollback, row_sql_no))) { + LOG_ERROR("row_data_index init fail", KR(ret), K(tenant_id), K(row_data_index), + K(participant_key_str), K(log_id), K(log_offset), K(row_index), K(is_rollback), K(row_sql_no), + K(task), K(redo_log_entry_task)); + } else { + new (stmt_task) DmlStmtTask(task, redo_log_entry_task, *row_data_index, row); + + row_data_index->set_host(&task); + + if (OB_FAIL(redo_log_entry_task.add_stmt(row_index, stmt_task))) { + LOG_ERROR("add stmt into trans task fail", KR(ret), K(task), K(row_index), "stmt_task", *stmt_task); + } else { + // Update the Local Schema version of PartTransTask + task.update_local_schema_version(stmt_task->get_table_version()); + + LOG_DEBUG("add_stmt succ", KPC(stmt_task), K(redo_log_entry_task)); + } + } + } + + if (OB_FAIL(ret)) { + if (NULL != stmt_task) { + stmt_task->~DmlStmtTask(); + redo_log_entry_task.free(stmt_task); + stmt_task = NULL; + } + + if (NULL != row_data_index) { + row_data_index->~ObLogRowDataIndex(); + task.free(row_data_index); + row_data_index = NULL; + } + } + + return ret; +} +} +} diff --git a/src/liboblog/src/ob_log_part_trans_parser.h b/src/liboblog/src/ob_log_part_trans_parser.h new file mode 100644 index 0000000000000000000000000000000000000000..a5ac00fd2a3ec250137dcc9fda54df1f59e6f9b9 --- /dev/null +++ b/src/liboblog/src/ob_log_part_trans_parser.h @@ -0,0 +1,117 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_PART_TRANS_PARSER_H_ +#define OCEANBASE_LIBOBLOG_OB_LOG_PART_TRANS_PARSER_H_ + +#include "lib/utility/ob_macro_utils.h" +#include "ob_log_tenant.h" // ObLogTenant +#include + +namespace oceanbase +{ +namespace liboblog +{ +class PartTransTask; + +class IObLogPartTransParser +{ +public: + virtual ~IObLogPartTransParser() {} + + enum { DATA_OP_TIMEOUT = 200 * 1000 }; + +public: + virtual int parse(PartTransTask &task, volatile bool &stop_flag) = 0; + + virtual int parse(ObLogEntryTask &task, volatile bool &stop_flag) = 0; +}; + + +////////////////////////////////////// ObLogPartTransParser ////////////////////////////////////// +// thread safe + +class MutatorRow; +class IObLogBRPool; +class IObLogMetaManager; +class ObLogPartTransParser : public IObLogPartTransParser +{ +public: + ObLogPartTransParser(); + virtual ~ObLogPartTransParser(); + +public: + virtual int parse(PartTransTask &task, volatile bool &stop_flag); + virtual int parse(ObLogEntryTask &task, volatile bool &stop_flag); + +public: + int init(IObLogBRPool *br_pool, + IObLogMetaManager *meta_manager, + const int64_t cluster_id); + void destroy(); + +private: + bool is_rollback_savepoint_stmt_(MutatorRow &row) const + { + return row.is_rollback_stmt(); + } + int handle_ddl_part_rollback_savepoint_(const uint64_t row_index, + PartTransTask &part_trans_task, + MutatorRow &row); + int handle_dml_part_rollback_savepoint_(const uint64_t row_index, + PartTransTask &part_trans_task, + ObLogEntryTask &log_entry_task, + MutatorRow &row); + int parse_ddl_redo_log_(PartTransTask &task, volatile bool &stop_flag); + int parse_stmts_(ObLogTenant *tenant, + const char *redo_data, + const int64_t redo_data_len, + ObLogEntryTask &redo_log_entry_task, + PartTransTask &task, + uint64_t &row_index, + volatile bool &stop_flag); + // 1. Non-PG partitions do not filter row data for now. TODO: Turn on later + // 2. For PG partitions, filter out non-whitelisted and common index data based on TableIDCache filtering + // 3. Do not filter DDL partition data to avoid transaction data and DDL data dependency + int filter_row_data_(ObLogTenant *tenant, + const char *redo_data, + const int64_t redo_data_len, + const int64_t cur_pos, + PartTransTask &task, + bool &need_filter, + int32_t &row_size, + volatile bool &stop_flag); + // DDL data/non-PG partitioned data all need to be deserialized in whole rows, no filtering + bool should_not_filter_row_(PartTransTask &task); + int parse_ddl_stmts_(const uint64_t row_index, MutatorRow &row, PartTransTask &task); + int parse_dml_stmts_(const uint64_t row_index, + MutatorRow &row, + ObLogEntryTask &redo_log_entry_task, + PartTransTask &part_trans_task, + const bool is_rollback = false); + +private: + bool inited_; + IObLogBRPool *br_pool_; + IObLogMetaManager *meta_manager_; + + // The cluster ID of this cluster + // Set as the unique ID of the DDL + int64_t cluster_id_; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogPartTransParser); +}; + +} /* liboblog */ +} /* oceanbase */ +#endif diff --git a/src/liboblog/src/ob_log_part_trans_resolver.cpp b/src/liboblog/src/ob_log_part_trans_resolver.cpp new file mode 100644 index 0000000000000000000000000000000000000000..67ab2ccbd9d2e356ce65ac5bfdae17365c705730 --- /dev/null +++ b/src/liboblog/src/ob_log_part_trans_resolver.cpp @@ -0,0 +1,1987 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG_FETCHER + +#include "ob_log_part_trans_resolver.h" + +#include "share/ob_define.h" // OB_SERVER_TENANT_ID +#include "share/ob_errno.h" // OB_SUCCESS +#include "lib/utility/ob_macro_utils.h" // OB_UNLIKELY +#include "lib/oblog/ob_log_module.h" // LOG_ERROR + +#include "ob_log_fetch_stat_info.h" // TransStatInfo +#include "ob_log_part_serve_info.h" // PartServeInfo +#include "ob_log_cluster_id_filter.h" // IObLogClusterIDFilter +#include "ob_log_config.h" // TCONF +#include "ob_log_instance.h" // TCTX +#include "ob_log_dml_parser.h" // IObLogDmlParser + +using namespace oceanbase::common; +using namespace oceanbase::clog; +using namespace oceanbase::storage; +using namespace oceanbase::transaction; +namespace oceanbase +{ +namespace liboblog +{ +bool IObLogPartTransResolver::test_mode_on = false; +bool IObLogPartTransResolver::test_checkpoint_mode_on = false; +int64_t IObLogPartTransResolver::test_mode_ignore_redo_count = 0; + +IObLogPartTransResolver::ObLogMissingInfo::ObLogMissingInfo() +{ + reset(); +} + +int IObLogPartTransResolver::ObLogMissingInfo::sort_and_unique_missing_log_ids() +{ + int ret = OB_SUCCESS; + + // sort missing log + std::sort(missing_log_ids_.begin(), missing_log_ids_.end()); + LOG_INFO("[UNIQUE] [MISSING_LOG] [BEGIN]", K(missing_log_ids_)); + + // unique + int64_t cur_idx = 0; + while (OB_SUCC(ret) && cur_idx < missing_log_ids_.count()) { + const int64_t cur_log_id = missing_log_ids_.at(cur_idx); + + bool has_done = false; + + int64_t check_log_id_idx = cur_idx + 1; + while(OB_SUCC(ret) && ! has_done && check_log_id_idx < missing_log_ids_.count()) { + if (cur_log_id == missing_log_ids_.at(check_log_id_idx)) { + if (OB_FAIL(missing_log_ids_.remove(check_log_id_idx))) { + LOG_ERROR("missing_log_ids_ remove fail", KR(ret), K(check_log_id_idx), K(missing_log_ids_)); + } else { + check_log_id_idx = cur_idx + 1; + } + } else { + has_done = true; + } + } // while + cur_idx += 1; + } + + LOG_INFO("[UNIQUE] [MISSING_LOG] [END]", KR(ret), K(missing_log_ids_)); + + return ret; +} + +int IObLogPartTransResolver::ObLogMissingInfo::push_back_missing_log_id(const uint64_t log_id) +{ + int ret = OB_SUCCESS; + + if (OB_FAIL(missing_log_ids_.push_back(log_id))) { + LOG_ERROR("missing_log_ids_ push_back fail", KR(ret), K(log_id)); + } + + return ret; +} + +int IObLogPartTransResolver::ObLogMissingInfo::push_back_trans_id(const transaction::ObTransID &trans_id) +{ + int ret = OB_SUCCESS; + + if (OB_FAIL(missing_trans_ids_.push_back(trans_id))) { + LOG_ERROR("missing_trans_ids_ push_back fail", KR(ret), K(trans_id)); + } + + return ret; +} + +int IObLogPartTransResolver::ObLogMissingInfo::push_back_log_index(const int64_t log_index) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(log_index < 0)) { + LOG_ERROR("invalid argument", K(log_index)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(log_indexs_.push_back(log_index))) { + LOG_ERROR("log_indexs_ push_back fail", KR(ret), K(log_index)); + } + + return ret; +} + +ObLogPartTransResolver::ObLogPartTransResolver(const char* pkey_str, + TaskPool &task_pool, + PartTransTaskMap &task_map, + TransCommitMap &trans_commit_map, + IObLogFetcherDispatcher &dispatcher, + IObLogClusterIDFilter &cluster_id_filter) : + offlined_(false), + pkey_(), + first_log_ts_(OB_INVALID_TIMESTAMP), + part_trans_dispatcher_(pkey_str, task_pool, task_map, trans_commit_map, dispatcher), + cluster_id_filter_(cluster_id_filter), + start_global_trans_version_(OB_INVALID_TIMESTAMP) +{} + +ObLogPartTransResolver::~ObLogPartTransResolver() +{ +} + +int ObLogPartTransResolver::init(const ObPartitionKey& pkey, + const int64_t start_tstamp, + const int64_t start_global_trans_version) +{ + pkey_ = pkey; + start_global_trans_version_ = start_global_trans_version; + first_log_ts_ = OB_INVALID_TIMESTAMP; + + return part_trans_dispatcher_.init(pkey, start_tstamp); +} + +int ObLogPartTransResolver::read(const clog::ObLogEntry& log_entry, + ObLogMissingInfo &missing_info, + TransStatInfo &tsi, + const PartServeInfo &serve_info, + ObStorageLogType &log_type, + const bool need_filter_pg_no_missing_redo_trans, + const IObLogPartTransResolver::ObAggreLogIndexArray &log_indexs) +{ + int ret = OB_SUCCESS; + int64_t pos = 0; + bool is_log_filtered = false; + bool is_log_aggre = false; + bool is_barrier_log = false; + int64_t begin_time = get_timestamp(); + const int64_t tstamp = log_entry.get_header().get_submit_timestamp(); + // Record before processing + if (OB_INVALID_TIMESTAMP == first_log_ts_) { + first_log_ts_ = tstamp; + } + + missing_info.reset(); + log_type = storage::OB_LOG_UNKNOWN; + + if (OB_UNLIKELY(offlined_)) { + LOG_ERROR("partition has been offlined", K(offlined_), K(pkey_)); + ret = OB_ERR_UNEXPECTED; + } + // Parsing the header of a transaction log + else if (OB_FAIL(decode_trans_log_header_(log_entry, pos, is_log_filtered, is_log_aggre, is_barrier_log, log_type))) { + LOG_ERROR("decode trans log header fail", KR(ret), K(log_entry), K(pos)); + } else if (! is_log_filtered) { + // Non-transaction logs or checkpoint logs are always treated as unknown log types + if (OB_FAIL(read_unknown_log_(log_entry))) { + LOG_ERROR("read NON-SUBMIT type log entry fail", KR(ret), K(log_entry)); + } + // Non-PG aggregation log + } else if (! is_log_aggre) { + ObLogAggreTransLog aggre_trans_log; + ObLogEntryWrapper log_entry_wrapper(/*is_log_aggre*/false, log_entry, aggre_trans_log); + const int64_t log_entry_index = 0; + + if (is_barrier_log) { + // TODO + } else if (OB_FAIL(read_log_(log_entry_wrapper, log_entry_index, begin_time, pos, missing_info, tsi, serve_info, log_type))) { + if (OB_ITEM_NOT_SETTED != ret) { + LOG_ERROR("read_log_ fail", KR(ret), K(log_entry_wrapper), K(log_entry_index), K(begin_time), K(pos), K(serve_info), + K(serve_info), K(log_type)); + } + } + // PG aggregation log + } else { + ObTransIDArray missing_log_trans_id_array; + const bool is_read_missing_log = false; + + if (OB_FAIL(parse_and_read_aggre_log_(log_entry, begin_time, missing_info, tsi, serve_info, + is_read_missing_log, missing_log_trans_id_array, need_filter_pg_no_missing_redo_trans, + log_indexs))) { + if (OB_ITEM_NOT_SETTED != ret) { + LOG_ERROR("parse_and_read_aggre_log_ fail", KR(ret), K(log_entry), K(missing_log_trans_id_array), K(begin_time), K(serve_info), + K(is_read_missing_log), K(missing_log_trans_id_array), K(need_filter_pg_no_missing_redo_trans), + K(log_indexs)); + } + } + } + + // Missing log arrays are guaranteed to be ordered and free of duplicates + if (OB_ITEM_NOT_SETTED == ret) { + LOG_INFO("need to read missing log", K(log_entry), K(missing_info)); + + // in case of overwrite error code OB_ITEM_NOT_SETTED + int tmp_ret = OB_SUCCESS; + if (missing_info.get_missing_log_count() <= 0) { + LOG_ERROR("missing log count should be greater than 0", "log_cnt", missing_info.get_missing_log_count()); + ret = OB_ERR_UNEXPECTED; + } else if (OB_UNLIKELY(OB_SUCCESS != (tmp_ret = missing_info.sort_and_unique_missing_log_ids()))) { + LOG_ERROR("missing_info sort_and_unique_missing_log_ids fail", KR(ret), KR(tmp_ret), K(missing_info)); + ret = tmp_ret; + } + } + + return ret; +} + +int ObLogPartTransResolver::read_log_(const liboblog::ObLogEntryWrapper &log_entry, + const int64_t log_entry_index, + const int64_t begin_time, + int64_t &pos, + ObLogMissingInfo &missing, + TransStatInfo &tsi, + const PartServeInfo &serve_info, + storage::ObStorageLogType &log_type) +{ + int ret = OB_SUCCESS; + int hit_count = 0; + ObTransIDArray missing_log_trans_id_array; + + // Single multi-partition CHECKPOINT log, one log_entry + if (storage::ObStorageLogTypeChecker::is_checkpoint_log(log_type)) { + ++hit_count; + if (OB_FAIL(read_checkpoint_log_(log_entry, pos))) { + LOG_ERROR("read_checkpoint_log_ fail", KR(ret), K(log_type), K(log_entry), K(pos)); + } + } else { + tsi.decode_header_time_ += get_timestamp() - begin_time; + + LOG_DEBUG("read trans log", K_(pkey), K(log_entry), K(log_type)); + + // First process the OB_LOG_MUTATOR log + // The Mutator log only logs mutator information, and may contain sequences as follows: + // 1. mutator_log, mutator_log ...... mutator abort log + // 2. mutator_log, mutator_log ...... redo, prepare commit/abort + // 3. mutator_log, mutator_log ...... sp_read, sp_commit/abort + if (OB_SUCCESS == ret && (log_type & storage::OB_LOG_MUTATOR)) { + ++hit_count; + if (OB_FAIL(read_mutator_(log_entry, pos, tsi, missing_log_trans_id_array))) { + if (OB_ENTRY_EXIST == ret) { + ret = OB_SUCCESS; + LOG_DEBUG("mutator log has been read multiple times", K(log_type), K(log_entry)); + } else { + LOG_ERROR("read mutator log fail", KR(ret), K(log_type), K(log_entry)); + } + } + } + + // mutator abort log + if (OB_SUCCESS == ret && (log_type & storage::OB_LOG_MUTATOR_ABORT)) { + ++hit_count; + ret = read_mutator_abort_(log_entry, pos); + } + + // REDO/PREPARE/COMMIT/CLEAR logs of various types may be in one log body + // REDO logs are processed first + if (OB_SUCCESS == ret && (log_type & storage::OB_LOG_TRANS_REDO)) { + ++hit_count; + ret = read_redo_(log_entry, pos, tsi, missing_log_trans_id_array); + + // Handling duplicate redo logs + if (OB_ENTRY_EXIST == ret) { + bool with_prepare = (log_type & storage::OB_LOG_TRANS_PREPARE); + if (with_prepare) { + // When the redo log and the prepare log are in one log body, since the prepare log may be read multiple times + // the redo log may be duplicated, allowing this to exist + ret = OB_SUCCESS; + LOG_INFO("redo log has been read multiple times which should have missing log", + K(log_type), K(log_entry)); + } else { + LOG_ERROR("redo log has been read multiple times", KR(ret), K(log_type), K(log_entry)); + } + } + } + + // PREPARE log + bool with_prepare_and_served = false; + if (OB_SUCCESS == ret && (log_type & storage::OB_LOG_TRANS_PREPARE)) { + ++hit_count; + bool with_redo = (log_type & storage::OB_LOG_TRANS_REDO); + ret = read_prepare_(log_entry, log_entry_index, missing, pos, with_redo, tsi, serve_info, + with_prepare_and_served); + } + + // Commit log + if (OB_SUCCESS == ret && (log_type & storage::OB_LOG_TRANS_COMMIT)) { + ++hit_count; + bool with_prepare = (log_type & storage::OB_LOG_TRANS_PREPARE); + // Filter the commit log if the commit log and the prepare log are together and the prepare log is not served + if (with_prepare && ! with_prepare_and_served) { + // filter commit log + } else { + ret = read_commit_(log_entry, serve_info, pos, with_prepare, tsi); + } + } + + // Abort log + if (OB_SUCCESS == ret && (log_type & storage::OB_LOG_TRANS_ABORT)) { + ++hit_count; + ret = read_abort_(log_entry, pos); + } + + // Clear log + if (OB_SUCCESS == ret && (log_type & storage::OB_LOG_TRANS_CLEAR)) { + ++hit_count; + // ignore + tsi.clear_cnt_++; + tsi.clear_size_ += log_entry.get_header().get_data_len(); + } + + // single partition REDO log + if (OB_SUCCESS == ret && (log_type & storage::OB_LOG_SP_TRANS_REDO)) { + ++hit_count; + ret = read_sp_trans_redo_(log_entry, pos, tsi, missing_log_trans_id_array); + } + + // single partition COMMIT log + if (OB_SUCCESS == ret && (log_type & storage::OB_LOG_SP_TRANS_COMMIT)) { + ++hit_count; + ret = read_sp_trans_commit_(log_entry, log_entry_index, missing, pos, tsi, serve_info); + } + + // single partition ABORT log + if (OB_SUCCESS == ret && (log_type & storage::OB_LOG_SP_TRANS_ABORT)) { + ++hit_count; + ret = read_sp_trans_abort_(log_entry, pos); + } + + if (OB_SUCCESS == ret && (storage::OB_LOG_SP_ELR_TRANS_COMMIT == log_type)) { + ++hit_count; + ret = read_sp_trans_commit_(log_entry, log_entry_index, missing, pos, tsi, serve_info, true/*is_sp_elr_trans*/); + } + } + + if (OB_SUCC(ret)) { + if (hit_count <=0 + && OB_LOG_TRANS_STATE != log_type) { + LOG_ERROR("Log entry is trans log type, but not handled, not supported.", K(hit_count), K(log_entry), K(pos), K(log_type)); + ret = OB_NOT_SUPPORTED; + } + } + + return ret; +} + +int ObLogPartTransResolver::read_missing_redo(const clog::ObLogEntry &log_entry, + const ObTransIDArray &missing_log_trans_id_array) +{ + int ret = OB_SUCCESS; + int64_t pos = 0; + bool is_trans_log = false; + bool is_log_aggre = false; + bool is_barrier_log = false; + ObStorageLogType log_type = storage::OB_LOG_UNKNOWN; + TransStatInfo tsi; + + if (OB_UNLIKELY(offlined_)) { + LOG_ERROR("partition has been offlined", K(offlined_), K(pkey_)); + ret = OB_ERR_UNEXPECTED; + } + // Parsing the header of a transaction log + else if (OB_FAIL(decode_trans_log_header_(log_entry, pos, is_trans_log, is_log_aggre, is_barrier_log, log_type))) { + LOG_ERROR("decode trans log header fail", KR(ret), K(log_entry), K(pos)); + } else if (! is_trans_log) { + LOG_ERROR("invalid missing redo log which is not trans log", K(log_type), K(log_entry)); + ret = OB_INVALID_ARGUMENT; + } else if (! is_log_aggre) { + // Non-aggregated log handling + ObLogAggreTransLog aggre_trans_log; + ObLogEntryWrapper log_entry_wrapper(/*is_log_aggre*/false, log_entry, aggre_trans_log); + + if (OB_FAIL(read_missing_redo_(log_entry_wrapper, missing_log_trans_id_array, pos, tsi, log_type))) { + LOG_ERROR("read_missing_redo_ fail", KR(ret), K(log_entry_wrapper), K(missing_log_trans_id_array), K(pos), + K(tsi), K(log_type)); + } + } else { + // aggregated log handling + int64_t begin_time = 0; + ObLogMissingInfo missing_info; + PartServeInfo serve_info; + const bool is_read_missing_log = true; + const bool need_filter_pg_no_missing_redo_trans = false; + ObAggreLogIndexArray log_indexs; + + if (OB_FAIL(parse_and_read_aggre_log_(log_entry, begin_time, missing_info, tsi, serve_info, + is_read_missing_log, missing_log_trans_id_array, need_filter_pg_no_missing_redo_trans, + log_indexs))) { + LOG_ERROR("parse_and_read_aggre_log_ fail", KR(ret), K(log_entry), K(begin_time), K(serve_info), + K(is_read_missing_log), K(missing_log_trans_id_array), K(need_filter_pg_no_missing_redo_trans), + K(log_indexs)); + } + } + + return ret; +} + +int ObLogPartTransResolver::read_missing_redo_(const liboblog::ObLogEntryWrapper &log_entry, + const ObTransIDArray &missing_log_trans_id_array, + int64_t &pos, + TransStatInfo &tsi, + storage::ObStorageLogType &log_type) +{ + int ret = OB_SUCCESS; + const bool is_pg_aggre_log = log_entry.is_pg_aggre_log(); + + // Mutor log + if (log_type & storage::OB_LOG_MUTATOR) { + if (OB_FAIL(read_mutator_(log_entry, pos, tsi, missing_log_trans_id_array, true))) { + LOG_ERROR("read mutator missing log fail", KR(ret), K(pos), K(log_entry), K(missing_log_trans_id_array)); + } + } + // normal REDO log + else if (log_type & storage::OB_LOG_TRANS_REDO) { + if (OB_FAIL(read_redo_(log_entry, pos, tsi, missing_log_trans_id_array, true))) { + LOG_ERROR("read redo missing log fail", KR(ret), K(pos), K(log_entry), K(missing_log_trans_id_array)); + } + } + // Single partition transaction REDO log + else if (log_type & storage::OB_LOG_SP_TRANS_REDO) { + if (OB_FAIL(read_sp_trans_redo_(log_entry, pos, tsi, missing_log_trans_id_array, true))) { + LOG_ERROR("read sp trans redo fail", KR(ret), K(pos), K(log_entry), K(missing_log_trans_id_array)); + } + } + // The COMMIT log of the single partition, REDO is in the COMMIT log + // You need to deserialize the log first and read the redo inside + else if (log_type & storage::OB_LOG_SP_TRANS_COMMIT) { + ObSpTransCommitLog commit_log; + int64_t after_decode_time = 0; + bool with_redo = false; + if (OB_FAIL(deserialize_sp_commit_and_parse_redo_(log_entry, pos, commit_log, after_decode_time, + with_redo, missing_log_trans_id_array, true))) { + LOG_ERROR("deserialize_sp_commit_and_parse_redo_ fail", KR(ret), K(log_entry), K(pos), + K(commit_log), K(missing_log_trans_id_array)); + } + } + else { + if (! is_pg_aggre_log) { + // Non-PG, other logs: exceptions, requirement to be REDO logs + LOG_ERROR("invalid missing redo log which is not REDO log", K(log_type), K(log_entry)); + ret = OB_INVALID_ARGUMENT; + } else { + // PG scenario read missing redo, all other log types ignored + LOG_INFO("redo PG missing log, ignore log type which is not REDO log", K(log_type), K(log_entry)); + } + } + + return ret; +} + +int ObLogPartTransResolver::filter_pg_log_based_on_trans_id_(const ObTransIDArray &missing_log_trans_id_array, + const ObTransID &log_trans_id, + const bool is_pg_missing_log, + bool &is_filter) +{ + int ret = OB_SUCCESS; + // No filtering by default + is_filter = false; + bool has_find = false; + + // filter by trans_id for pg redo missing log + if (is_pg_missing_log) { + for (int64_t idx = 0; OB_SUCC(ret) && idx < missing_log_trans_id_array.count() && ! has_find; ++idx) { + const ObTransID &trans_id = missing_log_trans_id_array.at(idx); + + if (log_trans_id == trans_id) { + is_filter = false; + has_find = true; + } + } // for + + if (OB_SUCC(ret)) { + if (! has_find) { + is_filter = true; + } + } + } else { + // do nothing + } + + if (is_pg_missing_log) { + LOG_INFO("filter_pg_log_based_on_trans_id_", K(log_trans_id), K(missing_log_trans_id_array), K(is_pg_missing_log), + K(is_filter), K(has_find)); + } + + return ret; +} + +// Support for multi-threaded calls +// Issuance policy: +// DML/DDL downlisting of partitioned transaction tasks +int ObLogPartTransResolver::dispatch(volatile bool &stop_flag, int64_t &pending_task_count) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(offlined_)) { + // No flush after partition deletion, handling concurrent scenarios + LOG_INFO("partition has been offlined, need not flush", K(offlined_), K(pkey_)); + ret = OB_SUCCESS; + } else if (OB_FAIL(part_trans_dispatcher_.dispatch_part_trans(stop_flag, pending_task_count))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("part trans dispatch fail", KR(ret), K(pkey_), K(part_trans_dispatcher_)); + } + } else { + // success + } + + return ret; +} + +int ObLogPartTransResolver::offline(volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + + LOG_INFO("[PART_TRANS_RESOLVER] offline", KPC(this)); + + if (OB_UNLIKELY(offlined_)) { + LOG_ERROR("partition has been offlined", K(offlined_), K(pkey_)); + ret = OB_ERR_UNEXPECTED; + } + // First clear all ready and unready tasks to ensure memory reclamation + // This operation is mutually exclusive with the dispatch operation + else if (OB_FAIL(part_trans_dispatcher_.clean_task(pkey_))) { + LOG_ERROR("clean task fail", KR(ret), K(pkey_)); + } + // dispatch offline partition task + else if (OB_FAIL(part_trans_dispatcher_.dispatch_offline_partition_task(pkey_, stop_flag))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("dispatch offline task fail", KR(ret), K(pkey_)); + } + } else { + offlined_ = true; + } + + return ret; +} + +int ObLogPartTransResolver::decode_trans_log_header_(const clog::ObLogEntry& log_entry, + int64_t &pos, + bool &is_log_filtered, + bool &is_log_aggre, + bool &is_barrier_log, + ObStorageLogType &log_type) +{ + int ret = OB_SUCCESS; + const char *buf = log_entry.get_buf(); + const int64_t len = log_entry.get_header().get_data_len(); + ObLogType log_entry_log_type = log_entry.get_header().get_log_type(); + const uint64_t log_id = log_entry.get_header().get_log_id(); + + log_type = storage::OB_LOG_UNKNOWN; + is_log_filtered = false; + is_log_aggre = false; + is_barrier_log = false; + + if (OB_ISNULL(buf) || OB_UNLIKELY(len < 0)) { + LOG_ERROR("invalid log entry", KR(ret), K(buf), K(len), K(log_entry)); + ret = OB_ERR_UNEXPECTED; + } + // check is barrier log or not + else if (is_barrier_log_(log_id, log_entry)) { + is_log_filtered = true; + // Non-aggregated logs + is_log_aggre = false; + // barrer日志 + is_barrier_log = true; + } + // Only parses SUBMIT type CLOG logs + else if (clog::OB_LOG_SUBMIT != log_entry_log_type + && clog::OB_LOG_AGGRE != log_entry_log_type) { + is_log_filtered = false; + log_type = storage::OB_LOG_UNKNOWN; + } else { + if (clog::OB_LOG_AGGRE == log_entry_log_type) { + is_log_filtered = true; + is_log_aggre = true; + } else { + if (OB_FAIL(decode_storage_log_type(log_entry, pos, log_type))) { + LOG_ERROR("decode_storage_log_type fail", KR(ret), K(log_entry), K(pos), K(log_type)); + } else { + // Only transaction logs or checkpoint logs are handled. + bool is_trans_log = storage::ObStorageLogTypeChecker::is_trans_log(log_type); + bool is_checkpoint_log = storage::ObStorageLogTypeChecker::is_checkpoint_log(log_type); + is_log_filtered = is_trans_log || is_checkpoint_log; + + if (is_trans_log) { + // 1. only transaction logs need to ignore the reserve field + // 2. checkpoint logs do not have a reverse field and do not need to be ignored + int64_t reserve = 0; + if (OB_FAIL(serialization::decode_i64(buf, len, pos, &reserve))) { + LOG_ERROR("decode reserve field fail", KR(ret), K(pkey_), K(buf), K(len), + K(pos), K(log_type), K(is_trans_log), K(log_entry)); + } + } + } + } + } + + return ret; +} + +// TODO +// The first log of the new partition is treated as a barrier log +bool ObLogPartTransResolver::is_barrier_log_(const uint64_t log_id, + const clog::ObLogEntry& log_entry) +{ + bool bool_ret = false; + UNUSED(log_entry); + + bool_ret = (1 == log_id); + + return bool_ret; +} + +// Get the task, and if it doesn't exist, create a new one +int ObLogPartTransResolver::obtain_task_(const PartTransID &part_trans_id, PartTransTask*& task) +{ + int ret = OB_SUCCESS; + + if (OB_FAIL(part_trans_dispatcher_.get_task(part_trans_id, task))) { + if (OB_ENTRY_NOT_EXIST == ret) { + ret = OB_SUCCESS; + + // Assign a new task + if (OB_FAIL(part_trans_dispatcher_.alloc_task(part_trans_id, task))) { + LOG_ERROR("alloc task fail", KR(ret), K(part_trans_id)); + } else { + // success + } + } else { + LOG_ERROR("get task fail", KR(ret), K(part_trans_id)); + } + } else { + // success + } + + return ret; +} + +int ObLogPartTransResolver::parse_and_read_aggre_log_(const clog::ObLogEntry &log_entry, + const int64_t begin_time, + ObLogMissingInfo &missing, + TransStatInfo &tsi, + const PartServeInfo &serve_info, + const bool is_read_missing_log, + const ObTransIDArray &missing_log_trans_id_array, + const bool need_filter_pg_no_missing_redo_trans, + const IObLogPartTransResolver::ObAggreLogIndexArray &log_indexs) +{ + int ret = OB_SUCCESS; + + const char *log_buf = log_entry.get_buf(); + const int64_t log_buf_len = log_entry.get_header().get_data_len(); + int32_t next_log_offset = 0; + int64_t log_entry_index = -1; + + while (OB_SUCC(ret) && next_log_offset < log_buf_len) { + int64_t pos = next_log_offset; + int64_t submit_timestamp = OB_INVALID_TIMESTAMP; + int64_t log_type_in_buf = 0; + int64_t trans_id_inc = 0; + + if (OB_FAIL(serialization::decode_i32(log_buf, log_buf_len, pos, &next_log_offset))) { + LOG_ERROR("serialization decode_i32 failed", KR(ret), K(log_buf_len), K(pos)); + } else if (OB_FAIL(serialization::decode_i64(log_buf, log_buf_len, pos, &submit_timestamp))) { + LOG_ERROR("serialization::decode_i64 failed", KR(ret), K(log_buf_len), K(pos)); + } else if (OB_FAIL(serialization::decode_i64(log_buf, log_buf_len, pos, &log_type_in_buf))) { + LOG_ERROR("serialization::decode_i64 failed", KR(ret), K(log_buf_len), K(pos)); + } else if (OB_FAIL(serialization::decode_i64(log_buf, log_buf_len, pos, &trans_id_inc))) { + LOG_ERROR("serialization::decode_i64 failed", KR(ret), K(log_buf_len), K(pos)); + } else { + const char *buf = log_buf + pos; + const int64_t buf_len = next_log_offset - pos; + + ObLogAggreTransLog aggre_trans_log; + aggre_trans_log.reset(next_log_offset, + submit_timestamp, + static_cast(log_type_in_buf), + trans_id_inc, + buf, + buf_len); + + // Process each aggregated log + ObLogEntryWrapper log_entry_wrapper(/*is_log_aggre*/true, log_entry, aggre_trans_log); + // Inc log_entry_index + ++log_entry_index; + int64_t aggre_trans_log_pos = 0; + // Default not filter + bool is_filter = false; + + if (! is_read_missing_log) { + if (need_filter_pg_no_missing_redo_trans) { + if (OB_FAIL(filter_pg_no_missing_redo_trans_(log_indexs, log_entry_index, is_filter))) { + LOG_ERROR("filter_pg_no_missing_redo_trans_ fail", KR(ret), K(log_indexs), K(log_entry_index), K(is_filter)); + } + } else { + is_filter = false; + } + + if (is_filter) { + if (OB_UNLIKELY(false == need_filter_pg_no_missing_redo_trans)) { + LOG_ERROR("need_filter_pg_no_missing_redo_trans should be true", K(need_filter_pg_no_missing_redo_trans), + K(is_filter), K(log_entry_index), K(log_entry_wrapper), K(log_entry), K(log_indexs)); + ret = OB_ERR_UNEXPECTED; + } else { + LOG_INFO("filter pg no missing redo trans", K(log_entry_index), K(log_entry_wrapper), K(log_entry), + K(log_indexs), K(is_filter)); + } + } else { + if (OB_FAIL(read_log_(log_entry_wrapper, log_entry_index, begin_time, aggre_trans_log_pos, missing, tsi, serve_info, + aggre_trans_log.log_type_))) { + if (OB_ITEM_NOT_SETTED != ret) { + LOG_ERROR("read_log_ fail", KR(ret), K(log_entry_wrapper), K(log_entry_index), K(begin_time), + K(aggre_trans_log_pos), K(serve_info), K(serve_info), "log_type", aggre_trans_log.log_type_); + } + } + } + } else { + // Read missing log for PG + if (OB_FAIL(read_missing_redo_(log_entry_wrapper, missing_log_trans_id_array, aggre_trans_log_pos, tsi, aggre_trans_log.log_type_))) { + LOG_ERROR("read_missing_redo_ fail", KR(ret), K(log_entry_wrapper), K(missing_log_trans_id_array), + K(aggre_trans_log_pos), K(tsi), "log_type", aggre_trans_log.log_type_); + } + } + } + } // while + + return ret; +} + +int ObLogPartTransResolver::filter_pg_no_missing_redo_trans_(const ObAggreLogIndexArray &log_indexs, + const int64_t cur_log_index, + bool &is_filter) +{ + int ret = OB_SUCCESS; + // No filtering by default + is_filter = false; + bool has_find = false; + + for (int64_t idx = 0; OB_SUCC(ret) && idx < log_indexs.count() && ! has_find; ++idx) { + const int64_t log_index = log_indexs.at(idx); + + if (cur_log_index == log_index) { + is_filter = false; + has_find = true; + } + } // for + + if (OB_SUCC(ret)) { + if (! has_find) { + is_filter = true; + } + } + + LOG_INFO("filter_pg_no_missing_redo_trans_", K(log_indexs), K(cur_log_index), K(is_filter), K(has_find)); + + return ret; +} + +int ObLogPartTransResolver::read_mutator_(const liboblog::ObLogEntryWrapper &log_entry, + int64_t &pos, + TransStatInfo &tsi, + const ObTransIDArray &missing_log_trans_id_array, + const bool is_missing_log) +{ + int ret = OB_SUCCESS; + transaction::ObTransMutatorLog log; + const uint64_t log_id = log_entry.get_header().get_log_id(); + const int64_t tstamp = log_entry.get_submit_timestamp(); + const char *buf = log_entry.get_buf(); + const int64_t len = log_entry.get_buf_len(); + int64_t start_time = get_timestamp(); + const bool is_pg_aggre_log = log_entry.is_pg_aggre_log(); + const uint64_t real_tenant_id = log_entry.get_header().get_partition_key().get_tenant_id(); + + if (OB_ISNULL(buf) || OB_UNLIKELY(len < 0)) { + LOG_ERROR("invalid log entry", K(buf), K(len), K(log_entry)); + ret = OB_ERR_UNEXPECTED; + // Initialize the mutator in the mutator log, requiring that no data be copied during deserialization + } else if (OB_FAIL(log.get_mutator().init())) { + LOG_ERROR("mutator log init for deserialize fail", KR(ret), K(pkey_), K(log_entry)); + } else if (OB_FAIL(log.deserialize(buf, len, pos))) { + LOG_ERROR("deserialize redo log fail", KR(ret), K(pkey_), K(buf), K(len), K(pos), + K(log), K(log_entry)); + } else { + int64_t after_decode_time = get_timestamp(); + + if (OB_FAIL(parse_redo_log_(log_entry.get_log_offset(), log.get_mutator(), log.get_trans_id(), log_id, tstamp, + log.get_log_no(), false, is_missing_log, is_pg_aggre_log, missing_log_trans_id_array))) { + if (OB_ENTRY_EXIST != ret) { + LOG_ERROR("parse redo log fail", KR(ret), K(log), K(log_id), K(tstamp), K(log_entry), + K(is_missing_log), K(is_pg_aggre_log), K(missing_log_trans_id_array)); + } + } else { + int64_t end_time = get_timestamp(); + + tsi.redo_size_ += len; + tsi.redo_cnt_++; + tsi.read_redo_decode_time_ += after_decode_time - start_time; + tsi.read_redo_parse_time_ += end_time - after_decode_time; + tsi.read_redo_time_ += end_time - start_time; + } + } + + return ret; +} + +int ObLogPartTransResolver::read_redo_(const liboblog::ObLogEntryWrapper &log_entry, + int64_t &pos, + TransStatInfo &tsi, + const ObTransIDArray &missing_log_trans_id_array, + const bool is_missing_log) +{ + int ret = OB_SUCCESS; + transaction::ObTransRedoLogHelper helper; + transaction::ObTransRedoLog log(helper); + const uint64_t log_id = log_entry.get_header().get_log_id(); + const int64_t tstamp = log_entry.get_submit_timestamp(); + const char *buf = log_entry.get_buf(); + const int64_t len = log_entry.get_buf_len(); + int64_t start_time = get_timestamp(); + const bool is_pg_aggre_log = log_entry.is_pg_aggre_log(); + const uint64_t real_tenant_id = log_entry.get_header().get_partition_key().get_tenant_id(); + + if (OB_ISNULL(buf) || OB_UNLIKELY(len < 0)) { + LOG_ERROR("invalid log entry", K(buf), K(len), K(log_entry)); + ret = OB_ERR_UNEXPECTED; + } + // Initialize the mutator in the redo log, requiring that no data be copied during deserialization + else if (OB_FAIL(log.init())) { + LOG_ERROR("redo log init for deserialize fail", KR(ret), K(pkey_), K(log_entry)); + } else if (OB_FAIL(log.deserialize(buf, len, pos))) { + LOG_ERROR("deserialize redo log fail", KR(ret), K(pkey_), K(buf), K(len), K(pos), + K(log), K(log_entry)); + } else { + int64_t after_decode_time = get_timestamp(); + + if (is_last_redo_with_empty_content_(log)) { + // filter the last redo if it is empty + // currently the last relog log of xa trans or trans of replicate table may be empty, should ignore this empty redo + LOG_DEBUG("filter empty redo log for xa trans or trans of replicate table", "trans_id", log.get_trans_id(), + "partition", log.get_partition(), K(log_id), "log_mutator", log.get_mutator()); + } else if (OB_FAIL(parse_redo_log_(log_entry.get_log_offset(), log.get_mutator(), log.get_trans_id(), log_id, tstamp, + log.get_log_no(), false, is_missing_log, is_pg_aggre_log, missing_log_trans_id_array))) { + if (OB_ENTRY_EXIST != ret) { + LOG_ERROR("parse redo log fail", KR(ret), K(log), K(log_id), K(tstamp), K(log_entry), + K(is_missing_log), K(is_pg_aggre_log), K(missing_log_trans_id_array)); + } + } else { + int64_t end_time = get_timestamp(); + + tsi.redo_size_ += len; + tsi.redo_cnt_++; + tsi.read_redo_decode_time_ += after_decode_time - start_time; + tsi.read_redo_parse_time_ += end_time - after_decode_time; + tsi.read_redo_time_ += end_time - start_time; + } + } + + return ret; +} + +int ObLogPartTransResolver::parse_redo_log_(const int32_t log_offset, + const transaction::ObTransMutator &log_mutator, + const transaction::ObTransID &trans_id, + const uint64_t log_id, + const int64_t tstamp, + const int64_t log_no, + const bool is_sp_trans, + const bool parse_missing_log, + const bool is_pg_aggre_log, + const ObTransIDArray &missing_log_trans_id_array) +{ + int ret = OB_SUCCESS; + const char *data = log_mutator.get_data(); + const int64_t data_len = log_mutator.get_position(); + PartTransTask *task = NULL; + PartTransID part_trans_id(trans_id, pkey_); + bool is_filter = false; + const bool is_pg_missing_log = parse_missing_log && is_pg_aggre_log; + + if (OB_FAIL(filter_pg_log_based_on_trans_id_(missing_log_trans_id_array, trans_id, is_pg_missing_log, + is_filter))) { + LOG_ERROR("filter_pg_log_based_on_trans_id_ fail", KR(ret), K(missing_log_trans_id_array), + K(trans_id), K(is_pg_missing_log), K(parse_missing_log), K(is_filter), K(log_id)); + } else if (is_filter) { + LOG_INFO("filter pg trans log, which is not missing", K(trans_id), K(log_id), + K(is_pg_missing_log), K(parse_missing_log)); + // Ignore the first few redo logs in test mode and in the case of non-missing log parsing + } else if (OB_UNLIKELY(! parse_missing_log && test_mode_on && test_mode_ignore_redo_count > log_no)) { + LOG_WARN("[TEST_MODE] ignore redo log", K(log_no), K(log_id), K_(pkey), K(tstamp), K(trans_id), + K(is_sp_trans), K(parse_missing_log), K(test_mode_on), K(test_mode_ignore_redo_count)); + } else { + // Request data valid + if (OB_UNLIKELY(data_len <= 0)) { + LOG_ERROR("invalid mutator data length", K(data_len), K(log_mutator)); + ret = OB_INVALID_DATA; + } + // Get a Partition Trans Task + else if (OB_FAIL(part_trans_dispatcher_.get_task(part_trans_id, task))) { + if (OB_ENTRY_NOT_EXIST != ret) { + LOG_ERROR("get task fail", KR(ret), K(part_trans_id)); + } else { + // For reading missing log scenarios, the task must exist + if (OB_UNLIKELY(parse_missing_log)) { + LOG_ERROR("task does not exist while reading missing log", + KR(ret), K(trans_id), K(pkey_), K(log_id), K(tstamp)); + } else { + // For normal read log scenarios, when the task does not exist, dynamically create one + ret = OB_SUCCESS; + if (OB_FAIL(part_trans_dispatcher_.alloc_task(part_trans_id, task))) { + LOG_ERROR("alloc task fail", KR(ret), K(part_trans_id), K(pkey_), K(parse_missing_log)); + } + } + } + } + + // Push redo logs to partition task + if (OB_SUCCESS == ret) { + bool need_dispatch_row_data = false; + ObLogEntryTask *redo_log_entry_task = NULL; + + if (OB_ISNULL(task)) { + LOG_ERROR("invalid task", K(task)); + ret = OB_INVALID_ERROR; + } else if (OB_FAIL(task->push_redo_log(pkey_, trans_id, log_no, log_id, log_offset, tstamp, data, data_len, + need_dispatch_row_data, redo_log_entry_task))) { + if (OB_ENTRY_EXIST == ret) { + // redo log duplication + } + // Due to missing logs, the current logs cannot be pushed in and are handled in separate cases + else if (OB_LOG_MISSING == ret) { + LOG_WARN("[MISSING_LOG] redo log is missing, current log can not be pushed", + K(pkey_), K(log_no), K(log_id), K(tstamp), K(trans_id), K(parse_missing_log)); + // If a missing log scenario exists during the reading of the missing log, a bug exists and an unpredictable error is returned here + if (parse_missing_log) { + LOG_ERROR("this should not happen while parsing missing log, unexcepted error", + K(pkey_), K(log_no), K(log_id), K(tstamp), K(trans_id), K(parse_missing_log)); + ret = OB_ERR_UNEXPECTED; + } else { + // In a normal read log scenario, this log is simply ignored, and the missing logs will definitely be found when the prepare log is processed later + LOG_WARN("[MISSING_LOG] ignore current redo log while previous redo is missing", + K(pkey_), K(log_no), K(log_id), K(tstamp), K(trans_id), K(parse_missing_log)); + ret = OB_SUCCESS; + } + } else { + LOG_ERROR("push redo log fail", KR(ret), K(pkey_), K(task), K(log_id), + K(tstamp), K(log_no)); + } + } else if (need_dispatch_row_data) { + if (OB_FAIL(dispatch_log_entry_task_(redo_log_entry_task))) { + LOG_ERROR("dispatch_log_entry_task_ fail", KR(ret), K(redo_log_entry_task)); + } + } else { + // do nothing + } + + LOG_DEBUG("read redo log", K(is_sp_trans), K(parse_missing_log), K_(pkey), K(log_no), + K(log_id), K(tstamp), K(data_len), K(trans_id), "is_aggre_log", is_pg_aggre_log); + } + } + + return ret; +} + +// Read the PREPARE log to check if the redo log is missing +// If some redo logs are missing, return OB_ITEM_NOT_SETTED +int ObLogPartTransResolver::read_prepare_(const liboblog::ObLogEntryWrapper &log_entry, + const int64_t log_entry_index, + ObLogMissingInfo &missing, + int64_t &pos, + const bool with_redo, + TransStatInfo &tsi, + const PartServeInfo &serve_info, + bool &is_prepare_log_served) +{ + int ret = OB_SUCCESS; + + const uint64_t log_id = log_entry.get_header().get_log_id(); + const int64_t tstamp = log_entry.get_submit_timestamp(); + const char *buf = log_entry.get_buf(); + const int64_t len = log_entry.get_buf_len(); + const common::ObVersion freeze_version = log_entry.get_header().get_freeze_version(); + int64_t start_time = get_timestamp(); + // Whether the transaction is marked as batch commit in CLOG + bool is_batch_committed = log_entry.is_batch_committed(); + // close batch commit for test mode + if (test_checkpoint_mode_on) { + is_batch_committed = false; + } + transaction::ObTransPrepareLogHelper helper; + transaction::ObTransPrepareLog log(helper); + + if (OB_ISNULL(buf)) { + LOG_ERROR("invalid log buf", K(buf), K(log_entry)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_UNLIKELY(len < 0)) { + LOG_ERROR("invalid log len", K(len), K(log_entry)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(log.deserialize(buf, len, pos))) { + LOG_ERROR("deserialize prepare log fail", KR(ret), K(pkey_), K(buf), K(len), K(pos)); + } else { + int64_t after_decode_time = get_timestamp(); + BatchCommitTransInfo batch_commit_trans_info; + batch_commit_trans_info.init_for_prepare_log(log.is_batch_commit_trans(), is_batch_committed); + const transaction::ObElrTransInfoArray &elt_trans_info_array = log.get_prev_trans_arr(); + + if (OB_FAIL(parse_prepare_log_(log.get_trans_id(), + log.get_redo_log_ids(), + with_redo, + log.get_cluster_id(), + freeze_version, + log_id, + tstamp, + missing, + log_entry_index, + false, + log.get_app_trace_id_str(), + log.get_app_trace_info(), + serve_info, + is_prepare_log_served, + batch_commit_trans_info, + elt_trans_info_array, + log.get_checkpoint(), + log.get_commit_version(), + &log.get_partition_log_info_arr()))) { + if (OB_ITEM_NOT_SETTED == ret) { + // missing log + } else { + LOG_ERROR("parse prepare log fail", KR(ret), K(log), K(with_redo), K(log_entry), + K(serve_info), K(is_batch_committed)); + } + } else { + int64_t end_time = get_timestamp(); + int64_t decode_time = after_decode_time - start_time; + int64_t parse_time = end_time - after_decode_time; + + tsi.prepare_cnt_++; + tsi.prepare_size_ += len; + tsi.prepare_with_redo_cnt_ += with_redo ? 1 : 0; + tsi.read_prepare_time_ += end_time - start_time; + tsi.read_prepare_decode_time_ += decode_time; + tsi.read_prepare_parse_time_ += parse_time; + } + } + + return ret; +} + +int ObLogPartTransResolver::check_part_trans_served_(const int64_t trans_log_tstamp, + const uint64_t log_id, + const uint64_t cluster_id, + const PartServeInfo &serve_info, + bool &is_served) +{ + int ret = OB_SUCCESS; + bool is_trans_log_served = serve_info.is_served(trans_log_tstamp); + bool is_cluster_id_served = false; + + if (OB_FAIL(cluster_id_filter_.check_is_served(cluster_id, is_cluster_id_served))) { + LOG_ERROR("check cluster id served fail", KR(ret), K(cluster_id)); + } else { + is_served = is_trans_log_served && is_cluster_id_served; + + // The info log is only output when the prepare log is not served, assuming there are not many of these logs, otherwise the DEBUG log is output + if (! is_trans_log_served) { + LOG_INFO("[STAT] [FETCHER] [PART_TRANS_NOT_SERVE]", K(is_trans_log_served), + K(is_cluster_id_served), K(log_id), + "trans_log_tstamp", TS_TO_STR(trans_log_tstamp), + K(cluster_id), K_(pkey), K(serve_info)); + } else if (! is_cluster_id_served) { + LOG_DEBUG("[STAT] [FETCHER] [PART_TRANS_NOT_SERVE]", K(is_trans_log_served), + K(is_cluster_id_served), K(log_id), + "trans_log_tstamp", TS_TO_STR(trans_log_tstamp), + K(cluster_id), K_(pkey), K(serve_info)); + } + } + return ret; +} + +// DDL partitions can be cleaned up directly +// DML needs to mark the PartTransTask as unserved +// and ensure that parse_commit_log_ is processed correctly PartTransTask::is_served_state +int ObLogPartTransResolver::handle_when_part_trans_not_served_(const PartTransID &part_trans_id, + const uint64_t log_id, + const int64_t tstamp) +{ + int ret = OB_SUCCESS; + const bool is_ddl_part = is_ddl_partition(pkey_); + + if (OB_FAIL(part_trans_dispatcher_.remove_task(is_ddl_part, part_trans_id))) { + LOG_ERROR("remove task from part trans dispatcher fail", KR(ret), K(is_ddl_part), K(part_trans_id), + K(log_id), K(tstamp)); + } + + return ret; +} + +// is_batch_commit_trans: whether the transaction is a batch commit optimization, i.e. the prepare log contains commit version information, batch commit +// +// trans_is_batch_committed: whether the transaction was batch-committed successfully +// observer will mark committed in ilog for transactions that have been batch committed, liboblog will record in CLOG via RPC whether +// this transaction has been batch committed, optimising the transaction process and avoiding a backlog of transactions +// +// Note: the observer does not guarantee that each transaction's prepare log has a batch commit marker, which can be lost due to master cutting or log loss +int ObLogPartTransResolver::parse_prepare_log_(const transaction::ObTransID &trans_id, + const transaction::ObRedoLogIdArray &all_redos, + const bool with_redo, + const uint64_t cluster_id, + const common::ObVersion freeze_version, + const uint64_t log_id, + const int64_t tstamp, + ObLogMissingInfo &missing_info, + const int64_t log_entry_index, + const bool is_sp_trans, + const ObString &trace_id, + const ObString &trace_info, + const PartServeInfo &serve_info, + bool &is_prepare_log_served, + const BatchCommitTransInfo &batch_commit_trans_info, + const transaction::ObElrTransInfoArray &elt_trans_info_array, + const int64_t checkpoint, + const int64_t commit_version, + const transaction::PartitionLogInfoArray *prepare_log_info) +{ + int ret = OB_SUCCESS; + PartTransTask *task = NULL; + PartTransID part_trans_id(trans_id, pkey_); + + is_prepare_log_served = false; + + // Check if partitioned transaction is serviced + if (OB_FAIL(check_part_trans_served_(tstamp, log_id, cluster_id, serve_info, is_prepare_log_served))) { + LOG_ERROR("check_part_trans_served_ fail", KR(ret), K(tstamp), K(cluster_id), K(serve_info)); + } else if (! is_prepare_log_served) { + // If a partitioned transaction is not in service, the information already available for that partitioned transaction is deleted directly + if (OB_FAIL(handle_when_part_trans_not_served_(part_trans_id, log_id, tstamp))) { + LOG_ERROR("handle_when_part_trans_not_served_ fail", KR(ret), K(part_trans_id), K(log_id), + K(tstamp)); + } + } else if (OB_FAIL(obtain_task_(part_trans_id, task))) { + LOG_ERROR("obtain task fail", KR(ret), K(pkey_), K(part_trans_id)); + } else if (OB_ISNULL(task)) { + LOG_ERROR("invalid task", K(task)); + ret = OB_ERR_UNEXPECTED; + } else { + const SortedRedoLogList &sorted_redos = task->get_sorted_redo_list(); + + // Verifying the integrity of REDO logs + if (OB_FAIL(check_redo_log_list_(all_redos, log_id, sorted_redos, with_redo, missing_info))) { + LOG_ERROR("check_redo_log_list_ fail", KR(ret), K(all_redos), K(sorted_redos), K(with_redo), + K(missing_info)); + } else { + bool is_redo_log_complete = (missing_info.get_missing_log_count() <= 0); + + if (! is_redo_log_complete) { + // Record the trans_id corresponding to the missing redo log + if (OB_FAIL(missing_info.push_back_trans_id(trans_id))) { + LOG_ERROR("missing_info push_back_trans_id fail", KR(ret), K(trans_id)); + } else if (OB_FAIL(missing_info.push_back_log_index(log_entry_index))) { + LOG_ERROR("missing_info push_back_log_index fail", KR(ret), K(log_entry_index)); + } else { + ret = OB_ITEM_NOT_SETTED; + } + // Missing redo log scenario + LOG_INFO("[MISSING_LOG] partition detect missing log", K_(pkey), + "prepare_log_id", log_id, "missing_count", missing_info.get_missing_log_count(), + K(missing_info), K(trans_id), K(tstamp), K(with_redo)); + } else { + // Prepare logs are only set when REDO logs are complete + if (OB_FAIL(prepare_normal_trans_task_(*task, tstamp, trans_id, log_id, cluster_id, + freeze_version, trace_id, trace_info, batch_commit_trans_info, elt_trans_info_array, + commit_version, checkpoint, prepare_log_info))) { + LOG_ERROR("prepare_normal_trans_task_ fail", KR(ret), KPC(task), K(tstamp), K(trans_id), + K(log_id), K(cluster_id), K(freeze_version), K(trace_id), K(trace_info), + K(batch_commit_trans_info), K(elt_trans_info_array), K(commit_version), + K(checkpoint), KPC(prepare_log_info)); + } else { + LOG_DEBUG("read prepare log", K(is_sp_trans), K_(pkey), K(log_id), K(tstamp), + K(cluster_id), K(freeze_version), K(trans_id), K(trace_id), + K(batch_commit_trans_info), K(elt_trans_info_array), K(commit_version), + K(checkpoint), KPC(prepare_log_info)); + } + } + } + } + + return ret; +} + +/* + There are two bugs in transactions, both of which are transaction logging exceptions that when encountered will cause liboblog to check for incorrect status and exit abnormally: + The first problem: a single transaction may log prepare twice on a partition, and the log sequence is not as expected + Second problem: a single transaction may log prepare+abort again on a partition after commit [because of a statement rollback scenario, the transaction state machine is incorrect]. + + The server needs to fix this problem and liboblog needs to have the means to support exception log recovery: + The more difficult part of the problem is that when a second prepare log is encountered, it cannot be simply filtered because it is impossible to distinguish between two prepares or a prepare+abort after a commit, using the following ideas. + (1) support configuration items to filter prepare logs, encounter support for filtering redundant prepare + (2) in order to distinguish between the above two cases, if the transaction has been committed off, again abort, abort will be filtered out to avoid modifying the state and losing data + + For commit + prepare + abort logs, the following is a demonstration of the correctness of the processing + FetchStream will only dispatch a task after each Fetch Result is processed + 1. when commit + prepare + abort are processed in the same Fetch result, after filtering out prepare, the fetch partition transaction is in the committed state, and abort is filtered at this point + 2. When commit is processed first and the partitioned transaction task has been dispatched successfully, the next processing of dirty data [prepare+abort] will end up aborting + 3. When commit is processed first and the partitioned transaction task is not sent successfully, the next processing of dirty data can be filtered out +*/ +int ObLogPartTransResolver::prepare_normal_trans_task_(PartTransTask &task, + const int64_t tstamp, + const transaction::ObTransID &trans_id, + const uint64_t log_id, + const uint64_t cluster_id, + const common::ObVersion freeze_version, + const common::ObString &trace_id, + const common::ObString &trace_info, + const BatchCommitTransInfo &batch_commit_trans_info, + const transaction::ObElrTransInfoArray &elt_trans_info_array, + const int64_t commit_version, + const int64_t checkpoint, + const transaction::PartitionLogInfoArray *prepare_log_info) +{ + int ret = OB_SUCCESS; + const bool skip_abnormal_trans_log = (TCONF.skip_abnormal_trans_log != 0); + // default need handle + bool need_handle_prepare_task = true; + + if (skip_abnormal_trans_log) { + // Already advanced via the prepare log, filter the second prepare log at this point + if (task.is_dml_trans() || task.is_ddl_trans()) { + // No handling required + need_handle_prepare_task = false; + LOG_ERROR("skip abnormal prepare log", K(task), K(trans_id), K(log_id), K(tstamp)); + } + } + + if (! need_handle_prepare_task) { + // do nothing + } else if (OB_FAIL(task.prepare(pkey_, tstamp, trans_id, log_id, cluster_id, freeze_version, trace_id, trace_info, + elt_trans_info_array))) { + LOG_ERROR("prepare normal trans fail", KR(ret), K(pkey_), K(trans_id), K(log_id), K(tstamp), + K(cluster_id), K(freeze_version), K(task), K(trace_id), K(trace_info), K(elt_trans_info_array)); + // Update if checkpoint is valid, push partition level checkpoint + } else if (OB_INVALID_VERSION != checkpoint && OB_FAIL(part_trans_dispatcher_.update_checkpoint(checkpoint))) { + LOG_ERROR("part_trans_dispatcher update checkpoint fail", KR(ret), K(pkey_), K(trans_id), K(log_id), + K(tstamp), K(cluster_id), K(freeze_version), K(task), K(trace_id), + K(batch_commit_trans_info), K(commit_version), K(checkpoint), + KPC(prepare_log_info)); + } else { + // 1. batch commit transactions + // 2. early unlock scenario, no precommit here for single machine single partition transaction [no prepart_log_info], precommit inside parse_commit_log_ + if (batch_commit_trans_info.is_batch_commit_trans_) { + if (OB_ISNULL(prepare_log_info)) { + LOG_ERROR("prepare_log_info is null", KPC(prepare_log_info)); + ret = OB_ERR_UNEXPECTED; + } else { + // Whether it is pre-committed or not, if the transaction is not committed in bulk + bool is_ready_to_commit = (! batch_commit_trans_info.trans_is_batch_committed_); + + // This is not a commit log carrying a prepare log scenario, but a prepare log triggering a commit scenario by itself, + // and there is no scenario where the number of participants is uncertain + bool commit_log_with_prepare = false; + + // Execute the commit action + if (OB_FAIL(commit_task_(task, commit_version, *prepare_log_info, + log_id, tstamp, trans_id, commit_log_with_prepare, is_ready_to_commit))) { + LOG_ERROR("commit task fail where prepare task", KR(ret), K(pkey_), K(task), + K(commit_version), KPC(prepare_log_info), K(log_id), K(tstamp), K(trans_id), + K(commit_log_with_prepare), K(is_ready_to_commit)); + } + } + } + + if (OB_SUCC(ret)) { + // Preparing tasks to be placed in the dispatcher + if (OB_FAIL(part_trans_dispatcher_.prepare_task(task))) { + LOG_ERROR("dispatcher prepare task fail", KR(ret), K(task)); + } else { + // succ + } + } + } + + return ret; +} + +int ObLogPartTransResolver::dispatch_log_entry_task_(ObLogEntryTask *log_entry_task) +{ + int ret = OB_SUCCESS; + IObLogDmlParser *dml_parser = TCTX.dml_parser_; +// TODO + bool stop_flag = false; + + if (OB_ISNULL(log_entry_task)) { + LOG_ERROR("log_entry_task is NULL", K(log_entry_task)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_ISNULL(dml_parser)) { + LOG_ERROR("dml_parser is NULL", K(dml_parser)); + ret = OB_ERR_UNEXPECTED; + } else { + // succ + RETRY_FUNC(stop_flag, *dml_parser, push, *log_entry_task, DATA_OP_TIMEOUT); + } + + return ret; +} + +/// Check that the redo log list is complete +/// Note that. +/// 1. when the prepare log shares a log with the last redo, all_redos does not contain the last redo log +/// 2. sorted_redos is a list of existing redo logs, sorted from smallest to largest, which ensures that +/// in the case of missing redo logs, the missing redo log ID must be smaller than the existing redo log ID because the partition logs are read in order +/// +/// @param all_redos all redo log IDs, not including the last redo log ID if prepare_with_redo is true +/// @param prepare_log_id prepare log ID +/// @param sorted_redos list of currently fetched redo logs, sorted from smallest to largest +/// @param prepare_with_redo whether the prepare log carries the last redo log +/// @param missing returns the missing logs +int ObLogPartTransResolver::check_redo_log_list_( + const transaction::ObRedoLogIdArray& all_redos, + const uint64_t prepare_log_id, + const SortedRedoLogList &sorted_redos, + const bool prepare_with_redo, + ObLogMissingInfo &missing_info) +{ + int ret = OB_SUCCESS; + bool has_missing = false; + + missing_info.reset(); + + if (prepare_with_redo) { + has_missing = (sorted_redos.log_num_ < (all_redos.count() + 1)); + } else { + has_missing = (sorted_redos.log_num_ < all_redos.count()); + } + + // Most scenarios will not have misses, so put the time-consuming operations in the scenarios with misses + if (has_missing) { + // For scenarios where the last REDO and PREPARE are together: + // 1. For non-LOB scenarios, the last REDO must exist + // 2. For LOB scenarios, the last REDO may be half of the LOB data and it may be ignored + // + // So, when looking for a missing log, consider the last REDO + ObRedoLogIdArray actual_all_redos(all_redos); + if (prepare_with_redo) { + if (OB_FAIL(actual_all_redos.push_back(prepare_log_id))) { + LOG_ERROR("push prepare_log_id into all redos fail", KR(ret), K(actual_all_redos), + K(prepare_log_id)); + } + } + + if (OB_SUCCESS == ret) { + // Iterate through actual_all_redos, all redo logs with a smaller ID than the first redo log in sorted_redos are the missing logs + // Assume here that actual_all_redos is not sorted, so traverse all logs + for (int64_t idx = 0; OB_SUCCESS == ret && idx < actual_all_redos.count(); idx++) { + uint64_t curr_id = static_cast(actual_all_redos.at(idx)); + + if (OB_ISNULL(sorted_redos.head_) || curr_id < sorted_redos.head_->start_log_id_) { + if (OB_FAIL(missing_info.push_back_missing_log_id(curr_id))) { + LOG_ERROR("push log id into missing fail", KR(ret), K(curr_id), K(missing_info)); + } + } + } + } + LOG_DEBUG("check_redo_log_list_", K(sorted_redos), K(all_redos), K(missing_info), + K(sorted_redos.head_), K(sorted_redos.tail_), + KPC(sorted_redos.head_), KPC(sorted_redos.tail_)); + } + + return ret; +} + +int ObLogPartTransResolver::read_commit_(const liboblog::ObLogEntryWrapper &log_entry, + const PartServeInfo &serve_info, + int64_t &pos, + bool with_prepare, + TransStatInfo &tsi) +{ + int ret = OB_SUCCESS; + + const uint64_t log_id = log_entry.get_header().get_log_id(); + const int64_t tstamp = log_entry.get_submit_timestamp(); + const char *buf = log_entry.get_buf(); + const int64_t len = log_entry.get_buf_len(); + int64_t start_time = get_timestamp(); + + PartitionLogInfoArray partition_log_info_arr; + transaction::ObTransCommitLog log(partition_log_info_arr); + if (OB_ISNULL(buf)) { + LOG_ERROR("invalid log buf", K(buf), K(log_entry)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_UNLIKELY(len < 0)) { + LOG_ERROR("invalid log len", K(len), K(log_entry)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(log.deserialize(buf, len, pos))) { + LOG_ERROR("deserialize commit log fail", KR(ret), K(pkey_), K(buf), K(len), K(pos)); + } else { + int64_t after_decode_time = get_timestamp(); + + if (OB_FAIL(parse_commit_log_(false/*is_ready_to_commit*/, + with_prepare, + log.get_partition_log_info_array(), + log.get_trans_id(), + log.get_global_trans_version(), + log_id, + tstamp, + log.get_cluster_id(), + false, + serve_info))) { + LOG_ERROR("parse commit log fail", KR(ret), K(with_prepare), K(log), K(log_id), K(tstamp), + K(serve_info)); + } else { + int64_t end_time = get_timestamp(); + int64_t read_commit_time = end_time - start_time; + + tsi.commit_cnt_++; + tsi.commit_size_ += len; + tsi.commit_with_prepare_cnt_ += with_prepare ? 1 : 0; + tsi.read_commit_time_ += read_commit_time; + tsi.read_commit_decode_time_ += after_decode_time - start_time; + tsi.read_commit_parse_time_ += end_time - after_decode_time; + } + } + + return ret; +} + +// commit_log_with_prepare: Whether the commit log contains a prepare, which determines whether to build an array of participants +// is_ready_to_commit: whether to commit or not +int ObLogPartTransResolver::commit_task_(PartTransTask &task, + const int64_t global_trans_version, + const transaction::PartitionLogInfoArray &prepare_log_info, + const uint64_t log_id, + const int64_t tstamp, + const transaction::ObTransID &trans_id, + const bool commit_log_with_prepare, + const bool is_ready_to_commit) +{ + int ret = OB_SUCCESS; + const transaction::PartitionLogInfoArray *pid_arr = NULL; + transaction::PartitionLogInfoArray with_prepare_pid_arr; + + // If the commit log contains prepare, then it is a single transaction with only itself as a participant + // Since the participants do not know the prepare log ID when they write the log, the participants array is empty and needs to be constructed manually + if (commit_log_with_prepare) { + transaction::ObPartitionLogInfo pid(pkey_, log_id, tstamp); + + if (OB_UNLIKELY(prepare_log_info.count() > 0)) { + LOG_ERROR("invalid prepare log info which should be empty. " + "because prepare and commit log is combined", + K(pkey_), K(log_id), K(commit_log_with_prepare), K(prepare_log_info), + K(trans_id), K(tstamp)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(with_prepare_pid_arr.push_back(pid))) { + LOG_ERROR("push back partition log info fail", KR(ret), K(pid), K(with_prepare_pid_arr)); + } else { + pid_arr = &with_prepare_pid_arr; + } + } else { + pid_arr = &(prepare_log_info); + } + + if (OB_SUCCESS == ret) { + TransCommitInfo trans_commit_info(log_id, tstamp); + + // Set to a partitioned transaction context + if (OB_ISNULL(pid_arr)) { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("PartitionLogInfoArray for commit_task should not be null", KR(ret), K_(pkey), K(task)); + } else if (OB_FAIL(task.commit(global_trans_version, *pid_arr, is_ready_to_commit, first_log_ts_, + trans_id, trans_commit_info, part_trans_dispatcher_))) { + LOG_ERROR("commit normal trans fail", KR(ret), K(pkey_), K(task), K(log_id), + K(tstamp), K(global_trans_version), KPC(pid_arr), K(is_ready_to_commit), + K(first_log_ts_), K(trans_id), K(trans_commit_info)); + } else { /* success */ } + } + + return ret; +} + +int ObLogPartTransResolver::parse_commit_log_(const bool is_ready_to_commit, + const bool with_prepare, + const transaction::PartitionLogInfoArray &prepare_log_info, + const transaction::ObTransID &trans_id, + const int64_t global_trans_version, + const uint64_t log_id, + const int64_t tstamp, + const uint64_t cluster_id, + const bool is_sp_trans, + const PartServeInfo &serve_info) +{ + int ret = OB_SUCCESS; + PartTransTask *task = NULL; + PartTransID part_trans_id(trans_id, pkey_); + + if (OB_FAIL(part_trans_dispatcher_.get_task(part_trans_id, task))) { + if (OB_ENTRY_NOT_EXIST == ret) { + // The transaction context does not exist, in two cases. + // 1. it can happen when the commit log is a separate log + // i.e. this log is after the liboblog start point, but the prepare log is before the start point + // 2. prepare log processing determines that it is not served, and removes the partition transaction task, see: handle_when_part_trans_not_served_ + if (with_prepare) { + LOG_ERROR("trans task does not exist", KR(ret), K(part_trans_id), K(log_id), K(pkey_), + K(with_prepare)); + } else { + // Normal, skip this log + // 1. In backup and recovery mode: check the log + // 2. start_global_trans_version is not set, no need check, skip this log + if (OB_INVALID_TIMESTAMP == start_global_trans_version_) { + // do nothing + ret = OB_SUCCESS; + } else { + // check the trans + bool is_served = false; + if (OB_FAIL(check_part_trans_served_(tstamp, log_id, cluster_id, serve_info, is_served))) { + LOG_ERROR("check_part_trans_served_ fail", KR(ret), K(tstamp), K(cluster_id), K(serve_info), + K(is_served)); + } else if (! is_served) { + // No service, no processing required + ret = OB_SUCCESS; + } else { + // served -> may not really served, because it is determined by the timestamp of the commit log, here need to check + // If the current commmit log global trans version is greater than the start global trans ersion, an error should be reported and the larger timestamp should be rolled back + if (global_trans_version > start_global_trans_version_) { + LOG_ERROR("commit log global_trans_version is greater then start_global_trans_version, " + "maybe have lost trans", + K(is_sp_trans), K_(pkey), K(log_id), K(tstamp), K(trans_id), + K(with_prepare), K(prepare_log_info), K(global_trans_version), + K(start_global_trans_version_)); + ret = OB_LOG_MISSING; + } else { + // Checked successfully, can ignore commit log + ret = OB_SUCCESS; + } + } + } + + if (OB_SUCC(ret)) { + LOG_INFO("ignore commit log which is single trans log after start log " + "whose prepare log id is little than start log id", + K_(pkey), K(log_id), K(tstamp), K(trans_id), K(with_prepare), K(prepare_log_info), + K(is_sp_trans)); + } + } + } else { + LOG_ERROR("get task from part trans dispatcher fail", KR(ret), K(pkey_), K(trans_id)); + } + } else if (OB_ISNULL(task)) { + LOG_ERROR("invalid task", KR(ret), K(trans_id), K(task)); + ret = OB_ERR_UNEXPECTED; + } + // 1. Execute the commit task action + // 2. Early unlocking scenario where single partition transaction is pre-committed + else if (OB_FAIL(commit_task_(*task, global_trans_version, prepare_log_info, log_id, tstamp, + trans_id, with_prepare, is_ready_to_commit))) { + LOG_ERROR("commit_task_ fail", KR(ret), KPC(task), K(global_trans_version), K(prepare_log_info), + K(log_id), K(tstamp), K(trans_id), K(with_prepare), K(is_ready_to_commit)); + } else { + // After commit, the task cannot be accessed again, as it may be concurrently flushed to downstream + LOG_DEBUG("read commit log", K(is_sp_trans), K(is_ready_to_commit), K_(pkey), K(log_id), K(tstamp), K(trans_id), + K(with_prepare), K(prepare_log_info), K(global_trans_version)); + } + return ret; +} + +int ObLogPartTransResolver::read_mutator_abort_(const liboblog::ObLogEntryWrapper &log_entry, + int64_t &pos) +{ + int ret = OB_SUCCESS; + const char *buf = log_entry.get_buf(); + const int64_t len = log_entry.get_buf_len(); + const int64_t tstamp = log_entry.get_submit_timestamp(); + const uint64_t log_id = log_entry.get_header().get_log_id(); + transaction::ObTransMutatorAbortLog log; + + if (OB_ISNULL(buf)) { + LOG_ERROR("invalid log buf", K(buf), K(log_entry)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_UNLIKELY(len < 0)) { + LOG_ERROR("invalid log len", K(len), K(log_entry)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(log.deserialize(buf, len, pos))) { + LOG_ERROR("deserialize abort log fail", KR(ret), K(pkey_), K(buf), K(len), K(pos)); + } else { + if (OB_FAIL(parse_abort_log_(log.get_trans_id(), log_id, tstamp))) { + LOG_ERROR("parse abort log fail", KR(ret), K(log), K(log_id), K(tstamp)); + } else { + // succ + } + LOG_DEBUG("read mutator abort log", K_(pkey), K(tstamp), K(log_id), K(log)); + } + + return ret; +} + +int ObLogPartTransResolver::read_abort_(const liboblog::ObLogEntryWrapper &log_entry, + int64_t &pos) +{ + int ret = OB_SUCCESS; + + const char *buf = log_entry.get_buf(); + const int64_t len = log_entry.get_buf_len(); + const int64_t tstamp = log_entry.get_submit_timestamp(); + const uint64_t log_id = log_entry.get_header().get_log_id(); + + transaction::ObTransAbortLog log; + if (OB_ISNULL(buf)) { + LOG_ERROR("invalid log buf", K(buf), K(log_entry)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_UNLIKELY(len < 0)) { + LOG_ERROR("invalid log len", K(len), K(log_entry)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(log.deserialize(buf, len, pos))) { + LOG_ERROR("deserialize abort log fail", KR(ret), K(pkey_), K(buf), K(len), K(pos)); + } else { + if (OB_FAIL(parse_abort_log_(log.get_trans_id(), log_id, tstamp))) { + LOG_ERROR("parse abort log fail", KR(ret), K(log), K(log_id), K(tstamp)); + } else { + // success + } + LOG_DEBUG("read abort log", K_(pkey), K(tstamp), K(log_id)); + } + + return ret; +} + +int ObLogPartTransResolver::parse_abort_log_(const transaction::ObTransID &trans_id, + const uint64_t log_id, + const int64_t tstamp) +{ + int ret = OB_SUCCESS; + PartTransID part_trans_id(trans_id, pkey_); + const bool is_ddl_part = is_ddl_partition(pkey_); + const bool skip_abnormal_trans_log = (TCONF.skip_abnormal_trans_log != 0); + // Default requires handling + bool need_handle_abort_log = true; + + if (skip_abnormal_trans_log) { + PartTransTask *task = NULL; + if (OB_FAIL(part_trans_dispatcher_.get_task(part_trans_id, task))) { + if (OB_ENTRY_NOT_EXIST == ret) { + ret = OB_SUCCESS; + } else { + LOG_ERROR("get task from part trans dispatcher fail", KR(ret), K(pkey_), K(trans_id)); + } + } else { + if (OB_ISNULL(task)) { + LOG_ERROR("task is NULL", K(task)); + ret = OB_ERR_UNEXPECTED; + } else if (task->is_trans_committed()) { + need_handle_abort_log = false; + LOG_ERROR("task is_trans_committed, skip abnormal abort log", KPC(task), K(trans_id), K(log_id), K(tstamp)); + } + } + } + + if (OB_FAIL(ret)) { + } else if (! need_handle_abort_log) { + // do nothing + } + // Delete partitioned transaction tasks directly from dispatcher + else if (OB_FAIL(part_trans_dispatcher_.remove_task(is_ddl_part, part_trans_id))) { + LOG_ERROR("remove task from dispatcher fail", KR(ret), K(is_ddl_part), K(part_trans_id), K(log_id), K(tstamp), + K(part_trans_dispatcher_)); + } + + return ret; +} + +int ObLogPartTransResolver::read_sp_trans_redo_(const liboblog::ObLogEntryWrapper &log_entry, + int64_t &pos, + TransStatInfo &tsi, + const ObTransIDArray &missing_log_trans_id_array, + const bool is_missing_log) +{ + int ret = OB_SUCCESS; + + const uint64_t log_id = log_entry.get_header().get_log_id(); + const int64_t tstamp = log_entry.get_submit_timestamp(); + const char *buf = log_entry.get_buf(); + const int64_t len = log_entry.get_buf_len(); + int64_t start_time = get_timestamp(); + const bool is_pg_aggre_log = log_entry.is_pg_aggre_log(); + const uint64_t real_tenant_id = log_entry.get_header().get_partition_key().get_tenant_id(); + + transaction::ObSpTransRedoLog log; + if (OB_ISNULL(buf)) { + LOG_ERROR("invalid log buf", K(buf), K(log_entry)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_UNLIKELY(len < 0)) { + LOG_ERROR("invalid log len", K(len), K(log_entry)); + ret = OB_ERR_UNEXPECTED; + } + // Initialize the mutator in the redo log, requiring that no data be copied during deserialization + else if (OB_FAIL(log.get_mutator().init())) { + LOG_ERROR("sp redo log init for deserialize fail", KR(ret), K(pkey_), K(log_entry)); + } else if (OB_FAIL(log.deserialize(buf, len, pos))) { + LOG_ERROR("deserialize sp trans redo log fail", KR(ret), K(pkey_), K(buf), K(len), K(pos), + K(log), K(log_entry)); + } else { + int64_t after_decode_time = get_timestamp(); + + if (OB_FAIL(parse_redo_log_(log_entry.get_log_offset(), + log.get_mutator(), + log.get_trans_id(), + log_id, + tstamp, + log.get_log_no(), + true, + is_missing_log, + is_pg_aggre_log, + missing_log_trans_id_array))) { + // Note: The OB_ENTRY_EXIST case is not handled here and must not be repeated + LOG_ERROR("parse sp redo log fail", KR(ret), K(log), K(log_id), K(tstamp), K(log_entry), + K(is_missing_log), K(is_pg_aggre_log), K(missing_log_trans_id_array)); + } else { + int64_t end_time = get_timestamp(); + + tsi.sp_redo_cnt_++; + tsi.sp_redo_size_ += len; + tsi.read_sp_redo_time_ += end_time - start_time; + tsi.read_sp_redo_decode_time_ += after_decode_time - start_time; + tsi.read_sp_redo_parse_time_ += end_time - after_decode_time; + } + } + + return ret; +} + +int ObLogPartTransResolver::deserialize_sp_commit_and_parse_redo_(const liboblog::ObLogEntryWrapper &log_entry, + int64_t &pos, + transaction::ObSpTransCommitLog &commit_log, + int64_t &after_decode_time, + bool &with_redo, + const ObTransIDArray &missing_log_trans_id_array, + const bool is_missing_log) +{ + int ret = OB_SUCCESS; + const uint64_t log_id = log_entry.get_header().get_log_id(); + const int64_t tstamp = log_entry.get_submit_timestamp(); + const char *buf = log_entry.get_buf(); + const int64_t len = log_entry.get_buf_len(); + const bool is_pg_aggre_log = log_entry.is_pg_aggre_log(); + const uint64_t real_tenant_id = log_entry.get_header().get_partition_key().get_tenant_id(); + + if (OB_ISNULL(buf)) { + LOG_ERROR("invalid log buf", K(buf), K(log_entry)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_UNLIKELY(len < 0)) { + LOG_ERROR("invalid log len", K(len), K(log_entry)); + ret = OB_ERR_UNEXPECTED; + } + // Initialize the mutator in the redo log, requiring that no data be copied during deserialization + else if (OB_FAIL(commit_log.get_mutator().init())) { + LOG_ERROR("sp commit log init for deserialize fail", KR(ret), K(pkey_), K(log_entry)); + } else if (OB_FAIL(commit_log.deserialize(buf, len, pos))) { + LOG_ERROR("deserialize sp trans commit log fail", KR(ret), K(pkey_), K(buf), K(len), K(pos), + K(commit_log), K(log_entry)); + } else { + after_decode_time = get_timestamp(); + const transaction::ObTransID &trans_id = commit_log.get_trans_id(); + + // If the redo log is not empty, parse the redo log + with_redo = (! commit_log.is_empty_redo_log()); + if (! with_redo) { + // empty redo log + } else { + if (OB_FAIL(parse_redo_log_(log_entry.get_log_offset(), commit_log.get_mutator(), trans_id, log_id, tstamp, + commit_log.get_log_no(), true, is_missing_log, is_pg_aggre_log, missing_log_trans_id_array))) { + if (OB_ENTRY_EXIST == ret) { + // duplicate redo logs, probably dealing with missing log scenarios + // This error is simply ignored to facilitate subsequent processing of prepare and commit + LOG_WARN("redo log has been read mutiple times, must have missing log", + K(commit_log), K(log_entry), K(is_missing_log), K(is_pg_aggre_log), K(missing_log_trans_id_array)); + ret = OB_SUCCESS; + } else { + LOG_ERROR("parse sp redo log in commit log fail", KR(ret), K(commit_log), + K(commit_log), K(log_entry)); + } + } + } + } + return ret; +} + +int ObLogPartTransResolver::read_sp_trans_commit_(const liboblog::ObLogEntryWrapper &log_entry, + const int64_t log_entry_index, + ObLogMissingInfo &missing, + int64_t &pos, + TransStatInfo &tsi, + const PartServeInfo &serve_info, + const bool is_sp_elr_trans) +{ + int ret = OB_SUCCESS; + const uint64_t log_id = log_entry.get_header().get_log_id(); + const int64_t tstamp = log_entry.get_submit_timestamp(); + const int64_t len = log_entry.get_buf_len(); + const common::ObVersion freeze_version = log_entry.get_header().get_freeze_version(); + int64_t start_time = get_timestamp(); + int64_t after_decode_time = 0; + bool with_redo = false; + ObTransIDArray missing_log_trans_id_array; + + transaction::ObSpTransCommitLog commit_log; + + if (OB_FAIL(deserialize_sp_commit_and_parse_redo_(log_entry, pos, commit_log, after_decode_time, + with_redo, missing_log_trans_id_array))) { + LOG_ERROR("deserialize_sp_commit_and_parse_redo_ fail", KR(ret), K(log_entry), K(pos), + K(commit_log), K(missing_log_trans_id_array)); + } else { + const transaction::ObTransID &trans_id = commit_log.get_trans_id(); + const transaction::ObElrTransInfoArray &elt_trans_info_array = commit_log.get_prev_trans_arr(); + bool is_prepare_log_served = false; + // Early unlocking scenario: single partition transaction on single machine, essentially requires parsing of sp_commit logs, but similar to batch commit transaction, is pre-committed + BatchCommitTransInfo batch_commit_trans_info; + batch_commit_trans_info.init_for_sp_elr_trans(is_sp_elr_trans); + + // prepare part trans + if (OB_SUCCESS == ret) { + if (OB_FAIL(parse_prepare_log_(trans_id, + commit_log.get_redo_log_ids(), + with_redo, + commit_log.get_cluster_id(), + freeze_version, + log_id, + tstamp, + missing, + log_entry_index, + true, + commit_log.get_app_trace_id_str(), + commit_log.get_app_trace_info(), + serve_info, + is_prepare_log_served, + batch_commit_trans_info, + elt_trans_info_array, + commit_log.get_checkpoint()))) { + if (OB_ITEM_NOT_SETTED == ret) { + // missing log + } else { + LOG_ERROR("parse sp prepare log fail", KR(ret), K(commit_log), K(log_id), K(tstamp), + K(missing), K(batch_commit_trans_info)); + } + } + } + + // commit partitioned transactions + // Parsing the commit log if only the partitioned transaction is served + if (OB_SUCCESS == ret && is_prepare_log_served) { + transaction::PartitionLogInfoArray prepare_log_info; + bool with_prepare = true; + if (OB_FAIL(parse_commit_log_(is_sp_elr_trans/*is_ready_to_commit*/, + with_prepare, + prepare_log_info, + trans_id, + tstamp, + log_id, + tstamp, + commit_log.get_cluster_id(), + true, + serve_info))) { + LOG_ERROR("parse sp commit log fail", KR(ret), K(commit_log), K(log_id), K(tstamp), K(serve_info)); + } + } + + int64_t end_time = get_timestamp(); + + if (OB_SUCCESS == ret) { + tsi.sp_commit_cnt_++; + tsi.sp_commit_size_ += len; + tsi.sp_commit_with_redo_cnt_ += with_redo ? 1 : 0; + tsi.read_sp_redo_time_ += end_time - start_time; + tsi.read_sp_commit_decode_time_ += after_decode_time - start_time; + tsi.read_sp_commit_parse_time_ += end_time - after_decode_time; + } + } + + return ret; +} + +int ObLogPartTransResolver::read_sp_trans_abort_(const liboblog::ObLogEntryWrapper &log_entry, + int64_t &pos) +{ + int ret = OB_SUCCESS; + const int64_t tstamp = log_entry.get_submit_timestamp(); + const uint64_t log_id = log_entry.get_header().get_log_id(); + const char *buf = log_entry.get_buf(); + const int64_t len = log_entry.get_buf_len(); + // Default trace_id is empty + ObString trace_id; + + transaction::ObSpTransAbortLog log; + if (OB_ISNULL(buf)) { + LOG_ERROR("invalid log buf", K(buf), K(log_entry)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_UNLIKELY(len < 0)) { + LOG_ERROR("invalid log len", K(len), K(log_entry)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(log.deserialize(buf, len, pos))) { + LOG_ERROR("deserialize abort log fail", KR(ret), K(pkey_), K(buf), K(len), K(pos)); + } else { + const transaction::ObTransID &trans_id = log.get_trans_id(); + + if (OB_FAIL(parse_abort_log_(trans_id, log_id, tstamp))) { + LOG_ERROR("parse sp abort log fail", KR(ret), K(log), K(log_id), K(tstamp)); + } else { + // 成功 + } + + LOG_DEBUG("read sp trans abort log", K_(pkey), K(tstamp), K(log)); + } + return ret; +} + +int ObLogPartTransResolver::read_checkpoint_log_(const liboblog::ObLogEntryWrapper &log_entry, + int64_t &pos) +{ + int ret = OB_SUCCESS; + + const uint64_t log_id = log_entry.get_header().get_log_id(); + const int64_t tstamp = log_entry.get_submit_timestamp(); + const char *buf = log_entry.get_buf(); + const int64_t len = log_entry.get_buf_len(); + const bool is_ddl_part = is_ddl_partition(pkey_); + transaction::ObCheckpointLog log; + + if (OB_ISNULL(buf)) { + LOG_ERROR("invalid log buf", K(buf), K(log_entry)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_UNLIKELY(len < 0)) { + LOG_ERROR("invalid log len", K(len), K(log_entry)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(log.deserialize(buf, len, pos))) { + LOG_ERROR("deserialize checkpoint log fail", KR(ret), K(pkey_), K(buf), K(len), K(pos)); + } else { + const int64_t checkpoint = log.get_checkpoint(); + + // ignoring checkpoint logs in test mode + if (test_checkpoint_mode_on) { + LOG_INFO("read checkpoint log, but ignore", K_(pkey), K(tstamp), K(log), K(test_checkpoint_mode_on)); + } else { + // Advance partition checkpoint based on checkpoint logs + if (OB_FAIL(part_trans_dispatcher_.update_checkpoint(checkpoint))) { + LOG_ERROR("part_trans_dispatcher update checkpoint fail", KR(ret), K(pkey_), K(log), K(log_id), + K(tstamp), K(checkpoint)); + } + + LOG_DEBUG("read checkpoint log", K_(pkey), K(is_ddl_part), K(tstamp), K(log), K(checkpoint)); + } + } + + return ret; +} + +// Converting unknown logs to heartbeats +int ObLogPartTransResolver::read_unknown_log_(const clog::ObLogEntry &log_entry) +{ + int ret = OB_SUCCESS; + UNUSED(log_entry); + return ret; +} + +bool ObLogPartTransResolver::is_last_redo_with_empty_content_(const transaction::ObTransRedoLog &redo_log) const +{ + const int64_t length = redo_log.get_mutator().get_position(); + return 0 == length && redo_log.is_last(); +} + +} +} diff --git a/src/liboblog/src/ob_log_part_trans_resolver.h b/src/liboblog/src/ob_log_part_trans_resolver.h new file mode 100644 index 0000000000000000000000000000000000000000..346949b906d95d9f2d5756d0d411bedfcede88d0 --- /dev/null +++ b/src/liboblog/src/ob_log_part_trans_resolver.h @@ -0,0 +1,459 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_PART_TRANS_RESOLVER_H__ +#define OCEANBASE_LIBOBLOG_OB_LOG_PART_TRANS_RESOLVER_H__ + +#include "lib/string/ob_string.h" // ObString +#include "common/ob_partition_key.h" // ObPartitionKey +#include "clog/ob_log_entry.h" // ObLogEntry +#include "ob_log_entry_wrapper.h" // ObLogEntryWrapper +#include "storage/transaction/ob_trans_define.h" // ObTransID, ObRedoLogIdArray +#include "storage/ob_storage_log_type.h" // ObStorageLogType + +#include "ob_log_trans_log.h" // RedoLogList +#include "ob_log_task_pool.h" // ObLogTransTaskPool +#include "ob_log_utils.h" // _SEC_ +#include "ob_log_common.h" // OB_INVALID_PROGRESS +#include "ob_log_part_trans_dispatcher.h" // PartTransDispatcher + +namespace oceanbase +{ +namespace liboblog +{ + +struct TransStatInfo; +class IObLogFetcherDispatcher; +class IObLogClusterIDFilter; +struct PartServeInfo; + +// Partitioned transaction parser +// +// Parses partitioned log streams into partitioned transactions and outputs them in order +// 1. single threaded data production only, multi-threaded dispatch() is supported +// 2. one partitioned transaction parser per partition +// 3. cannot produce data while calling the offline function, but dispatch() is supported +class IObLogPartTransResolver +{ +public: + // Test mode on or off + static bool test_mode_on; + static bool test_checkpoint_mode_on; + static int64_t test_mode_ignore_redo_count; + + typedef common::ObSEArray ObTransIDArray; + typedef common::ObSEArray ObAggreLogIndexArray; + // PG scenarios, as aggregated logs: + // 1. it is possible to parse to an aggregated log and find multiple transactions with missing redo logs + // 2. Missing log arrays are guaranteed to be ordered and de-duplicated: when multiple transactions have the same missing redo log, there may be duplicate log_id, + struct ObLogMissingInfo + { + // array of missing log ids + ObLogIdArray missing_log_ids_; + // array of trans that contain missing log + ObTransIDArray missing_trans_ids_; + // 1. when parsing to prepre or sp_commit and finding a missing redo, read prepare or reda sp_commit again to advance the status after adding the missing redo log + // 2. log_indexes_records the index (i.e. the number of entries) of the prepare/sp_commit in the PG aggregation log, + // ensuring that data from non-missing transactions is filtered out when read prepare/sp_commit is used to advance the status + ObAggreLogIndexArray log_indexs_; + + ObLogMissingInfo(); + + void reset() + { + missing_log_ids_.reset(); + missing_trans_ids_.reset(); + log_indexs_.reset(); + } + + int sort_and_unique_missing_log_ids(); + int64_t get_missing_log_count() const { return missing_log_ids_.count(); } + + int push_back_missing_log_id(const uint64_t log_id); + int push_back_trans_id(const transaction::ObTransID &trans_id); + int push_back_log_index(const int64_t log_index); + + TO_STRING_KV(K_(missing_log_ids), + K_(missing_trans_ids), + K_(log_indexs)); + }; + +public: + virtual ~IObLogPartTransResolver() {} + +public: + virtual int init(const common::ObPartitionKey &pkey, + const int64_t start_tstamp, + const int64_t start_global_trans_version) = 0; + +public: + /// Read log entries and parse logs into zone transactions + /// + /// @param [in] log_entry Target log entry + /// @param [out] missing Missing info, where missing redo logs are guaranteed to be ordered and not duplicated + /// @param [out] tsi Transaction log parsing statistics + /// @param [in] serve_info Partition service information, used to determine if a partitioned transaction is served + /// @param [out] log_type The type of log to be parsed + /// @param [in] need_filter_pg_no_missing_redo_trans + /// PG scenario: read missing log and then read prepare/sp_commit again needs to filter non-missing redo log transaction data + /// @param [in] log_indexes PG scenario, prepare/sp_commit log is at index of aggregated logs + /// + /// @retval OB_SUCCESS Success + /// @retval OB_ITEM_NOT_SETTED redo log is incomplete + /// @retval Other error codes Fail + virtual int read(const clog::ObLogEntry &log_entry, + ObLogMissingInfo &missing_info, + TransStatInfo &tsi, + const PartServeInfo &serve_info, + storage::ObStorageLogType &log_type, + const bool need_filter_pg_no_missing_redo_trans, + const IObLogPartTransResolver::ObAggreLogIndexArray &log_indexs) = 0; + + /// Read missing mutator log entries/REDO log entries + /// Supports aggregated logs, for PG aggregated logs. + /// 1. only care about log_type, OB_LOG_MUTATOR, OB_LOG_TRANS_REDO, OB_LOG_SP_TRANS_REDO, OB_LOG_SP_TRANS_COMMIT that include redo data, other + /// Log types are ignored + /// 2. Filtering based on trans_id information: Synchronize over aggregated logs and only care about transaction data for missing logs + /// + /// @param log_entry Target log entry + /// @param missing_log_trans_id_array Array of missing log trans_id + /// + /// @retval OB_SUCCESS Success + /// @retval Other error codes Fail + virtual int read_missing_redo(const clog::ObLogEntry &log_entry, + const ObTransIDArray &missing_log_trans_id_array) = 0; + + /// 将READY的分区事务输出 + /// 支持多线程调用 + /// 注意:该操作可能会阻塞,直到dispatch成功 + /// + /// @retval OB_SUCCESS Success + /// @retval OB_IN_STOP_STATE Exit + /// @retval Other error codes Fail + virtual int dispatch(volatile bool &stop_flag, int64_t &pending_task_count) = 0; + + /// 分区下线,生成"分区下线"事务任务作为分区的最后一个任务输出,同时清理掉剩余所有任务 + /// 注意:该操作可能会阻塞 + /// + /// @retval OB_SUCCESS Success + /// @retval OB_IN_STOP_STATE Exit + /// @retval Other error codes Fail + virtual int offline(volatile bool &stop_flag) = 0; + + /// 获取当前分区的tps信息 + virtual double get_tps() = 0; + + // 获取分派任务进度及其相关信息 + // + // @retval OB_INVALID_PROGRESS 当前无任务待输出,进度无效 + // @retval 其他值 当前输出进度: 待输出的任务时间戳 - 1 + virtual int get_dispatch_progress(int64_t &progress, PartTransDispatchInfo &dispatch_info) = 0; + + // 下发心跳任务 + virtual int heartbeat(const common::ObPartitionKey &pkey, const int64_t hb_tstamp) = 0; +}; + +////////////////////////////////// ObLogPartTransResolver ///////////////////////////////// + +// 分区事务解析器 +class ObLogPartTransResolver : public IObLogPartTransResolver +{ +public: + ObLogPartTransResolver(const char* pkey_str, + TaskPool &task_pool, + PartTransTaskMap &task_map, + TransCommitMap &trans_commit_map, + IObLogFetcherDispatcher &dispatcher, + IObLogClusterIDFilter &cluster_id_filter); + virtual ~ObLogPartTransResolver(); + + static const int64_t DATA_OP_TIMEOUT = 10 * _SEC_; + +public: + virtual int init(const common::ObPartitionKey &pkey, + const int64_t start_tstamp, + const int64_t major_freeze_timestamp); + virtual int read(const clog::ObLogEntry &log_entry, + ObLogMissingInfo &missing_info, + TransStatInfo &tsi, + const PartServeInfo &serve_info, + storage::ObStorageLogType &log_type, + const bool need_filter_pg_no_missing_redo_trans, + const IObLogPartTransResolver::ObAggreLogIndexArray &log_indexs); + virtual int read_missing_redo(const clog::ObLogEntry &log_entry, + const ObTransIDArray &missing_log_trans_id_array); + virtual int dispatch(volatile bool &stop_flag, int64_t &pending_task_count); + virtual int offline(volatile bool &stop_flag); + virtual double get_tps() { return part_trans_dispatcher_.get_tps(); } + virtual int get_dispatch_progress(int64_t &progress, PartTransDispatchInfo &dispatch_info) + { + return part_trans_dispatcher_.get_dispatch_progress(progress, dispatch_info); + } + virtual int heartbeat(const common::ObPartitionKey &pkey, const int64_t hb_tstamp) + { + return part_trans_dispatcher_.heartbeat(pkey, hb_tstamp); + } + + TO_STRING_KV(K_(pkey), + K_(offlined), + K_(part_trans_dispatcher)); +private: + struct BatchCommitTransInfo + { + BatchCommitTransInfo() { reset(); } + ~BatchCommitTransInfo() { reset(); } + + void reset() + { + is_sp_elr_trans_ = false; + is_batch_commit_trans_ = false; + trans_is_batch_committed_ = false; + } + + void init_for_prepare_log(const bool is_batch_commit_trans, const bool trans_is_batch_committed) + { + is_sp_elr_trans_ = false; + is_batch_commit_trans_ = is_batch_commit_trans; + trans_is_batch_committed_ = trans_is_batch_committed; + } + + void init_for_sp_elr_trans(const bool is_sp_elr_trans) + { + is_sp_elr_trans_ = is_sp_elr_trans; + is_batch_commit_trans_ = false; + trans_is_batch_committed_ = false; + } + + TO_STRING_KV(K_(is_sp_elr_trans), + K_(is_batch_commit_trans), + K_(trans_is_batch_committed)); + + bool is_sp_elr_trans_; + bool is_batch_commit_trans_; + bool trans_is_batch_committed_; + }; + + // 1. PG scenario: log_entry_index records the number of transaction logs in the aggregated log, starting from 0 + // 2. Non-PG scenario: log_entry_index=0 + int read_log_(const liboblog::ObLogEntryWrapper &log_entry, + const int64_t log_entry_index, + const int64_t begin_time, + int64_t &pos, + ObLogMissingInfo &missing, + TransStatInfo &tsi, + const PartServeInfo &serve_info, + storage::ObStorageLogType &log_type); + int read_missing_redo_(const liboblog::ObLogEntryWrapper &log_entry, + const ObTransIDArray &missing_log_trans_id_array, + int64_t &pos, + TransStatInfo &tsi, + storage::ObStorageLogType &log_type); + int parse_and_read_aggre_log_(const clog::ObLogEntry &log_entry, + const int64_t begin_time, + ObLogMissingInfo &missing, + TransStatInfo &tsi, + const PartServeInfo &serve_info, + const bool is_read_missing_log, + const ObTransIDArray &missing_log_trans_id_array, + const bool need_filter_pg_no_missing_redo_trans, + const IObLogPartTransResolver::ObAggreLogIndexArray &log_indexs); + // Any log_entry that is not located in log_indexes needs to be ignored + int filter_pg_no_missing_redo_trans_(const ObAggreLogIndexArray &log_indexs, + const int64_t cur_log_index, + bool &if_filter); + int obtain_task_(const PartTransID &part_trans_id, PartTransTask*& task); + int decode_trans_log_header_(const clog::ObLogEntry& log_entry, + int64_t &pos, + bool &is_log_filtered, + bool &is_log_aggre, + bool &is_barrier_log, + storage::ObStorageLogType &log_type); + bool is_barrier_log_(const uint64_t log_id, + const clog::ObLogEntry& log_entry); + int read_mutator_(const liboblog::ObLogEntryWrapper &log_entry, + int64_t &pos, + TransStatInfo &tsi, + const ObTransIDArray &missing_log_trans_id_array, + const bool is_missing_log = false); + int read_mutator_abort_(const liboblog::ObLogEntryWrapper &log_entry, + int64_t &pos); + int read_redo_(const liboblog::ObLogEntryWrapper &log_entry, + int64_t &pos, + TransStatInfo &tsi, + const ObTransIDArray &missing_log_trans_id_array, + const bool is_missing_log = false); + int parse_redo_log_(const int32_t log_offset, + const transaction::ObTransMutator &log_mutator, + const transaction::ObTransID &trans_id, + const uint64_t log_id, + const int64_t tstamp, + const int64_t log_no, + const bool is_sp_trans, + const bool is_missing_log, + const bool is_pg_aggre_log, + const ObTransIDArray &missing_log_trans_id_array); + // 1. Only PG read missing log scenarios are filtered based on trans_id + // 2. All others are not filtered + int filter_pg_log_based_on_trans_id_(const ObTransIDArray &missing_log_trans_id_array, + const transaction::ObTransID &log_trans_id, + const bool is_pg_missing_log, + bool &is_filter); + int read_prepare_(const liboblog::ObLogEntryWrapper &log_entry, + const int64_t log_entry_index, + ObLogMissingInfo &missing, + int64_t &pos, + const bool with_redo, + TransStatInfo &tsi, + const PartServeInfo &serve_info, + bool &is_prepare_log_served); + // 1. the prepare log timestamp can be passed in + // 2. the commit log timestamp can be passed in (backup recovery scenario) + int check_part_trans_served_(const int64_t trans_log_tstamp, + const uint64_t log_id, + const uint64_t cluster_id, + const PartServeInfo &serve_info, + bool &is_served); + int handle_when_part_trans_not_served_(const PartTransID &part_trans_id, + const uint64_t log_id, + const int64_t tstamp); + int parse_prepare_log_(const transaction::ObTransID &trans_id, + const transaction::ObRedoLogIdArray &all_redos, + const bool with_redo, + const uint64_t cluster_id, + const common::ObVersion freeze_version, + const uint64_t log_id, + const int64_t tstamp, + ObLogMissingInfo &missing_info, + const int64_t log_entry_index, + const bool is_sp_trans, + const common::ObString &trace_id, + const common::ObString &trace_info, + const PartServeInfo &serve_info, + bool &is_prepare_log_served, + const BatchCommitTransInfo &batch_commit_trans_info, + const transaction::ObElrTransInfoArray &elt_trans_info_array, + const int64_t checkpoint, + const int64_t commit_version = common::OB_INVALID_VERSION, + const transaction::PartitionLogInfoArray *prepare_log_info = NULL); + // Currently, partitioned transactions are advanced in the following ways. + // 1. normal transactions are advanced by themselves, through commit logs, such as redo, prepare, commit or sp_redo, sp_commit + // 2. rely on the checkpoint log for advancement, when no transaction occurs on the partition, a checkpoint log is written at 50ms (read timestamp on the standby) + // 3. rely on checkpoint information carried by subsequent prepare or sp_commit logs to advance previous transactions + // 4. rely on the clog batch commit flag + // For example, for a single multipartition transaction with only redo, prepare logs, should advance through 2,3,4 + int prepare_normal_trans_task_(PartTransTask &task, + const int64_t tstamp, + const transaction::ObTransID &trans_id, + const uint64_t log_id, + const uint64_t cluster_id, + const common::ObVersion freeze_version, + const common::ObString &trace_id, + const common::ObString &trace_info, + const BatchCommitTransInfo &batch_commit_trans_info, + const transaction::ObElrTransInfoArray &elt_trans_info_array, + const int64_t commit_version, + const int64_t checkpoint, + const transaction::PartitionLogInfoArray *prepare_log_info); + int check_redo_log_list_( + const transaction::ObRedoLogIdArray& all_redos, + const uint64_t prepare_log_id, + const SortedRedoLogList &sorted_redos, + const bool prepare_with_redo, + ObLogMissingInfo &missing); + // Read commit/abort logs. + int read_commit_(const liboblog::ObLogEntryWrapper &log_entry, + const PartServeInfo &serve_info, + int64_t &pos, + bool with_prepare, + TransStatInfo &tsi); + int commit_task_(PartTransTask &task, + const int64_t global_trans_version, + const transaction::PartitionLogInfoArray &prepare_log_info, + const uint64_t log_id, + const int64_t tstamp, + const transaction::ObTransID &trans_id, + const bool commit_log_with_prepare, + const bool is_ready_to_commit); + // 1. 2pc commit日志一定提交成功, 非预提交, is_ready_to_commit=false + // 2. 单分区事务 sp_commit日志一定提交成功, 非预提交, is_ready_to_commit=false + // 3. 提前解行锁场景,单分区sp_commit预提交, is_ready_to_commit=true // 1. 2pc commit log must commit, not pre-commit, is_ready_to_commit=false + // 2. single-partition transaction sp_commit log must commit, non-pre-commit, is_ready_to_commit=false + // 3. early unlock (row lock) scenario, single-partition sp_commit pre-commit, is_ready_to_commit=true + int parse_commit_log_(const bool is_ready_to_commit, + const bool with_prepare, + const transaction::PartitionLogInfoArray &prepare_log_info, + const transaction::ObTransID &trans_id, + const int64_t global_trans_version, + const uint64_t log_id, + const int64_t tstamp, + const uint64_t cluster_id, + const bool is_sp_trans, + const PartServeInfo &serve_info); + int read_abort_(const liboblog::ObLogEntryWrapper &log_entry, + int64_t &pos); + int parse_abort_log_(const transaction::ObTransID &trans_id, + const uint64_t log_id, + const int64_t tstamp); + // Read unknown type log. + int read_unknown_log_(const clog::ObLogEntry &log_entry); + int read_sp_trans_redo_(const liboblog::ObLogEntryWrapper &log_entry, + int64_t &pos, + TransStatInfo &tsi, + const ObTransIDArray &missing_log_trans_id_array, + const bool is_missing_log = false); + int read_sp_trans_commit_(const liboblog::ObLogEntryWrapper &log_entry, + const int64_t log_entry_index, + ObLogMissingInfo &missing, + int64_t &pos, + TransStatInfo &tsi, + const PartServeInfo &serve_info, + const bool is_sp_elr_trans = false); + int deserialize_sp_commit_and_parse_redo_(const liboblog::ObLogEntryWrapper &log_entry, + int64_t &pos, + transaction::ObSpTransCommitLog &commit_log, + int64_t &after_decode_time, + bool &with_redo, + const ObTransIDArray &missing_log_trans_id_array, + const bool is_missing_log = false); + int read_sp_trans_abort_(const liboblog::ObLogEntryWrapper &log_entry, + int64_t &pos); + int read_checkpoint_log_(const liboblog::ObLogEntryWrapper &log_entry, + int64_t &pos); + // Handle the compatibility issue of the last redo log being empty + bool is_last_redo_with_empty_content_(const transaction::ObTransRedoLog &redo_log) const; + + //////////////////////////////////////////////////////////////////////// + // DML-ObLogEntryTask related implementations + int dispatch_log_entry_task_(ObLogEntryTask *log_entry_task); + +private: + bool offlined_ CACHE_ALIGNED; // Is the partition deleted + common::ObPartitionKey pkey_; + // Record the log timestamp of the first log synced to the partition, to determine whether the predecessor transaction commits in the early unlock scenario + // Consider that the predecessor transaction of T2 is T1 and that T1 was last aborted, when the liboblog start time loci fall exactly between T1 and T2 and only T2's data can be synchronized. + // Parse to T2 whose predecessor transaction is T1, query T1's partitioned transaction context and abort transaction table will not exist, at this point think T1 commit, then it is wrong to determine the status in this way. + // Option. + // Record the timestamp of the partition sync to the first log, if T1_commit version > first_log_ts_, then you can follow the above process + // otherwise rely on checkpoint or abort logs to advance the status of T2 + int64_t first_log_ts_; + PartTransDispatcher part_trans_dispatcher_; + IObLogClusterIDFilter &cluster_id_filter_; + int64_t start_global_trans_version_; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogPartTransResolver); +}; + +} +} + +#endif diff --git a/src/liboblog/src/ob_log_part_trans_resolver_factory.cpp b/src/liboblog/src/ob_log_part_trans_resolver_factory.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f31fd8685ce8ce50581825496acab2fc789213ac --- /dev/null +++ b/src/liboblog/src/ob_log_part_trans_resolver_factory.cpp @@ -0,0 +1,149 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG_FETCHER + +#include "ob_log_part_trans_resolver_factory.h" + +#include "lib/allocator/ob_mod_define.h" // ObNewModIds +#include "storage/transaction/ob_trans_define.h" // MAX_ELR_TRANS_INTERVAL + + +using namespace oceanbase::common; + +namespace oceanbase +{ +namespace liboblog +{ + +ObLogPartTransResolverFactory::ObLogPartTransResolverFactory() : + inited_(false), + task_pool_(NULL), + log_entry_task_pool_(NULL), + dispatcher_(NULL), + cluster_id_filter_(NULL), + allocator_(), + task_map_(), + trans_commit_map_() +{} + +ObLogPartTransResolverFactory::~ObLogPartTransResolverFactory() +{ + destroy(); +} + +int ObLogPartTransResolverFactory::init(TaskPool &task_pool, + IObLogEntryTaskPool &log_entry_task_pool, + IObLogFetcherDispatcher &dispatcher, + IObLogClusterIDFilter &cluster_id_filter) +{ + int ret = OB_SUCCESS; + const int64_t obj_size = sizeof(ObLogPartTransResolver); + + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("init twice"); + ret = OB_INIT_TWICE; + } else if (OB_FAIL(allocator_.init(obj_size, ObModIds::OB_LOG_PART_TRANS_RESOLVER, + OB_SERVER_TENANT_ID, DEFAULT_BLOCK_SIZE))) { + LOG_ERROR("init allocator fail", KR(ret), K(obj_size)); + } else if (OB_FAIL(task_map_.init(ObModIds::OB_LOG_PART_TRANS_RESOLVER))) { + LOG_ERROR("init task map fail", KR(ret)); + } else if (OB_FAIL(trans_commit_map_.init(ObModIds::OB_LOG_PART_TRANS_RESOLVER))) { + LOG_ERROR("init abort task map fail", KR(ret)); + } else { + task_pool_ = &task_pool; + log_entry_task_pool_ = &log_entry_task_pool; + dispatcher_ = &dispatcher; + cluster_id_filter_ = &cluster_id_filter; + inited_ = true; + } + return ret; +} + +void ObLogPartTransResolverFactory::destroy() +{ + inited_ = false; + task_pool_ = NULL; + log_entry_task_pool_ = NULL; + dispatcher_ = NULL; + cluster_id_filter_ = NULL; + (void)allocator_.destroy(); + (void)task_map_.destroy(); + (void)trans_commit_map_.destroy(); +} + +int ObLogPartTransResolverFactory::alloc(const char *pkey_str, + IObLogPartTransResolver *&ptr) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(task_pool_) || OB_ISNULL(dispatcher_) || OB_ISNULL(cluster_id_filter_)) { + LOG_ERROR("not init", K(task_pool_), K(dispatcher_), K(cluster_id_filter_)); + ret = OB_NOT_INIT; + } else { + void *obj = allocator_.alloc(); + + if (OB_ISNULL(obj)) { + LOG_ERROR("allocate memory for ObLogPartTransResolver fail", K(obj)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + ptr = new(obj) ObLogPartTransResolver(pkey_str, *task_pool_, task_map_, trans_commit_map_, *dispatcher_, *cluster_id_filter_); + } + } + + return ret; +} + +void ObLogPartTransResolverFactory::free(IObLogPartTransResolver *ptr) +{ + if (OB_LIKELY(inited_) && OB_NOT_NULL(ptr)) { + ObLogPartTransResolver *resolver = dynamic_cast(ptr); + ptr->~IObLogPartTransResolver(); + + allocator_.free(resolver); + ptr = NULL; + resolver = NULL; + } +} + +bool ObLogPartTransResolverFactory::TransInfoClearerByCheckpoint::operator()(const transaction::ObTransID &key, + TransCommitInfo &trans_commit_info) +{ + const int64_t log_ts = trans_commit_info.log_ts_; + const uint64_t log_id = trans_commit_info.log_id_; + bool need_purge = (log_ts < (checkpoint_ - transaction::MAX_ELR_TRANS_INTERVAL)); + + if (need_purge) { + purge_count_++; + _LOG_DEBUG("[STAT] [TRANS_COMMIT_INFO] [PURGE] PART_TRANS_ID=%s CHECKPOINT=%ld/%ld(%lu) DELTA=%ld/%ld", + to_cstring(key), log_ts, checkpoint_, log_id, + checkpoint_ - log_ts, transaction::MAX_ELR_TRANS_INTERVAL); + } + + return need_purge; +} + +void ObLogPartTransResolverFactory::gc_commit_trans_info(const int64_t checkpoint) +{ + int ret = OB_SUCCESS; + TransInfoClearerByCheckpoint purger(checkpoint); + + if (OB_FAIL(trans_commit_map_.remove_if(purger))) { + LOG_ERROR("trans_commit_map_ remove_if fail", K(checkpoint)); + } else { + _LOG_INFO("[STAT] [TRANS_COMMIT_INFO] [PURGE] PURGE_COUNT=%ld CUR_COUNT=%ld CHECKPOINT=%ld", + purger.purge_count_, trans_commit_map_.count(), checkpoint); + } +} + +} +} diff --git a/src/liboblog/src/ob_log_part_trans_resolver_factory.h b/src/liboblog/src/ob_log_part_trans_resolver_factory.h new file mode 100644 index 0000000000000000000000000000000000000000..bfe60ab33ec9f09cde16f933c7698f7642025831 --- /dev/null +++ b/src/liboblog/src/ob_log_part_trans_resolver_factory.h @@ -0,0 +1,90 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_PART_TRANS_RESOLVER_FACTORY_H__ +#define OCEANBASE_LIBOBLOG_OB_LOG_PART_TRANS_RESOLVER_FACTORY_H__ + +#include "ob_log_part_trans_resolver.h" // IObLogPartTransResolver, PartTransTaskMap + +#include "lib/allocator/ob_small_allocator.h" // ObSmallAllocator + +namespace oceanbase +{ +namespace liboblog +{ + +class IObLogPartTransResolverFactory +{ +public: + virtual ~IObLogPartTransResolverFactory() {} + +public: + virtual int alloc(const char *pkey_str, IObLogPartTransResolver *&ptr) = 0; + virtual void free(IObLogPartTransResolver *ptr) = 0; +}; + +///////////////////////////////////////////////////////////////////// + +typedef ObLogTransTaskPool TaskPool; +class IObLogEntryTaskPool; +class IObLogFetcherDispatcher; +class IObLogClusterIDFilter; + +class ObLogPartTransResolverFactory : public IObLogPartTransResolverFactory +{ + static const int64_t DEFAULT_BLOCK_SIZE = (1L << 24); + +public: + ObLogPartTransResolverFactory(); + virtual ~ObLogPartTransResolverFactory(); + +public: + int init(TaskPool &task_pool, + IObLogEntryTaskPool &log_entry_task_pool, + IObLogFetcherDispatcher &dispatcher, + IObLogClusterIDFilter &cluster_id_filter); + void destroy(); + +public: + virtual int alloc(const char *pkey_str, IObLogPartTransResolver *&ptr); + virtual void free(IObLogPartTransResolver *ptr); + + struct TransInfoClearerByCheckpoint + { + int64_t checkpoint_; + int64_t purge_count_; + + explicit TransInfoClearerByCheckpoint(const int64_t checkpoint) : checkpoint_(checkpoint), purge_count_(0) + {} + bool operator()(const transaction::ObTransID &key, TransCommitInfo &trans_commit_info); + }; + void gc_commit_trans_info(const int64_t checkpoint); + +private: + bool inited_; + TaskPool *task_pool_; + IObLogEntryTaskPool *log_entry_task_pool_; + IObLogFetcherDispatcher *dispatcher_; + IObLogClusterIDFilter *cluster_id_filter_; + + common::ObSmallAllocator allocator_; + PartTransTaskMap task_map_; + TransCommitMap trans_commit_map_; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogPartTransResolverFactory); +}; + +} +} + +#endif diff --git a/src/liboblog/src/ob_log_part_trans_task.cpp b/src/liboblog/src/ob_log_part_trans_task.cpp new file mode 100644 index 0000000000000000000000000000000000000000..bb125d03e5b06c7fe0be2c2156c7048ff0b731e6 --- /dev/null +++ b/src/liboblog/src/ob_log_part_trans_task.cpp @@ -0,0 +1,3289 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG + +#include "ob_log_part_trans_task.h" + +#include "lib/string/ob_string.h" // ObString +#include "common/cell/ob_cell_reader.h" // ObCellReader +#include "share/schema/ob_schema_service.h" // OB_INVALID_DDL_OP +#include "share/schema/ob_table_schema.h" // ObTableSchema +#include "share/schema/ob_column_schema.h" // ObColumnSchemaV2 +#include "storage/ob_i_store.h" // T_DML_INSERT +#include "storage/memtable/ob_memtable_mutator.h" // ObMemtableMutatorMeta +#include "storage/memtable/ob_memtable_context.h" // ObTransRowFlag + +#include "ob_log_binlog_record.h" // ObLogBR +#include "ob_log_binlog_record_pool.h" // ObLogBRPool +#include "ob_log_row_data_index.h" // ObLogRowDataIndex +#include "ob_log_utils.h" // obj2str +#include "ob_log_common.h" // ALL_DDL_OPERATION_TABLE_DDL_STMT_STR_COLUMN_ID +#include "ob_obj2str_helper.h" // ObObj2strHelper +#include "ob_log_instance.h" // TCTX +#include "ob_log_part_trans_dispatcher.h" // PartTransDispatcher +#include "ob_log_config.h" +#include "ob_log_store_service.h" +#include "ob_log_resource_collector.h" + +#define PARSE_INT64(name, obj, val, INVALID_VALUE, check_value) \ + do { \ + if (OB_FAIL(obj.get_int(reinterpret_cast(val)))) { \ + LOG_ERROR("get_int fail", KR(ret), K(obj), "column", name); \ + } else if (INVALID_VALUE == val && check_value) {\ + LOG_ERROR("invalid value", K(val), "column", name); \ + ret = OB_INVALID_DATA; \ + } \ + } while (0); + +using namespace oceanbase::common; +using namespace oceanbase::common::serialization; +using namespace oceanbase::memtable; +using namespace oceanbase::share::schema; +using namespace oceanbase::transaction; + +namespace oceanbase +{ +namespace liboblog +{ + +void IStmtTask::reset() +{ + hash_value_ = OB_INVALID_ID; + row_index_ = OB_INVALID_ID; + next_ = NULL; +} + +uint64_t IStmtTask::get_tenant_id() const +{ + return host_.get_tenant_id(); +} + +//////////////////////////////////////////////////////////////////////////////////////// + +MutatorRow::MutatorRow(common::ObIAllocator &allocator) : + ObMemtableMutatorRow(), + allocator_(allocator), + deserialized_(false), + cols_parsed_(false), + new_cols_(), + old_cols_(), + rowkey_cols_() +{} + +MutatorRow::~MutatorRow() +{ + reset(); +} + +int MutatorRow::deserialize(const char* buf, const int64_t data_len, int64_t& pos) +{ + int ret = OB_SUCCESS; + + + if (OB_UNLIKELY(deserialized_)) { + LOG_ERROR("deserialize twice"); + ret = OB_STATE_NOT_MATCH; + } else if (OB_ISNULL(buf) || OB_UNLIKELY(pos < 0) || OB_UNLIKELY(pos > data_len)) { + LOG_ERROR("invalid argument", K(buf), K(pos), K(data_len)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(ObMemtableMutatorRow::deserialize(buf, data_len, pos))) { + LOG_ERROR("deserialize mutator fail", KR(ret), KP(buf), K(data_len), K(pos)); + } else { + deserialized_ = true; + } + + return ret; +} + +int MutatorRow::deserialize_first(const char* buf, + const int64_t buf_len, + int64_t &pos, + int32_t &row_size, + uint64_t &table_id) +{ + int ret = OB_SUCCESS; + row_size = 0; + table_id = OB_INVALID_ID; + int64_t new_pos = pos; + + if (OB_UNLIKELY(deserialized_)) { + LOG_ERROR("deserialize twice"); + ret = OB_STATE_NOT_MATCH; + } else if (OB_ISNULL(buf) || OB_UNLIKELY(pos < 0) || OB_UNLIKELY(pos > buf_len)) { + LOG_ERROR("invalid argument", K(buf), K(pos), K(buf_len)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(decode_i32(buf, buf_len, new_pos, (int32_t *)&row_size_))) { + LOG_ERROR("deserialize row_size fail", KR(ret), K(new_pos), K(row_size_)); + } else if (pos + row_size_ > buf_len) { + LOG_ERROR("size overflow", KR(ret), KP(buf), K(buf_len), K(pos), K_(row_size)); + ret = OB_SIZE_OVERFLOW; + } else if (OB_FAIL(decode_vi64(buf, buf_len, new_pos, (int64_t *)&table_id_))) { + LOG_ERROR("deserialize table_id fail", KR(ret), K(new_pos), K(table_id_)); + } else { + row_size = row_size_; + table_id = table_id_; + // The pos indicates the position that has been resolved + pos = new_pos; + } + + return ret; +} + +int MutatorRow::deserialize_second(const char* buf, + const int64_t buf_len, + int64_t &pos, + int64_t &table_version) +{ + int ret = OB_SUCCESS; + table_version = 0; + int64_t new_pos = pos; + + if (OB_UNLIKELY(deserialized_)) { + LOG_ERROR("deserialize twice"); + ret = OB_STATE_NOT_MATCH; + } else if (OB_ISNULL(buf) || OB_UNLIKELY(pos < 0) || OB_UNLIKELY(pos > buf_len)) { + LOG_ERROR("invalid argument", K(buf), K(pos), K(buf_len)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(rowkey_.deserialize(buf, buf_len, new_pos))) { + LOG_ERROR("deserialize rowkey fail", KR(ret), K(new_pos), K(rowkey_)); + } else if (OB_FAIL(decode_vi64(buf, buf_len, new_pos, &table_version_))) { + LOG_ERROR("deserialize table_version fail", KR(ret), K(new_pos), K(table_version_)); + } else { + table_version = table_version_; + // The pos indicates the position that has been resolved + pos = new_pos; + } + + return ret; +} + +// If obj2str_helper is empty, then won’t conversion of obj to string +// also allow table schema to be empty +int MutatorRow::parse_cols(ObObj2strHelper *obj2str_helper /* = NULL */, + const ObSimpleTableSchemaV2 *simple_table_schema /* = NULL */, + const TableSchemaInfo *tb_schema_info /* = NULL */, + const bool enable_output_hidden_primary_key /* = false */) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(cols_parsed_)) { + LOG_ERROR("columns has been parsed", K(cols_parsed_)); + ret = OB_STATE_NOT_MATCH; + } else if (OB_UNLIKELY(! deserialized_)) { + LOG_ERROR("row has not been deserialized"); + ret = OB_STATE_NOT_MATCH; + } else if (NULL != simple_table_schema + && OB_UNLIKELY(ObMemtableMutatorRow::table_id_ != simple_table_schema->get_table_id())) { + LOG_ERROR("invalid table schema", K(table_id_), K(simple_table_schema->get_table_id())); + ret = OB_INVALID_ARGUMENT; + } + + // parse value of new column + if (OB_SUCC(ret)) { + if (OB_ISNULL(new_row_.data_) || OB_UNLIKELY(new_row_.size_ <= 0)) { + LOG_WARN("new row data is empty", K(new_row_), + "mutator_row", (const ObMemtableMutatorRow &)(*this)); + new_cols_.reset(); + } else if (OB_FAIL(parse_columns_(new_cols_, new_row_.data_, + new_row_.size_, obj2str_helper, simple_table_schema, tb_schema_info, enable_output_hidden_primary_key))) { + LOG_ERROR("parse new columns fail", KR(ret), K(new_row_), K(obj2str_helper), K(simple_table_schema), + K(tb_schema_info), K(enable_output_hidden_primary_key)); + } else { + // succ + } + } + + // parse value of old column + if (OB_SUCC(ret)) { + if (OB_ISNULL(old_row_.data_) || OB_UNLIKELY(old_row_.size_ <= 0)) { + // no old cols + old_cols_.reset(); + } else if (OB_FAIL(parse_columns_(old_cols_, old_row_.data_, + old_row_.size_, obj2str_helper, simple_table_schema, tb_schema_info, enable_output_hidden_primary_key))) { + LOG_ERROR("parse old columns fail", KR(ret), K(old_row_), K(obj2str_helper), K(simple_table_schema), + K(tb_schema_info), K(enable_output_hidden_primary_key)); + } else { + // succ + } + } + + // parse rowkey data + if (OB_SUCC(ret)) { + rowkey_cols_.reset(); + + if (OB_FAIL(parse_rowkey_(rowkey_cols_, rowkey_, obj2str_helper, simple_table_schema, tb_schema_info, + enable_output_hidden_primary_key))) { + LOG_ERROR("parse_rowkey_ fail", KR(ret), K(rowkey_), K(obj2str_helper), + K(enable_output_hidden_primary_key)); + } else { + // succ + } + } + + if (OB_SUCC(ret)) { + cols_parsed_ = true; + } + + return ret; +} + +int MutatorRow::parse_columns_(ColValueList &cols, + const char *col_data, + const int64_t col_data_size, + ObObj2strHelper *obj2str_helper, + const ObSimpleTableSchemaV2 *table_schema, + const TableSchemaInfo *tb_schema_info, + const bool enable_output_hidden_primary_key) +{ + int ret = OB_SUCCESS; + ObCellReader cell_reader; + ObCompactStoreType store_type = SPARSE; + + // NOTE: Allow obj2str_helper and column_schema to be empty + if (OB_ISNULL(col_data) || OB_UNLIKELY(col_data_size <= 0)) { + LOG_ERROR("invalid argument", K(col_data_size), K(col_data)); + ret = OB_INVALID_ARGUMENT; + } + // Validate cols values + else if (OB_UNLIKELY(cols.num_ > 0)) { + LOG_ERROR("column value list is not reseted", K(cols)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(cell_reader.init(col_data, col_data_size, store_type))) { + LOG_ERROR("init cell reader fail", KR(ret), KP(col_data), K(col_data_size)); + } else { + // Iterate through all Cells using Cell Reader + while (OB_SUCC(ret) && OB_SUCC(cell_reader.next_cell())) { + uint64_t column_id = OB_INVALID_ID; + const ObObj *value = NULL; + bool is_row_finished = false; + + if (OB_FAIL(cell_reader.get_cell(column_id, value, &is_row_finished))) { + LOG_ERROR("get_cell from cell reader fail", KR(ret), KP(value), K(column_id)); + } else if (is_row_finished) { + ret = OB_ITER_END; + } else if (OB_ISNULL(value)) { + LOG_ERROR("cell data is invalid", K(value)); + ret = OB_INVALID_DATA; + } else if (OB_INVALID_ID == column_id) { + // Note: the column_id obtained here may be invalid + // For example a delete statement with only one cell and an invalid column_id in the cell + LOG_DEBUG("cell column_id is invalid", "cell", *value, + K_(dml_type), K_(table_id), K_(rowkey)); + } else { + bool ignore_column = false; + ColumnSchemaInfo* column_schema_info = NULL; + + if (NULL != table_schema && NULL != tb_schema_info) { + ColumnPropertyFlag column_property_flag; + + if (OB_FAIL(tb_schema_info->get_column_schema_info(column_id, enable_output_hidden_primary_key, column_schema_info, + column_property_flag))) { + LOG_ERROR("get_column_schema_info", KR(ret), K_(table_id), + "table_name", table_schema->get_table_name(), + K(column_id), K(enable_output_hidden_primary_key), + K(column_schema_info), K(column_property_flag)); + } else if (column_property_flag.is_non_user()) { + // ignore non user rowkey columns + LOG_DEBUG("ignore non user rowkey column", K(column_property_flag), + "table_id", table_schema->get_table_id(), + "table_name", table_schema->get_table_name(), + "table_schame_version", table_schema->get_schema_version(), + K(column_id)); + + ignore_column = true; + // Column is deleted if Column Schema does not exist + } else if (column_property_flag.is_delete()) { + // ignore deleted columns + LOG_DEBUG("ignore non-existed column", K(column_property_flag), + "table_id", table_schema->get_table_id(), + "table_name", table_schema->get_table_name(), + "table_schame_version", table_schema->get_schema_version(), + K(column_id)); + + ignore_column = true; + } else if (column_property_flag.is_hidden()) { + // ignore hidden columns + LOG_DEBUG("ignore hidden column", K(column_property_flag), + "table_id", table_schema->get_table_id(), + "table_name", table_schema->get_table_name(), + "table_schame_version", table_schema->get_schema_version(), + K(column_id)); + + ignore_column = true; + } else if (column_property_flag.is_invisible()) { + // ignore invisible columns + LOG_DEBUG("ignore invisible column", K(column_property_flag), + "table_id", table_schema->get_table_id(), + "table_name", table_schema->get_table_name(), + "table_schame_version", table_schema->get_schema_version(), + K(column_id)); + + ignore_column = true; + } else if (OB_ISNULL(column_schema_info)) { + LOG_ERROR("column_schema_info is null", K(column_schema_info), K(column_property_flag), + "table_id", table_schema->get_table_id(), + "table_name", table_schema->get_table_name(), + "version", table_schema->get_schema_version(), + K(column_id)); + ret = OB_ERR_UNEXPECTED; + } else { + ignore_column = false; + } + } + + if (OB_SUCC(ret) && ! ignore_column) { + if (OB_FAIL(add_column_(cols, column_id, value, obj2str_helper, table_schema, + column_schema_info))) { + LOG_ERROR("add_column_ fail", K(cols), KR(ret), K(column_id), KP(value), + K(obj2str_helper), K(table_schema), K(column_schema_info)); + } + } + } + } + + ret = OB_ITER_END == ret ? OB_SUCCESS : ret; + } + + return ret; +} + +int MutatorRow::add_column_(ColValueList &cols, + const uint64_t column_id, + const ObObj *value, + ObObj2strHelper *obj2str_helper, + const share::schema::ObSimpleTableSchemaV2 *simple_table_schema, + const ColumnSchemaInfo *column_schema_info) +{ + int ret = OB_SUCCESS; + ColValue *cv_node = static_cast(allocator_.alloc(sizeof(ColValue))); + + // NOTE: Allow obj2str_helper and column_schema to be empty + if (OB_ISNULL(cv_node)) { + LOG_ERROR("allocate memory for ColValue fail", "size", sizeof(ColValue)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else if (NULL != column_schema_info && column_schema_info->is_delete()) { + LOG_ERROR("column_schema_info is not null, should not be delete column", K(column_id), + KPC(column_schema_info)); + ret = OB_ERR_UNEXPECTED; + } else { + cv_node->reset(); + cv_node->value_ = *value; + cv_node->column_id_ = column_id; + common::ObArrayHelper extended_type_info; + common::ObAccuracy accuracy; + common::ObCollationType collation_type = ObCollationType::CS_TYPE_BINARY; + + // Set meta information and scale information if column schema is valid + if (NULL != column_schema_info) { + column_cast(cv_node->value_, *column_schema_info); + column_schema_info->get_extended_type_info(extended_type_info); + accuracy = column_schema_info->get_accuracy(); + collation_type = column_schema_info->get_collation_type(); + } + + LOG_DEBUG("column_cast: ", K(column_id), + "old_scale", value->get_scale(), "new_scale", cv_node->value_.get_scale()); + + // If the LOB is larger than 2M, do not print the contents, but the address and length, in case of taking too long to print the log + if (value->is_lob() && value->get_string_len() > 2 * _M_) { + LOG_DEBUG("column_cast: ", "old_obj_ptr", (void *)value->get_string_ptr(), + "old_obj_len", value->get_string_len(), + "new_obj_ptr", (void *)cv_node->value_.get_string_ptr(), + "new_obj_len", cv_node->value_.get_string_len()); + } else { + LOG_DEBUG("column_cast: ", "old_obj", *value, "new_obj", + cv_node->value_); + } + + // convert obj to string if obj2str_helper is valid + // no deep copy of string required + // note: currently DML must pass into obj2str_helper and simple_table_schema + if (OB_NOT_NULL(obj2str_helper) && OB_NOT_NULL(simple_table_schema) && OB_FAIL(obj2str_helper->obj2str(simple_table_schema->get_tenant_id(), + simple_table_schema->get_table_id(), + column_id, + cv_node->value_, + cv_node->string_value_, + allocator_, + false, + extended_type_info, + accuracy, + collation_type))) { + LOG_ERROR("obj2str fail", KR(ret), "obj", *value, K(obj2str_helper), K(accuracy), K(collation_type)); + } else if (OB_FAIL(cols.add(cv_node))) { + LOG_ERROR("add column into ColValueList fail", KR(ret), "column_value", *cv_node, K(cols)); + } + } + + if (OB_FAIL(ret)) { + if (NULL != cv_node) { + allocator_.free((void *)cv_node); + cv_node = NULL; + } + } + + return ret; +} + +int MutatorRow::parse_rowkey_(ColValueList &rowkey_cols, + const common::ObStoreRowkey &rowkey, + ObObj2strHelper *obj2str_helper, + const ObSimpleTableSchemaV2 *simple_table_schema, + const TableSchemaInfo *tb_schema_info, + const bool enable_output_hidden_primary_key) +{ + int ret = OB_SUCCESS; + int64_t rowkey_count = rowkey.get_obj_cnt(); + const ObObj *rowkey_objs = rowkey.get_obj_ptr(); + + if (OB_UNLIKELY(rowkey_count <= 0) || OB_ISNULL(rowkey_objs)) { + LOG_ERROR("rowkey is invalid", K(rowkey_count), K(rowkey_objs), K(rowkey)); + ret = OB_INVALID_ARGUMENT; + } else { + for (int64_t index = 0; OB_SUCC(ret) && index < rowkey_count; index++) { + // Column ID is invalid when Table Schema is not provided + uint64_t column_id = OB_INVALID_ID; + ColumnSchemaInfo* column_schema_info = NULL; + bool ignore_column = false; + + if (NULL != simple_table_schema && NULL != tb_schema_info) { + const ObLogRowkeyInfo &rowkey_info = tb_schema_info->get_rowkey_info(); + ColumnPropertyFlag column_property_flag; + + if (OB_UNLIKELY(rowkey_count != rowkey_info.get_size())) { + LOG_ERROR("rowkey count does not match schema", K(rowkey_count), K(rowkey_info), + K(simple_table_schema->get_table_name()), K(simple_table_schema->get_table_id())); + ret = OB_INVALID_DATA; + } else if (OB_FAIL(rowkey_info.get_column_id(index, column_id))) { + LOG_ERROR("get column id from rowkey info fail", KR(ret), K(index), + K(column_id), K(rowkey_info)); + } else if (OB_FAIL(tb_schema_info->get_column_schema_info(column_id, enable_output_hidden_primary_key, + column_schema_info, column_property_flag))) { + LOG_ERROR("get_column_schema_info fail", KR(ret), K(index), K(rowkey_count), + "table_schema_version", simple_table_schema->get_schema_version(), + "table_id", simple_table_schema->get_table_id(), + "table_name", simple_table_schema->get_table_name(), + K(column_id), K(enable_output_hidden_primary_key), + K(column_schema_info), K(column_property_flag)); + } else if (column_property_flag.is_non_user()) { + // ignore hidden rowkey column + LOG_DEBUG("ignore non user rowkey column", K(column_property_flag), + "table_id", simple_table_schema->get_table_id(), + "table_name", simple_table_schema->get_table_name(), + "table_schame_version", simple_table_schema->get_schema_version(), + K(column_id)); + + ignore_column = true; + } else if (column_property_flag.is_delete()) { + LOG_ERROR("rowkey column does not exist", K(column_property_flag), + "table_id", simple_table_schema->get_table_id(), + "table_name", simple_table_schema->get_table_name(), + "table_schame_version", simple_table_schema->get_schema_version(), + K(column_id)); + ret = OB_ERR_UNEXPECTED; + } else if (column_property_flag.is_hidden()) { + // ignore hidden rowkey column + LOG_DEBUG("ignore hidden rowkey column", K(column_property_flag), + "table_id", simple_table_schema->get_table_id(), + "table_name", simple_table_schema->get_table_name(), + "table_schame_version", simple_table_schema->get_schema_version(), + K(column_id)); + + ignore_column = true; + } else if (column_property_flag.is_invisible()) { + // ignore invisible column + LOG_DEBUG("ignore invisible column", K(column_property_flag), + "table_id", simple_table_schema->get_table_id(), + "table_name", simple_table_schema->get_table_name(), + "table_schame_version", simple_table_schema->get_schema_version(), + K(column_id)); + + ignore_column = true; + } else if (OB_ISNULL(column_schema_info)) { + LOG_ERROR("column_schema_info is null", K(column_schema_info), K(column_property_flag), + "table_id", simple_table_schema->get_table_id(), + "table_name", simple_table_schema->get_table_name(), + "table_schame_version", simple_table_schema->get_schema_version(), + K(column_id)); + ret = OB_ERR_UNEXPECTED; + } else { + ignore_column = false; + } + } + + if (OB_SUCC(ret) && ! ignore_column) { + if (OB_FAIL(add_column_(rowkey_cols, column_id, rowkey_objs + index, + obj2str_helper, simple_table_schema, column_schema_info))) { + LOG_ERROR("add_column_ fail", K(rowkey_cols), KR(ret), K(column_id), + K(index), K(rowkey_objs[index]), K(obj2str_helper), K(simple_table_schema), K(column_schema_info)); + } + } + } + } + + return ret; +} + +void MutatorRow::reset() +{ + deserialized_ = false; + cols_parsed_ = false; + + // FIXME: All nodes in new_cols_ and all memory used by Obj cast should be destroyed here. + // but for the time being this memory will be reused in the future, so for the time being it will not be destroyed + new_cols_.reset(); + old_cols_.reset(); + rowkey_cols_.reset(); + + ObMemtableMutatorRow::reset(); +} + +int MutatorRow::get_cols(ColValueList **rowkey_cols, ColValueList **new_cols, ColValueList **old_cols) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(! cols_parsed_)) { + LOG_ERROR("column has not been parsed"); + ret = OB_STATE_NOT_MATCH; + } else { + if (NULL != rowkey_cols) { + *rowkey_cols = &rowkey_cols_; + } + + if (NULL != new_cols) { + *new_cols = &new_cols_; + } + + if (NULL != old_cols) { + *old_cols = &old_cols_; + } + } + return ret; +} + +//////////////////////////////////////////////////////////////////////////////////////// +int64_t DmlStmtUniqueID::get_dml_unique_id_length() const +{ + int64_t dml_unique_id_length = 0; + + dml_unique_id_length = pkey_and_log_id_str_.length(); + dml_unique_id_length = dml_unique_id_length + + sizeof(DELIMITER_STR) + + compute_str_length_base_num(log_offset_) + + sizeof(DELIMITER_STR) + + compute_str_length_base_num(row_index_) + + 1; + + return dml_unique_id_length; +} + +int64_t DmlStmtUniqueID::compute_str_length_base_num(uint64_t num) +{ + int64_t str_length = 0; + + if (num < 10) { + str_length = 1; + } else if (num < 100) { + str_length = 2; + } else if (num < 1000) { + str_length = 3; + } else if (num < 10000) { + str_length = 4; + } else if (num < 100000) { + str_length = 5; + } else if (num < 1000000) { + str_length = 6; + } else if (num < 10000000) { + str_length = 7; + } else if (num < 100000000) { + str_length = 8; + } else if (num < 1000000000) { + str_length = 9; + } else if (num < 10000000000) { + str_length = 10; + } else { + str_length = MAX_ROW_INDEX_LENGTH; + } + + return str_length; +} + +int DmlStmtUniqueID::customized_to_string(char* buf, const int64_t buf_len, int64_t &pos) const +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(NULL == buf || buf_len <= 0)) { + LOG_ERROR("invalid argument", K(buf), K(buf_len)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_UNLIKELY(! is_valid())) { + LOG_ERROR("dml_stmt_unique_id is not valid", K(*this)); + ret = OB_ERR_UNEXPECTED; + } else { + int64_t str_len = pkey_and_log_id_str_.length(); + + if (OB_UNLIKELY(buf_len <= str_len)) { + LOG_ERROR("buf not enough", K(buf_len), K(str_len)); + ret = OB_BUF_NOT_ENOUGH; + } else { + MEMCPY(buf + pos, pkey_and_log_id_str_.ptr(), str_len); + pos += str_len; + + if (OB_FAIL(common::databuff_printf(buf, buf_len, pos, DELIMITER_STR"%d", log_offset_))) { + LOG_ERROR("databuff_printf fail", KR(ret), K(buf), K(buf_len), K(pos), K(row_index_)); + } else if (OB_FAIL(common::databuff_printf(buf, buf_len, pos, DELIMITER_STR"%lu", row_index_))) { + LOG_ERROR("databuff_printf fail", KR(ret), K(buf), K(buf_len), K(pos), K(row_index_)); + } else { + // succ + } + } + } + + return ret; +} + +DmlStmtTask::DmlStmtTask(PartTransTask &host, + ObLogEntryTask &redo_log_entry_task, + ObLogRowDataIndex &row_data_index, + MutatorRow &row) : + IStmtTask(STMT_TYPE_DML, host), + redo_log_entry_task_(redo_log_entry_task), + row_data_index_(row_data_index), + row_(row) +{ + // set hash value + IStmtTask::set_hash_value(row.rowkey_.murmurhash(host.get_partition().get_table_id() + host.get_partition().get_partition_id())); + +} + +DmlStmtTask::~DmlStmtTask() { reset(); } + +void DmlStmtTask::reset() +{ + IStmtTask::reset(); + row_.reset(); +} + +int64_t DmlStmtTask::get_part_id() const +{ + return get_host().get_partition().get_partition_id(); +} + +//////////////////////////////////////////////////////////////////////////////////// +int64_t DdlStmtUniqueID::to_string(char* buf, const int64_t buf_len) const +{ + int64_t pos = 0; + + if (NULL != buf && buf_len > 0) { + (void)common::databuff_printf(buf, buf_len, pos, + "%ld_%ld", cluster_id_, schema_version_); + } + + return pos; +} + +DdlStmtTask::DdlStmtTask(PartTransTask &host, MutatorRow &row, const int64_t cluster_id) : + IStmtTask(STMT_TYPE_DDL, host), + row_(row), + ddl_stmt_str_(), + ddl_operation_type_(OB_INVALID_DDL_OP), + ddl_op_schema_version_(0), + ddl_op_table_id_(OB_INVALID_ID), + ddl_op_tenant_id_(OB_INVALID_TENANT_ID), + ddl_op_database_id_(OB_INVALID_ID), + ddl_op_tablegroup_id_(OB_INVALID_ID), + ddl_exec_tenant_id_(OB_INVALID_TENANT_ID), + cluster_id_(cluster_id), + br_(NULL) +{ + // set hash value + IStmtTask::set_hash_value(row.rowkey_.murmurhash(host.get_partition().hash())); + ddl_op_schema_version_str_[0] = '\0'; +} + +DdlStmtTask::~DdlStmtTask() +{ + reset(); +} + +bool DdlStmtTask::is_recyclebin_database_id(const uint64_t tenant_id, const uint64_t database_id) +{ + return (combine_id(tenant_id, OB_RECYCLEBIN_SCHEMA_ID) == database_id); +} + +bool DdlStmtTask::is_drop_table_ddl_(const int64_t ddl_operation_type) +{ + ObSchemaOperationType op_type = static_cast(ddl_operation_type); + + return (OB_DDL_DROP_TABLE == op_type); +} + +bool DdlStmtTask::is_drop_tablegroup_ddl_(const int64_t ddl_operation_type) +{ + ObSchemaOperationType op_type = static_cast(ddl_operation_type); + + return (OB_DDL_DEL_TABLEGROUP == op_type); +} + +bool DdlStmtTask::is_drop_tenant_ddl_(const int64_t ddl_operation_type) +{ + ObSchemaOperationType op_type = static_cast(ddl_operation_type); + + return (OB_DDL_DEL_TENANT == op_type); +} + +bool DdlStmtTask::is_global_index_ddl_(const int64_t ddl_operation_type) +{ + bool bool_ret = false; + + ObSchemaOperationType op_type = static_cast(ddl_operation_type); + + bool_ret = (OB_DDL_CREATE_GLOBAL_INDEX == op_type) + || (OB_DDL_DROP_GLOBAL_INDEX == op_type); + + return bool_ret; +} + +bool DdlStmtTask::is_normal_index_ddl_(const int64_t ddl_operation_type) +{ + bool bool_ret = false; + + ObSchemaOperationType op_type = static_cast(ddl_operation_type); + + bool_ret = (OB_DDL_CREATE_INDEX == op_type) + || (OB_DDL_DROP_INDEX == op_type); + + return bool_ret; +} + +bool DdlStmtTask::is_create_tenant_end_ddl_(const int64_t ddl_operation_type) +{ + bool bool_ret = false; + + ObSchemaOperationType op_type = static_cast(ddl_operation_type); + bool_ret = (OB_DDL_ADD_TENANT_END == op_type); + + return bool_ret; +} + +bool DdlStmtTask::is_finish_schema_split_ddl_(const int64_t ddl_operation_type) +{ + bool bool_ret = false; + + ObSchemaOperationType op_type = static_cast(ddl_operation_type); + bool_ret = (OB_DDL_FINISH_SCHEMA_SPLIT == op_type); + + return bool_ret; +} + +bool DdlStmtTask::is_sub_partition_alter_ddl_(const int64_t ddl_operation_type) +{ + bool bool_ret = false; + + ObSchemaOperationType op_type = static_cast(ddl_operation_type); + bool_ret = (OB_DDL_ADD_SUB_PARTITION == op_type) + || (OB_DDL_DROP_SUB_PARTITION == op_type); + + return bool_ret; +} + +int DdlStmtTask::parse_ddl_info(ObLogBR *br, + const uint64_t row_index, + bool &is_valid_ddl, + int64_t &update_schema_version, + uint64_t &exec_tenant_id) +{ + int ret = OB_SUCCESS; + bool contain_ddl_stmt = false; + + is_valid_ddl = true; + + if (OB_ISNULL(br) || OB_UNLIKELY(OB_INVALID_ID == row_index)) { + LOG_ERROR("invalid argument", K(br), K(row_index)); + ret = OB_INVALID_ARGUMENT; + } + // parses the column data + // but does not convert the column data to a string + else if (OB_FAIL(row_.parse_cols())) { + LOG_ERROR("parse columns fail", KR(ret), K(row_)); + } else if (OB_FAIL(parse_ddl_info_(contain_ddl_stmt, update_schema_version))) { + if (OB_INVALID_DATA == ret) { + // If invalid data is encountered, the log is printed but the dirty data is ignored + LOG_ERROR("fail to parse DDL, __all_ddl_operation table data is invalid", + "log_id", get_host().get_prepare_log_id(), + "log_timestamp", get_host().get_timestamp(), + K(row_)); + + // an invalid DDL statement + is_valid_ddl = false; + ret = OB_SUCCESS; + } else { + LOG_ERROR("parse_ddl_info_ fail", KR(ret), K(row_)); + } + } else { + is_valid_ddl = true; + + if (! contain_ddl_stmt) { + // filter if don't contans ddl stmt + is_valid_ddl = false; + + // 1: If it is a drop table type and the operation is on a table in the recycle bin, then the DDL is retained + // The reason is that in this case the ddl statement should have been logged, but in the case of drop database it will not be logged, the scenario is as follows. + // For table A, in the DB, perform the following sequence of operations. + // 1. delete table A, go to recycle bin + // 2. delete DB, go to recycle bin + // 3. directly purge DB, table A in the recycle bin will also be purged, but the database id in the __all_ddl_operation table records + // the database id of the __recyclebin, not the database id of the DB, resulting in the DB cannot be traced back to table A being deleted, + // based on the above For the above reason, the DDL of the drop type should be kept here to ensure subsequent committer processing + // + // 2: Under oracle tenant, drop user will trigger multiple drop table, drop database DDLs, and none of them have ddl_stmt_str, here keep + if (is_drop_table_ddl_(ddl_operation_type_)) { + is_valid_ddl = true; + } + + if (is_drop_tablegroup_ddl_(ddl_operation_type_)) { + is_valid_ddl = true; + } + + if (is_drop_tenant_ddl_(ddl_operation_type_)) { + is_valid_ddl = true; + } + + // When create a primary table and specify a global index, the DDL for the primary table and the DDL for the global index will be + // recorded in the __all_ddl_opertition table, and the ddl_stmt_str will be empty, so make sure the global index DDL is not filtered + if (is_global_index_ddl_(ddl_operation_type_)) { + is_valid_ddl = true; + } + + // When you create the primary table, you also create the unique index table, in this case, in the __all_ddl_opertition table, will + // record the DDL of the primary table and the DDL of the unique index, at this time, the ddl_stmt_str is empty, you need to ensure that the unique index DDL is not filtered + // note: neither normal indexes nor unique indexes will be filtered here, you need to determine if it is a unique index based on schema information + if (is_normal_index_ddl_(ddl_operation_type_)) { + is_valid_ddl = true; + } + + // schema split mode, there are two types of tenant creation: + // 1. OB_DDL_ADD_TENANT_START records ddl_stmt, but does not process it + // 2. OB_DDL_ADD_TENANT_END does not record ddl_stmt, it cannot be filtered here + if (is_create_tenant_end_ddl_(ddl_operation_type_)) { + is_valid_ddl = true; + } + + // schema split done + if (is_finish_schema_split_ddl_(ddl_operation_type_)) { + is_valid_ddl = true; + } + + // obadmin performs dynamic add/drop operations on secondary partitions with null ddl_stmt_str, but not filtering + if (is_sub_partition_alter_ddl_(ddl_operation_type_)) { + is_valid_ddl = true; + } + } + + if (OB_SUCCESS == ret && is_valid_ddl) { + if (OB_FAIL(build_ddl_binlog_record_(br, ddl_stmt_str_, row_index))) { + LOG_ERROR("build_ddl_binlog_record_ fail", KR(ret), K(br), K(ddl_stmt_str_), "commit_version", get_host().get_global_trans_version(), + K(row_index)); + } else { + // set Binlog Record + set_binlog_record(br); + } + } + + if (OB_SUCC(ret)) { + // 1. If ddl_exec_tenant_id_ is invalid, use ddl_op_tenant_id_, as __all_ddl_operation does not contain an exec_tenant_id column for versions below 1460 + // 2. If ddl_op_tenant_id_ is not valid [e.g. alter system add zone 'z1' etc.], then use the partitioned tenant_id + if (OB_INVALID_TENANT_ID != ddl_exec_tenant_id_) { + exec_tenant_id = ddl_exec_tenant_id_; + } else if (OB_INVALID_TENANT_ID != ddl_op_tenant_id_) { + exec_tenant_id = ddl_op_tenant_id_; + } else { + exec_tenant_id = get_host().get_tenant_id(); + } + } + } + + if (OB_SUCCESS == ret) { + _LOG_INFO("[STAT] [DDL] [PARSE] OP_TYPE=%s(%ld) SCHEMA_VERSION=%ld " + "VERSION_DELAY=%.3lf(sec) EXEC_TENANT_ID=%ld TABLE_ID=%ld TENANT_ID=%ld DB_ID=%ld " + "TG_ID=%ld DDL_STMT=[%s] CONTAIN_DDL=%d IS_VALID=%d", + ObSchemaOperation::type_str((ObSchemaOperationType)ddl_operation_type_), + ddl_operation_type_, ddl_op_schema_version_, get_delay_sec(ddl_op_schema_version_), + ddl_exec_tenant_id_, ddl_op_table_id_, ddl_op_tenant_id_, + ddl_op_database_id_, ddl_op_tablegroup_id_, + to_cstring(ddl_stmt_str_), contain_ddl_stmt, is_valid_ddl); + } + + return ret; +} + +int DdlStmtTask::parse_ddl_info_(bool &contain_ddl_stmt, int64_t &update_schema_version) +{ + int ret = OB_SUCCESS; + ColValueList *new_cols = NULL; + ColValueList *rowkey_cols = NULL; + + if (OB_FAIL(row_.get_cols(&rowkey_cols, &new_cols, NULL))) { + LOG_ERROR("get_cols fail", KR(ret), K(row_)); + } else if (OB_ISNULL(rowkey_cols) || OB_ISNULL(new_cols)) { + LOG_ERROR("get_cols fail", K(new_cols), K(rowkey_cols)); + ret = OB_ERR_UNEXPECTED; + } else { + contain_ddl_stmt = false; + update_schema_version = 0; + + ddl_stmt_str_.reset(); + ddl_op_schema_version_ = 0; + ddl_op_schema_version_str_[0] = '\0'; + ddl_operation_type_ = OB_INVALID_DDL_OP; + ddl_op_table_id_ = OB_INVALID_ID; + ddl_op_tenant_id_ = OB_INVALID_TENANT_ID; + ddl_op_database_id_ = OB_INVALID_ID; + ddl_op_tablegroup_id_ = OB_INVALID_ID; + ddl_exec_tenant_id_ = OB_INVALID_TENANT_ID; + + // only parse insert stmt + if (storage::T_DML_INSERT != row_.dml_type_) { + LOG_WARN("ignore NON-INSERT statement of table __all_ddl_operation", K(row_)); + contain_ddl_stmt = false; + } else if (rowkey_cols->num_ != 1) { + LOG_ERROR("__all_ddl_operation rowkey column num is more than 1. not supported", + K(*rowkey_cols), K(row_)); + ret = OB_NOT_SUPPORTED; + } else if (OB_ISNULL(rowkey_cols->head_) || OB_UNLIKELY(rowkey_cols->num_ <= 0)) { + LOG_ERROR("rowkey column parsed is invalid", KPC(rowkey_cols)); + ret = OB_ERR_UNEXPECTED; + } + // parse schema_version from rowkey + else if (OB_FAIL(parse_schema_version_(rowkey_cols->head_->value_, ddl_op_schema_version_))) { + LOG_ERROR("parse_schema_version_ fail", KR(ret), K(*(rowkey_cols->head_))); + } else { + // update schema version whatever + update_schema_version = ddl_op_schema_version_; + + // parse normal columns + if (OB_FAIL(parse_ddl_info_from_normal_columns_(*new_cols))) { + LOG_ERROR("parse_ddl_info_from_normal_columns_ fail", KR(ret), K(*new_cols)); + } else { + // verify parse result + if (ddl_stmt_str_.empty()) { + // only focus on ddl that not empty + contain_ddl_stmt = false; + } + // ignore ddl which tenant id is invalid + else if (OB_INVALID_TENANT_ID == ddl_op_tenant_id_) { + LOG_WARN("ignore DDL which tenant id is invaild", + K(ddl_op_tenant_id_), + K(ddl_op_schema_version_), + "ddl_op_schema_version_str", static_cast(ddl_op_schema_version_str_), + K(ddl_operation_type_), + K(ddl_op_table_id_), + K(ddl_op_database_id_), + K(ddl_op_tablegroup_id_), + K(ddl_stmt_str_)); + contain_ddl_stmt = false; + } + // Allow tablegroup id, database id, table id to be invalid + // but schema version/operation type cannot be invalid, they are treated as invalid DDLs + else if (ddl_op_schema_version_ <= 0 || OB_INVALID_DDL_OP == ddl_operation_type_) { + LOG_ERROR("ddl information is incomplete", + K(ddl_op_tenant_id_), + K(ddl_op_schema_version_), + "ddl_op_schema_version_str", static_cast(ddl_op_schema_version_str_), + K(ddl_operation_type_), + K(ddl_op_table_id_), + K(ddl_op_database_id_), + K(ddl_op_tablegroup_id_), + K(ddl_stmt_str_)); + ret = OB_INVALID_DATA; + } else { + contain_ddl_stmt = true; + } + } + } + } + + return ret; +} + +int DdlStmtTask::build_ddl_binlog_record_(ObLogBR *br, + const ObString &ddl_stmt, + const uint64_t row_index) +{ + int ret = OB_SUCCESS; + const int64_t global_trans_version = get_host().get_global_trans_version(); + uint64_t cluster_id = get_host().get_cluster_id(); + const common::ObVersion &freeze_version = get_host().get_freeze_version(); + // DDL tenant_id records the tenant ID of the partition to which it belongs, not the executor tenant ID, to ensure that in schema split + // scenarios, incremental backup DDLs are not incorrectly distributed to the tenant to which they belong, causing loci to get stuck + const uint64_t tenant_id = get_host().get_tenant_id(); + ILogRecord *br_data = NULL; + + // no need set trace id, trace_info for ddl + ObString trace_id; + ObString trace_info; + ObString ddl_unique_id; + const int64_t part_trans_task_count = 1; + + if (OB_ISNULL(br) || OB_UNLIKELY(OB_INVALID_ID == row_index)) { + LOG_ERROR("invalid argument", K(br), K(ddl_stmt), K(global_trans_version), K(row_index)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(init_ddl_unique_id_(ddl_unique_id))) { + LOG_ERROR("init_ddl_unique_id_ fail", KR(ret), K(ddl_unique_id)); + } else if (OB_FAIL(br->init_data(EDDL, cluster_id, tenant_id, ddl_op_schema_version_, + trace_id, trace_info, ddl_unique_id, freeze_version, global_trans_version, + part_trans_task_count))) { + LOG_ERROR("ObLogBR::init_data EDDL fail", KR(ret), K(global_trans_version), + K(cluster_id), K(freeze_version), K(tenant_id), K(ddl_op_schema_version_), + K(trace_id), K(trace_info), K(ddl_unique_id), K(part_trans_task_count)); + } else if (OB_ISNULL(br_data = (br->get_data()))) { + LOG_ERROR("get binlog record data fail", K(br)); + ret = OB_ERR_UNEXPECTED; + } else { + // 2 field for columns + // ddl_stmt_str and ddl_schema_version + int col_count = 2; + int64_t column_array_size = sizeof(BinLogBuf) * col_count; + BinLogBuf *column_array = static_cast(get_host().alloc(column_array_size)); + + if (OB_ISNULL(column_array)) { + LOG_ERROR("allocate memory for column array fail", K(column_array_size)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + br_data->setNewColumn(column_array, col_count); + br_data->setOldColumn(NULL, 0); + + br_data->putNew(ddl_stmt.ptr(), ddl_stmt.length()); + br_data->putNew(ddl_op_schema_version_str_, static_cast(strlen(ddl_op_schema_version_str_))); + } + } + + return ret; +} + +int DdlStmtTask::init_ddl_unique_id_(common::ObString &ddl_unique_id) +{ + int ret = OB_SUCCESS; + DdlStmtUniqueID ddl_stmt_unique_id(cluster_id_, ddl_op_schema_version_); + + if (OB_UNLIKELY(! ddl_stmt_unique_id.is_valid())) { + LOG_ERROR("ddl_stmt_unique_id is not valid", K(ddl_stmt_unique_id)); + ret = OB_INVALID_ARGUMENT; + } else { + common::ObIAllocator &allocator= this->get_host().get_allocator(); + const int64_t buf_len = MAX_DDL_UNIQUE_ID_LENGTH; + char *buf = static_cast(allocator.alloc(buf_len)); + int64_t pos = 0; + + if (OB_ISNULL(buf)) { + LOG_ERROR("allocate memory for trans id buffer fail", K(buf)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else if (OB_FAIL(databuff_printf(buf, buf_len, pos, + "%s", to_cstring(ddl_stmt_unique_id)))) { + LOG_ERROR("init_ddl_unique_id_ fail", KR(ret), K(buf), K(buf_len), K(pos), + K(ddl_stmt_unique_id)); + } else { + ddl_unique_id.assign_ptr(buf, static_cast(pos)); + } + } + + return ret; +} + +int DdlStmtTask::parse_schema_version_(ObObj &value, int64_t &schema_version) +{ + int ret = OB_SUCCESS; + int64_t pos = 0; + + if (OB_FAIL(value.get_int(schema_version))) { + LOG_ERROR("get schema_version fail", KR(ret), K(value)); + } else if (OB_UNLIKELY(schema_version <= 0)) { + LOG_ERROR("all_ddl_operation schema_version is invalid", K(schema_version), K(value)); + ret = OB_INVALID_DATA; + } else if (OB_FAIL(databuff_printf(ddl_op_schema_version_str_, MAX_DDL_SCHEMA_VERSION_STR_LENGTH, + pos, "%ld", schema_version))) { + LOG_ERROR("databuff_printf fail", KR(ret), K(schema_version), + K(ddl_op_schema_version_str_), K(pos)); + } else { + // succ + } + + return ret; +} + +int DdlStmtTask::parse_ddl_info_from_normal_columns_(ColValueList &col_value_list) +{ + int ret = OB_SUCCESS; + const bool is_schema_split_mode = TCTX.is_schema_split_mode_; + const uint64_t ddl_tenant_id = get_host().get_tenant_id(); + ColValue *cv_node = col_value_list.head_; + + while (NULL != cv_node && OB_SUCCESS == ret) { + ColValue *next = cv_node->get_next(); + ObObj &value = cv_node->value_; + + switch (cv_node->column_id_) { + case ALL_DDL_OPERATION_TABLE_EXEC_TENANT_ID_COLUMN_ID: { + // allow invalid exec_tenant_id + PARSE_INT64("exec_tenant_id", value, ddl_exec_tenant_id_, OB_INVALID_TENANT_ID, false); + break; + } + + case ALL_DDL_OPERATION_TABLE_TENANT_ID_COLUMN_ID: { + // allow invalid tenant id + PARSE_INT64("tenant_id", value, ddl_op_tenant_id_, OB_INVALID_TENANT_ID, false); + // schema split schema, __all_ddl_operation table tenant_id as 0, need to record tenant_id of tenant + if (is_schema_split_mode) { + if (OB_INVALID_TENANT_ID == ddl_op_tenant_id_) { + ddl_op_tenant_id_ = ddl_tenant_id; + } + } + break; + } + + case ALL_DDL_OPERATION_TABLE_DATABASE_ID_COLUMN_ID: { + // allow invalid database id + PARSE_INT64("database_id", value, ddl_op_database_id_, OB_INVALID_ID, false); + const uint64_t original_ddl_op_database_id = ddl_op_database_id_; + ddl_op_database_id_ = combine_id_(is_schema_split_mode, ddl_tenant_id, ddl_op_database_id_); + LOG_DEBUG("[DDL_STMT] [PARSE]", K(ddl_tenant_id), "original_ddl_op_database_id", original_ddl_op_database_id, + K(ddl_op_database_id_)); + + break; + } + + case ALL_DDL_OPERATION_TABLE_TABLEGROUP_ID_COLUMN_ID: { + // allow invalid tablegroup id + PARSE_INT64("tablegroup_id", value, ddl_op_tablegroup_id_, OB_INVALID_ID, false); + const uint64_t original_ddl_op_tablegroup_id = ddl_op_tablegroup_id_; + ddl_op_tablegroup_id_ = combine_id_(is_schema_split_mode, ddl_tenant_id, ddl_op_tablegroup_id_); + LOG_DEBUG("[DDL_STMT] [PARSE]", K(ddl_tenant_id), "original_ddl_op_tablegroup_id", original_ddl_op_tablegroup_id, + K(ddl_op_tablegroup_id_)); + + break; + } + + case ALL_DDL_OPERATION_TABLE_TABLE_ID_COLUMN_ID: { + // allow invalid table id + PARSE_INT64("table_id", value, ddl_op_table_id_, OB_INVALID_ID, false); + const uint64_t original_ddl_op_table_id = ddl_op_table_id_; + ddl_op_table_id_ = combine_id_(is_schema_split_mode, ddl_tenant_id, ddl_op_table_id_); + LOG_DEBUG("[DDL_STMT] [PARSE]", K(ddl_tenant_id), K(original_ddl_op_table_id), K(ddl_op_table_id_)); + + break; + } + + case ALL_DDL_OPERATION_TABLE_OPERATION_TYPE_COLUMN_ID: { + // check operation type + PARSE_INT64("operation_type", value, ddl_operation_type_, OB_INVALID_DDL_OP, true); + break; + } + + case ALL_DDL_OPERATION_TABLE_DDL_STMT_STR_COLUMN_ID: { + ddl_stmt_str_ = value.get_varchar(); + break; + } + + default: + break; + } + + cv_node = next; + } + + return ret; +} + +uint64_t DdlStmtTask::combine_id_(const bool is_schema_split_mode, + const uint64_t tenant_id, + const uint64_t pure_id) +{ + uint64_t res_id = pure_id; + + if (is_schema_split_mode) { + if (OB_INVALID_ID != pure_id && pure_id > 0) { + res_id = combine_id(tenant_id, pure_id); + } + } + + return res_id; +} + +void DdlStmtTask::reset() +{ + ddl_stmt_str_.reset(); + ddl_op_schema_version_ = 0; + ddl_op_schema_version_str_[0] = '\0'; + ddl_operation_type_ = OB_INVALID_DDL_OP; + ddl_op_table_id_ = OB_INVALID_ID; + ddl_op_tenant_id_ = OB_INVALID_TENANT_ID; + ddl_op_database_id_ = OB_INVALID_ID; + ddl_op_tablegroup_id_ = OB_INVALID_ID; + ddl_exec_tenant_id_ = OB_INVALID_TENANT_ID; + br_ = NULL; + + row_.reset(); + IStmtTask::reset(); +} + +//////////////////////////////////////////////////////////////////////////////////// + +ObLogEntryTask::ObLogEntryTask() : + host_(NULL), + partition_(), + trans_id_(), + log_id_(OB_INVALID_ID), + log_offset_(0), + meta_node_(NULL), + redo_node_(), + stmt_list_(), + formatted_stmt_num_(0), + row_ref_cnt_(0), + arena_allocator_("LogEntryTask", OB_MALLOC_MIDDLE_BLOCK_SIZE) +{ +} + +ObLogEntryTask::~ObLogEntryTask() +{ + reset(); +} + +void ObLogEntryTask::reset() +{ + host_ = NULL; + partition_.reset(); + trans_id_.reset(); + log_id_ = OB_INVALID_ID; + log_offset_ = 0; + meta_node_ = NULL; + redo_node_.reset(); + stmt_list_.reset(); + formatted_stmt_num_ = 0; + row_ref_cnt_ = 0; + + arena_allocator_.clear(); +} + +bool ObLogEntryTask::is_valid() const +{ + bool bool_ret = false; + + bool_ret = (NULL != meta_node_) + && (meta_node_->is_valid()) + && (redo_node_.is_valid()); + + return bool_ret; +} + +int ObLogEntryTask::init(const common::ObPartitionKey &pkey, + const transaction::ObTransID &trans_id, + const uint64_t log_id, + const int32_t log_offset, + DmlRedoLogMetaNode *meta_node, + char *mutator_row_data, + const int64_t mutator_row_size, + const int64_t redo_data_size) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(OB_INVALID_ID == log_id) + || OB_UNLIKELY(log_offset < 0) + || OB_ISNULL(meta_node) + || OB_ISNULL(mutator_row_data) + || OB_UNLIKELY(mutator_row_size <=0) + || OB_UNLIKELY(redo_data_size <=0)) { + LOG_ERROR("invalid argument", K(log_id), K(log_offset), K(mutator_row_data), K(mutator_row_size), + K(redo_data_size)); + ret = OB_INVALID_ARGUMENT; + } else { + partition_ = pkey; + trans_id_ = trans_id; + log_id_ = log_id; + log_offset_ = log_offset; + meta_node_ = meta_node; + + redo_node_.reset(mutator_row_data, mutator_row_size, redo_data_size); + + LOG_DEBUG("LogEntryTask init", K(this), KPC(this)); + } + + return ret; +} + +int ObLogEntryTask::append_redo_log(const int64_t log_no, + const uint64_t log_id, + const char *redo_data, + const int64_t redo_data_size) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(meta_node_)) { + LOG_ERROR("meta_node_ is NULL"); + ret = OB_ERR_UNEXPECTED; + } else if (OB_UNLIKELY(! meta_node_->is_valid()) + || OB_UNLIKELY(! redo_node_.is_valid())) { + LOG_ERROR("meta_node_ or redo_node_ is not valid", KPC(meta_node_), K(redo_node_)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(meta_node_->update_redo_meta(log_no, log_id))) { + LOG_ERROR("meta_node_ update_redo_meta fail", KR(ret), K(log_no), K(log_id)); + } else if (OB_FAIL(redo_node_.append_redo_log(redo_data, redo_data_size))) { + LOG_ERROR("redo node append data fail", KR(ret), K(redo_node_), K(log_no), K(log_id), + K(redo_data_size)); + } else { + // succ + } + + return ret; +} + +void *ObLogEntryTask::alloc(const int64_t size) +{ + void *alloc_ret = NULL; + + if (size > 0) { + alloc_ret = arena_allocator_.alloc(size); + } + + return alloc_ret; +} + +void ObLogEntryTask::free(void *ptr) +{ + arena_allocator_.free(ptr); + ptr = NULL; +} + +int ObLogEntryTask::add_stmt(const uint64_t row_index, IStmtTask *stmt_task) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(OB_INVALID_ID == row_index) + || OB_ISNULL(stmt_task)) { + LOG_ERROR("invalid argument", K(row_index), KPC(stmt_task)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(stmt_list_.add(stmt_task))) { + LOG_ERROR("add stmt task into stmt_list fail", KR(ret), K(stmt_list_), KP(stmt_task)); + } else { + stmt_task->set_row_index(row_index); + } + + return ret; +} + +int64_t ObLogEntryTask::inc_formatted_stmt_num() +{ + return ATOMIC_AAF(&formatted_stmt_num_, 1); +} + +int ObLogEntryTask::link_row_list() +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(meta_node_)) { + LOG_ERROR("meta_node_ is NULL", KPC(meta_node_)); + ret = OB_ERR_UNEXPECTED; + } else { + DmlStmtTask *stmt_task = static_cast(stmt_list_.head_); + meta_node_->valid_row_num_ = 0; + + while (OB_SUCCESS == ret && NULL != stmt_task) { + DmlStmtTask *next_stmt = static_cast(stmt_task->get_next()); + ObLogRowDataIndex &row_data_index = stmt_task->get_row_data_index(); + ObLogBR *br = row_data_index.get_binlog_record(); + const bool is_rollback_stmt = row_data_index.is_rollback(); + bool need_link = true; + + if (is_rollback_stmt) { + meta_node_->is_contain_rollback_row_ = true; + LOG_DEBUG("handle rollback stmt", K(meta_node_), K(is_rollback_stmt), K(row_data_index)); + } else if (OB_ISNULL(br)) { + LOG_ERROR("binlog record in statement is invalid", K(row_data_index), KPC(stmt_task), K(br)); + ret = OB_ERR_UNEXPECTED; + } else if (! br->is_valid()) { + // ignore invalid br + need_link = false; + // recycle Binlog Record + LOG_DEBUG("br is not valid", K(*this), "valid_row_num", meta_node_->valid_row_num_); + + if (OB_FAIL(revert_binlog_record_(br))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("revert binlog record fail", KR(ret), K(br), K(stmt_task), KPC(stmt_task)); + } + } else { + br = NULL; + } + } else { + // do nothing + } + + if (OB_SUCC(ret) && need_link) { + meta_node_->valid_row_num_++; + row_data_index.set_next(NULL); + + if (NULL == meta_node_->row_head_) { + meta_node_->row_head_ = &row_data_index; + meta_node_->row_tail_ = &row_data_index; + } else { + meta_node_->row_tail_->set_next(&row_data_index); + meta_node_->row_tail_ = &row_data_index; + } + } + + if (OB_SUCCESS == ret) { + stmt_task = next_stmt; + } + } // while + + set_row_ref_cnt(meta_node_->valid_row_num_); + } + + return ret; +} + +int ObLogEntryTask::revert_binlog_record_(ObLogBR *br) +{ + int ret = OB_SUCCESS; + ILogRecord *br_data = NULL; + IObLogBRPool *br_pool = TCTX.br_pool_; + + if (OB_ISNULL(br_pool)) { + LOG_ERROR("invalid resource collector", K(br_pool)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(br)) { + LOG_ERROR("binlog record is invalid", K(br)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(br_data = br->get_data())) { + LOG_ERROR("binlog record data is invalid", K(br)); + ret = OB_INVALID_ARGUMENT; + } else { + br->set_host(NULL); + br_pool->free(br); + } + + return ret; +} + +int ObLogEntryTask::get_valid_row_num(int64_t &valid_row_num) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(meta_node_)) { + LOG_ERROR("meta_node_ is NULL", KPC(meta_node_)); + ret = OB_ERR_UNEXPECTED; + } else { + valid_row_num = meta_node_->get_valid_row_num(); + } + + return ret; +} + +int64_t ObLogEntryTask::dec_row_ref_cnt() +{ + return ATOMIC_AAF(&row_ref_cnt_, -1); +} + +void ObLogEntryTask::set_row_ref_cnt(const int64_t row_ref_cnt) +{ + (void)ATOMIC_SET(&row_ref_cnt_, row_ref_cnt); +} + +const char *ObLogEntryTask::print_callback_module(const CallBackModule cb) +{ + const char *str = "NONE"; + + switch(cb) { + case DML_PARSER_CB: + str = "DmlParserCallback"; + break; + + case FORMATTER_CB: + str = "FormatterCallback"; + break; + + case STORAGER_CB: + str = "StoragerCallback"; + break; + + default: + str = "NONE"; + break; + } + + return str; +} + +bool ObLogEntryTask::is_dml_parser(const CallBackModule cb) +{ + return DML_PARSER_CB == cb; +} + +bool ObLogEntryTask::is_formatter(const CallBackModule cb) +{ + return FORMATTER_CB == cb; +} + +bool ObLogEntryTask::is_storager(const CallBackModule cb) +{ + return STORAGER_CB == cb; +} + +//////////////////////////////////////////////////////////////////////////////////// + +PartTransTask::PartTransTask() : + ObLogResourceRecycleTask(ObLogResourceRecycleTask::PART_TRANS_TASK), + serve_state_(SERVED), + type_(TASK_TYPE_UNKNOWN), + exec_tenant_id_(OB_INVALID_TENANT_ID), + partition_(), + pkey_str_(NULL), + timestamp_(OB_INVALID_TIMESTAMP), + is_trans_id_inited_(false), + trans_id_(), + trans_id_str_(), + prepare_log_id_(OB_INVALID_ID), + cluster_id_(0), + freeze_version_(), + pkey_and_log_id_str_(), + row_no_(0), + sorted_redo_list_(), + sorted_dml_row_list_(), + log_entry_task_(NULL), + global_trans_version_(OB_INVALID_VERSION), + is_trans_committed_(false), + is_trans_ready_to_commit_(false), + checkpoint_seq_(0), + global_trans_seq_(0), + participants_(NULL), + participant_count_(0), + local_schema_version_(OB_INVALID_VERSION), + stmt_list_(), + next_task_(NULL), + ref_cnt_(0), + data_ready_lock_(), + is_data_ready_(false), + wait_formatted_cond_(NULL), + wait_data_ready_cond_(), + dml_ready_redo_node_num_(0), + allocator_(), + trace_id_(), + trace_info_(), + prev_trans_arr_(), + follow_trans_arr_(), + reserve_field_(0) +{ +} + +PartTransTask::~PartTransTask() +{ + reset(); +} + +int PartTransTask::set_prepare_log_id(const uint64_t prepare_log_id) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(OB_INVALID_ID == prepare_log_id)) { + LOG_ERROR("invalid argument", K(prepare_log_id)); + ret = OB_INVALID_ARGUMENT; + } else { + prepare_log_id_ = prepare_log_id; + } + return ret; +} + +void PartTransTask::set_allocator(const int64_t page_size, ObIAllocator &large_allocator) +{ + allocator_.set_allocator(page_size, large_allocator); +} + +void PartTransTask::set_prealloc_page(void *page) +{ + allocator_.set_prealloc_page(page); +} + +void PartTransTask::revert_prealloc_page(void *&page) +{ + allocator_.revert_prealloc_page(page); +} + +const char *PartTransTask::print_task_type(const TaskType type) +{ + const char *task_type_str = "UNKNOWN"; + + switch (type) { + case TASK_TYPE_UNKNOWN: { + task_type_str = "UNKNOWN"; + break; + } + case TASK_TYPE_DML_TRANS: { + task_type_str = "DML_TRANS"; + break; + } + case TASK_TYPE_DDL_TRANS: { + task_type_str = "DDL_TRANS"; + break; + } + case TASK_TYPE_PART_HEARTBEAT: { + task_type_str = "PART_HEARTBEAT"; + break; + } + case TASK_TYPE_GLOBAL_HEARTBEAT: { + task_type_str = "GLOBAL_HEARTBEAT"; + break; + } + case TASK_TYPE_OFFLINE_PARTITION: { + task_type_str = "OFFLINE_PARTITION"; + break; + } + case TASK_TYPE_NOT_SERVED_TRANS: { + task_type_str = "NOT_SERVED_TRANS"; + break; + } + + default: { + task_type_str = "INVALID"; + break; + } + } + + return task_type_str; +} + +void PartTransTask::set_pkey_info(const common::ObPartitionKey &partition, + const char *pkey_str) +{ + partition_ = partition; + pkey_str_ = pkey_str; +} + +void PartTransTask::reset() +{ + destroy_participant_array_(); + + serve_state_ = SERVED; + type_ = TASK_TYPE_UNKNOWN; + exec_tenant_id_ = OB_INVALID_TENANT_ID; + partition_.reset(); + pkey_str_ = NULL; + timestamp_ = OB_INVALID_TIMESTAMP; + is_trans_id_inited_ = false; + trans_id_.reset(); + trans_id_str_.reset(); + prepare_log_id_ = OB_INVALID_ID; + cluster_id_ = 0; + freeze_version_.reset(); + pkey_and_log_id_str_.reset(); + row_no_ = 0; + sorted_redo_list_.reset(); + sorted_dml_row_list_.reset(); + log_entry_task_ = NULL; + global_trans_version_ = OB_INVALID_VERSION; + is_trans_committed_ = false; + is_trans_ready_to_commit_ = false; + checkpoint_seq_ = 0; + global_trans_seq_ = 0; + participants_ = NULL; + participant_count_ = 0; + + local_schema_version_ = OB_INVALID_VERSION; + + free_stmt_list(); + + next_task_ = NULL; + + ref_cnt_ = 0; + is_data_ready_ = false; + wait_formatted_cond_ = NULL; + dml_ready_redo_node_num_ = 0; + + // The trace_id memory does not need to be freed separately, the allocator frees it all together + trace_id_.reset(); + trace_info_.reset(); + + prev_trans_arr_.reset(); + follow_trans_arr_.reset(); + + // reuse memory + allocator_.reset(); + reserve_field_ = 0; +} + +int PartTransTask::push_redo_log(const common::ObPartitionKey &pkey, + const transaction::ObTransID &trans_id, + const int64_t log_no, + const uint64_t log_id, + const int32_t log_offset, + const int64_t tstamp, + const char *buf, + const int64_t buf_len, + bool &need_dispatch_log_entry_task, + ObLogEntryTask *&redo_log_entry_task) +{ + int ret = OB_SUCCESS; + ObMemtableMutatorMeta meta; + int64_t pos = 0; + const bool is_ddl_part = is_ddl_partition(pkey); + need_dispatch_log_entry_task = false; + redo_log_entry_task = NULL; + + if (OB_UNLIKELY(log_no < 0) + || OB_UNLIKELY(OB_INVALID_ID == log_id) + || OB_ISNULL(buf) + || OB_UNLIKELY(buf_len <= 0)) { + LOG_ERROR("invalid arguments", K(log_no), K(log_id), KP(buf), K(buf_len)); + ret = OB_INVALID_ARGUMENT; + } + // deserialize meta + else if (OB_FAIL(meta.deserialize(buf, buf_len, pos))) { + LOG_ERROR("deserialize ObMemtableMutatorMeta fail", KR(ret), K(meta), K(log_no), + K(log_id), K(buf), K(buf_len), K(pos)); + } + // Check the length of the data recorded in the meta, it can't be smaller than the length of the redo data + // otherwise the data does not match the one recorded in the meta + else if (OB_UNLIKELY(meta.get_total_size() < buf_len)) { + LOG_ERROR("meta total size is little than mutator log data size", + K(meta), K(buf_len), K(log_id), K(log_no)); + ret = OB_INVALID_DATA; + } else if (OB_FAIL(init_trans_id_info_(pkey, trans_id))) { + LOG_ERROR("init_trans_id_info_ fail", KR(ret), K(pkey), K(trans_id)); + } else { + const char *redo_data = buf + pos; + int64_t redo_data_size = buf_len - pos; + const uint8_t row_flags = meta.get_flags(); + + if (meta.is_row_start()) { + // If it is the start of a row, a new redo node is generated + if (OB_FAIL(push_redo_on_row_start_(trans_id, meta, log_no, log_id, log_offset, redo_data, redo_data_size))) { + if (OB_ENTRY_EXIST == ret) { + // redo log duplicate + } else { + LOG_ERROR("push_redo_on_row_start_ fail", KR(ret), K(trans_id), K(meta), K(log_no), K(log_id), K(log_offset), + KP(redo_data), K(redo_data_size)); + } + } + } else { + // If it is not the beginning of a row, push data to the previous node + if (OB_FAIL(push_redo_on_not_row_start_(meta, log_no, log_id, redo_data, redo_data_size))) { + // If it is the first node, the redo log is incomplete + if (OB_LOG_MISSING == ret) { + LOG_WARN("[MISSING_LOG] first redo is not row start, must miss redo log", + K(pkey), K(log_no), K(log_id), K(tstamp), K(trans_id), K(sorted_redo_list_), K(meta)); + } else { + LOG_ERROR("push_redo_on_not_row_start_ fail", KR(ret), K(pkey), K(meta), K(log_no), + K(log_id), K(tstamp), KP(redo_data), K(redo_data_size), K(trans_id)); + } + } + } + + if (OB_SUCC(ret)) { + // 1. DML data, a redo data aggregation will be dispatched + // 2. duplicate redo logs won't dispatch + if (! is_ddl_part) { + if (ObTransRowFlag::is_normal_row(row_flags) || ObTransRowFlag::is_big_row_end(row_flags)) { + if (OB_ISNULL(log_entry_task_)) { + LOG_ERROR("log_entry_task_ is NULL", K(partition_), K(trans_id_), K(log_entry_task_)); + ret = OB_ERR_UNEXPECTED; + } else { + redo_log_entry_task = log_entry_task_; + need_dispatch_log_entry_task = true; + // reset log_entry_task_ + log_entry_task_ = NULL; + + const bool is_test_mode_on = TCONF.test_mode_on != 0; + if (is_test_mode_on) { + LOG_INFO("LogEntryTask-alloc", "LogEntryTask", *redo_log_entry_task); + } + } + } + } + } + } + + LOG_DEBUG("push redo log", KR(ret), K(is_ddl_part), K(pkey), K(log_no), K(log_id), K(tstamp), K(buf_len), K(meta), + K(trans_id), K(redo_log_entry_task), KPC(redo_log_entry_task), K(sorted_redo_list_)); + + return ret; +} + +int PartTransTask::init_trans_id_info_(const common::ObPartitionKey &pkey, + const transaction::ObTransID &trans_id) +{ + int ret = OB_SUCCESS; + + if (is_trans_id_inited_) { + // do nothing + } else { + trans_id_ = trans_id; + + if (OB_FAIL(to_string_trans_id())) { + LOG_ERROR("to_string_trans_id fail", KR(ret), K(pkey), K(trans_id)); + } else { + is_trans_id_inited_ = true; + } + } + + return ret; +} + +int PartTransTask::push_redo_on_row_start_(const transaction::ObTransID &trans_id, + const ObMemtableMutatorMeta &meta, + const int64_t log_no, + const uint64_t log_id, + const int32_t log_offset, + const char *redo_data, + const int64_t redo_data_size) +{ + int ret = OB_SUCCESS; + // Length of the actual data, minus the meta information + const int64_t mutator_row_size = meta.get_data_size(); + const bool is_ddl_part = is_ddl_partition(partition_); + + if (is_ddl_part) { + if (OB_FAIL(push_ddl_redo_on_row_start_(meta, log_no, log_id, redo_data, redo_data_size, mutator_row_size))) { + if (OB_ENTRY_EXIST != ret) { + LOG_ERROR("push_ddl_redo_on_row_start_ fail", KR(ret), K(trans_id), K(meta), K(log_no), K(log_id), + KP(redo_data), K(redo_data_size)); + } + } + } else { + if (OB_FAIL(push_dml_redo_on_row_start_(trans_id, meta, log_no, log_id, log_offset, redo_data, redo_data_size, + mutator_row_size))) { + if (OB_ENTRY_EXIST != ret) { + LOG_ERROR("push_dml_redo_on_row_start_ fail", KR(ret), K(trans_id), K(meta), K(log_no), K(log_id), + KP(redo_data), K(redo_data_size)); + } + } + } + + return ret; +} + +int PartTransTask::push_ddl_redo_on_row_start_(const ObMemtableMutatorMeta &meta, + const int64_t log_no, + const uint64_t log_id, + const char *redo_data, + const int64_t redo_data_size, + const int64_t mutator_row_size) +{ + int ret = OB_SUCCESS; + DdlRedoLogNode *node = NULL; + char *mutator_row_data = NULL; + + // alloc a Node + if (OB_ISNULL(node = static_cast(allocator_.alloc(sizeof(DdlRedoLogNode))))) { + LOG_ERROR("allocate memory for DdlRedoLogNode fail", "size", sizeof(DdlRedoLogNode)); + ret = OB_ALLOCATE_MEMORY_FAILED; + // DDL partitioned transaction task allocates buffer of actual data length + } else if (OB_ISNULL(mutator_row_data = static_cast(allocator_.alloc(mutator_row_size)))) { + LOG_ERROR("allocate memory for mutator row data fail", K(mutator_row_size), K(meta)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + // Fill the data carried in this redo + (void)MEMCPY(mutator_row_data, redo_data, redo_data_size); + + // reset redo log node + node->reset(log_no, log_id, mutator_row_data, mutator_row_size, redo_data_size); + + // Push to redo list + if (OB_FAIL(sorted_redo_list_.push(node))) { + if (OB_ENTRY_EXIST == ret) { + // redo log duplicate + } else { + LOG_ERROR("push node into redo log list fail", KR(ret), K(sorted_redo_list_), KPC(node)); + } + } + } + + if (OB_FAIL(ret)) { + if (NULL != node) { + node->reset(); + allocator_.free(node); + node = NULL; + } + + if (NULL != mutator_row_data) { + allocator_.free(mutator_row_data); + mutator_row_data = NULL; + } + } + + return ret; +} + +int PartTransTask::push_dml_redo_on_row_start_(const transaction::ObTransID &trans_id, + const memtable::ObMemtableMutatorMeta &meta, + const int64_t log_no, + const uint64_t log_id, + const int32_t log_offset, + const char *redo_data, + const int64_t redo_data_size, + const int64_t mutator_row_size) +{ + int ret = OB_SUCCESS; + ObLogEntryTask *redo_log_entry_task = NULL; + char *mutator_row_data = NULL; + DmlRedoLogMetaNode *meta_node = NULL; + + if (OB_ISNULL(meta_node = static_cast(allocator_.alloc(sizeof(DmlRedoLogMetaNode))))) { + LOG_ERROR("allocate memory for DmlRedoLogMetaNode fail", "size", sizeof(DmlRedoLogMetaNode)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + // reset redo log meta node + meta_node->reset(log_no, log_id); + + // Push to redo list + if (OB_FAIL(sorted_redo_list_.push(meta_node))) { + if (OB_ENTRY_EXIST == ret) { + // redo log duplicate] + } else { + LOG_ERROR("push node into redo log list fail", KR(ret), K(sorted_redo_list_), KPC(meta_node)); + } + } + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(get_log_entry_task_(redo_log_entry_task))) { + LOG_ERROR("get_log_entry_task_ fail", KR(ret), KPC(redo_log_entry_task)); + } else if (OB_ISNULL(mutator_row_data = static_cast(redo_log_entry_task->alloc(mutator_row_size)))) { + LOG_ERROR("allocate memory for mutator row data fail", K(mutator_row_size), K(meta)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + // Fill the data carried in this redo + (void)MEMCPY(mutator_row_data, redo_data, redo_data_size); + + if (OB_FAIL(redo_log_entry_task->init(partition_, trans_id, log_id, log_offset, meta_node, + mutator_row_data, mutator_row_size, redo_data_size))) { + LOG_ERROR("redo_log_entry_task init fail", KR(ret), KPC(redo_log_entry_task)); + } else if (OB_UNLIKELY(NULL != log_entry_task_)) { + LOG_ERROR("log_entry_task_ is not NULL, unexcepted", KPC(log_entry_task_)); + ret = OB_ERR_UNEXPECTED; + } else { + log_entry_task_ = redo_log_entry_task; + } + } + } + + if (OB_FAIL(ret)) { + if (NULL != meta_node) { + meta_node->reset(); + allocator_.free(meta_node); + meta_node = NULL; + } + } + + return ret; +} + +int PartTransTask::get_log_entry_task_(ObLogEntryTask *&log_entry_task) +{ + int ret = OB_SUCCESS; + log_entry_task = NULL; + IObLogEntryTaskPool *log_entry_task_pool = TCTX.log_entry_task_pool_; + + if (OB_ISNULL(log_entry_task_pool)) { + LOG_ERROR("log_entry_task_pool_ is NULL"); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(log_entry_task_pool->alloc(log_entry_task, this))) { + LOG_ERROR("log_entry_task_pool_ alloc fail", KR(ret), KPC(log_entry_task), KPC(this)); + } else if (OB_ISNULL(log_entry_task)) { + LOG_ERROR("log_entry_task is NULL", KPC(log_entry_task), KPC(this)); + ret = OB_ERR_UNEXPECTED; + } else { + // succ + } + + return ret; +} + +int PartTransTask::push_redo_on_not_row_start_(const ObMemtableMutatorMeta &meta, + const int64_t log_no, + const uint64_t log_id, + const char *redo_data, + const int64_t redo_data_size) +{ + int ret = OB_SUCCESS; + const bool is_ddl_part = is_ddl_partition(partition_); + + if (is_ddl_part) { + if (OB_FAIL(push_ddl_redo_on_not_row_start_(meta, log_no, log_id, redo_data, redo_data_size))) { + if (OB_LOG_MISSING != ret) { + LOG_ERROR("push_ddl_redo_on_not_row_start_ fail", KR(ret), K(partition_), K(meta), K(log_no), + K(log_id), KP(redo_data), K(redo_data_size)); + } + } + } else { + if (OB_FAIL(push_dml_redo_on_not_row_start_(meta, log_no, log_id, redo_data, redo_data_size))) { + if (OB_LOG_MISSING != ret) { + LOG_ERROR("push_dml_redo_on_not_row_start_ fail", KR(ret), K(partition_), K(meta), K(log_no), + K(log_id), KP(redo_data), K(redo_data_size)); + } + } + } + + return ret; +} + +// If it is not the beginning of the line, it must be consecutive to the previous log, i.e. the log number is a successor to the previous push log +// +// 1. if the redo list is empty, the first log is not the starting redo log of the partitioned transaction, +// to facilitate the processing of the LOB log, return OB_LOG_MISSING and ask the log to be ignored externally, and then resume the redo log from the first log +// +// 2. If log_no is not consecutive with the last push log, it means there is a data error, it is a bug of the observer, return OB_DISCONTINUOUS_LOG +int PartTransTask::push_ddl_redo_on_not_row_start_(const ObMemtableMutatorMeta &meta, + const int64_t log_no, + const uint64_t log_id, + const char *redo_data, + const int64_t redo_data_size) +{ + int ret = OB_SUCCESS; + DdlRedoLogNode *last_redo = static_cast(sorted_redo_list_.last_push_node_); + + if (sorted_redo_list_.log_num_ <= 0) { + ret = OB_LOG_MISSING; + } else if (OB_ISNULL(last_redo)) { + LOG_ERROR("last redo node is invalid", K(sorted_redo_list_)); + ret = OB_ERR_UNEXPECTED; + } + // Append redo data to the previous node + else if (OB_FAIL(last_redo->append_redo_log(log_no, log_id, redo_data, redo_data_size))) { + LOG_ERROR("last redo append data fail", KR(ret), KPC(last_redo), K(log_no), K(log_id), + K(redo_data_size)); + } else { + // success + LOG_DEBUG("LOB data append success", K(meta), K(log_no), K(log_id), K(redo_data_size), + KPC(last_redo)); + } + + return ret; +} + +int PartTransTask::push_dml_redo_on_not_row_start_(const memtable::ObMemtableMutatorMeta &meta, + const int64_t log_no, + const uint64_t log_id, + const char *redo_data, + const int64_t redo_data_size) +{ + int ret = OB_SUCCESS; + DmlRedoLogMetaNode *last_redo = static_cast(sorted_redo_list_.last_push_node_); + + if (sorted_redo_list_.log_num_ <= 0) { + ret = OB_LOG_MISSING; + } else if (OB_ISNULL(last_redo)) { + LOG_ERROR("last redo node is invalid", K(sorted_redo_list_)); + ret = OB_ERR_UNEXPECTED; + } else { + if (OB_ISNULL(log_entry_task_)) { + LOG_ERROR("log_entry_task_ is NULL", K(log_entry_task_)); + ret = OB_ERR_UNEXPECTED; + } else { + if (OB_FAIL(log_entry_task_->append_redo_log(log_no, log_id, redo_data, redo_data_size))) { + LOG_ERROR("log_entry_task_ append_redo_log fail", KR(ret), KPC(log_entry_task_), K(log_no), K(log_id), + K(redo_data_size)); + } else { + LOG_DEBUG("LOB data append success", K(meta), K(log_no), K(log_id), K(redo_data_size), + KPC(last_redo)); + } + } + } + + return ret; +} + +int PartTransTask::prepare(const common::ObPartitionKey &partition, + const int64_t timestamp, + const ObTransID &trans_id, + const uint64_t prepare_log_id, + const uint64_t cluster_id, + const common::ObVersion freeze_version, + const ObString &trace_id, + const ObString &trace_info, + const transaction::ObElrTransInfoArray &elt_trans_info_array) +{ + int ret = OB_SUCCESS; + const bool is_ddl_part = is_ddl_table(partition.get_table_id()); + + if (OB_UNLIKELY(TASK_TYPE_UNKNOWN != type_)) { + LOG_ERROR("type is not expected", "type", print_task_type(type_), + "expected_type", "TASK_TYPE_UNKNOWN"); + ret = OB_INIT_TWICE; + } else if (OB_UNLIKELY(! partition.is_valid()) + || OB_UNLIKELY(timestamp <= 0) + || OB_UNLIKELY(! trans_id.is_valid()) + || OB_UNLIKELY(OB_INVALID_ID == prepare_log_id)) { + LOG_ERROR("invalid arguemnts", K(partition), K(timestamp), K(trans_id), K(prepare_log_id)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(init_trans_id_info_(partition, trans_id))) { + LOG_ERROR("init_trans_id_info_ fail", KR(ret), K(partition), K(trans_id)); + } else if (OB_FAIL(init_trace_id_(trace_id))) { + LOG_ERROR("init_trace_id_ fail", KR(ret), K(trace_id)); + } else if (OB_FAIL(init_trace_info_(trace_info))) { + LOG_ERROR("init_trace_info_ fail", KR(ret), K(trace_info)); + } else if (OB_FAIL(prev_trans_arr_.assign(elt_trans_info_array))) { + LOG_ERROR("prev_trans_arr_ assign fail", KR(ret), K(elt_trans_info_array)); + } else if (OB_FAIL(to_string_pkey_and_log_id_(prepare_log_id))) { + LOG_ERROR("to_string_pkey_and_log_id_ fail", KR(ret), K(prepare_log_id)); + } else { + // Set type to DDL transaction if it is a DDL table, otherwise it is a DML transaction + type_ = is_ddl_part ? TASK_TYPE_DDL_TRANS : TASK_TYPE_DML_TRANS; + timestamp_ = timestamp; + prepare_log_id_ = prepare_log_id; + cluster_id_ = cluster_id; + freeze_version_ = freeze_version; + } + + LOG_DEBUG("PartTransTask::prepare", KR(ret), K(partition), K(type_), K(sorted_redo_list_), + K(timestamp), K(trans_id), K(prepare_log_id), K(cluster_id), K(freeze_version), K(trace_id), + "count", prev_trans_arr_.count(), K(elt_trans_info_array)); + + return ret; +} + +int PartTransTask::commit(const int64_t global_trans_version, + const PartitionLogInfoArray &participants, + const bool is_ready_to_commit, + const int64_t first_log_ts, + const transaction::ObTransID &trans_id, + const TransCommitInfo &trans_commit_info, + PartTransDispatcher &part_trans_dispatcher) +{ + int ret = OB_SUCCESS; + const bool is_ddl_part = is_ddl_partition(partition_); + + if (OB_UNLIKELY(OB_INVALID_TIMESTAMP == first_log_ts)) { + LOG_ERROR("first_log_ts is invalid", K(first_log_ts)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(set_commit_info_(global_trans_version, participants))) { + LOG_ERROR("set_commit_info_ fail", KR(ret), K(global_trans_version), K(participants)); + } + // Pre-commit or not + else if (is_ready_to_commit) { + // Set up a pre-committed state and wait until the checkpoint advances to update the state or update the state via callbacks/checks in early unlock scenarios + // see: update_trans_committed_status() + ATOMIC_STORE(&is_trans_ready_to_commit_, true); + + // Pre-commit status, handling precursor transactions in early unlock scenarios + if (OB_FAIL(handle_elr_prev_trans_(part_trans_dispatcher, first_log_ts))) { + LOG_ERROR("handle_elr_prev_trans_ fail", KR(ret), K(partition_), K(prepare_log_id_), K(trans_id_), + K(prev_trans_arr_), K(follow_trans_arr_)); + } else { + // succ + } + } else { + ATOMIC_STORE(&is_trans_committed_, true); + + if (OB_FAIL(part_trans_dispatcher.insert_commit_trans(trans_id, trans_commit_info))) { + LOG_ERROR("trans_commit_map insert fail", KR(ret), K(trans_id), K(trans_commit_info)); + } + // Transaction commit status. Handling subsequent transactions in early unlock scenarios + else if (OB_FAIL(handle_elr_follow_trans_(part_trans_dispatcher))) { + LOG_ERROR("handle_elr_follow_trans_ fail", KR(ret), K(partition_), K(prepare_log_id_), K(trans_id_), + K(prev_trans_arr_), K(follow_trans_arr_)); + } + } + + LOG_DEBUG("commit_normal_trans", KR(ret), "pkey", partition_, K(trans_id), K(is_ddl_part), + K(is_ready_to_commit), K(is_trans_ready_to_commit_), + K(is_trans_committed_), K(global_trans_version), K(participants), + "prev_trans_arr_cnt", prev_trans_arr_.count(), K(prev_trans_arr_), + "follow_trans_arr_cnt", follow_trans_arr_.count(), K(follow_trans_arr_)); + + return ret; +} + +int PartTransTask::set_commit_info_(const int64_t global_trans_version, + const PartitionLogInfoArray &participants) +{ + int ret = OB_SUCCESS; + + // A commit transaction may only be a DML transaction or a DDL transaction, all other types are reported as errors + if (OB_UNLIKELY(TASK_TYPE_DML_TRANS != type_ && TASK_TYPE_DDL_TRANS != type_)) { + LOG_ERROR("can not commit without prepare", K_(type), K_(timestamp), K_(partition), K_(trans_id)); + ret = OB_STATE_NOT_MATCH; + } else if (OB_UNLIKELY(global_trans_version < 0)) { + LOG_ERROR("invalid argument", K(global_trans_version)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_UNLIKELY(participants.count() <= 0)) { + LOG_ERROR("participants are empty", K(participants), K(global_trans_version)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(init_participant_array_(participants))) { + LOG_ERROR("init participants array fail", KR(ret), K(participants)); + } else { + global_trans_version_ = global_trans_version; + } + + return ret; +} + +int PartTransTask::handle_elr_prev_trans_(PartTransDispatcher &part_trans_dispatcher, + const int64_t first_log_ts) +{ + int ret = OB_SUCCESS; + const int64_t prev_trans_arr_cnt = prev_trans_arr_.count(); + int64_t prev_trans_commit_cnt = 0; + bool has_done = false; + const char *commit_reason = "NONE"; + + if (prev_trans_arr_cnt > 0) { + for (int64_t idx = 0; OB_SUCC(ret) && ! has_done && idx < prev_trans_arr_cnt; ++idx) { + const ObElrTransInfo &elr_trans_info = prev_trans_arr_.at(idx); + const ObTransID &prev_trans_id = elr_trans_info.get_trans_id(); + const int64_t prev_trans_commit_version = elr_trans_info.get_commit_version(); + const int trans_res = elr_trans_info.get_result(); + + if (OB_UNLIKELY(! ObTransResultState::is_valid(trans_res))) { + LOG_ERROR("elr_trans_info result is not valid", K(trans_res), K(elr_trans_info), K(prev_trans_arr_)); + ret = OB_INVALID_ARGUMENT; + } else if (ObTransResultState::is_commit(trans_res)) { + ++prev_trans_commit_cnt; + } else if (ObTransResultState::is_abort(trans_res)) { + // The predecessor transaction is in the abort state and is not processed here, essentially the current transaction writes the abort log + has_done = true; + } else { + // The predecessor transaction is in UNKNOWN + PartTransTask *prev_trans_task = NULL; + PartTransID prev_part_trans_id(prev_trans_id, partition_); + bool is_commit_trans = false; + + if (OB_FAIL(part_trans_dispatcher.get_task(prev_part_trans_id, prev_trans_task))) { + if (OB_ENTRY_NOT_EXIST == ret) { + // 1. Find that the predecessor transaction does not exist, indicating that the state of the predecessor transaction must have been determined + // 2. Determine if the commit version of the predecessor transaction is less than the first log timestamp, no longer processed + // 3. Find if the predecessor transaction is abort + ret = OB_SUCCESS; + + if (prev_trans_commit_version < first_log_ts) { + // The predecessor transaction was not synchronized, and is not handled here + has_done = true; + } else { + if (OB_FAIL(part_trans_dispatcher.find_commit_trans_info(prev_trans_id, is_commit_trans))) { + LOG_ERROR("part_trans_dispatcher find_commit_trans_info fail", KR(ret), + K(is_commit_trans)); + } else if (! is_commit_trans) { + // The predecessor transaction is in the abort state and is not processed here, essentially the current transaction writes the abort log + has_done = true; + } else { + // The predecessor transaction already commit + ++prev_trans_commit_cnt; + commit_reason = "prev_trans_be_commit_trans_map"; + } + } + } else { + LOG_ERROR("part_trans_dispatcher get_task fail", KR(ret), K(prev_part_trans_id), KPC(prev_trans_task)); + } + } else if (OB_ISNULL(prev_trans_task)) { + LOG_ERROR("prev_trans_task is NULL", K(prev_part_trans_id), K(prev_trans_task)); + ret = OB_ERR_UNEXPECTED; + // The predecessor transaction already commit + } else if (prev_trans_task->is_trans_committed()) { + ++prev_trans_commit_cnt; + commit_reason = "prev_trans_be_commited"; + // The predecessor transaction not commit, register a callback + } else if (OB_FAIL(prev_trans_task->register_elt_follow_trans(trans_id_))) { + LOG_ERROR("prev_trans_task register_elt_follow_trans fail", KR(ret), K(partition_), K(trans_id_)); + } else { + // succ + } + LOG_DEBUG("elr_prev_trans", K(commit_reason), K(partition_), K(trans_id_), K(prev_trans_arr_), K(elr_trans_info), + K(is_commit_trans)); + } + } // for + + if (OB_SUCC(ret)) { + if (OB_FAIL(commit_elr_trans_(prev_trans_arr_cnt, prev_trans_commit_cnt, commit_reason, part_trans_dispatcher))) { + LOG_ERROR("commit_elr_trans_ fail", KR(ret), K(partition_), K(prepare_log_id_), K(trans_id_), + K(prev_trans_arr_cnt), K(prev_trans_commit_cnt), K(commit_reason)); + } else { + // succ + } + } + } else { + // no predecessor transaction + // do nothing + } + + return ret; +} + +int PartTransTask::commit_elr_trans_(const int64_t prev_trans_arr_cnt, + const int64_t prev_trans_commit_cnt, + const char *commit_reason, + PartTransDispatcher &part_trans_dispatcher) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! ATOMIC_LOAD(&is_trans_ready_to_commit_))) { + LOG_ERROR("is_trans_ready_to_commit is false, unexcepted trans state", K(partition_), K(trans_id_)); + ret = OB_STATE_NOT_MATCH; + } else if (prev_trans_arr_cnt == prev_trans_commit_cnt) { + ATOMIC_STORE(&is_trans_committed_, true); + + LOG_DEBUG("commit_elr_trans", K(commit_reason), K(partition_), K(trans_id_), K(prev_trans_arr_cnt), K(prev_trans_commit_cnt), + "is_ready_to_commit", ATOMIC_LOAD(&is_trans_ready_to_commit_), + "is_trans_committed", ATOMIC_LOAD(&is_trans_committed_)); + + // Transaction commit status. Handling subsequent transactions in early unlock scenarios + if (OB_FAIL(handle_elr_follow_trans_(part_trans_dispatcher))) { + LOG_ERROR("handle_elr_follow_trans_ fail", KR(ret), K(partition_), K(prepare_log_id_), K(trans_id_), + K(prev_trans_arr_), K(follow_trans_arr_)); + } + } else { + // do nothing + } + + return ret; +} + +int PartTransTask::handle_elr_follow_trans_(PartTransDispatcher &part_trans_dispatcher) +{ + int ret = OB_SUCCESS; + const int64_t follow_trans_arr_cnt = follow_trans_arr_.count(); + + for (int64_t idx = 0; OB_SUCC(ret) && idx < follow_trans_arr_cnt; ++idx) { + const ObTransID &follow_trans_id = follow_trans_arr_.at(idx); + PartTransTask *follow_task = NULL; + PartTransID follow_part_trans_id(follow_trans_id, partition_); + + if (OB_FAIL(part_trans_dispatcher.get_task(follow_part_trans_id, follow_task))) { + if (OB_ENTRY_NOT_EXIST == ret) { + // Finding that a successor transaction does not exist means that the status of the successor transaction must have been determined, no special treatment is needed here + ret = OB_SUCCESS; + } else { + LOG_ERROR("part_trans_dispatcher get_task fail", KR(ret), K(follow_part_trans_id), KPC(follow_task)); + } + } else if (OB_ISNULL(follow_task)) { + LOG_ERROR("follow_task is NULL", K(follow_part_trans_id), K(follow_task)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(follow_task->handle_elt_trans_callback(trans_id_, part_trans_dispatcher))) { + LOG_ERROR("handle_elt_trans_callback_ fail", KR(ret), K(trans_id_)); + } else { + // succ + } + } // for + + // The list of successor transactions needs to be cleared after processing + // Three scenarios determine the commit status of a transaction and trigger the processing of the list of successor transactions + // 1. commit() + // 2. commit_elr_trans_() + // 3. update_trans_committed_status + // Single-unit single-partition/single-unit multi-partition early unlock scenarios are currently supported, so 2 and 3 may be called repeatedly, as the task will not be issued until after the logs have been parsed + if (OB_SUCC(ret)) { + follow_trans_arr_.reset(); + } + + return ret; +} + +int PartTransTask::handle_elt_trans_callback(const transaction::ObTransID &trans_id, + PartTransDispatcher &part_trans_dispatcher) +{ + int ret = OB_SUCCESS; + const int64_t prev_trans_arr_cnt = prev_trans_arr_.count(); + int64_t prev_trans_commit_cnt = 0; + bool is_prev_trans_exist = false; + + for (int64_t idx = 0; OB_SUCC(ret) && idx < prev_trans_arr_cnt; ++idx) { + ObElrTransInfo &elr_trans_info = prev_trans_arr_.at(idx); + const ObTransID &prev_trans_id = elr_trans_info.get_trans_id(); + const int trans_res = elr_trans_info.get_result(); + + // Find the corresponding transaction in the precursor array + if (trans_id == prev_trans_id) { + if (OB_UNLIKELY(! ObTransResultState::is_unknown(trans_res))) { + LOG_ERROR("elr_trans_info result is not unknown", K(trans_res), K(elr_trans_info), + K(prev_trans_arr_)); + ret = OB_STATE_NOT_MATCH; + } else { + // Change the status to commit and count the number of committed transactions + is_prev_trans_exist = true; + elr_trans_info.set_result(ObTransResultState::COMMIT); + ++prev_trans_commit_cnt; + } + } else if (ObTransResultState::is_commit(trans_res)) { + ++prev_trans_commit_cnt; + } else { + // do nothing + } + } // for + + if (OB_SUCC(ret)) { + if (OB_UNLIKELY(! is_prev_trans_exist)) { + LOG_ERROR("unfound prev trans is unexcepted", K(partition_), K(trans_id), K(prev_trans_arr_)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(commit_elr_trans_(prev_trans_arr_cnt, prev_trans_commit_cnt, "follow_trans_callback", part_trans_dispatcher))) { + LOG_ERROR("commit_elr_trans_ fail", KR(ret), K(partition_), K(prepare_log_id_), K(trans_id_), + K(prev_trans_arr_cnt), K(prev_trans_commit_cnt)); + } else { + // succ + } + } + + return ret; +} + +int PartTransTask::register_elt_follow_trans(const transaction::ObTransID &follow_trans_id) +{ + int ret = OB_SUCCESS; + + if (OB_FAIL(follow_trans_arr_.push_back(follow_trans_id))) { + LOG_ERROR("follow_trans_arr_ push_back fail", KR(ret), K(partition_), K(trans_id_), K(follow_trans_id)); + } + + return ret; +} + +int PartTransTask::update_trans_committed_status(const int64_t checkpoint, + PartTransDispatcher &part_trans_dispatcher) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(OB_INVALID_VERSION == checkpoint)) { + LOG_ERROR("checkpoint is invalid", K(partition_), K(checkpoint)); + ret = OB_INVALID_ARGUMENT; + } else { + // Single machine multipartition transactions, global_trans_version to determine if they are ready for distribution + if (ATOMIC_LOAD(&is_trans_ready_to_commit_)) { + bool bool_ret = (global_trans_version_ <= checkpoint); + + // Atoms set is_trans_committed to true if it can be committed and sent, ensuring that partitioned tasks can be committed and validated + if (bool_ret) { + ATOMIC_STORE(&is_trans_committed_, true); + + // Transaction commit status. Handling subsequent transactions in early unlock scenarios + if (OB_FAIL(handle_elr_follow_trans_(part_trans_dispatcher))) { + LOG_ERROR("handle_elr_follow_trans_ fail", KR(ret), K(partition_), K(prepare_log_id_), K(trans_id_), + K(prev_trans_arr_), K(follow_trans_arr_)); + } + } + } + } + + return ret; +} + +int PartTransTask::try_to_set_data_ready_status() +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! is_dml_trans())) { + LOG_ERROR("Not a dml trans is unexcepted", KPC(this)); + ret = OB_STATE_NOT_MATCH; + } else if (ATOMIC_LOAD(&is_data_ready_)) { + // do nothing + } else if (is_contain_empty_redo_log()) { + set_data_ready(); + } else { + // Ensure the correctness of concurrent processing of Formatter/Storager + // and PartTransDispatcher-try_to_set_data_ready_status + ObByteLockGuard guard(data_ready_lock_); + const int64_t total_node_num = sorted_redo_list_.get_node_number(); + const int64_t cur_ready_node_num = ATOMIC_LOAD(&dml_ready_redo_node_num_); + const bool is_part_trans_served = is_served(); + bool is_data_ready = false; + + if (OB_UNLIKELY(! is_part_trans_served)) { + LOG_ERROR("part trans unserved is unexcepted", K(is_part_trans_served), KPC(this)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(check_dml_redo_node_ready_and_handle_(total_node_num, cur_ready_node_num, is_data_ready))) { + LOG_ERROR("check_dml_redo_node_ready_and_handle_ fail", KR(ret), K(total_node_num), K(cur_ready_node_num), + K(is_data_ready), KPC(this)); + } else {} + } + + return ret; +} + +int PartTransTask::handle_log_entry_task_callback(const ObLogEntryTask::CallBackModule cb_module, + ObLogEntryTask &log_entry_task, + bool &is_unserved_part_trans_task_can_be_recycled) +{ + int ret = OB_SUCCESS; + int64_t valid_row_num = 0; + + if (OB_UNLIKELY(is_ddl_part())) { + LOG_ERROR("Not a dml part is unexcepted", KPC(this)); + ret = OB_STATE_NOT_MATCH; + } else { + ObByteLockGuard guard(data_ready_lock_); + + const int64_t total_node_num = sorted_redo_list_.get_node_number(); + const int64_t cur_ready_node_num = ATOMIC_AAF(&dml_ready_redo_node_num_, 1); + const bool is_part_trans_served = is_served(); + + if (cur_ready_node_num > total_node_num) { + LOG_ERROR("cur_ready_node_num is greater than sorted_redo_list_ total_node_num", + K(cur_ready_node_num), K(total_node_num), K(log_entry_task), KPC(this)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(log_entry_task.get_valid_row_num(valid_row_num))) { + LOG_ERROR("log_entry_task get_valid_row_num fail", KR(ret), K(valid_row_num)); + } else if (is_part_trans_served) { + if (! is_trans_committed()) { + // do nothing + } else if (cur_ready_node_num < total_node_num) { + // do nothing + } else { + bool is_data_ready = false; + + if (OB_FAIL(check_dml_redo_node_ready_and_handle_(total_node_num, cur_ready_node_num, is_data_ready))) { + LOG_ERROR("check_dml_redo_node_ready_and_handle_ fail", KR(ret), K(total_node_num), K(cur_ready_node_num), + K(is_data_ready), KPC(this)); + } + } + } else { + if (OB_FAIL(handle_unserved_part_trans_(is_unserved_part_trans_task_can_be_recycled))) { + LOG_ERROR("handle_unserved_part_trans_ fail", KR(ret), K(is_unserved_part_trans_task_can_be_recycled), KPC(this)); + } + } + } + + if (OB_SUCC(ret)) { + bool need_revert_log_entry_task = false; + if (ObLogEntryTask::is_dml_parser(cb_module) || ObLogEntryTask::is_storager(cb_module)) { + need_revert_log_entry_task = true; + } else if (ObLogEntryTask::is_formatter(cb_module)) { + if (0 == valid_row_num) { + need_revert_log_entry_task = true; + } + } else {} + + if (need_revert_log_entry_task) { + IObLogResourceCollector *resource_collector = TCTX.resource_collector_; + + if (OB_ISNULL(resource_collector)) { + LOG_ERROR("resource_collector is NULL"); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(resource_collector->revert_log_entry_task(&log_entry_task))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("revert_log_entry_task fail", KR(ret), K(log_entry_task)); + } + } else {} + } + } + + return ret; +} + +int PartTransTask::check_dml_redo_node_ready_and_handle_(const int64_t total_node_num, + const int64_t cur_ready_node_num, + bool &is_data_ready) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(is_ddl_part())) { + LOG_ERROR("Not a dml part is unexcepted", KPC(this)); + ret = OB_STATE_NOT_MATCH; + } else if (ATOMIC_LOAD(&is_data_ready_)) { + // Double check, do nothing when data is ready + is_data_ready = is_data_ready_; + } else { + is_data_ready = (cur_ready_node_num == total_node_num); + + if (is_data_ready) { + if (OB_FAIL(handle_when_all_dml_redo_node_ready_())) { + LOG_ERROR("handle_when_all_dml_redo_node_ready_ fail", KR(ret)); + } else { + // Only when processing is complete can the status be set to avoid concurrent processing by Sequencer + set_data_ready(); + } + } + } + + return ret; +} + +int PartTransTask::handle_when_all_dml_redo_node_ready_() +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(is_ddl_part())) { + LOG_ERROR("Not a dml part is unexcepted", KPC(this)); + ret = OB_ERR_UNEXPECTED; + } else if (sorted_redo_list_.log_num_ > 0 && OB_UNLIKELY(! sorted_redo_list_.is_valid())) { + LOG_ERROR("redo log list is invalid", K(sorted_redo_list_), KPC(this)); + ret = OB_ERR_UNEXPECTED; + } else { + DmlRedoLogMetaNode *dml_redo_meta = static_cast(sorted_redo_list_.head_); + + while (OB_SUCC(ret) && NULL != dml_redo_meta) { + if (! dml_redo_meta->is_contain_valid_row()) { + // do nothing + } else if (OB_FAIL(sorted_dml_row_list_.push(dml_redo_meta->get_row_head(), dml_redo_meta->get_row_tail(), + dml_redo_meta->get_valid_row_num(), dml_redo_meta->is_contain_rollback_row()))) { + LOG_ERROR("sorted_dml_row_list_ push fail", KR(ret), KPC(dml_redo_meta), K(sorted_redo_list_)); + } else { + // succ + } + + LOG_DEBUG("[DML_REDO_META]", KR(ret), K_(partition), K_(trans_id), KPC(dml_redo_meta), K(sorted_dml_row_list_)); + + if (OB_SUCC(ret)) { + dml_redo_meta = static_cast(dml_redo_meta->next_); + } + } // while + } + + return ret; +} + +int PartTransTask::handle_unserved_part_trans(bool &is_unserved_part_trans_task_can_be_recycled) +{ + int ret = OB_SUCCESS; + // Ensure the correctness of concurrent processing of Formatter, Storager and PartTransDispatcher + ObByteLockGuard guard(data_ready_lock_); + + // set unserved statue + set_unserved_(); + + if (OB_FAIL(handle_unserved_part_trans_(is_unserved_part_trans_task_can_be_recycled))) { + LOG_ERROR("handle_unserved_part_trans_ fail", KR(ret), K(is_unserved_part_trans_task_can_be_recycled), KPC(this)); + } + + return ret; +} + +int PartTransTask::handle_unserved_part_trans_(bool &is_unserved_part_trans_task_can_be_recycled) +{ + int ret = OB_SUCCESS; + IObLogResourceCollector *resource_collector = TCTX.resource_collector_; + + if (OB_UNLIKELY(is_ddl_part())) { + LOG_ERROR("Not a dml part is unexcepted", KPC(this)); + ret = OB_STATE_NOT_MATCH; + } else if (OB_ISNULL(resource_collector)) { + LOG_ERROR("resource_collector is NULL"); + ret = OB_ERR_UNEXPECTED; + } else if (ATOMIC_LOAD(&is_data_ready_)) { + LOG_ERROR("data is already ready, not expected", KPC(this)); + ret = OB_ERR_UNEXPECTED; + } else { + const int64_t total_node_num = sorted_redo_list_.get_node_number(); + const int64_t cur_ready_node_num = ATOMIC_LOAD(&dml_ready_redo_node_num_); + bool is_data_ready = false; + + if (OB_FAIL(check_dml_redo_node_ready_and_handle_(total_node_num, cur_ready_node_num, is_data_ready))) { + LOG_ERROR("check_dml_redo_node_ready_and_handle_ fail", KR(ret), K(total_node_num), K(cur_ready_node_num), + K(is_data_ready), KPC(this)); + } else if (is_data_ready) { + // Get PartTransTask status only when all dml redo node ready + is_unserved_part_trans_task_can_be_recycled = true; + + // set unserved Part Transaction ref cnt + set_ref_cnt(get_br_num() + 1); + ObLogRowDataIndex *row_data_index = get_sorted_dml_row_list().get_head(); + + while (OB_SUCC(ret) && NULL != row_data_index) { + if (OB_FAIL(resource_collector->revert_unserved_task(false/*is_rollback_row*/, *row_data_index))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("revert_unserved_task fail", KR(ret), KPC(row_data_index)); + } + } + + if (OB_SUCC(ret)) { + row_data_index = row_data_index->get_next(); + } + } // while + } else {} + } + + return ret; +} + +int PartTransTask::init_part_heartbeat_info(const common::ObPartitionKey &pkey, const int64_t timestamp) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(TASK_TYPE_UNKNOWN != type_)) { + LOG_ERROR("task has been initialized", K(type_)); + ret = OB_INIT_TWICE; + } else if (OB_UNLIKELY(timestamp <= 0)) { + LOG_ERROR("invalid argument", K(timestamp)); + ret = OB_INVALID_ARGUMENT; + } else { + type_ = TASK_TYPE_PART_HEARTBEAT; + partition_ = pkey; + timestamp_ = timestamp; + } + + return ret; +} + +int PartTransTask::init_global_heartbeat_info(const int64_t timestamp) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(TASK_TYPE_UNKNOWN != type_)) { + LOG_ERROR("task has been initialized", K(type_)); + ret = OB_INIT_TWICE; + } else if (OB_UNLIKELY(timestamp <= 0)) { + LOG_ERROR("invalid argument", K(timestamp)); + ret = OB_INVALID_ARGUMENT; + } else { + type_ = TASK_TYPE_GLOBAL_HEARTBEAT; + timestamp_ = timestamp; + } + + return ret; +} + +int PartTransTask::init_offline_partition_task(const common::ObPartitionKey &partition) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(TASK_TYPE_UNKNOWN != type_)) { + LOG_ERROR("task has been initialized", K(type_)); + ret = OB_INIT_TWICE; + } else if (OB_UNLIKELY(! partition.is_valid())) { + LOG_ERROR("invalid argument", K(partition)); + ret = OB_INVALID_ARGUMENT; + } else { + type_ = TASK_TYPE_OFFLINE_PARTITION; + partition_ = partition; + } + + return ret; +} + +// Only conversions from DML/DDL type transactions are supported, direct initialisation is not supported +int PartTransTask::convert_to_not_served_trans() +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(TASK_TYPE_DML_TRANS != type_) + && OB_UNLIKELY(TASK_TYPE_DDL_TRANS != type_)) { + LOG_ERROR("task is not DML or DDL trans, not support", K(type_)); + ret = OB_NOT_SUPPORTED; + } else if (OB_UNLIKELY(! is_task_info_valid())) { + LOG_ERROR("tqsk info is not valid", K(*this)); + ret = OB_INVALID_DATA; + } else { + type_ = TASK_TYPE_NOT_SERVED_TRANS; + } + + return ret; +} + +bool PartTransTask::is_base_trans_info_valid_() const +{ + return (is_trans_committed_ + && partition_.is_valid() + && timestamp_ > 0 + && trans_id_.is_valid() + && OB_INVALID_ID != prepare_log_id_ + && global_trans_version_ >= 0); +} + +bool PartTransTask::is_task_info_valid() const +{ + bool bool_ret = false; + + // All types of transactions require a valid checkpoint seq + if (checkpoint_seq_ < 0) { + bool_ret = false; + } else { + switch (type_) { + case TASK_TYPE_PART_HEARTBEAT: + // Partitioned heartbeats task require both timestamps and partitions to be valid + bool_ret = (timestamp_ > 0) && (partition_.is_valid()); + break; + + case TASK_TYPE_GLOBAL_HEARTBEAT: + // Global heartbeat only requires a valid timestamp + bool_ret = (timestamp_ > 0); + break; + + case TASK_TYPE_NOT_SERVED_TRANS: + case TASK_TYPE_DML_TRANS: + // The basic transaction information is valid and the DML parsing sequence number is valid + bool_ret = (is_base_trans_info_valid_()); + break; + + case TASK_TYPE_DDL_TRANS: + // Basic transaction information is valid + bool_ret = (is_base_trans_info_valid_()); + break; + + case TASK_TYPE_OFFLINE_PARTITION: + // Offline task only require valid partition + bool_ret = (partition_.is_valid()); + break; + + default: + bool_ret = false; + break; + } + } + + if (!bool_ret) { + LOG_INFO("is_task_info_valid", K(bool_ret), K(*this)); + } + return bool_ret; +} + +void *PartTransTask::alloc(const int64_t size) +{ + void *alloc_ret = NULL; + if (size > 0) { + alloc_ret = allocator_.alloc(size); + } + return alloc_ret; +} + +void PartTransTask::free(void *ptr) +{ + allocator_.free(ptr); + ptr = NULL; +} + +int PartTransTask::add_stmt(const uint64_t row_index, IStmtTask *stmt_task) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(OB_INVALID_ID == row_index) + || OB_ISNULL(stmt_task)) { + LOG_ERROR("invalid argument", K(row_index), KPC(stmt_task)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(stmt_list_.add(stmt_task))) { + LOG_ERROR("add stmt task into stmt_list fail", KR(ret), K(stmt_list_), KP(stmt_task)); + } else { + stmt_task->set_row_index(row_index); + } + + return ret; +} + +int PartTransTask::add_ddl_stmt(const uint64_t row_index, DdlStmtTask *ddl_stmt) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(OB_INVALID_ID == row_index) + || OB_ISNULL(ddl_stmt) + || OB_UNLIKELY(ddl_stmt->get_op_schema_version() <= 0) + || OB_ISNULL(ddl_stmt->get_binlog_record())) { + LOG_ERROR("invalid argument", KPC(ddl_stmt)); + ret = OB_INVALID_ARGUMENT; + } else { + ddl_stmt->set_row_index(row_index); + ddl_stmt->set_next(NULL); + + if (NULL == stmt_list_.head_) { + stmt_list_.head_ = ddl_stmt; + stmt_list_.tail_ = ddl_stmt; + } else { + IStmtTask *stmt_task = stmt_list_.head_; + IStmtTask *before = NULL; + bool located = false; + + // Sort DDL by Schema version incrementally + while (OB_SUCCESS == ret && ! located && NULL != stmt_task) { + IStmtTask *next = stmt_task->get_next(); + DdlStmtTask *dt = dynamic_cast(stmt_task); + + if (OB_ISNULL(dt)) { + LOG_ERROR("dynamic cast to DdlStmtTask fail", K(stmt_task), K(*stmt_task)); + ret = OB_ERR_UNEXPECTED; + break; + } + + if (dt->get_op_schema_version() > ddl_stmt->get_op_schema_version()) { + ddl_stmt->set_next(dt); + + if (NULL == before) { + stmt_list_.head_ = ddl_stmt; + } else { + before->set_next(ddl_stmt); + } + + located = true; + break; + } else { + before = stmt_task; + stmt_task = next; + } + } + + if (OB_SUCCESS == ret && ! located) { + stmt_list_.tail_->set_next(ddl_stmt); + stmt_list_.tail_ = ddl_stmt; + ddl_stmt->set_next(NULL); + } + } + + if (OB_SUCC(ret)) { + stmt_list_.num_++; + } + } + + return ret; +} + +void PartTransTask::set_formatted() +{ + LOG_DEBUG("[STAT] [TRANS_TASK] SET_FORMATTED", K_(is_data_ready), "task", *this); + + (void)ATOMIC_SET(&is_data_ready_, true); + + // Atomic loading cond variables + ObCond *cond = ATOMIC_LOAD(&wait_formatted_cond_); + + if (NULL != cond) { + cond->signal(); + } +} + +// Note: requires that the cond variable provided in multiple consecutive calls is the same variable +int PartTransTask::wait_formatted(const int64_t timeout, ObCond &cond) +{ + int ret = OB_SUCCESS; + int64_t end_time = ::oceanbase::common::ObTimeUtility::current_time() + timeout; + + if (ATOMIC_LOAD(&is_data_ready_)) { + // The format is already done, nothing needs to be done + } else { + // First set the condition variable + (void)ATOMIC_SET(&wait_formatted_cond_, &cond); + + // Re-check the variable values + while (OB_SUCCESS == ret && ! ATOMIC_LOAD(&is_data_ready_)) { + int64_t left_time = end_time - ::oceanbase::common::ObTimeUtility::current_time(); + + if (left_time <= 0) { + ret = OB_TIMEOUT; + break; + } else { + cond.timedwait(left_time); + } + } + } + + if (OB_SUCC(ret)) { + LOG_DEBUG("[STAT] [TRANS_TASK] WAIT_FORMATTED", K_(is_data_ready), "task", *this); + } + + return ret; +} + +void PartTransTask::set_data_ready() +{ + LOG_DEBUG("[STAT] [TRANS_TASK] SET_DATA_READY", K_(is_data_ready), "task", *this); + (void)ATOMIC_SET(&is_data_ready_, true); + wait_data_ready_cond_.signal(); +} + +int PartTransTask::wait_data_ready(const int64_t timeout) +{ + int ret = OB_SUCCESS; + int64_t end_time = ::oceanbase::common::ObTimeUtility::current_time() + timeout; + + // Re-check the variable values + while (OB_SUCCESS == ret && ! ATOMIC_LOAD(&is_data_ready_)) { + int64_t left_time = end_time - ::oceanbase::common::ObTimeUtility::current_time(); + + if (left_time <= 0) { + ret = OB_TIMEOUT; + break; + } else { + wait_data_ready_cond_.timedwait(left_time); + } + } + + if (OB_SUCC(ret)) { + LOG_DEBUG("[STAT] [TRANS_TASK] WAIT_DATA_READY", K_(is_data_ready), "task", *this); + } + + return ret; +} + +int64_t PartTransTask::dec_ref_cnt() +{ + return ATOMIC_AAF(&ref_cnt_, -1); +} + +void PartTransTask::set_ref_cnt(const int64_t value) +{ + (void)ATOMIC_SET(&ref_cnt_, value); +} + +int PartTransTask::init_trace_id_(const ObString &trace_id) +{ + int ret = OB_SUCCESS; + trace_id_.reset(); + + if (trace_id.length() > 0) { + if (OB_ISNULL(trace_id.ptr())) { + LOG_ERROR("invalid trace id", K(trace_id)); + ret = OB_INVALID_ARGUMENT; + } else { + int64_t trace_id_len = trace_id.length(); + + // Allocate trace_id memory + char *buf = static_cast(allocator_.alloc(trace_id_len)); + if (OB_ISNULL(buf)) { + LOG_ERROR("allocate memory for trace id buffer fail", K(buf), K(trace_id_len)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + trace_id_.assign_buffer(buf, static_cast(trace_id_len)); + int64_t write_len = trace_id_.write(trace_id.ptr(), trace_id.length()); + + if (write_len != trace_id_len) { + LOG_ERROR("write trace id fail", K(write_len), K(trace_id_len), K(trace_id), K(trace_id_)); + ret = OB_ERR_UNEXPECTED; + } + } + } + } + + return ret; +} + +int PartTransTask::init_trace_info_(const ObString &trace_info) +{ + int ret = OB_SUCCESS; + trace_info_.reset(); + + if (trace_info.length() > 0) { + if (OB_ISNULL(trace_info.ptr())) { + LOG_ERROR("invalid trace info", K(trace_info)); + ret = OB_INVALID_ARGUMENT; + } else { + const int64_t trace_info_len = trace_info.length(); + char *buf = static_cast(allocator_.alloc(trace_info_len)); + + if (OB_ISNULL(buf)) { + LOG_ERROR("allocate memory for trace id buffer fail", K(buf), K(trace_info_len)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + trace_info_.assign_buffer(buf, static_cast(trace_info_len)); + int64_t write_len = trace_info_.write(trace_info.ptr(), trace_info.length()); + + if (write_len != trace_info_len) { + LOG_ERROR("write trace id fail", K(write_len), K(trace_info_len), K(trace_info), K(trace_info_)); + ret = OB_ERR_UNEXPECTED; + } + } + } + } + + return ret; +} + +int PartTransTask::init_participant_array_(const PartitionLogInfoArray &participants) +{ + int ret = OB_SUCCESS; + int64_t part_count = participants.count(); + ObPartitionLogInfo *part_array = NULL; + + if (OB_UNLIKELY(NULL != participants_) || OB_UNLIKELY(participant_count_ > 0)) { + LOG_ERROR("participant has been initialized", K(participants_), K(participant_count_)); + ret = OB_INIT_TWICE; + } else if (part_count <= 0) { + // no valid participants + part_count = 0; + part_array = NULL; + } else { + int64_t alloc_size = part_count * sizeof(ObPartitionLogInfo); + part_array = static_cast(allocator_.alloc(alloc_size)); + + if (OB_ISNULL(part_array)) { + LOG_ERROR("allocate memory for participant array fail", K(part_count), K(alloc_size), + K(participants)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + for (int64_t index = 0; OB_SUCC(ret) && index < participants.count(); index++) { + new(part_array + index) ObPartitionLogInfo(participants.at(index)); + } + } + } + + if (OB_SUCC(ret)) { + participants_ = part_array; + participant_count_ = part_count; + } else { + if (NULL != part_array) { + allocator_.free(part_array); + part_array = NULL; + } + } + + return ret; +} + +void PartTransTask::destroy_participant_array_() +{ + if (NULL != participants_ && participant_count_ > 0) { + for (int64_t index = 0; index < participant_count_; index++) { + participants_[index].~ObPartitionLogInfo(); + } + + allocator_.free(participants_); + participants_ = NULL; + participant_count_ = 0; + } +} + +int PartTransTask::set_participants(const PartitionLogInfoArray &participants) +{ + return init_participant_array_(participants); +} + +int PartTransTask::to_string_pkey_and_log_id_(const uint64_t prepare_log_id) +{ + int ret = OB_SUCCESS; + static const int64_t local_buf_size = + PrintableSizeGetter::value; + char pkey_local_buf[local_buf_size]; + int64_t pkey_local_buf_pos = 0; + + // Print with a local buffer to avoid memory reuse, so don't call to_cstring(pkey) directly + pkey_local_buf_pos = partition_.to_string(pkey_local_buf, local_buf_size); + + if (OB_UNLIKELY(pkey_local_buf_pos <= 0 || pkey_local_buf_pos >= local_buf_size)) { + LOG_ERROR("pkey local buf pos is not valid", K(pkey_local_buf_pos), K(local_buf_size), K(partition_)); + ret = OB_ERR_UNEXPECTED; + } else { + int64_t buf_len = pkey_local_buf_pos + + sizeof(DELIMITER_STR) + + DmlStmtUniqueID::compute_str_length_base_num(prepare_log_id) + + 1; + char *buf = static_cast(allocator_.alloc(buf_len)); + + if (OB_ISNULL(buf)) { + LOG_ERROR("allocate memory for trans id buffer fail", K(buf), K(buf_len)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + MEMCPY(buf, pkey_local_buf, pkey_local_buf_pos); + int64_t pos = pkey_local_buf_pos; + + if (OB_FAIL(common::databuff_printf(buf, buf_len, pos, DELIMITER_STR"%lu", prepare_log_id))) { + LOG_ERROR("databuff_printf fail", KR(ret), K(buf), K(buf_len), K(pos), K(prepare_log_id)); + } else { + pkey_and_log_id_str_.assign_ptr(buf, static_cast(pos)); + } + } + } + + return ret; +} + +int PartTransTask::to_string_trans_id() +{ + int ret = OB_SUCCESS; + // ObTransID all use the maximum value, 1024 is large enough + static const int64_t TRANS_ID_BUF_LENGTH = 1024; + char trans_id_buf[TRANS_ID_BUF_LENGTH]; + int64_t pos = 0; + + if (OB_FAIL(common::databuff_printf(trans_id_buf, TRANS_ID_BUF_LENGTH, pos, "%s", to_cstring(trans_id_)))) { + LOG_ERROR("databuff_printf fail", K(ret), K(trans_id_), K(trans_id_buf), K(TRANS_ID_BUF_LENGTH), K(pos)); + } else if (OB_UNLIKELY(pos <= 0 || pos >= TRANS_ID_BUF_LENGTH)) { + LOG_ERROR("local buf pos is not valid", K(pos), K(TRANS_ID_BUF_LENGTH), K(partition_)); + ret = OB_ERR_UNEXPECTED; + } else { + int64_t buf_len = pos + 1; + char *buf = static_cast(allocator_.alloc(buf_len)); + + if (OB_ISNULL(buf)) { + LOG_ERROR("allocator_ alloc for trans id str fail", K(buf), K(buf_len)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + MEMCPY(buf, trans_id_buf, pos); + buf[pos] = '\0'; + + trans_id_str_.assign(buf, static_cast(buf_len)); + } + } + + return ret; +} + +// 1. roll back the local schema version of the task +// 1) dml transactions with statements using the same schema version can be left alone; however, rollback is still done for robustness reasons +// 2) ddl transaction rollback by statement +// +// 2. rollback stmt_list task row.sql_no for stmt greater than the rollback condition +// +int PartTransTask::revert_by_rollback_savepoint(const uint64_t row_index, const int32_t sql_no) +{ + int ret = OB_SUCCESS; + const bool is_ddl = is_ddl_trans(); + + if (OB_UNLIKELY(OB_INVALID_ID == row_index) || OB_UNLIKELY(sql_no < 0)) { + LOG_ERROR("invalid argument", K(row_index), K(sql_no)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_UNLIKELY(! is_ddl)) { + LOG_ERROR("is not ddl, unexcepted", K(*this)); + ret = OB_ERR_UNEXPECTED; + // rollabck ddl trans + } else if (OB_FAIL(revert_ddl_stmt_(sql_no))) { + LOG_ERROR("revert ddl stmt by rollback savepoint failed", KR(ret), K(is_ddl), K(sql_no)); + } else { + // succ + } + + return ret; +} + +// DDL transactions stmt order is dependent on schema version and cannot be guaranteed to be strictly incremented by sql no +// need to traverse stmt list +int PartTransTask::revert_ddl_stmt_(const int32_t rollback_sql_no) +{ + int ret = OB_SUCCESS; + const int64_t total_stmt_num = stmt_list_.num_; + int64_t stmt_num = 0; + IStmtTask *stmt_task = stmt_list_.head_; + IStmtTask *dummy_task = NULL; + IStmtTask *pre_task = NULL; + + + while (OB_SUCC(ret) && NULL != stmt_task) { + DdlStmtTask *ddl_task = dynamic_cast(stmt_task); + if (OB_ISNULL(ddl_task)) { + LOG_ERROR("dynamic cast to DdlStmtTask fail", K(stmt_task), K(ddl_task)); + ret = OB_ERR_UNEXPECTED; + } else { + const int32_t stmt_sql_no = ddl_task->get_row_sql_no(); + IStmtTask *next = stmt_task->get_next(); + // stmt less than or equal to the sql_no specified by the rollback savepoint is not processed + if (stmt_sql_no <= rollback_sql_no) { + // dummy points to the first matching stmt + if (NULL == dummy_task) { + dummy_task = stmt_task; + } + pre_task = stmt_task; + stmt_num++; + _LOG_DEBUG("[SAVEPOINT][DDL] ddl_stmt need not revert, schema_version=%ld," + "rollback_sql_no=%d, stmt_sql_no=%d", ddl_task->get_op_schema_version(), + rollback_sql_no, stmt_sql_no); + } else { + if (NULL != pre_task) { + pre_task->set_next(stmt_task->get_next()); + } + ddl_task->~DdlStmtTask(); + free(ddl_task); + ddl_task = NULL; + } + // In all cases, continue traversing the next + stmt_task = next; + } + } + + if (OB_SUCC(ret)) { + stmt_list_.num_ = stmt_num; + stmt_list_.head_ = dummy_task; + stmt_list_.tail_ = pre_task; + } + + if (OB_SUCC(ret)) { + _LOG_INFO("[SAVEPOINT][DDL] ROLLBACK_SQL_NO=%d STMT_CNT=%ld/%ld", + rollback_sql_no, total_stmt_num, stmt_num); + } + + return ret; +} + +} // namespace liboblog +} // namespace oceanbase diff --git a/src/liboblog/src/ob_log_part_trans_task.h b/src/liboblog/src/ob_log_part_trans_task.h new file mode 100644 index 0000000000000000000000000000000000000000..8e560c41fb95b80fcc4d2f626c4815aed422d58c --- /dev/null +++ b/src/liboblog/src/ob_log_part_trans_task.h @@ -0,0 +1,1138 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_TRANS_TASK_H__ +#define OCEANBASE_LIBOBLOG_TRANS_TASK_H__ + +#include "lib/queue/ob_link.h" // ObLink +#include "lib/atomic/ob_atomic.h" // ATOMIC_LOAD +#include "lib/lock/ob_small_spin_lock.h" // ObByteLock +#include "common/object/ob_object.h" // ObObj +#include "common/ob_partition_key.h" // ObPartitionKey +#include "common/ob_queue_thread.h" // ObCond +#include "storage/transaction/ob_trans_define.h" // ObTransID, PartitionLogInfoArray +#include "storage/memtable/ob_memtable_mutator.h" // ObMemtableMutatorRow, ObMemtableMutatorMeta +#include "storage/ob_i_store.h" // ObRowDml + +#include "ob_log_trans_log.h" // SortedRedoLogList +#include "ob_log_row_list.h" // SortedDmlRowList +#include "ob_log_lighty_list.h" // LightyList +#include "ob_small_arena.h" // ObSmallArena +#include "ob_log_task_pool.h" // TransTaskBase +#include "ob_log_utils.h" // is_ddl_table +#include "ob_log_resource_recycle_task.h" // ObLogResourceRecycleTask + +namespace oceanbase +{ +namespace share +{ +namespace schema +{ +class ObTableSchema; +class ObColumnSchemaV2; +} +} + +namespace liboblog +{ +class PartTransTask; +class ObLogBR; +class ObLogEntryTask; +class ObLogRowDataIndex; + +class IStmtTask : public ObLink // Inheritance of ObLink is only used for Sequencer +{ +public: + enum StmtType + { + STMT_TYPE_UNKNOWN = 0, + STMT_TYPE_DML = 1, // DML statement + STMT_TYPE_DDL = 2, // DDL statement + }; + + IStmtTask(const StmtType type, PartTransTask &host) : + type_(type), + host_(host), + hash_value_(common::OB_INVALID_ID), + row_index_(OB_INVALID_ID), + next_(NULL) + {} + + virtual ~IStmtTask() { reset(); } + + bool is_unknown_stmt() const { return STMT_TYPE_UNKNOWN == type_; } + bool is_dml_stmt() const { return STMT_TYPE_DML == type_; } + bool is_ddl_stmt() const { return STMT_TYPE_DDL == type_; } + + PartTransTask &get_host() { return host_; } + const PartTransTask &get_host() const { return host_; } + uint64_t get_tenant_id() const; + + StmtType get_type() const { return type_; } + void set_type(const int type) { type_ = static_cast(type); } + + void reset(); + + IStmtTask *get_next() { return next_; } + void set_next(IStmtTask *next) { next_ = next; } + + uint64_t hash() const { return hash_value_; } + void set_hash_value(const int64_t hash_value) { hash_value_ = hash_value; } + + uint64_t get_row_index() const { return row_index_; } + void set_row_index(const uint64_t row_index) { row_index_ = row_index; } + + static const char* print_stmt_type(const int type) + { + const char *type_str = "UNKNOWN"; + switch (type) { + case STMT_TYPE_UNKNOWN: + type_str = "UNKNOWN"; + break; + case STMT_TYPE_DML: + type_str = "DML"; + break; + case STMT_TYPE_DDL: + type_str = "DDL"; + break; + default: + type_str = "INVALID"; + break; + } + return type_str; + } + + TO_STRING_KV(K_(type), + "type_str", print_stmt_type(type_), + K_(hash_value), + K_(row_index), + K_(host), + KP_(next)); + +protected: + StmtType type_; + PartTransTask &host_; // part trans task the stmt belongs to + uint64_t hash_value_; // HASH value + uint64_t row_index_; // row index for the stmt in the trans it belongs + IStmtTask *next_; + +private: + DISALLOW_COPY_AND_ASSIGN(IStmtTask); +}; + +//////////////////////////////////////////////////////////////////////////////////// + +// node of column value +struct ColValue +{ + common::ObObj value_; + uint64_t column_id_; + ObString string_value_; // The value after converting Obj to a string + ColValue *next_; + + void reset() + { + value_.reset(); + column_id_ = common::OB_INVALID_ID; + string_value_.reset(); + next_ = NULL; + } + + bool is_valid() + { + return value_.is_valid_type() && common::OB_INVALID_ID != column_id_; + } + + ColValue *get_next() { return next_; } + void set_next(ColValue *next) { next_ = next; } + + TO_STRING_KV(K_(value), K_(column_id), K_(string_value)); +}; + +/////////////////////////////////////////////////////////////////////////////////// + +typedef LightyList ColValueList; +class ObObj2strHelper; + +// row value +class MutatorRow : public memtable::ObMemtableMutatorRow +{ +public: + explicit MutatorRow(common::ObIAllocator &allocator); + virtual ~MutatorRow(); + +public: + // Deserialize a row + virtual int deserialize(const char* buf, const int64_t data_len, int64_t& pos); + + // Support for filtering table data within PG + // Deserialize some fields: first step to get row_size, table_id + int deserialize_first(const char* buf, + const int64_t data_len, + int64_t &pos, + int32_t &row_size, + uint64_t &table_id); + + // Deserialize some fields: Step 2 continues the parsing to get the table_version + int deserialize_second(const char* buf, + const int64_t data_len, + int64_t &pos, + int64_t &table_version); + + void reset(); + + // Parse the column data + // If obj2str_helper is empty, do not convert obj to string + int parse_cols(ObObj2strHelper *obj2str_helper = NULL, + const share::schema::ObSimpleTableSchemaV2 *simple_table_schema = NULL, + const TableSchemaInfo *tb_schema_info = NULL, + const bool enable_output_hidden_primary_key = false); + + int get_cols(ColValueList **rowkey_cols, ColValueList **new_cols, ColValueList **old_cols); + + bool is_rollback_stmt() const { return 1 == flag_; } + +public: + TO_STRING_KV("Row", static_cast(*this), + K_(deserialized), + K_(cols_parsed), + K_(new_cols), + K_(old_cols), + K_(rowkey_cols)); + +private: + int parse_columns_(ColValueList &cols, + const char *col_data, + const int64_t col_data_size, + ObObj2strHelper *obj2str_helper, + const share::schema::ObSimpleTableSchemaV2 *simple_table_schema, + const TableSchemaInfo *tb_schema_info, + const bool enable_output_hidden_primary_key); + int parse_rowkey_(ColValueList &rowkey_cols, + const common::ObStoreRowkey &rowkey, + ObObj2strHelper *obj2str_helper, + const share::schema::ObSimpleTableSchemaV2 *simple_table_schema, + const TableSchemaInfo *tb_schema_info, + const bool enable_output_hidden_primary_key); + int add_column_(ColValueList &cols, + const uint64_t column_id, + const ObObj *value, + ObObj2strHelper *obj2str_helper, + const share::schema::ObSimpleTableSchemaV2 *simple_table_schema, + const ColumnSchemaInfo *column_schema); + +private: + common::ObIAllocator &allocator_; + + bool deserialized_; + bool cols_parsed_; + ColValueList new_cols_; // A list of new values for the columns, currently no primary key values are stored, only normal columns + ColValueList old_cols_; // A list of old values for the columns, currently no primary key values are stored, only normal columns + ColValueList rowkey_cols_; // rowkey column + +private: + DISALLOW_COPY_AND_ASSIGN(MutatorRow); +}; + +/////////////////////////////////////////////////////////////////////////////////// +#define DELIMITER_STR "," + +// The DML unique ID is Pkey+LogId+RowIndex +// The format is: PkeyStr,LogID,RowIndex, where the separator is , +class DmlStmtUniqueID +{ +public: + DmlStmtUniqueID(const ObString &pkey_and_log_id_str, const int32_t log_offset, const uint64_t row_index) : + pkey_and_log_id_str_(pkey_and_log_id_str), log_offset_(log_offset), row_index_(row_index) {} + ~DmlStmtUniqueID() { reset(); } +public: + void reset() + { + log_offset_ = 0; + row_index_ = OB_INVALID_ID; + } + + bool is_valid() const + { return ! pkey_and_log_id_str_.empty() && log_offset_ >= 0 && OB_INVALID_ID != row_index_; } + const ObString &get_pkey_and_log_id_str() const { return pkey_and_log_id_str_; } + uint64_t get_row_index() const { return row_index_; } + int64_t get_dml_unique_id_length() const; + +public: + // row_index(uint64_t): to_cstring长度20 + static const int64_t MAX_ROW_INDEX_LENGTH = 20; + static int64_t compute_str_length_base_num(uint64_t num); + // Optimising customisation to_string + int customized_to_string(char* buf, const int64_t buf_len, int64_t &pos) const; + TO_STRING_KV(K_(pkey_and_log_id_str), K_(row_index)); + +private: + const ObString &pkey_and_log_id_str_; + int32_t log_offset_; + uint64_t row_index_; + +private: + DISALLOW_COPY_AND_ASSIGN(DmlStmtUniqueID); +}; + +// DML statement task +class DmlStmtTask : public IStmtTask +{ +public: + DmlStmtTask(PartTransTask &host, + ObLogEntryTask &redo_log_entry_task, + ObLogRowDataIndex &row_data_index, + MutatorRow &row); + virtual ~DmlStmtTask(); + + void reset(); + + int64_t get_table_version() const { return row_.table_version_; } + uint64_t get_table_id() const { return row_.table_id_; } + int64_t get_part_id() const; + const common::ObStoreRowkey &get_rowkey() const { return row_.rowkey_; } + storage::ObRowDml get_dml_type() const { return row_.dml_type_; } + + // Parse the column data + // If obj2str_helper is empty, then no conversion of obj to string + // NOTE: you can get_cols() only if you succeed in parse_cols() + int parse_cols(ObObj2strHelper *obj2str_helper = NULL, + const share::schema::ObSimpleTableSchemaV2 *simple_table_schema = NULL, + const TableSchemaInfo *tb_schema_info = NULL, + const bool enable_output_hidden_primary_key = false) + { + return row_.parse_cols(obj2str_helper, simple_table_schema, tb_schema_info, enable_output_hidden_primary_key); + } + + int get_cols(ColValueList **rowkey_cols, ColValueList **new_cols, ColValueList **old_cols) + { + return row_.get_cols(rowkey_cols, new_cols, old_cols); + } + + ObLogEntryTask &get_redo_log_entry_task() { return redo_log_entry_task_; } + ObLogRowDataIndex &get_row_data_index() { return row_data_index_; } + + int32_t get_row_sql_no() const { return row_.sql_no_; } + +public: + TO_STRING_KV("IStmtTask", static_cast(*this), + K_(row), + K_(redo_log_entry_task), + K_(row_data_index)); + +private: + ObLogEntryTask &redo_log_entry_task_; + ObLogRowDataIndex &row_data_index_; + MutatorRow &row_; +private: + DISALLOW_COPY_AND_ASSIGN(DmlStmtTask); +}; + +/////////////////////////////////////////////////////////////////////////////// + +// DDL unique ID using cluster_id + schema_version +class DdlStmtUniqueID +{ +public: + DdlStmtUniqueID(const int64_t cluster_id, const uint64_t schema_version) : + cluster_id_(cluster_id), schema_version_(schema_version) {} + ~DdlStmtUniqueID() { reset(); } +public: + void reset() + { + cluster_id_ = OB_INVALID_CLUSTER_ID; + schema_version_ = OB_INVALID_TIMESTAMP; + } + + bool is_valid() const + { return OB_INVALID_CLUSTER_ID != cluster_id_ + && OB_INVALID_TIMESTAMP != schema_version_; } + + int64_t get_cluster_id() const { return cluster_id_; } + uint64_t get_schema_version() const { return schema_version_; } + +public: + int64_t to_string(char* buf, const int64_t buf_len) const; + +private: + int64_t cluster_id_; // cluster ID + int64_t schema_version_; // schema version + +private: + DISALLOW_COPY_AND_ASSIGN(DdlStmtUniqueID); +}; + +class DdlStmtTask : public IStmtTask +{ +public: + DdlStmtTask(PartTransTask &host, MutatorRow &row, const int64_t cluster_id); + virtual ~DdlStmtTask(); + +public: + void reset(); + + // parse DDL data + // init DDL Binlog Record + int parse_ddl_info(ObLogBR *br, + const uint64_t row_index, + bool &is_valid_ddl, + int64_t &update_schema_version, + uint64_t &exec_tennat_id); + + // get ddl str + const ObString &get_ddl_stmt_str() const { return ddl_stmt_str_; } + int64_t get_operation_type() const { return ddl_operation_type_; } + uint64_t get_op_table_id() const { return ddl_op_table_id_; } + uint64_t get_op_tenant_id() const { return ddl_op_tenant_id_; } + uint64_t get_op_database_id() const { return ddl_op_database_id_; } + uint64_t get_op_tablegroup_id() const { return ddl_op_tablegroup_id_; } + int64_t get_op_schema_version() const { return ddl_op_schema_version_; } + uint64_t get_exec_tenant_id() const { return ddl_exec_tenant_id_; } + int64_t get_cluster_id() const { return cluster_id_; } + int32_t get_row_sql_no() const { return row_.sql_no_; } + + ObLogBR *get_binlog_record() { return br_; } + const ObLogBR *get_binlog_record() const { return br_; } + void set_binlog_record(ObLogBR *br) { br_ = br; } + +public: + // tennat_id(UINT64_MAX: 20) + schema_version(INT64_MAX:19) + static const int64_t MAX_DDL_UNIQUE_ID_LENGTH = 50; + // schema_version(INT64_MAX:19) + static const int64_t MAX_DDL_SCHEMA_VERSION_STR_LENGTH = 20; + // log id (INT64_MAX:19) + static const int64_t MAX_PREPRAR_LOG_ID_LENGTH = 20; + +public: + TO_STRING_KV("IStmtTask", static_cast(*this), + K_(row), + K_(ddl_exec_tenant_id), + K_(ddl_stmt_str), + K_(ddl_op_schema_version), + K_(ddl_operation_type), + K_(ddl_op_table_id), + K_(ddl_op_tenant_id), + K_(ddl_op_database_id), + K_(ddl_op_tablegroup_id), + K_(cluster_id)); + +private: + int parse_ddl_info_(bool &contain_ddl_stmt, int64_t &update_schema_version); + int parse_schema_version_(ObObj &col_value, int64_t &schema_version); + int parse_ddl_info_from_normal_columns_(ColValueList &col_value_list); + // 1. schema non-split mode returns the pure_id itself + // 2. schema split mode returns the calculated result if the pure_id is valid; otherwise returns the pure_id itself + // When in schema split mode, the common tenant table_id, database_id, user_id and tablegroup_id are + // removed from the tenant information and need to be recalculated to ensure the schema is refreshed correctly + uint64_t combine_id_(const bool is_schema_split_mode, + const uint64_t tenant_id, + const uint64_t pure_id); + int build_ddl_binlog_record_(ObLogBR *br, + const ObString &ddl_stmt, + const uint64_t row_index); + bool is_recyclebin_database_id(const uint64_t tenant_id, const uint64_t database_id); + bool is_drop_table_ddl_(const int64_t ddl_operation_type); + bool is_drop_tablegroup_ddl_(const int64_t ddl_operation_type); + bool is_drop_tenant_ddl_(const int64_t ddl_operation_type); + bool is_global_index_ddl_(const int64_t ddl_operation_type); + // OB_DDL_CREATE_INDEX + // OB_DDL_DROP_INDEX + bool is_normal_index_ddl_(const int64_t ddl_operation_type); + bool is_create_tenant_end_ddl_(const int64_t ddl_operation_type); + bool is_finish_schema_split_ddl_(const int64_t ddl_operation_type); + // OB_DDL_ADD_SUB_PARTITION + // OB_DDL_DROP_SUB_PARTITION + bool is_sub_partition_alter_ddl_(const int64_t ddl_operation_type); + int init_ddl_unique_id_(common::ObString &ddl_unique_id); + +private: + MutatorRow &row_; + ObString ddl_stmt_str_; + int64_t ddl_operation_type_; + int64_t ddl_op_schema_version_; + char ddl_op_schema_version_str_[MAX_DDL_SCHEMA_VERSION_STR_LENGTH]; + + uint64_t ddl_op_table_id_; + uint64_t ddl_op_tenant_id_; + uint64_t ddl_op_database_id_; + uint64_t ddl_op_tablegroup_id_; + + // Record Executor Tenant ID + uint64_t ddl_exec_tenant_id_; + + // record cluster ID + int64_t cluster_id_; + + ObLogBR *br_; + +private: + DISALLOW_COPY_AND_ASSIGN(DdlStmtTask); +}; + +///////////////////////////////////////////////////////////////////////////////// + +typedef LightyList StmtList; + +class ObLogEntryTask +{ +public: + ObLogEntryTask(); + virtual ~ObLogEntryTask(); + void reset(); + bool is_valid() const; + +public: + int init(const common::ObPartitionKey &pkey, + const transaction::ObTransID &trans_id, + const uint64_t log_id, + const int32_t log_offset, + DmlRedoLogMetaNode *meta_node, + char *data, + const int64_t size, + const int64_t pos); + int append_redo_log(const int64_t log_no, + const uint64_t log_id, + const char *buf, + const int64_t buf_len); + + uint64_t hash() const + { + uint64_t hash_value = get_partition().hash(); + + return hash_value; + } + +public: + inline void *get_host() { return host_; } + inline void set_host(void *host) { host_ = host; } + + const common::ObPartitionKey &get_partition() const { return partition_; } + uint64_t get_tenant_id() const { return partition_.get_tenant_id(); } + bool is_ddl_part() const { return is_ddl_table(partition_.get_table_id()); } + + const transaction::ObTransID &get_trans_id() const { return trans_id_; } + uint64_t get_log_id() const { return log_id_; } + int32_t get_log_offset() const { return log_offset_; } + + DmlRedoLogMetaNode *get_meta_node() { return meta_node_; } + int get_valid_row_num(int64_t &valid_row_num); + + common::ObIAllocator &get_allocator() { return arena_allocator_; } + void *alloc(const int64_t size); + void free(void *ptr); + + const DmlRedoLogNode &get_redo_log_node() const { return redo_node_; } + DmlRedoLogNode &get_redo_log_node() { return redo_node_; } + + const StmtList &get_stmt_list() const { return stmt_list_; } + StmtList &get_stmt_list() { return stmt_list_; } + int64_t get_stmt_num() const { return stmt_list_.num_; } + int add_stmt(const uint64_t row_index, IStmtTask *stmt_task); + + // Increases the number of statements that complete the formatting and returns the result after the increase + int64_t inc_formatted_stmt_num(); + + // 1. iterate through the formatted DmlStmt, concatenating all row indices + // 2. Recycle directly for invalid binlog records + // 3. Linkup row indexes of rollback stmt as well, for savepoint-rollback + int link_row_list(); + + int64_t dec_row_ref_cnt(); + void set_row_ref_cnt(const int64_t ref_cnt); + int64_t get_row_ref_cnt() const { return ATOMIC_LOAD(&row_ref_cnt_); } + + enum CallBackModule + { + NONE = -1, + + DML_PARSER_CB = 0, + FORMATTER_CB = 1, + STORAGER_CB = 2, + }; + static const char *print_callback_module(const CallBackModule cb); + static bool is_dml_parser(const CallBackModule cb); + static bool is_formatter(const CallBackModule cb); + static bool is_storager(const CallBackModule cb); + + TO_STRING_KV(K_(partition), + K_(trans_id), + K_(log_id), + K_(log_offset), + K_(meta_node), + K_(redo_node), + K_(stmt_list), + K_(formatted_stmt_num), + K_(row_ref_cnt)); + +private: + int revert_binlog_record_(ObLogBR *br); + +private: + void *host_; // PartTransTask host + + common::ObPartitionKey partition_; // Partition key + transaction::ObTransID trans_id_; // Transaction ID + + // Log ID, for redo data + // 1. non-LOB record corresponding to LogEntry log_id + // 2. First LogEntry log_id for LOB records + uint64_t log_id_; + // for not aggre log: log_offset_=0 + // for aggre log: log_offset_ record offset + int32_t log_offset_; + + DmlRedoLogMetaNode *meta_node_; // meta node + DmlRedoLogNode redo_node_; // data node + StmtList stmt_list_; // statement list + int64_t formatted_stmt_num_; // Number of statements that formatted + int64_t row_ref_cnt_; // reference count + + // Non-thread safe allocator + // used for Parser/Formatter + common::ObArenaAllocator arena_allocator_; // allocator + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogEntryTask); +}; + +///////////////////////////////////////////////////////////////////////////////// + +class PartTransDispatcher; +struct TransCommitInfo; + +// Partitioned transaction tasks +// Distinguish between DDL transactions, DML transactions and heartbeats to facilitate differentiation of transaction types when parsing +class PartTransTask : public TransTaskBase, public ObLogResourceRecycleTask +{ +public: + enum TaskType + { + TASK_TYPE_UNKNOWN = 0, + TASK_TYPE_DML_TRANS, // DML trans + TASK_TYPE_DDL_TRANS, // DDL trans + TASK_TYPE_PART_HEARTBEAT, // heartbeat of partition level + TASK_TYPE_GLOBAL_HEARTBEAT, // heartbeat of global level, used to pass checkpoint info for downstream + TASK_TYPE_OFFLINE_PARTITION, // partition offline task + TASK_TYPE_NOT_SERVED_TRANS, // not served trans, convert from other trans type + TASK_TYPE_MAX + }; + enum ServedState + { + UNSERVED = 0, // Partition transaction is abort or partition transaction is not served + SERVED = 1 + }; + +public: + PartTransTask(); + virtual ~PartTransTask(); + +public: + static const char *print_task_type(const TaskType type); + +public: + void reset(); + void set_pkey_info(const common::ObPartitionKey &partition, + const char *pkey_str); + + /// The initialisation process of a transaction task is divided into four stages. + /// where: the DML transaction task processing process, where the maintenance of the completion status is completed, and the disassembly, maintenance and distribution of the task. + /// + /// 1. Upon receipt of the Redo log: push_redo_log(); + /// For DML: When the detection of single redo/multiple redo logs [LOB] is complete, split the sub-task and send it to the redo ObLogEntryTask, followed by the ObLogEntryTask callback processing + /// 2. Upon receipt of the prepare log: prepare() + /// If the transaction commits in bulk, call commit() to enter the commit or pre-commit state + /// 3. If the Redo log is missing, continue to push_redo_log(); after the missing log is filled, prepare() again + /// 4. If the Commit log is received: commit() + /// + /// @retval OB_ENTRY_EXIST redo log already exists + /// @retval OB_LOG_MISSING redo log missing, current log push failed: LOB intermediate log scenario, missing LOB start log + int push_redo_log(const common::ObPartitionKey &pkey, + const transaction::ObTransID &trans_id, + const int64_t log_no, + const uint64_t log_id, + const int32_t log_offset, + const int64_t tstamp, + const char *buf, + const int64_t buf_len, + bool &need_dispatch_log_entry_task, + ObLogEntryTask *&redo_log_entry_task); + + /// Prepare normal transaction tasks, the transaction type may be DDL or DML + /// + /// @param [in] partition partitionKey + /// @param [in] timestamp Partition transaction timestamp, set to Prepare log timestamp + /// @param [in] trans_id Transaction ID + /// @param [in] prepare_log_id Prepare log ID, for single partitioned transactions without Prepare, the last one shall prevail + /// @param [in] cluster_id cluster ID + /// @param [in] freeze version freeze version + /// @param [in] trace_id app trace id + /// @param [in] trace_info app trace info + /// @param [in] elt_trans_info_array Precursor transaction information, for early unlocking scenarios: + /// Single partition trans on single machine : precursor transaction information is based on the OB_LOG_SP_ELR_TRANS_COMMITCsp_commit) log + /// Multi-partition trans on single machine: Preceding transaction information is based on the PREPARE log + /// + /// @retval OB_SUCCESS Success + /// @retval Other error codes Fail + int prepare(const common::ObPartitionKey &partition, + const int64_t timestamp, + const transaction::ObTransID &trans_id, + const uint64_t prepare_log_id, + const uint64_t cluster_id, + const common::ObVersion freeze_version, + const ObString &trace_id, + const ObString &trace_info, + const transaction::ObElrTransInfoArray &elt_trans_info_array); + + /// Submit a normal transaction task + /// Requires that the prepares have been successful and that the redo log is complete + /// + /// @param [in] global_trans_version Global transaction version + /// @param [in] participant array + /// @param [in] is_ready_to_commit 1. whether it is pre-commit, i.e. the commit status has not yet been determined and the status is subsequently advanced by checkpoint + /// 2. This contains the batch commit transaction and the early unlock scenario "pre-commit" + /// @param [in] first_log_ts Record the first log timestamp of the partition + /// @param [in] trans_id Transaction ID + /// @param [in] trans_commit_info Transaction commit info + /// @param [in] part_trans_dispatcher Partition transaction dispatcher + /// + /// @retval OB_SUCCESS Success + /// @retval Other error codes Fail + int commit(const int64_t global_trans_version, + const transaction::PartitionLogInfoArray &participants, + const bool is_ready_to_commit, + const int64_t first_log_ts, + const transaction::ObTransID &trans_id, + const TransCommitInfo &trans_commit_info, + PartTransDispatcher &part_trans_dispatcher); + + /// Early Row Unlock Scenario: registering information about the successor transaction for callbacks by the successor transaction + /// + /// @param [in] trans_id trans id + /// + /// @retval OB_SUCCESS Success + /// @retval Other error codes Fail + int register_elt_follow_trans(const transaction::ObTransID &trans_id); + + /// Early Row Unlock Scenario: registering information about the successor transaction for callbacks by the successor transaction + /// + /// @param [in] trans_id trans id + /// + /// @retval OB_SUCCESS Success + /// @retval Other error codes Fail + int handle_elt_trans_callback(const transaction::ObTransID &trans_id, + PartTransDispatcher &part_trans_dispatcher); + + /// try to set PartTransTask in DataReady + /// PartTransDispatcher dispatch commit part trans + /// + /// @retval OB_SUCCESS Success + /// @retval Other error codes Fail + int try_to_set_data_ready_status(); + + /// DmlParser/Formatter/Storager handle ObLogEntryTask callback + /// + /// @param [in] log_entry_task ObLogEntryTask + /// @param [out] is_unserved_part_trans_task_can_be_recycled + /// + /// @retval OB_SUCCESS succ + /// @retval other fail + int handle_log_entry_task_callback(const ObLogEntryTask::CallBackModule cb_module, + ObLogEntryTask &log_entry_task, + bool &is_unserved_part_trans_task_can_be_recycled); + + /// PartTransDispatcher::remove_task call + /// + /// @param [out] is_unserved_part_trans_task_can_be_recycled + /// + /// @retval OB_SUCCESS succ + /// @retval other fail + int handle_unserved_part_trans(bool &is_unserved_part_trans_task_can_be_recycled); + + // Initialize partition heartbeat task information + // Set the type to: TASK_TYPE_PART_HEARTBEAT + // + /// @param [in] pkey partition key + /// @param [in] timestamp Heartbeat timestamp + // + /// @retval OB_SUCCESS Success + /// @retval Other error codes Fail + int init_part_heartbeat_info(const common::ObPartitionKey &pkey, const int64_t timestamp); + + // Initialize global heartbeat task information, global heartbeat task is independent of partition + // Set the type to: TASK_TYPE_GLOBAL_HEARTBEAT + // + /// @param [in] timestamp heartbeat timestamp + // + /// @retval OB_SUCCESS Success + /// @retval Other error codes Fail + int init_global_heartbeat_info(const int64_t timestamp); + + // Initially offline partition task + // Set the type to: TASK_TYPE_OFFLINE_PARTITION + // + // @param [in] partition PartitionKey + // + /// @retval OB_SUCCESS Success + /// @retval Other error codes Fail + int init_offline_partition_task(const common::ObPartitionKey &partition); + + // is task info valid or not + bool is_task_info_valid() const; + + // Convert to a non-serviceable partitioned transaction type + // Note: Only conversions from DML/DDL type transactions are supported, direct initialisation is not supported + int convert_to_not_served_trans(); + + SortedRedoLogList &get_sorted_redo_list() { return sorted_redo_list_; } + bool is_contain_empty_redo_log() const { return 0 == sorted_redo_list_.get_node_number(); } + + SortedDmlRowList &get_sorted_dml_row_list() { return sorted_dml_row_list_; } + int64_t get_br_num() const { return sorted_dml_row_list_.get_row_num(); } + + void set_checkpoint_seq(const int64_t seq) { checkpoint_seq_ = seq; } + int64_t get_checkpoint_seq() const { return checkpoint_seq_; } + + void set_type(const TaskType type) { type_ = type; } + TaskType get_type() const { return type_; } + + void set_exec_tenant_id(const uint64_t exec_tenant_id) { exec_tenant_id_ = exec_tenant_id; } + uint64_t get_exec_tenant_id() const { return exec_tenant_id_; } + + bool is_global_heartbeat() const { return TASK_TYPE_GLOBAL_HEARTBEAT == type_; } + bool is_part_heartbeat() const { return TASK_TYPE_PART_HEARTBEAT == type_; } + bool is_ddl_part_heartbeat() const + { + return is_part_heartbeat() && is_ddl_table(partition_.get_table_id()); + } + bool is_ddl_part() const { return is_ddl_table(partition_.get_table_id()); } + bool is_dml_trans() const { return TASK_TYPE_DML_TRANS == type_; } + bool is_ddl_trans() const { return TASK_TYPE_DDL_TRANS == type_; } + bool is_offline_partition_task() const { return TASK_TYPE_OFFLINE_PARTITION == type_; } + // Is it a DDL OFFLINE task + bool is_ddl_offline_task() const + { + return is_offline_partition_task() && is_ddl_table(partition_.get_table_id()); + } + bool is_not_served_trans() const { return TASK_TYPE_NOT_SERVED_TRANS == type_; } + + void set_trans_id(const transaction::ObTransID &trans_id) { trans_id_ = trans_id; } + const transaction::ObTransID &get_trans_id() const { return trans_id_; } + + uint64_t get_prepare_log_id() const { return prepare_log_id_; } + uint64_t get_cluster_id() const { return cluster_id_; } + const common::ObVersion &get_freeze_version() const { return freeze_version_; } + + void set_partition(const common::ObPartitionKey &partition) { partition_ = partition; } + const common::ObPartitionKey &get_partition() const { return partition_; } + bool is_pg() const { return partition_.is_pg(); } + uint64_t get_tenant_id() const { return partition_.get_tenant_id(); } + + const char *get_participant_key_str() const { return pkey_str_; } + const ObString &get_trans_id_str() const { return trans_id_str_; } + + void set_timestamp(const int64_t timestamp) { timestamp_ = timestamp; } + int64_t get_timestamp() const { return timestamp_; } + + // 单机多分区优化事务,根据checkpoint来决定是否提交, 并更新状态 + // + /// @param [in] checkpoint 维护分区级别checkpoint + // + /// @retval OB_SUCCESS Success + /// @retval Other error codes Fail + int update_trans_committed_status(const int64_t checkpoint, + PartTransDispatcher &part_trans_dispatcher); + bool is_trans_committed() const { return ATOMIC_LOAD(&is_trans_committed_); } + bool is_trans_ready_to_commit() const { return ATOMIC_LOAD(&is_trans_ready_to_commit_); } + + void update_local_schema_version(const int64_t sm_version) + { + if (sm_version > 0) { + local_schema_version_ = std::max(sm_version, local_schema_version_); + } + } + int64_t get_local_schema_version() const { return local_schema_version_; } + + void *alloc(const int64_t size); + void free(void *ptr); + + int add_stmt(const uint64_t row_index, IStmtTask *stmt_task); + int add_ddl_stmt(const uint64_t row_index, DdlStmtTask *ddl_stmt); + const StmtList &get_stmt_list() const { return stmt_list_; } + StmtList &get_stmt_list() { return stmt_list_; } + int64_t get_stmt_num() const { return stmt_list_.num_; } + + // Free stmt_list memory and clear the statement + void free_stmt_list() + { + // FIXME: Every IStmtTask in the list of statements should be destructured here + // but currently all the memory in the IStmtTask is allocated by the PartTransTask allocator, + // so it is sufficient to reuse the allocator memory directly, which is also the highest performance + stmt_list_.reset(); + } + + void set_formatted(); + int wait_formatted(const int64_t timeout, common::ObCond &cond); + + void set_data_ready(); + int wait_data_ready(const int64_t timeout); + + int64_t dec_ref_cnt(); + void set_ref_cnt(const int64_t ref_cnt); + int64_t get_ref_cnt() const { return ref_cnt_; } + + void set_global_trans_seq(const int64_t seq) { global_trans_seq_ = seq; } + int64_t get_global_trans_seq() const { return global_trans_seq_; } + + void set_next_task(PartTransTask *next) { next_task_ = next; } + PartTransTask *next_task() { return next_task_; } + + int64_t get_global_trans_version() const { return global_trans_version_; } + + common::ObIAllocator &get_allocator() { return allocator_; } + + const transaction::ObPartitionLogInfo *get_participants() const + { + return participants_; + } + + int64_t get_participant_count() const + { + return participant_count_; + } + + // Retrieve the last digit of reserve_field_ + bool has_valid_binlog_record() const + { + return reserve_field_ & 0x01; + } + + // Set the last digit of reserve_field_ to 1 + void set_has_valid_binlog_record() + { + reserve_field_ |= 0x01; + } + + // for unittest start + int set_prepare_log_id(const uint64_t prepare_log_id); + int set_participants(const transaction::PartitionLogInfoArray &participants); + // for unittest end + + void set_allocator(const int64_t page_size, + common::ObIAllocator &large_allocator); + + void set_prealloc_page(void *page); + void revert_prealloc_page(void *&page); + + const ObString &get_trace_id() const { return trace_id_; } + const ObString &get_trace_info() const { return trace_info_; } + const ObString &get_pkey_and_log_id_str() const { return pkey_and_log_id_str_; } + uint64_t &get_row_no() { return row_no_; } + int to_string_trans_id(); + + int revert_by_rollback_savepoint(const uint64_t row_index, const int32_t sql_no); + bool is_served() const { return SERVED == serve_state_; } + + TO_STRING_KV("state", serve_state_, + "type", print_task_type(type_), + K_(type), + K_(exec_tenant_id), + K_(partition), + K_(timestamp), + K_(trans_id), + K_(prepare_log_id), + K_(cluster_id), + K_(row_no), + K_(sorted_redo_list), + K_(sorted_dml_row_list), + "dml_ready_num", dml_ready_redo_node_num_, + K_(global_trans_version), + K_(is_trans_committed), + K_(checkpoint_seq), + K_(global_trans_seq), + KP_(participants), + K_(participant_count), + K_(local_schema_version), + K_(stmt_list), + KP_(next_task), + K_(ref_cnt), + K_(is_data_ready), + KP_(wait_formatted_cond), + K_(trace_id), + K_(trace_info)); + +private: + int init_trace_id_(const ObString &trace_id); + int init_trace_info_(const ObString &trace_info); + int to_string_pkey_and_log_id_(const uint64_t prepare_log_id); + int init_participant_array_(const transaction::PartitionLogInfoArray &participants); + void destroy_participant_array_(); + bool is_base_trans_info_valid_() const; + int set_commit_info_(const int64_t global_trans_version, + const transaction::PartitionLogInfoArray &participants); + // Handling of row start + int push_redo_on_row_start_(const transaction::ObTransID &trans_id, + const memtable::ObMemtableMutatorMeta &meta, + const int64_t log_no, + const uint64_t log_id, + const int32_t log_offset, + const char *redo_data, + const int64_t redo_data_size); + int push_ddl_redo_on_row_start_(const memtable::ObMemtableMutatorMeta &meta, + const int64_t log_no, + const uint64_t log_id, + const char *redo_data, + const int64_t redo_data_size, + const int64_t mutator_row_size); + int push_dml_redo_on_row_start_(const transaction::ObTransID &trans_id, + const memtable::ObMemtableMutatorMeta &meta, + const int64_t log_no, + const uint64_t log_id, + const int32_t log_offset, + const char *redo_data, + const int64_t redo_data_size, + const int64_t mutator_row_size); + int get_log_entry_task_(ObLogEntryTask *&log_entry_task); + // handle non-row-start for lob + int push_redo_on_not_row_start_(const memtable::ObMemtableMutatorMeta &meta, + const int64_t log_no, + const uint64_t log_id, + const char *redo_data, + const int64_t redo_data_size); + int push_ddl_redo_on_not_row_start_(const memtable::ObMemtableMutatorMeta &meta, + const int64_t log_no, + const uint64_t log_id, + const char *redo_data, + const int64_t redo_data_size); + int push_dml_redo_on_not_row_start_(const memtable::ObMemtableMutatorMeta &meta, + const int64_t log_no, + const uint64_t log_id, + const char *redo_data, + const int64_t redo_data_size); + int revert_ddl_stmt_(const int32_t sql_no); + int handle_elr_prev_trans_(PartTransDispatcher &part_trans_dispatcher, + const int64_t first_log_ts); + int handle_elr_follow_trans_(PartTransDispatcher &part_trans_dispatcher); + int commit_elr_trans_(const int64_t prev_trans_arr_cnt, + const int64_t prev_trans_commit_cnt, + const char *commit_reason, + PartTransDispatcher &part_trans_dispatcher); + // Record the list of succeeding transactions for the current transaction + typedef common::ObSEArray ElrFollowTransIdArray; + int init_trans_id_info_(const common::ObPartitionKey &pkey, + const transaction::ObTransID &trans_id); + + int check_dml_redo_node_ready_and_handle_(const int64_t total_node_num, + const int64_t cur_ready_node_num, + bool &is_data_ready); + /// 1. when all DML redo node is formatted in Memory-Mode + /// 2. when all DML redo node is stored in Storage-Mode + /// + /// @retval OB_SUCCESS succ + /// @retval other fail + int handle_when_all_dml_redo_node_ready_(); + int handle_unserved_part_trans_(bool &is_unserved_part_trans_task_can_be_recycled); + void set_unserved_() { serve_state_ = UNSERVED; } + +private: + ServedState serve_state_; + TaskType type_; // task type + uint64_t exec_tenant_id_; // record tenant_id for DDL task + + common::ObPartitionKey partition_; // partition Key + const char *pkey_str_; + int64_t timestamp_; // Transaction timestamp, usually set to the Prepare log timestamp + bool is_trans_id_inited_; // Indicates whether trans_id is initialized + transaction::ObTransID trans_id_; // trans ID + ObString trans_id_str_; // string value of trans ID + uint64_t prepare_log_id_; // Prepare log ID, if there is no Prepare transaction for a single partition, the last one shall prevail + uint64_t cluster_id_; // cluster ID + common::ObVersion freeze_version_; // freeze version + + ObString pkey_and_log_id_str_; // store pkey + logId to_cstring + uint64_t row_no_; // for alloc global row_no + + SortedRedoLogList sorted_redo_list_; // ordered redo list + SortedDmlRowList sorted_dml_row_list_; // DML: Ordered list of RowDataIndex + ObLogEntryTask *log_entry_task_; // DML records the task currently being processed + + int64_t global_trans_version_; // Global transaction version, transaction commit version + + // whether the transaction has been committed, i.e. whether the commit log has arrived and the whole transaction is complete + // This variable is only relevant for DML transactions and DDL transactions + bool is_trans_committed_; + + // Whether or not it has been pre-committed, for standalone multipartition transactions + // This variable is only relevant for DML transactions and DDL transactions + bool is_trans_ready_to_commit_; + + // checkpoint seq number + // + // The Fetcher assigns a seq number to all tasks that are sent down and periodically calculates the seq information to be sent down via heartbeat tasks + // Committer sorts tasks arriving out of order based on the seq number and maintains the overall data seq by processing the tasks sequentially + int64_t checkpoint_seq_; + + // Transaction serial number assigned by sequencer globally + // Distributed transaction level, partitioned transactions within a distributed transaction have the same number + int64_t global_trans_seq_; + + // participants info + transaction::ObPartitionLogInfo *participants_; + int64_t participant_count_; + + // Data parsed from Redo + int64_t local_schema_version_; // Schema versions for partitioned transactions + StmtList stmt_list_; // statement list + + // PartTransTask linked list structure + // list of participants in Sequencer + // Fetcher for linking all partition transactions + PartTransTask *next_task_; + + // state variables + int64_t ref_cnt_; // ref count + + common::ObByteLock data_ready_lock_; + + // For DDL partition: whether the formatting is complete + // For DML partition: whether the formatting or storage is complete + // Note: DML partition: empty redo scene, is_data_ready_ = true + bool is_data_ready_; + + // To optimise memory usage, the condition variable is passed in externally + common::ObCond *wait_formatted_cond_; + common::ObCond wait_data_ready_cond_; + + // 1. Increase it when DML redo node is formatted in Memory-Mode + // 2. Increase it when DML redo node is stored in Storage-Mode + int64_t dml_ready_redo_node_num_; + + ObSmallArena allocator_; // allocator + + // App Trace ID + ObString trace_id_; + // App Trace Info + ObString trace_info_; + + // Support for early unlocking of lines + transaction::ObElrTransInfoArray prev_trans_arr_; // Array of precursor transactions + ElrFollowTransIdArray follow_trans_arr_; // Array of successor transactions + // An 8 bit reserved field: + // The lowest bit currently represents whether the partition contains a valid DML binlog_record (for DML only) + int8_t reserve_field_; // reserved field + +private: + DISALLOW_COPY_AND_ASSIGN(PartTransTask); +}; + +} +} +#endif diff --git a/src/liboblog/src/ob_log_part_trans_task_queue.cpp b/src/liboblog/src/ob_log_part_trans_task_queue.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c6bc02c85c8cec9f255748cf9dca0b1ddf1a0969 --- /dev/null +++ b/src/liboblog/src/ob_log_part_trans_task_queue.cpp @@ -0,0 +1,138 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG + +#include "ob_log_part_trans_task_queue.h" +#include "ob_log_part_trans_dispatcher.h" // PartTransDispatchInfo + +namespace oceanbase +{ +namespace liboblog +{ +// The task may be destroyed by other module threads during the dispatch process, so it needs to be popped out before it can be consumed +// The basic information of the task being dispatched is still recorded in the queue to satisfy the query of the dispatch progress +// +// pop task needs to record basic information about the task +PartTransTask* SafeTaskWithRecordQueue::pop() +{ + PartTransTask *ret_task = NULL; + common::ObByteLockGuard guard(lock_); + ret_task = queue_.pop(); + record_dispatching_task_info_(ret_task); + return ret_task; +} + +void SafeTaskWithRecordQueue::record_dispatching_task_info_(PartTransTask *task) +{ + if (OB_ISNULL(task)) { + // nothing + } else { + dispatching_task_info_.reset(task->is_trans_committed(), + task->is_trans_ready_to_commit(), + task->get_type(), + task->get_prepare_log_id(), + task->get_timestamp(), + task->get_global_trans_version()); + } +} + +void SafeTaskWithRecordQueue::reset_dispatched_task_info() +{ + common::ObByteLockGuard guard(lock_); + dispatching_task_info_.reset(); +} + +void SafePartTransTaskQueue::print_task_queue() +{ + common::ObByteLockGuard guard(lock_); + LOG_INFO("task queue info", "ddl_size", queue_.size()); + + PartTransTask *task = queue_.top(); + int64_t idx = 1; + + while (NULL != task) { + LOG_INFO("task queue", K(idx), "ddl_size", queue_.size(), + K(task), KPC(task)); + + task = task->next_task(); + ++idx; + } +} + +// 1. have pop out the task that is being dispatched, dispatch progress takes dispatching_task_info_ +// +// 2. dispatching_task_info_ is marked false, take top element task +// +void SafeTaskWithRecordQueue::update_dispatch_progress_by_task_queue( + int64_t &dispatch_progress, + PartTransDispatchInfo &dispatch_info) +{ + common::ObByteLockGuard guard(lock_); + if (dispatching_task_info_.is_dispatching_) { + if (OB_INVALID_TIMESTAMP != dispatching_task_info_.task_timestamp_) { + dispatch_progress = dispatching_task_info_.task_timestamp_ - 1; + + dispatch_info.next_task_type_ = PartTransTask::print_task_type(dispatching_task_info_.task_type_); + dispatch_info.next_trans_log_id_ = dispatching_task_info_.prepare_log_id_; + dispatch_info.next_trans_committed_ = dispatching_task_info_.is_trans_committed_; + dispatch_info.next_trans_ready_to_commit_ = dispatching_task_info_.is_trans_ready_to_commit_; + dispatch_info.next_trans_global_version_ = dispatching_task_info_.global_trans_version_; + } + } else { + PartTransTask *task = queue_.top(); + if (NULL == task) { + // Queue is empty and not processed + } else if (OB_INVALID_TIMESTAMP != task->get_timestamp()) { + // If there is a task to be output, take the "task to be output timestamp - 1" as the output progress + dispatch_progress = task->get_timestamp() - 1; + + // Update information for the next transaction + // Note: Only DML and DDL are valid + dispatch_info.next_task_type_ = PartTransTask::print_task_type(task->get_type()); + dispatch_info.next_trans_log_id_ = task->get_prepare_log_id(); + dispatch_info.next_trans_committed_ = task->is_trans_committed(); + dispatch_info.next_trans_ready_to_commit_ = task->is_trans_ready_to_commit(); + dispatch_info.next_trans_global_version_ = task->get_global_trans_version(); + } + } +} + +void SafeTaskWithRecordQueue::DispatchingTaskBasicInfo::reset() +{ + is_dispatching_ = false; + is_trans_committed_ = false; + is_trans_ready_to_commit_ = false; + task_type_ = PartTransTask::TaskType::TASK_TYPE_UNKNOWN; + prepare_log_id_ = OB_INVALID_ID; + task_timestamp_ = OB_INVALID_TIMESTAMP; + global_trans_version_ = OB_INVALID_VERSION; +} + +void SafeTaskWithRecordQueue::DispatchingTaskBasicInfo::reset(bool is_trans_committed, + bool is_trans_ready_to_commit, + PartTransTask::TaskType task_type, + uint64_t prepare_log_id, + int64_t task_timestamp, + int64_t global_trans_version) +{ + is_dispatching_ = true; + is_trans_committed_ = is_trans_committed; + is_trans_ready_to_commit_ = is_trans_ready_to_commit; + task_type_ = task_type; + prepare_log_id_ = prepare_log_id; + task_timestamp_ = task_timestamp; + global_trans_version_ = global_trans_version; +} + +} /* liboblog */ +} /* oceanbase */ diff --git a/src/liboblog/src/ob_log_part_trans_task_queue.h b/src/liboblog/src/ob_log_part_trans_task_queue.h new file mode 100644 index 0000000000000000000000000000000000000000..0421abf8dff828cc96885dd3aad6818c06952b74 --- /dev/null +++ b/src/liboblog/src/ob_log_part_trans_task_queue.h @@ -0,0 +1,252 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_PART_TRANS_TASK_QUEUE_H__ +#define OCEANBASE_LIBOBLOG_OB_LOG_PART_TRANS_TASK_QUEUE_H__ + +#include "lib/lock/ob_small_spin_lock.h" // ObByteLock + +#include "ob_log_part_trans_task.h" // PartTransTask + +namespace oceanbase +{ +namespace liboblog +{ + +struct PartTransDispatchInfo; + +// task queue +// not thread-safe +struct PartTransTaskQueue +{ +public: + PartTransTaskQueue() : size_(0), head_(NULL), tail_(NULL) {} + ~PartTransTaskQueue() + { + reset(); + } + + void reset() + { + size_ = 0; + head_ = NULL; + tail_ = NULL; + } + + void push(PartTransTask *task) + { + if (NULL != task) { + if (NULL == head_) { + head_ = task; + tail_ = head_; + task->set_next_task(NULL); + } else { + tail_->set_next_task(task); + task->set_next_task(NULL); + tail_ = task; + } + + size_++; + } + } + + PartTransTask* pop() + { + PartTransTask *task = head_; + + // Unconditional output + if (NULL != task) { + // point to next task + head_ = head_->next_task(); + + if (NULL == head_) { + tail_ = NULL; + } + + size_--; + + // clear the pointer to next task + task->set_next_task(NULL); + } + + return task; + } + + PartTransTask* top() + { + return head_; + } + + void remove(PartTransTask *task, bool &exist) + { + exist = false; + if (NULL != task && NULL != head_) { + PartTransTask *iter = head_; + PartTransTask *prev = NULL; + + while (NULL != iter) { + // Find the corresponding task + if (task == iter) { + exist = true; + + // If the previous one is empty, then it is the header node, modify the header node + if (NULL == prev) { + head_ = task->next_task(); + } else { + // If the previous one is not empty, modify the next pointer of the previous node + prev->set_next_task(task->next_task()); + } + + if (NULL == task->next_task()) { + // If the target node is the last node, modify the tail pointer to point to the previous node + tail_ = prev; + } + + // Clear the pointer to next node + task->set_next_task(NULL); + ATOMIC_DEC(&size_); + break; + } else { + prev = iter; + iter = iter->next_task(); + } + } + } + } + + int64_t size() const + { + return size_; + } + +private: + int64_t size_; + PartTransTask *head_; + PartTransTask *tail_; + +private: + DISALLOW_COPY_AND_ASSIGN(PartTransTaskQueue); +}; + +//////////////////////////////////////////////////////// +// +// Locking for operation mutual exclusion +class SafePartTransTaskQueue +{ +public: + SafePartTransTaskQueue() : queue_(), lock_() {} + virtual ~SafePartTransTaskQueue() { reset(); } + + void reset() + { + queue_.reset(); + } + + // Requires sequential push tasks + void push(PartTransTask *task) + { + common::ObByteLockGuard guard(lock_); + queue_.push(task); + } + + PartTransTask *pop() + { + common::ObByteLockGuard guard(lock_); + return queue_.pop(); + } + + PartTransTask *top() + { + common::ObByteLockGuard guard(lock_); + return queue_.top(); + } + + // Lock-protected operation of TOP elements + // + // Func supports member functions: + // void operate() (const PartTransTask *top_task); + // - if top_task is empty, means the head element does not exist + template + void top_operate(Func &func) + { + common::ObByteLockGuard guard(lock_); + func(queue_.top()); + } + + int64_t size() const + { + common::ObByteLockGuard guard(lock_); + return queue_.size(); + } + + void remove(PartTransTask *task, bool &exist) + { + common::ObByteLockGuard guard(lock_); + queue_.remove(task, exist); + } + + // used for debug + // print all tasks in the queue with lock + void print_task_queue(); +protected: + PartTransTaskQueue queue_; + mutable common::ObByteLock lock_; +}; + +class SafeTaskWithRecordQueue : public SafePartTransTaskQueue +{ +private: + struct DispatchingTaskBasicInfo + { + bool is_dispatching_; + bool is_trans_committed_; + bool is_trans_ready_to_commit_; + PartTransTask::TaskType task_type_; + uint64_t prepare_log_id_; + int64_t task_timestamp_; + int64_t global_trans_version_; + + // clear record info + void reset(); + + // set record info + void reset(bool is_trans_committed, + bool is_trans_ready_to_commit, + PartTransTask::TaskType task_type, + uint64_t prepare_log_id, + int64_t task_timestamp, + int64_t global_trans_version); + + TO_STRING_KV(K(is_dispatching_), K(is_trans_committed_), K(is_trans_ready_to_commit_), + K(task_type_), K(prepare_log_id_), K(task_timestamp_), K(global_trans_version_)); + }; + +public: + SafeTaskWithRecordQueue() : dispatching_task_info_() {} + ~SafeTaskWithRecordQueue() { dispatching_task_info_.reset(); } +public: + PartTransTask *pop(); + + void reset_dispatched_task_info(); + + void update_dispatch_progress_by_task_queue(int64_t &dispatch_progress, + PartTransDispatchInfo &dispatch_info); +private: + void record_dispatching_task_info_(PartTransTask *task); +private: + DispatchingTaskBasicInfo dispatching_task_info_; // The task being dispatched, the task dispatch is complete and needs to be reset +}; + +} +} + +#endif diff --git a/src/liboblog/src/ob_log_reader_plug_in.cpp b/src/liboblog/src/ob_log_reader_plug_in.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f78580547c40f7030245b6e45d3433f36b0d052f --- /dev/null +++ b/src/liboblog/src/ob_log_reader_plug_in.cpp @@ -0,0 +1,178 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG + +#include // ITableMeta +#include "lib/string/ob_string.h" // ObString +#include "ob_log_reader_plug_in.h" +#include "ob_log_binlog_record.h" +#include "ob_log_store_service.h" +#include "ob_log_utils.h" +#include "ob_log_instance.h" + +using namespace oceanbase::common; +using namespace oceanbase::logmessage; + +namespace oceanbase +{ +namespace liboblog +{ + +ObLogReader::ObLogReader() : + inited_(false), + store_service_stat_(), + store_service_(NULL) +{ +} + +ObLogReader::~ObLogReader() +{ + destroy(); +} + +int ObLogReader::init(IObStoreService &store_service) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("ObLogReader has been initialized"); + ret = OB_INIT_TWICE; + } else { + store_service_ = &store_service; + inited_ = true; + } + + return ret; +} + +void ObLogReader::destroy() +{ + if (inited_) { + inited_ = false; + store_service_stat_.reset(); + store_service_ = NULL; + } +} + +int ObLogReader::read(ObLogRowDataIndex &row_data_index) +{ + int ret = OB_SUCCESS; + ObLogTenantGuard guard; + ObLogTenant *tenant = NULL; + const uint64_t tenant_id = row_data_index.get_tenant_id(); + void *column_family_handle = NULL; + ObLogBR *br = NULL; + ILogRecord *binlog_record = NULL; + std::string key; + std::string value; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogReader has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(TCTX.get_tenant_guard(tenant_id, guard))) { + LOG_ERROR("get_tenant_guard fail", KR(ret)); + } else { + tenant = guard.get_tenant(); + column_family_handle = tenant->get_cf(); + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(row_data_index.get_storage_key(key))) { + LOG_ERROR("get_storage_key fail", KR(ret), "key", key.c_str(), K(row_data_index)); + } else if (OB_FAIL(read_store_service_(column_family_handle, row_data_index, key, value))) { + LOG_ERROR("read_store_service_ fail", KR(ret), K(row_data_index)); + } else if (OB_FAIL(row_data_index.construct_serilized_br_data(br))) { + LOG_ERROR("construct_serilized_br_data fail", KR(ret), K(row_data_index)); + } else if (OB_ISNULL(br)) { + LOG_ERROR("ObLogBR is NULL"); + ret = OB_ERR_UNEXPECTED; + } else if (OB_ISNULL(binlog_record = br->get_data())) { + LOG_ERROR("binlog_record is NULL", K(row_data_index)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(binlog_record->parse(value.c_str(), value.length()))) { + LOG_ERROR("binlog_record parse fail", K(ret), K(binlog_record), K(row_data_index)); + } else { + const int record_type = binlog_record->recordType(); + LOG_DEBUG("binlog_record parse succ", "record_type", print_record_type(record_type), + K(binlog_record), K(row_data_index)); + + store_service_stat_.do_data_stat(value.length()); + } + + return ret; +} + +int ObLogReader::read_store_service_(void *column_family_handle, + ObLogRowDataIndex &row_data_index, + std::string &key, + std::string &value) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogReader has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(store_service_)) { + LOG_ERROR("store_service_ is NULL"); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(store_service_->get(column_family_handle, key, value))) { + LOG_ERROR("store_service_ get fail", KR(ret), K(key.c_str()), "value_len", value.length(), K(row_data_index)); + } else { + LOG_DEBUG("store_service_ get succ", K(key.c_str()), "value_len", value.length()); + } + + return ret; +} + +// rocksdb::SetPerfLevel(rocksdb::PerfLevel::kDisable); +//_LOG_INFO("[READER] [STAT] perf=%s", rocksdb::get_perf_context()->ToString().c_str()); +// rocksdb::get_perf_context()->Reset(); +// rocksdb::get_iostats_context()->Reset(); +// rocksdb::SetPerfLevel(rocksdb::PerfLevel::kEnableTime); // open profiling + +int ObLogReader::print_serilized_br_value_(const std::string &key, + ObLogBR &task) +{ + int ret = OB_SUCCESS; + ObArray new_values; + ILogRecord *binlog_record = NULL; + ITableMeta *table_meta = NULL; + + if (OB_ISNULL(binlog_record = task.get_data())) { + LOG_ERROR("binlog_record is NULL", K(task)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_ISNULL(table_meta = LogMsgFactory::createTableMeta())) { + LOG_ERROR("table_meta is NULL"); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else if (0 != binlog_record->getTableMeta(table_meta)) { + LOG_ERROR("getTableMeta fail"); + ret = OB_ERR_UNEXPECTED; + } else { + bool is_table_meta_null = false; + int64_t col_count = 0; + + if (NULL == table_meta) { + is_table_meta_null = true; + } else { + col_count = table_meta->getColCount(); + } + + // get_br_value(binlog_record, new_values); + LOG_INFO("store_service_", "key", key.c_str(), K(is_table_meta_null), K(col_count), K(new_values)); + } + + return ret; +} + +} // namespace liboblog +} // namespace oceanbase diff --git a/src/liboblog/src/ob_log_reader_plug_in.h b/src/liboblog/src/ob_log_reader_plug_in.h new file mode 100644 index 0000000000000000000000000000000000000000..01b3e8c5975e1240621818f55a6a080fd95700d5 --- /dev/null +++ b/src/liboblog/src/ob_log_reader_plug_in.h @@ -0,0 +1,58 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_READER_H_ +#define OCEANBASE_LIBOBLOG_READER_H_ + +#include "ob_log_row_data_index.h" // ObLogRowDataIndex +#include "ob_log_store_service_stat.h" // StoreServiceStatInfo + +namespace oceanbase +{ +namespace liboblog +{ +class IObStoreService; + +class ObLogReader +{ +public: + ObLogReader(); + virtual ~ObLogReader(); + int init(IObStoreService &store_service); + void destroy(); + +public: + int read(ObLogRowDataIndex &row_data_index); + StoreServiceStatInfo& get_store_stat_info() { return store_service_stat_; } + +private: + int read_store_service_(void *column_family_handle, + ObLogRowDataIndex &row_data_index, + std::string &key, + std::string &value); + + // for test + int print_serilized_br_value_(const std::string &key, + ObLogBR &br); + +private: + bool inited_; + StoreServiceStatInfo store_service_stat_; + IObStoreService *store_service_; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogReader); +}; + +} // namespace liboblog +} // namespace oceanbase +#endif diff --git a/src/liboblog/src/ob_log_ref_state.cpp b/src/liboblog/src/ob_log_ref_state.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d4a4c54837ace12a19b2429f18bde9d77b9f3f84 --- /dev/null +++ b/src/liboblog/src/ob_log_ref_state.cpp @@ -0,0 +1,95 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include "ob_log_ref_state.h" + +#include "lib/atomic/ob_atomic.h" // ATOMIC_* + +namespace oceanbase +{ +namespace liboblog +{ + +bool RefState::inc_ref(const int64_t target_state, int64_t &new_state, int64_t &new_ref) +{ + bool succ = false; + RefState cur_st(ATOMIC_LOAD(&value_)); + + while (! succ && cur_st.state_ == target_state) { + RefState old_st = cur_st; + RefState new_st = cur_st; + new_st.ref_cnt_++; // Status unchanged, reference count +1 + + cur_st.value_ = ATOMIC_CAS(&value_, old_st.value_, new_st.value_); + + if (cur_st.value_ == old_st.value_) { + succ = true; + // The reference count is successfully increased, returning the new state and the new reference count + new_state = new_st.state_; + new_ref = new_st.ref_cnt_; + } else { + // Reference count increase failed, return current state and current reference count + new_state = cur_st.state_; + new_ref = cur_st.ref_cnt_; + } + } + + return succ; +} + +void RefState::dec_ref(int64_t &new_state, int64_t &new_ref_cnt) +{ + RefState cur_st(ATOMIC_LOAD(&value_)); + + bool done = false; + while (!done) { + RefState old_st = cur_st; + RefState new_st = cur_st; + new_st.ref_cnt_--; // No change in status, number of transactions minus 1 + + cur_st.value_ = ATOMIC_CAS(&value_, old_st.value_, new_st.value_); + + if (old_st.value_ == cur_st.value_) { + done = true; + // Returns the state value and reference count after the reference count has been subtracted successfully + new_state = new_st.state_; + new_ref_cnt = new_st.ref_cnt_; + } + } +} + +bool RefState::change_state(const int64_t target_state, int64_t &old_state, int64_t &ref_cnt) +{ + bool succ = false; + RefState cur_st(ATOMIC_LOAD(&value_)); + + while (! succ && cur_st.state_ != target_state) { + RefState old_st = cur_st; + RefState new_st = cur_st; + new_st.state_ = target_state; // No change in reference count, change in status + + cur_st.value_ = ATOMIC_CAS(&value_, old_st.value_, new_st.value_); + + if (cur_st.value_ == old_st.value_) { + succ = true; + } + } + + // cur_st holds the old state values + old_state = cur_st.state_; + ref_cnt = cur_st.ref_cnt_; + + return succ; +} + +} +} diff --git a/src/liboblog/src/ob_log_ref_state.h b/src/liboblog/src/ob_log_ref_state.h new file mode 100644 index 0000000000000000000000000000000000000000..d68dbed08bebd502f7989bf1d494166e215c3890 --- /dev/null +++ b/src/liboblog/src/ob_log_ref_state.h @@ -0,0 +1,94 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_REF_STATE_H_ +#define OCEANBASE_LIBOBLOG_OB_LOG_REF_STATE_H_ + +#include // int64_t + +namespace oceanbase +{ +namespace liboblog +{ + +/// State with its own reference count +/// +//// state variable (8 bits) + reference count (56) bits => 64 bits +//// +/// Applicable scenarios: atomic maintenance of object state, combining state variables with reference counts to guarantee a safe end-of-life for objects. +/// +/// Example. +/// 1. initialised to the normal state (0) with a reference count of 0 +/// 2. Normal state with reference count dynamically increasing or decreasing +/// 3. When the object is destroyed, the state is atomically changed to offline, after which the reference count can only be decreased, not increased +/// 4. When the state becomes offline and the reference count is 0, the object can be safely destroyed +struct RefState +{ + union + { + struct + { + int64_t state_:8; // The lower 8 bits are status variables + int64_t ref_cnt_:56; // The high 56 bits are reference counters + }; + + int64_t value_; // Full 64-bit values + }; + + /// When the state is target_state, the reference count is increased and success is returned + /// If the state is not target_state, no reference counting is performed and a failure is returned + /// + /// @param [in] target_state target_state, only match this state to increase the reference count + /// @param [out] new_state The state value at the end of execution + /// @param [out] new_ref Reference count at the end of execution + /// + /// @retval whether execution was successful + bool inc_ref(const int64_t target_state, int64_t &new_state, int64_t &new_ref); + + /// Unconditionally decreasing reference count + /// + /// @param [out] new_state The value of the state corresponding to the successful subtraction of the reference count + /// @param [out] new_ref_cnt The value of the reference count after it has been subtracted + void dec_ref(int64_t &new_state, int64_t &new_ref_cnt); + + /// Ensure that the reference count remains unchanged and change the current state + /// + /// @param [in] target_state The target state to be changed + /// @param [out] old_state The value of the state before the change + /// @param [out] ref_cnt Reference count at the end of execution + /// + /// @retval true change of state successful, original state not equal to target_state + /// @retval false Failed to change state, original state equal to target_state, i.e. no change needed + bool change_state(const int64_t target_state, int64_t &old_state, int64_t &ref_cnt); + + void reset() + { + state_ = 0; + ref_cnt_ = 0; + } + + // Initialised to specified status + void reset(const int64_t state) + { + state_ = state; + ref_cnt_ = 0; + } + + RefState() { reset(); } + explicit RefState(const int64_t value) : value_(value) {} + ~RefState() { reset(); } +}; + +} +} + +#endif diff --git a/src/liboblog/src/ob_log_resource_collector.cpp b/src/liboblog/src/ob_log_resource_collector.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e15243dfd619b1be30a82e612f83588a0d83a4dc --- /dev/null +++ b/src/liboblog/src/ob_log_resource_collector.cpp @@ -0,0 +1,759 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG + +#include "ob_log_resource_collector.h" + +#include "storage/transaction/ob_trans_define.h" // ObTransID + +#include "ob_log_part_trans_task.h" // PartTransTask +#include "ob_log_task_pool.h" // ObLogTransTaskPool +#include "ob_log_binlog_record_pool.h" // ObLogBRPool +#include "ob_log_trans_ctx.h" // TransCtx +#include "ob_log_trans_ctx_mgr.h" // IObLogTransCtxMgr +#include "ob_log_row_data_index.h" // ObLogRowDataIndex +#include "ob_log_store_service.h" // IObStoreService +#include "ob_log_binlog_record.h" // ObLogBR +#include "ob_log_meta_manager.h" // IObLogMetaManager +#include "ob_log_instance.h" +#include "ob_log_tenant.h" +#include "ob_log_config.h" + +using namespace oceanbase::common; +using namespace oceanbase::transaction; + +namespace oceanbase +{ +namespace liboblog +{ + +ObLogResourceCollector::ObLogResourceCollector() : + inited_(false), + br_pool_(NULL), + trans_ctx_mgr_(NULL), + meta_manager_(NULL), + store_service_(NULL), + br_thread_num_(0), + br_count_(0), + total_part_trans_task_count_(0), + ddl_part_trans_task_count_(0), + dml_part_trans_task_count_(0), + hb_part_trans_task_count_(0), + other_part_trans_task_count_(0) +{ +} + +ObLogResourceCollector::~ObLogResourceCollector() +{ + destroy(); +} + +int ObLogResourceCollector::init(const int64_t thread_num, + const int64_t thread_num_for_br, + const int64_t queue_size, + IObLogBRPool *br_pool, + IObLogTransCtxMgr *trans_ctx_mgr, + IObLogMetaManager *meta_manager, + IObStoreService *store_service) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("ResourceCollector init twice"); + ret = OB_INIT_TWICE; + } else if (OB_UNLIKELY(thread_num <= 0) + || OB_UNLIKELY(thread_num_for_br <= 0) + || OB_UNLIKELY(thread_num_for_br >= thread_num) + || OB_UNLIKELY(queue_size <= 0) + || OB_ISNULL(br_pool) + || OB_ISNULL(trans_ctx_mgr) + || OB_ISNULL(meta_manager) + || OB_ISNULL(store_service)) { + LOG_ERROR("invalid arguments", K(thread_num), K(thread_num_for_br), K(queue_size), + K(br_pool), K(trans_ctx_mgr), K(meta_manager), K(store_service)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(RCThread::init(thread_num, queue_size))) { + LOG_ERROR("init ResourceCollector threads fail", KR(ret), K(thread_num), K(queue_size)); + } else { + br_pool_ = br_pool; + trans_ctx_mgr_ = trans_ctx_mgr; + meta_manager_ = meta_manager; + store_service_ = store_service; + br_thread_num_ = thread_num_for_br; + br_count_ = 0; + total_part_trans_task_count_ = 0; + ddl_part_trans_task_count_ = 0; + dml_part_trans_task_count_ = 0; + hb_part_trans_task_count_ = 0; + other_part_trans_task_count_ = 0; + inited_ = true; + + LOG_INFO("init ResourceCollector succ", K(thread_num), K(thread_num_for_br), K(queue_size)); + } + return ret; +} + +void ObLogResourceCollector::destroy() +{ + RCThread::destroy(); + inited_ = false; + br_pool_ = NULL; + trans_ctx_mgr_ = NULL; + meta_manager_ = NULL; + store_service_ = NULL; + br_thread_num_ = 0; + br_count_ = 0; + total_part_trans_task_count_ = 0; + ddl_part_trans_task_count_ = 0; + dml_part_trans_task_count_ = 0; + hb_part_trans_task_count_ = 0; + other_part_trans_task_count_ = 0; +} + +int ObLogResourceCollector::start() +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ResourceCollector has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(RCThread::start())) { + LOG_ERROR("start ResourceCollector threads fail", KR(ret)); + } else { + LOG_INFO("start ResourceCollector threads succ", KR(ret)); + } + return ret; +} + +void ObLogResourceCollector::stop() +{ + if (inited_) { + RCThread::stop(); + LOG_INFO("stop ResourceCollector threads succ"); + } +} + +void ObLogResourceCollector::mark_stop_flag() +{ + if (inited_) { + RCThread::mark_stop_flag(); + } +} + +int ObLogResourceCollector::revert(PartTransTask *task) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ResourceCollector has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(task)) { + LOG_ERROR("invalid argument", K(task)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(push_task_into_queue_(*task))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("push task into queue fail", KR(ret), K(task)); + } + } else { + // NOTE: After entering the queue, the task may be recycled at any time and cannot be further referenced + } + + return ret; +} + +int ObLogResourceCollector::revert(const int record_type, ObLogBR *br) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ResourceCollector has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(br)) { + LOG_ERROR("invalid argument", K(br)); + ret = OB_INVALID_ARGUMENT; + } else { + if (EDDL == record_type) { + PartTransTask *part_trans_task = NULL; + + if (OB_ISNULL(part_trans_task = static_cast(br->get_host()))) { + LOG_ERROR("binlog record host is invalid", K(br), K(br->get_host())); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(dec_ref_cnt_and_try_to_revert_task_(part_trans_task))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("dec_ref_cnt_and_try_to_revert_task_ fail", KR(ret), KPC(part_trans_task)); + } + } else {} + } else { + // Recycle asynchronously in case of HEARTBEAT、BEGIN、COMMIT、DML + if (OB_FAIL(push_task_into_queue_(*br))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("push task into queue fail", KR(ret), K(br), "record_type", print_record_type(record_type)); + } + } else { + // NOTE: After entering the queue, the task may be recycled at any time and cannot be further referenced + br = NULL; + } + } + } + + return ret; +} + +int ObLogResourceCollector::revert_unserved_task(const bool is_rollback_row, + ObLogRowDataIndex &row_data_index) +{ + int ret = OB_SUCCESS; + ObLogBR *br = row_data_index.get_binlog_record(); + + if (OB_UNLIKELY(! row_data_index.is_valid())) { + LOG_ERROR("row_data_index is not valid", K(row_data_index)); + ret = OB_INVALID_ARGUMENT; + } else { + // Storager mode, br is NULL + if (NULL == br) { + if (! is_rollback_row) { + if (OB_FAIL(del_store_service_data_(row_data_index))) { + LOG_ERROR("del_store_service_data_", KR(ret), K(row_data_index)); + } + } + } else { + // Memory mode + if (OB_FAIL(dec_ref_cnt_and_try_to_recycle_log_entry_task_(*br))) { + LOG_ERROR("dec_ref_cnt_and_try_to_recycle_log_entry_task_ fail", KR(ret)); + } else if (OB_FAIL(revert_single_binlog_record_(br))) { + LOG_ERROR("revert binlog record fail", KR(ret), K(br)); + } else { + br = NULL; + } + } + } + + return ret; +} + +int ObLogResourceCollector::revert_log_entry_task(ObLogEntryTask *log_entry_task) +{ + int ret = OB_SUCCESS; + + if (OB_FAIL(revert_log_entry_task_(log_entry_task))) { + LOG_ERROR("revert_log_entry_task_ fail", KR(ret), KPC(log_entry_task)); + } else { + log_entry_task = NULL; + } + + return ret; +} + +int ObLogResourceCollector::dec_ref_cnt_and_try_to_revert_task_(PartTransTask *part_trans_task) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ResourceCollector has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(part_trans_task)) { + LOG_ERROR("invalid arguments", K(part_trans_task)); + ret = OB_INVALID_ARGUMENT; + } else { + // Decrement the reference count of partitionk transaction task + // The partition transaction task needs to be recycled if the reference count becomes 0 + // Cannot continue to reference partition transaction tasks after that time, since partitioned transaction tasks may be recalled at any time + const bool need_revert_part_trans_task = (part_trans_task->dec_ref_cnt() == 0); + + if (need_revert_part_trans_task) { + if (OB_FAIL(revert(part_trans_task))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("revert PartTransTask fail", KR(ret), K(part_trans_task)); + } + } else { + part_trans_task = NULL; + } + } else { + // Cannot continue to access partition transaction task when do not need to recycle it + } + } + + return ret; +} + +int ObLogResourceCollector::revert_log_entry_task_(ObLogEntryTask *log_entry_task) +{ + int ret = OB_SUCCESS; + IObLogEntryTaskPool *log_entry_task_pool = TCTX.log_entry_task_pool_; + + if (OB_ISNULL(log_entry_task)) { + LOG_ERROR("log_entry_task is NULL"); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(log_entry_task_pool)) { + LOG_ERROR("log_entry_task_pool is NULL"); + ret = OB_ERR_UNEXPECTED; + } else { + const bool is_test_mode_on = TCONF.test_mode_on != 0; + if (is_test_mode_on) { + LOG_INFO("LogEntryTask-free", "LogEntryTask", *log_entry_task, "addr", log_entry_task); + } + + log_entry_task->~ObLogEntryTask(); + log_entry_task_pool->free(log_entry_task); + } + + return ret; +} + +int ObLogResourceCollector::revert_participants_(const int64_t thread_index, + PartTransTask *participants) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ResourceCollector has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(participants)) { + LOG_ERROR("invalid arguments", K(participants)); + ret = OB_INVALID_ARGUMENT; + } else { + PartTransTask *task = participants; + + while (OB_SUCCESS == ret && NULL != task) { + PartTransTask *next = task->next_task(); + task->set_next_task(NULL); + + if (OB_FAIL(recycle_part_trans_task_(thread_index, task))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("recycle_part_trans_task_ fail", KR(ret), K(thread_index), KPC(task)); + } + } else { + task = next; + } + } + + task = NULL; + } + + return ret; +} + +int ObLogResourceCollector::push_task_into_queue_(ObLogResourceRecycleTask &task) +{ + int ret = OB_SUCCESS; + static uint64_t part_trans_task_push_seq = 0; + static uint64_t br_push_seq = 0; + uint64_t hash_value = 0; + + if (task.is_part_trans_task()) { + hash_value = ATOMIC_FAA(&part_trans_task_push_seq, 1); + hash_value = (hash_value % (RCThread::get_thread_num() - br_thread_num_)) + br_thread_num_; + + PartTransTask *part_trans_task = static_cast(&task); + + if (OB_ISNULL(part_trans_task)) { + LOG_ERROR("invalid argument", K(part_trans_task)); + ret = OB_ERR_UNEXPECTED; + } else { + do_stat_(*part_trans_task, true/*need_accumulate_stat*/); + } + } else if (task.is_binlog_record_task()) { + (void)ATOMIC_AAF(&br_count_, 1); + + hash_value = ATOMIC_FAA(&br_push_seq, 1); + hash_value = hash_value % br_thread_num_; + } else {} + + // push to thread queue, asynchronous recycling + while (OB_SUCC(ret) && ! RCThread::is_stoped()) { + ret = RCThread::push(&task, hash_value, DATA_OP_TIMEOUT); + + if (OB_TIMEOUT != ret) { + break; + } + } + // Note: After a task is pushed to the queue, it may be recycled quickly and the task cannot be accessed later + + if (RCThread::is_stoped()) { + ret = OB_IN_STOP_STATE; + } + + return ret; +} + +int ObLogResourceCollector::recycle_part_trans_task_(const int64_t thread_index, + PartTransTask *task) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ResourceCollector has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(task)) { + LOG_ERROR("invalid argument", K(task)); + ret = OB_INVALID_ARGUMENT; + } else { + if (task->is_ddl_trans()) { + if (OB_FAIL(revert_dll_all_binlog_records_(task))) { + // Reclaim all Binlog Records within a DDL partitioned transaction + LOG_ERROR("revert_dll_all_binlog_records_ fail", KR(ret), K(*task)); + } + } + LOG_DEBUG("[ResourceCollector] recycle part trans task", K(thread_index), K(*task)); + + do_stat_(*task, false/*need_accumulate_stat*/); + + // recycle resource + task->revert(); + task = NULL; + } + + return ret; +} + +int ObLogResourceCollector::handle(void *data, + const int64_t thread_index, + volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + ObLogResourceRecycleTask *recycle_task = NULL; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ResourceCollector has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(stop_flag)) { + ret = OB_IN_STOP_STATE; + } else if (OB_ISNULL(data)) { + LOG_ERROR("invalid argument", K(data)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(recycle_task = static_cast(data))) { + LOG_ERROR("recycle_task is NULL", K(recycle_task)); + ret = OB_ERR_UNEXPECTED; + } else { + ObLogResourceRecycleTask::TaskType task_type = recycle_task->get_task_type(); + + if (recycle_task->is_part_trans_task()) { + PartTransTask *task = static_cast(recycle_task); + + // DML/DDL + if (task->is_ddl_trans() || task->is_dml_trans()) { + // Guaranteed reference count of 0 + if (OB_UNLIKELY(0 != task->get_ref_cnt())) { + LOG_ERROR("can not revert part trans task, ref_cnt is not zero", K(*task)); + ret = OB_INVALID_ARGUMENT; + } else { + bool enable_create = false; + TransCtx *trans_ctx = NULL; + bool all_participant_revertable = false; + // Copy the Trans ID to avoid invalidating the Trans ID when the PartTransTask is recycled + ObTransID trans_id = task->get_trans_id(); + + if (OB_FAIL(trans_ctx_mgr_->get_trans_ctx(trans_id, trans_ctx, enable_create))) { + LOG_ERROR("get trans_ctx fail", KR(ret), K(trans_id), K(*task)); + } + // Increase the number of participants that can be recycled + else if (OB_FAIL(trans_ctx->inc_revertable_participant_count(all_participant_revertable))) { + LOG_ERROR("trans_ctx.inc_revertable_participant_count fail", KR(ret), K(*trans_ctx)); + } + // Recycle the distributed transaction if all participants are available for recycling + else if (all_participant_revertable) { + PartTransTask *participants = trans_ctx->get_participant_objs(); + + if (OB_FAIL(trans_ctx->revert_participants())) { + LOG_ERROR("trans_ctx.revert_participants fail", KR(ret), K(*trans_ctx)); + } else if (OB_FAIL(trans_ctx_mgr_->remove_trans_ctx(trans_id))) { + LOG_ERROR("remove trans_ctx fail", KR(ret), K(trans_id), K(trans_ctx)); + } + // recycle all participants + else if (NULL != participants && OB_FAIL(revert_participants_(thread_index, participants))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("revert_participants_ fail", KR(ret), K(thread_index), K(participants), K(trans_id)); + } + } else { + participants = NULL; + } + } else { + // do nothing + } + + if (NULL != trans_ctx) { + int err = trans_ctx_mgr_->revert_trans_ctx(trans_ctx); + if (OB_SUCCESS != err) { + LOG_ERROR("revert_trans_ctx fail", K(err)); + ret = OB_SUCCESS == ret ? err : ret; + } + } + + task = NULL; + } + } else { + // All other tasks are recycled directly + if (OB_FAIL(recycle_part_trans_task_(thread_index, task))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("recycle_part_trans_task_ fail", KR(ret), K(thread_index), KPC(task)); + } + } else { + task = NULL; + } + } + } else if (recycle_task->is_binlog_record_task()) { + // HEARTBEAT、BEGIN、COMMIT、INSERT、DELETE、UPDATE + ObLogBR *task = static_cast(recycle_task); + int record_type = RecordType::EUNKNOWN; + + if (OB_ISNULL(task)) { + LOG_ERROR("ObLogBR task is NULL"); + ret = OB_ERR_UNEXPECTED; + } else if (task->get_record_type(record_type)) { + LOG_ERROR("ObLogBR task get_record_type fail", KR(ret)); + } else { + if (HEARTBEAT == record_type || EBEGIN == record_type || ECOMMIT == record_type) { + br_pool_->free(task); + } else { + if (OB_FAIL(revert_dml_binlog_record_(*task))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("revert_dml_binlog_record_ fail", KR(ret), KPC(task)); + } + } else {} + } + (void)ATOMIC_AAF(&br_count_, -1); + task = NULL; + } + } else { + LOG_ERROR("task type not supported", K(recycle_task), K(thread_index), + "task_type", ObLogResourceRecycleTask::print_task_type(task_type)); + ret = OB_NOT_SUPPORTED; + } + } + + return ret; +} + +int ObLogResourceCollector::revert_dml_binlog_record_(ObLogBR &br) +{ + int ret = OB_SUCCESS; + PartTransTask *part_trans_task = NULL; + ObLogRowDataIndex *row_data_index = NULL; + const bool is_serilized = br.is_serilized(); + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ResourceCollector has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(row_data_index = static_cast(br.get_host()))) { + LOG_ERROR("row_data_index is NULL", K(br)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(part_trans_task = static_cast(row_data_index->get_host()))) { + LOG_ERROR("part_trans_task is NULL", KPC(row_data_index)); + ret = OB_ERR_UNEXPECTED; + } else if (is_serilized) { + if (OB_FAIL(del_store_service_data_(*row_data_index))) { + LOG_ERROR("del_store_service_data_ fail", KR(ret), KPC(row_data_index)); + } + } else { + if (OB_FAIL(dec_ref_cnt_and_try_to_recycle_log_entry_task_(br))) { + LOG_ERROR("dec_ref_cnt_and_try_to_recycle_log_entry_task_ fail", KR(ret)); + } + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(revert_single_binlog_record_(&br))) { + LOG_ERROR("revert_single_binlog_record_ fail", KR(ret)); + } + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(dec_ref_cnt_and_try_to_revert_task_(part_trans_task))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("dec_ref_cnt_and_try_to_revert_task_ fail", KR(ret), KPC(part_trans_task)); + } + } + } + + return ret; +} + +int ObLogResourceCollector::del_store_service_data_(ObLogRowDataIndex &row_data_index) +{ + int ret = OB_SUCCESS; + std::string key; + + if (OB_FAIL(row_data_index.get_storage_key(key))) { + LOG_ERROR("get_storage_key fail", KR(ret), "key", key.c_str()); + } else { + const uint64_t tenant_id = row_data_index.get_tenant_id(); + ObLogTenantGuard guard; + ObLogTenant *tenant = NULL; + void *column_family_handle = NULL; + + if (OB_FAIL(TCTX.get_tenant_guard(tenant_id, guard))) { + LOG_ERROR("get_tenant_guard fail", KR(ret), K(tenant_id)); + } else { + tenant = guard.get_tenant(); + column_family_handle = tenant->get_cf(); + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(store_service_->del(column_family_handle, key))) { + LOG_ERROR("store_service_ del fail", KR(ret), K(key.c_str()), K(row_data_index)); + } else { + LOG_DEBUG("store_service_ del succ", K(key.c_str()), K(row_data_index)); + } + } + } + + return ret; +} + +int ObLogResourceCollector::dec_ref_cnt_and_try_to_recycle_log_entry_task_(ObLogBR &br) +{ + int ret = OB_SUCCESS; + ObLogEntryTask *log_entry_task = static_cast(br.get_log_entry_task()); + + if (OB_ISNULL(log_entry_task)) { + LOG_ERROR("log_entry_task is NULL"); + ret = OB_ERR_UNEXPECTED; + } else { + if (TCONF.test_mode_on) { + LOG_INFO("revert_dml_binlog_record", KPC(log_entry_task)); + } + const bool need_revert_log_entry_task = (log_entry_task->dec_row_ref_cnt() == 0); + + if (need_revert_log_entry_task) { + if (OB_FAIL(revert_log_entry_task_(log_entry_task))) { + LOG_ERROR("revert_log_entry_task_ fail", KR(ret), KPC(log_entry_task)); + } else { + log_entry_task = NULL; + } + } + } + + return ret; +} + +int ObLogResourceCollector::revert_dll_all_binlog_records_(PartTransTask *task) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ResourceCollector has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(task)) { + LOG_ERROR("invalid argument", K(task)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_UNLIKELY(! task->is_ddl_trans())) { + LOG_ERROR("is not ddl trans, unexpected", KPC(task)); + ret = OB_ERR_UNEXPECTED; + } else { + const StmtList &stmt_list = task->get_stmt_list(); + DdlStmtTask *stmt_task = static_cast(stmt_list.head_); + + // Iterate through all statements, get all Binlog Records and reclaim them + // FIXME: the Binlog Record contains references to memory allocated by the PartTransTask. + // They should be actively freed here, but as PartTransTask will release the memory uniformly when it is reclaimed + // memory in the Binlog Record is not actively freed here + while (OB_SUCC(ret) && NULL != stmt_task) { + DdlStmtTask *next = static_cast(stmt_task->get_next()); + ObLogBR *br = stmt_task->get_binlog_record(); + stmt_task->set_binlog_record(NULL); + + if (OB_FAIL(revert_single_binlog_record_(br))) { + LOG_ERROR("revert_single_binlog_record_ fail", KR(ret)); + } + + stmt_task = next; + } + } + + return ret; +} + +int ObLogResourceCollector::revert_single_binlog_record_(ObLogBR *br) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ResourceCollector has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(br)) { + LOG_ERROR("br is NULL"); + ret = OB_INVALID_ARGUMENT; + } else { + const bool is_serilized = br->is_serilized(); + + if (! is_serilized) { + ILogRecord *br_data = NULL; + + if (OB_ISNULL(br_data = br->get_data())) { + LOG_ERROR("binlog record data is invalid", K(br)); + ret = OB_INVALID_ARGUMENT; + } else { + ITableMeta *tblMeta = NULL; + // recycle Table Meta of binlog record + if (0 != br_data->getTableMeta(tblMeta)) { + LOG_ERROR("getTableMeta fail"); + ret = OB_ERR_UNEXPECTED; + } else if (NULL != tblMeta) { + meta_manager_->revert_table_meta(tblMeta); + br_data->setTableMeta(NULL); + } + + // recycle DB Meta of binlog record + if (NULL != br_data->getDBMeta()) { + meta_manager_->revert_db_meta(br_data->getDBMeta()); + br_data->setDBMeta(NULL); + } + } + } + + if (OB_SUCC(ret)) { + br_pool_->free(br); + br = NULL; + } + } + + return ret; +} + +int64_t ObLogResourceCollector::get_part_trans_task_count() const +{ + return ATOMIC_LOAD(&total_part_trans_task_count_); +} + +void ObLogResourceCollector::do_stat_(PartTransTask &task, + const bool need_accumulate_stat) +{ + int64_t cnt = 1; + + if (! need_accumulate_stat) { + cnt = -1; + } + + (void)ATOMIC_AAF(&total_part_trans_task_count_, cnt); + + if (task.is_ddl_trans()) { + (void)ATOMIC_AAF(&ddl_part_trans_task_count_, cnt); + } else if (task.is_dml_trans()) { + (void)ATOMIC_AAF(&dml_part_trans_task_count_, cnt); + } else if (task.is_part_heartbeat() || task.is_global_heartbeat()) { + (void)ATOMIC_AAF(&hb_part_trans_task_count_, cnt); + } else { + (void)ATOMIC_AAF(&other_part_trans_task_count_, cnt); + } +} + +void ObLogResourceCollector::print_stat_info() const +{ + _LOG_INFO("[RESOURCE_COLLECTOR] [STAT] BR=%ld TOTAL_PART=%ld DDL=%ld DML=%ld HB=%ld OTHER=%ld", + br_count_, + total_part_trans_task_count_, + ddl_part_trans_task_count_, + dml_part_trans_task_count_, + hb_part_trans_task_count_, + other_part_trans_task_count_); +} + +} // namespace liboblog +} // namespace oceanbase diff --git a/src/liboblog/src/ob_log_resource_collector.h b/src/liboblog/src/ob_log_resource_collector.h new file mode 100644 index 0000000000000000000000000000000000000000..774c5a5f4f0601c1a7ec39b31a85932c9f7f2bc4 --- /dev/null +++ b/src/liboblog/src/ob_log_resource_collector.h @@ -0,0 +1,143 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_RESOURCE_COLLECTOR_H__ +#define OCEANBASE_LIBOBLOG_RESOURCE_COLLECTOR_H__ + +#include "lib/thread/ob_multi_fixed_queue_thread.h" // ObMQThread +#include "ob_log_resource_recycle_task.h" // ObLogResourceRecycleTask +#include "ob_log_utils.h" // _SEC_ + +namespace oceanbase +{ +namespace liboblog +{ + +class PartTransTask; +class ObLogEntryTask; +class ObLogBR; +class ObLogRowDataIndex; + +class IObLogResourceCollector +{ +public: + enum { MAX_THREAD_NUM = 64 }; + +public: + virtual ~IObLogResourceCollector() {} + +public: + /// Recycle PartTransTask + /// @note: Require that all Binlog Records in the PartTransTask have been recycled + virtual int revert(PartTransTask *task) = 0; + + // recycle Binlog Record + virtual int revert(const int record_type, ObLogBR *br) = 0; + + virtual int revert_unserved_task(const bool is_rollback_row, + ObLogRowDataIndex &row_data_index) = 0; + + virtual int revert_log_entry_task(ObLogEntryTask *log_entry_task) = 0; + +public: + virtual int start() = 0; + virtual void stop() = 0; + virtual void mark_stop_flag() = 0; + virtual int64_t get_part_trans_task_count() const = 0; + virtual void print_stat_info() const = 0; +}; + +//////////////////////////////////////////////////////////// + +typedef common::ObMQThread RCThread; + +class IObLogBRPool; +class IObLogTransCtxMgr; +class IObLogMetaManager; +class IObStoreService; + +class ObLogResourceCollector : public IObLogResourceCollector, public RCThread +{ + static const int64_t DATA_OP_TIMEOUT = 10L * 1000L * 1000L; + static const int64_t PRINT_INTERVAL = 5 * _SEC_; + +public: + ObLogResourceCollector(); + virtual ~ObLogResourceCollector(); + +public: + int init(const int64_t thread_num, + const int64_t thread_num_for_br, + const int64_t queue_size, + IObLogBRPool *br_pool, + IObLogTransCtxMgr *trans_ctx_mgr, + IObLogMetaManager *meta_manager, + IObStoreService *store_service); + void destroy(); + +public: + int revert(PartTransTask *task); + int revert(const int record_type, ObLogBR *br); + int revert_unserved_task(const bool is_rollback_row, + ObLogRowDataIndex &row_data_index); + int revert_log_entry_task(ObLogEntryTask *log_entry_task); + +public: + int start(); + void stop(); + void mark_stop_flag(); + int handle(void *data, const int64_t thread_index, volatile bool &stop_flag); + int64_t get_part_trans_task_count() const; + void print_stat_info() const; + +private: + int push_task_into_queue_(ObLogResourceRecycleTask &task); + int revert_participants_(const int64_t thread_idx, PartTransTask *participants); + // Reclaiming resources for partitioned tasks + int recycle_part_trans_task_(const int64_t thread_idx, PartTransTask *task); + int revert_dll_all_binlog_records_(PartTransTask *task); + int revert_single_binlog_record_(ObLogBR *br); + int revert_dml_binlog_record_(ObLogBR &br); + int dec_ref_cnt_and_try_to_revert_task_(PartTransTask *part_trans_task); + int del_store_service_data_(ObLogRowDataIndex &row_data_index); + int dec_ref_cnt_and_try_to_recycle_log_entry_task_(ObLogBR &br); + int revert_log_entry_task_(ObLogEntryTask *log_entry_task); + + void do_stat_(PartTransTask &task, + const bool need_accumulate_stat); + +private: + bool inited_; + IObLogBRPool *br_pool_; + IObLogTransCtxMgr *trans_ctx_mgr_; + IObLogMetaManager *meta_manager_; + IObStoreService *store_service_; + // BinlogRecord and PartTransTask need handle separately in order to avoid maybe deadlock + int64_t br_thread_num_; + // Count the number of binlog record + int64_t br_count_; + + // Count the number of partition transaction tasks + int64_t total_part_trans_task_count_ CACHE_ALIGNED; + int64_t ddl_part_trans_task_count_ CACHE_ALIGNED; + int64_t dml_part_trans_task_count_ CACHE_ALIGNED; + int64_t hb_part_trans_task_count_ CACHE_ALIGNED; + int64_t other_part_trans_task_count_ CACHE_ALIGNED; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogResourceCollector); +}; + +} // namespace liboblog +} // namespace oceanbase + +#endif diff --git a/src/liboblog/src/ob_log_resource_recycle_task.h b/src/liboblog/src/ob_log_resource_recycle_task.h new file mode 100644 index 0000000000000000000000000000000000000000..31251ede5ce0cba7f71aa2a49a47595887b45b1d --- /dev/null +++ b/src/liboblog/src/ob_log_resource_recycle_task.h @@ -0,0 +1,65 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_RESOURCE_RECYCLE_TASK_H__ +#define OCEANBASE_LIBOBLOG_RESOURCE_RECYCLE_TASK_H__ + +namespace oceanbase +{ +namespace liboblog +{ +class ObLogResourceRecycleTask +{ +public: + enum TaskType + { + UNKNOWN_TASK = 0, + PART_TRANS_TASK = 1, + BINLOG_RECORD_TASK = 2 + }; + bool is_unknown_task() const { return UNKNOWN_TASK == task_type_; } + bool is_part_trans_task() const { return PART_TRANS_TASK == task_type_; } + bool is_binlog_record_task() const { return BINLOG_RECORD_TASK == task_type_; } + TaskType get_task_type() const { return task_type_; } + + static const char *print_task_type(TaskType task) + { + const char *str = "UNKNOWN_TASK"; + + switch (task) { + case PART_TRANS_TASK: + str = "PartTransTask"; + break; + case BINLOG_RECORD_TASK: + str = "BinlogRecordTask"; + break; + default: + str = "UNKNOWN_TASK"; + break; + } + + return str; + } + +public: + ObLogResourceRecycleTask() : task_type_(UNKNOWN_TASK) {} + ObLogResourceRecycleTask(TaskType task_type) : task_type_(task_type) {} + ~ObLogResourceRecycleTask() { task_type_ = UNKNOWN_TASK; } + +public: + TaskType task_type_; +}; + +} // namespace liboblog +} // namespace oceanbase + +#endif diff --git a/src/liboblog/src/ob_log_row_data_index.cpp b/src/liboblog/src/ob_log_row_data_index.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f1b5f92d810d6b92cf6ec8805dae8fc267a8fda9 --- /dev/null +++ b/src/liboblog/src/ob_log_row_data_index.cpp @@ -0,0 +1,234 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG + +#include "ob_log_row_data_index.h" +#include "lib/atomic/ob_atomic.h" // ATOMIC_* +#include "ob_log_instance.h" +#include "ob_log_binlog_record_pool.h" +#include "ob_log_meta_manager.h" + +namespace oceanbase +{ +namespace liboblog +{ +ObLogRowDataIndex::ObLogRowDataIndex() : + br_(NULL), + host_(NULL), + tenant_id_(OB_INVALID_TENANT_ID), + participant_key_str_(NULL), + log_id_(OB_INVALID_ID), + log_offset_(0), + row_no_(OB_INVALID_ID), + is_rollback_(false), + row_sql_no_(0), + br_commit_seq_(0), + trans_ctx_host_(NULL), + next_(NULL) +{ +} + +ObLogRowDataIndex::~ObLogRowDataIndex() +{ + reset(); +} + +void ObLogRowDataIndex::reset() +{ + br_ = NULL; + host_ = NULL; + tenant_id_= OB_INVALID_TENANT_ID; + participant_key_str_ = NULL; + log_id_ = OB_INVALID_ID; + log_offset_ = 0; + row_no_ = OB_INVALID_ID; + is_rollback_ = false; + row_sql_no_ = 0; + br_commit_seq_ = 0; + trans_ctx_host_ = NULL; + next_ = NULL; +} + +int ObLogRowDataIndex::init(const uint64_t tenant_id, + const char *participant_key, + const uint64_t log_id, + const int32_t log_offset, + const uint64_t row_no, + const bool is_rollback, + const int32_t row_sql_no) +{ + int ret = OB_SUCCESS; + + if (OB_NOT_NULL(br_)) { + LOG_ERROR("ILogRecord has been created"); + ret = OB_ERR_UNEXPECTED; + } else if (OB_ISNULL(participant_key) + || OB_UNLIKELY(OB_INVALID_ID == log_id) + || OB_UNLIKELY(log_offset < 0) + || OB_UNLIKELY(OB_INVALID_ID == row_no)) { + LOG_ERROR("invalid argument", K(participant_key), K(log_id), K(log_offset), K(row_no)); + ret = OB_INVALID_ARGUMENT; + } else { + tenant_id_ = tenant_id; + participant_key_str_ = participant_key; + log_id_ = log_id; + log_offset_ = log_offset; + row_no_ = row_no; + is_rollback_ = is_rollback; + row_sql_no_ = row_sql_no; + set_next(NULL); + } + + return ret; +} + +bool ObLogRowDataIndex::is_valid() const +{ + bool bool_ret = false; + + bool_ret = (NULL != participant_key_str_) + && (OB_INVALID_ID != log_id_) + && (log_offset_ >= 0) + && (OB_INVALID_ID != row_no_); + + return bool_ret; +} + +int ObLogRowDataIndex::get_storage_key(std::string &key) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(participant_key_str_)) { + LOG_ERROR("invalid argument"); + ret = OB_INVALID_ARGUMENT; + } else { + key.append(participant_key_str_); + key.append("_"); + key.append(std::to_string(log_id_)); + key.append("_"); + key.append(std::to_string(log_offset_)); + key.append("_"); + key.append(std::to_string(row_no_)); + } + + return ret; +} + + +bool ObLogRowDataIndex::before(const ObLogRowDataIndex &row_index, const bool is_single_row) +{ + bool bool_ret = false; + + if (! is_single_row) { + bool_ret = log_before_(row_index); + } else { + bool_ret = log_before_(row_index) || (log_equal_(row_index) && (row_no_ < row_index.row_no_)); + } + + return bool_ret; +} + +bool ObLogRowDataIndex::equal(const ObLogRowDataIndex &row_index, const bool is_single_row) +{ + bool bool_ret = false; + + if (! is_single_row) { + bool_ret = log_equal_(row_index); + } else { + bool_ret = log_equal_(row_index) && (row_no_ == row_index.row_no_); + } + + return bool_ret; +} + +int ObLogRowDataIndex::free_br_data() +{ + int ret = OB_SUCCESS; + IObLogBRPool *br_pool = TCTX.br_pool_; + IObLogMetaManager *meta_manager = TCTX.meta_manager_; + ILogRecord *br_data = NULL; + + if (OB_ISNULL(br_)) { + LOG_ERROR("invalid argument", K(*this)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(br_pool) || OB_ISNULL(meta_manager)) { + LOG_ERROR("br_pool or meta_manager is NULL"); + ret = OB_ERR_UNEXPECTED; + } else if (OB_ISNULL(br_data = br_->get_data())) { + LOG_ERROR("binlog record data is invalid", K(br_)); + ret = OB_INVALID_ARGUMENT; + } else { + ITableMeta *tblMeta = NULL; + // recycle Table Meta + if (0 != br_data->getTableMeta(tblMeta)) { + LOG_ERROR("getTableMeta fail"); + ret = OB_ERR_UNEXPECTED; + } else if (NULL != tblMeta) { + meta_manager->revert_table_meta(tblMeta); + br_data->setTableMeta(NULL); + } + + // recycle DB Meta + if (NULL != br_data->getDBMeta()) { + meta_manager->revert_db_meta(br_data->getDBMeta()); + br_data->setDBMeta(NULL); + } + } + + if (OB_SUCC(ret)) { + br_pool->free(br_); + br_ = NULL; + } + + return ret; +} + +int ObLogRowDataIndex::construct_serilized_br_data(ObLogBR *&br) +{ + int ret = OB_SUCCESS; + IObLogBRPool *br_pool = TCTX.br_pool_; + + if (OB_UNLIKELY(NULL != br_)) { + LOG_ERROR("invalid argument", K(*this)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(br_pool)) { + LOG_ERROR("br_pool is NULL"); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(br_pool->alloc(true/*is_serilized*/, br_, this))) { + LOG_ERROR("br_pool alloc fail", KPC(this), K(br_)); + } else { + br = br_; + } + + return ret; +} + +int64_t ObLogRowDataIndex::to_string(char* buf, const int64_t buf_len) const +{ + int64_t pos = 0; + + if (NULL != buf && buf_len > 0) { + (void)common::databuff_printf(buf, buf_len, pos, "key:%s_", participant_key_str_); + (void)common::databuff_printf(buf, buf_len, pos, "%lu_", log_id_); + (void)common::databuff_printf(buf, buf_len, pos, "%d_", log_offset_); + (void)common::databuff_printf(buf, buf_len, pos, "%lu,", row_no_); + (void)common::databuff_printf(buf, buf_len, pos, "is_rollback=%d,", is_rollback_); + (void)common::databuff_printf(buf, buf_len, pos, "row_sql_no=%d,", row_sql_no_); + (void)common::databuff_printf(buf, buf_len, pos, "br_seq=%ld", br_commit_seq_); + } + + return pos; +} + +} +} diff --git a/src/liboblog/src/ob_log_row_data_index.h b/src/liboblog/src/ob_log_row_data_index.h new file mode 100644 index 0000000000000000000000000000000000000000..3bf1423df41e80af4449988322faa664e35fdacf --- /dev/null +++ b/src/liboblog/src/ob_log_row_data_index.h @@ -0,0 +1,108 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_ROW_INDEX_H_ +#define OCEANBASE_LIBOBLOG_OB_LOG_ROW_INDEX_H_ + +#include "ob_log_binlog_record.h" + +namespace oceanbase +{ +namespace liboblog +{ +// Row data index, in-memory index of one INSERT/UPDATE/DELETE statement +class ObLogRowDataIndex +{ +public: + ObLogRowDataIndex(); + virtual ~ObLogRowDataIndex(); + void reset(); + +public: + int init(const uint64_t tenant_id, + const char *participant_key, + const uint64_t log_id, + const int32_t log_offset, + const uint64_t row_no, + const bool is_rollback, + const int32_t row_sql_no); + bool is_valid() const; + + ObLogBR *get_binlog_record() { return br_; } + const ObLogBR *get_binlog_record() const { return br_; } + void set_binlog_record(ObLogBR *br) { br_ = br; } + + uint64_t get_tenant_id() const { return tenant_id_; } + + inline void set_host(void *host) { host_ = host; } + inline void *get_host() { return host_; } + + int get_storage_key(std::string &key); + uint64_t get_log_id() const { return log_id_; } + int32_t get_log_offset() const {return log_offset_; } + uint64_t get_row_no() const { return row_no_; } + bool is_rollback() const { return is_rollback_; } + int32_t get_row_sql_no() const { return row_sql_no_; } + bool before(const ObLogRowDataIndex &row_index, const bool is_single_row); + bool equal(const ObLogRowDataIndex &row_index, const bool is_single_row); + + int64_t get_br_commit_seq() const { return br_commit_seq_; } + void *get_trans_ctx_host() { return trans_ctx_host_; } + void set_br_commit_seq(const int64_t br_commit_seq, void *trans_ctx_host) + { + br_commit_seq_ = br_commit_seq; + trans_ctx_host_ = trans_ctx_host; + } + + void set_next(ObLogRowDataIndex *next) {next_ = next;}; + ObLogRowDataIndex *get_next() {return next_;}; + + // The data needs to be freed from memory after it has been persisted + // 1. free the Unserilized ILogRecord + // 2. Free the serialised value + int free_br_data(); + + int construct_serilized_br_data(ObLogBR *&br); + + int64_t to_string(char* buf, const int64_t buf_len) const; + +private: + bool log_before_(const ObLogRowDataIndex &row_index) { return log_id_ < row_index.log_id_; } + bool log_equal_(const ObLogRowDataIndex &row_index) { return log_id_ == row_index.log_id_; } + +private: + ObLogBR *br_; + void *host_; // PartTransTask + + uint64_t tenant_id_; + + // StorageKey: participant_key+log_id+log_offset+row_no + const char *participant_key_str_; + uint64_t log_id_; + int32_t log_offset_; + // DML, DDL statement record row_no (numbered from 0 within the subdivision, not global) + // 0 for other types of records + uint64_t row_no_; + + bool is_rollback_; + int32_t row_sql_no_; + + int64_t br_commit_seq_; // Streaming commit model - seq + void *trans_ctx_host_; + + ObLogRowDataIndex *next_; +}; + +} +} + +#endif diff --git a/src/liboblog/src/ob_log_row_list.cpp b/src/liboblog/src/ob_log_row_list.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5f2f7d7af1e8906e9d52bb157b7e88ac8ea71121 --- /dev/null +++ b/src/liboblog/src/ob_log_row_list.cpp @@ -0,0 +1,265 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG + +#include "ob_log_row_list.h" +#include "ob_log_instance.h" // TCTX +#include "ob_log_store_service.h" +#include "ob_log_binlog_record_pool.h" +#include "ob_log_resource_collector.h" + +using namespace oceanbase::common; + +namespace oceanbase +{ +namespace liboblog +{ +SortedDmlRowList::SortedDmlRowList() + : row_num_(0), + head_(NULL), + tail_(NULL) +{ +} + +SortedDmlRowList::~SortedDmlRowList() +{ + reset(); +} + +void SortedDmlRowList::reset() +{ + row_num_ = 0; + head_ = NULL; + tail_ = NULL; +} + +bool SortedDmlRowList::is_valid() const +{ + return row_num_ > 0 && NULL != head_ && NULL != tail_; +} + +int SortedDmlRowList::push(ObLogRowDataIndex *row_head, + ObLogRowDataIndex *row_tail, + const int64_t row_num, + const bool is_contain_rollback_row) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(row_head) || OB_ISNULL(row_tail) || OB_UNLIKELY(row_num <= 0)) { + LOG_ERROR("row_head or row_tail is NULL", K(row_head), K(row_tail), K(row_num)); + ret = OB_INVALID_ARGUMENT; + } else if (! is_contain_rollback_row) { + if (OB_FAIL(push_when_not_contain_rollback_row_(row_head, row_tail, row_num))) { + LOG_ERROR("push_when_not_contain_rollback_row_ fail", KR(ret), KPC(row_head), KPC(row_tail), K(row_num)); + } + } else { + if (OB_FAIL(push_when_contain_rollback_row_(row_head, row_tail, row_num))) { + LOG_ERROR("push_when_contain_rollback_row_ fail", KR(ret), KPC(row_head), KPC(row_tail), K(row_num)); + } + } + + + return ret; +} + +int SortedDmlRowList::push_when_not_contain_rollback_row_(ObLogRowDataIndex *row_head, + ObLogRowDataIndex *row_tail, + const int64_t row_num) +{ + int ret = OB_SUCCESS; + const bool is_single_row = false; + + if (OB_ISNULL(row_head) || OB_ISNULL(row_tail) || OB_UNLIKELY(row_num <= 0)) { + LOG_ERROR("row_head or row_tail is NULL", K(row_head), K(row_tail), K(row_num)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(push_(row_head, row_tail, row_num, is_single_row))){ + LOG_ERROR("push_ fail", KR(ret), KPC(row_head), KPC(row_tail), K(row_num), K(is_single_row)); + } else { + // succ + } + + return ret; +} + +int SortedDmlRowList::push_when_contain_rollback_row_(ObLogRowDataIndex *row_head, + ObLogRowDataIndex *row_tail, + const int64_t row_num) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(row_head) || OB_ISNULL(row_tail) || OB_UNLIKELY(row_num <= 0)) { + LOG_ERROR("row_head or row_tail is NULL", K(row_head), K(row_tail), K(row_num)); + ret = OB_INVALID_ARGUMENT; + } else { + ObLogRowDataIndex *cur_row_index = row_head; + + while (OB_SUCC(ret) && NULL != cur_row_index) { + ObLogRowDataIndex *next_row_index = cur_row_index->get_next(); + const bool is_rollback_row = cur_row_index->is_rollback(); + + if (! is_rollback_row) { + // insert as a single node + cur_row_index->set_next(NULL); + const bool is_single_row = true; + + if (OB_FAIL(push_(cur_row_index, cur_row_index, 1, is_single_row))){ + LOG_ERROR("push_ fail", KR(ret), KPC(cur_row_index), K(is_single_row)); + } + } else { + const int32_t rollback_sql_no = cur_row_index->get_row_sql_no(); + + if (OB_FAIL(rollback_row_(rollback_sql_no, *cur_row_index))) { + LOG_ERROR("rollback_row_ fail", KR(ret), K(rollback_sql_no), KPC(cur_row_index)); + } + } + + if (OB_SUCC(ret)) { + cur_row_index = next_row_index; + } + } // while + } + + return ret; +} + +int SortedDmlRowList::push_(ObLogRowDataIndex *row_head, + ObLogRowDataIndex *row_tail, + const int64_t row_num, + const bool is_single_row) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(row_head) || OB_ISNULL(row_tail) || OB_UNLIKELY(row_num <= 0)) { + LOG_ERROR("row_head or row_tail is NULL", K(row_head), K(row_tail), K(row_num)); + ret = OB_INVALID_ARGUMENT; + } else { + if (NULL == head_) { + head_ = row_head; + tail_ = row_tail; + row_num_ = row_num; + } else if (OB_ISNULL(tail_)) { + LOG_ERROR("tail node is NULL, but head node is not NULL", K(head_), K(tail_)); + ret = OB_ERR_UNEXPECTED; + } else { + // insert tail + if (tail_->before(*row_head, is_single_row)) { + tail_->set_next(row_head); + tail_ = row_tail; + row_num_ += row_num; + // insert head + } else if (row_tail->before(*head_, is_single_row)) { + row_tail->set_next(head_); + head_ = row_head; + row_num_ += row_num; + } else { + // Iterate through all nodes to find the first redo node that is greater than or equal to the target node + ObLogRowDataIndex *pre_ptr = NULL; + ObLogRowDataIndex *cur_ptr = head_; + + while (cur_ptr->before(*row_head, is_single_row)) { + pre_ptr = cur_ptr; + cur_ptr = cur_ptr->get_next(); + } + + // If the node value is duplicated, the error node exists + if (cur_ptr->equal(*row_head, is_single_row)) { + LOG_INFO("redo log is pushed twice", KPC(row_head), KPC(row_tail), KPC(cur_ptr), KPC(this)); + ret = OB_ENTRY_EXIST; + } else { + row_tail->set_next(cur_ptr); + pre_ptr->set_next(row_head); + row_num_ += row_num; + } + + } + } + } + + return ret; +} + +int SortedDmlRowList::rollback_row_(const int32_t rollback_sql_no, + ObLogRowDataIndex &rollback_row_data_index) +{ + int ret = OB_SUCCESS; + const int64_t total_row_num = row_num_; + int64_t save_row_num = 0; + bool found = false; + ObLogRowDataIndex *pre_row_index = NULL; + ObLogRowDataIndex *cur_row_index = head_; + IObLogResourceCollector *resource_collector = TCTX.resource_collector_; + + if (OB_ISNULL(resource_collector)) { + LOG_ERROR("resource_collector is NULL"); + ret = OB_ERR_UNEXPECTED; + } + + // 1. stmt with seq_no less than or equal to the sql_no specified by rollback savepoint is not processed, find the first sql_no greater than rollback_sql_no + while (OB_SUCC(ret) && NULL != cur_row_index && !found) { + const int32_t cur_row_sql_no = cur_row_index->get_row_sql_no(); + + if (cur_row_sql_no <= rollback_sql_no) { + pre_row_index = cur_row_index; + cur_row_index = cur_row_index->get_next(); + ++save_row_num; + } else { + found = true; + } + } // while + + // 2. try to rollback and free + if (OB_SUCC(ret) && found) { + if (NULL == pre_row_index) { + head_ = NULL; + tail_ = NULL; + } else { + pre_row_index->set_next(NULL); + tail_ = pre_row_index; + } + row_num_ = save_row_num; + + // Recycle resources: persistent data needs to be cleaned up + // ObLogBR does not need to be recycled here, as the data is currently returned after persistence + while (OB_SUCC(ret) && NULL != cur_row_index) { + ObLogRowDataIndex *next_row_index = cur_row_index->get_next(); + + if (OB_FAIL(resource_collector->revert_unserved_task(false/*is_rollback_row*/, *cur_row_index))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("revert_unserved_task fail", KR(ret), KPC(cur_row_index)); + } + } + + if (OB_SUCC(ret)) { + cur_row_index = next_row_index; + } + } // while + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(resource_collector->revert_unserved_task(true/*is_rollback_row*/, rollback_row_data_index))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("revert_unserved_task fail", KR(ret), K(rollback_row_data_index)); + } + } + } + + if (OB_SUCC(ret)) { + _LOG_INFO("[SAVEPOINT][DML] ROLLBACK_SQL_NO=%d STMT_CNT=%ld/%ld", + rollback_sql_no, total_row_num, save_row_num); + } + + return ret; +} + +} // namespace liboblog +} // namespace oceanbase diff --git a/src/liboblog/src/ob_log_row_list.h b/src/liboblog/src/ob_log_row_list.h new file mode 100644 index 0000000000000000000000000000000000000000..77d7f4ddfcfc0099b843d41ef052f204651b45c8 --- /dev/null +++ b/src/liboblog/src/ob_log_row_list.h @@ -0,0 +1,73 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_BR_LIST_H__ +#define OCEANBASE_LIBOBLOG_BR_LIST_H__ + +#include "ob_log_row_data_index.h" // ObLogRowDataIndex + +namespace oceanbase +{ +namespace liboblog +{ +// Reassembles the partition transaction based on the row data ObLogEntryTask, the class that holds all ObLogRowDataIndex +class SortedDmlRowList +{ +public: + SortedDmlRowList(); + ~SortedDmlRowList(); + +public: + void reset(); + bool is_valid() const; + // 1. Each ObLogEntryTask contains single/multiple br data and has been concatenated + // 2. When it contains rollback, it needs to be handled + int push(ObLogRowDataIndex *row_head, + ObLogRowDataIndex *row_tail, + const int64_t row_num, + const bool is_contain_rollback_row); + + int64_t get_row_num() const { return row_num_; } + ObLogRowDataIndex *get_head() { return head_; } + ObLogRowDataIndex *get_tail() { return tail_; } + + TO_STRING_KV(K_(row_num), + K_(head), + K_(tail)); + +private: + int push_when_not_contain_rollback_row_(ObLogRowDataIndex *row_head, + ObLogRowDataIndex *row_tail, + const int64_t row_num); + int push_when_contain_rollback_row_(ObLogRowDataIndex *row_head, + ObLogRowDataIndex *row_tail, + const int64_t row_num); + // is_single_row = false: insert a linklist to SortedDmlRowList + // is_single_row = true: insert a row to SortedDmlRowList + int push_(ObLogRowDataIndex *row_head, + ObLogRowDataIndex *row_tail, + const int64_t row_num, + const bool is_single_row); + // dml stmt is strictly incremented by sql_no, just find the first statement that is greater than sql_no and roll back that and subsequent statements + int rollback_row_(const int32_t rollback_sql_no, + ObLogRowDataIndex &rollback_row_data_index); + +private: + int64_t row_num_; + ObLogRowDataIndex *head_; + ObLogRowDataIndex *tail_; +}; + +} // namespace liboblog +} // namespace oceanbase + +#endif diff --git a/src/liboblog/src/ob_log_rpc.cpp b/src/liboblog/src/ob_log_rpc.cpp new file mode 100644 index 0000000000000000000000000000000000000000..dd1c57aa52a92f3af10a718aa49b143354442849 --- /dev/null +++ b/src/liboblog/src/ob_log_rpc.cpp @@ -0,0 +1,268 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG + +#include "ob_log_rpc.h" + +#include "lib/utility/ob_macro_utils.h" // OB_FAIL +#include "lib/oblog/ob_log_module.h" // LOG_ERROR + +#include "ob_log_config.h" // ObLogConfig +#include "observer/ob_srv_network_frame.h" +#include "share/ob_encrypt_kms.h" // ObSSLClient + + +/// The rpc proxy executes the RPC function with two error codes: +/// 1. proxy function return value ret +/// 2. the result code carried by the proxy itself: proxy.get_result_code().rcode_, which indicates the error code returned by RPC processing on the target server +/// +/// The two error codes above are related. +/// 1. Synchronous RPC +/// + on success of ret, the result code must be OB_SUCCESS +/// + ret failure, result code failure means that the RPC failed to process on the target machine and returned the packet, including process processing failure, tenant not present, etc. +/// result code success means local RPC delivery failed, or the remote server machine is unresponsive for a long time, no packet return, etc. + +/// 2. Asynchronous RPC +/// + result code returned by proxy is meaningless because RPC is executed asynchronously and does not wait for packets to be returned to set the result code +/// + ret failure only means that the local RPC framework sent an error, excluding the case of no packet return from the target server +/// +/// Based on the above analysis, for the caller sending the RPC, only the ret return value is of concern, and ret can completely replace the result code + +#define SEND_RPC(RPC, SVR, TIMEOUT, REQ, ARG) \ + do { \ + if (OB_UNLIKELY(! inited_)) { \ + LOG_ERROR("not init"); \ + ret = OB_NOT_INIT; \ + } else { \ + obrpc::ObLogExternalProxy proxy; \ + if (OB_FAIL(net_client_.get_proxy(proxy))) { \ + LOG_ERROR("net client get proxy fail", KR(ret)); \ + } else {\ + int64_t max_rpc_proc_time = \ + ATOMIC_LOAD(&ObLogRpc::g_rpc_process_handler_time_upper_limit); \ + proxy.set_server((SVR)); \ + if (OB_FAIL(proxy.by(tenant_id_).trace_time(true).timeout((TIMEOUT))\ + .max_process_handler_time(static_cast(max_rpc_proc_time))\ + .RPC((REQ), (ARG)))) { \ + LOG_ERROR("rpc fail: " #RPC, "svr", (SVR), "rpc_ret", ret, \ + "result_code", proxy.get_result_code().rcode_, "req", (REQ)); \ + } \ + } \ + } \ + } while(0) + +using namespace oceanbase::common; +using namespace oceanbase::obrpc; + +namespace oceanbase +{ +namespace liboblog +{ + +int64_t ObLogRpc::g_rpc_process_handler_time_upper_limit = + ObLogConfig::default_rpc_process_handler_time_upper_limit_msec * _MSEC_; + +ObLogRpc::ObLogRpc() : + inited_(false), + tenant_id_(OB_INVALID_ID), + net_client_(), + last_ssl_info_hash_(UINT64_MAX), + ssl_key_expired_time_(0) +{} + +ObLogRpc::~ObLogRpc() +{ + destroy(); +} + +int ObLogRpc::req_start_log_id_by_tstamp(const common::ObAddr &svr, + const obrpc::ObLogReqStartLogIdByTsRequestWithBreakpoint& req, + obrpc::ObLogReqStartLogIdByTsResponseWithBreakpoint& res, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + SEND_RPC(req_start_log_id_by_ts_with_breakpoint, svr, timeout, req, res); + LOG_INFO("rpc: request start log id by tstamp", KR(ret), K(svr), K(timeout), K(req), K(res)); + return ret; +} + +int ObLogRpc::req_leader_heartbeat(const common::ObAddr &svr, + const obrpc::ObLogLeaderHeartbeatReq &req, + obrpc::ObLogLeaderHeartbeatResp &res, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + SEND_RPC(leader_heartbeat, svr, timeout, req, res); + LOG_DEBUG("rpc: request leader heartbeat", KR(ret), K(svr), K(timeout), K(req), K(res)); + return ret; +} + +int ObLogRpc::open_stream(const common::ObAddr &svr, + const obrpc::ObLogOpenStreamReq &req, + obrpc::ObLogOpenStreamResp &resp, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + SEND_RPC(open_stream, svr, timeout, req, resp); + LOG_DEBUG("rpc: open stream", KR(ret), K(svr), K(timeout), K(req), K(resp)); + return ret; +} + +int ObLogRpc::async_stream_fetch_log(const common::ObAddr &svr, + const obrpc::ObLogStreamFetchLogReq &req, + obrpc::ObLogExternalProxy::AsyncCB &cb, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + SEND_RPC(async_stream_fetch_log, svr, timeout, req, &cb); + LOG_DEBUG("rpc: async fetch stream log", KR(ret), K(svr), K(timeout), K(req)); + return ret; +} + +int ObLogRpc::init(const uint64_t tenant_id, const int64_t io_thread_num) +{ + int ret = OB_SUCCESS; + rpc::frame::ObNetOptions opt; + opt.rpc_io_cnt_ = static_cast(io_thread_num); + opt.mysql_io_cnt_ = 0; + + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("init twice"); + ret = OB_INIT_TWICE; + } else if (OB_UNLIKELY(io_thread_num <= 0)) { + LOG_ERROR("invalid argument", K(io_thread_num)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(net_client_.init(opt))) { + LOG_ERROR("init net client fail", KR(ret), K(io_thread_num)); + } else if (OB_FAIL(reload_ssl_config())) { + LOG_ERROR("reload_ssl_config succ", KR(ret)); + } else { + tenant_id_ = tenant_id; + inited_ = true; + LOG_INFO("init rpc succ", K(tenant_id), K(io_thread_num)); + } + return ret; +} + +void ObLogRpc::destroy() +{ + inited_ = false; + net_client_.destroy(); + tenant_id_ = OB_INVALID_ID; +} + +int ObLogRpc::reload_ssl_config() +{ + int ret = OB_SUCCESS; + const bool enable_ssl_client_authentication = (1 == TCONF.ssl_client_authentication); + const char *ssl_ext_kms_info_conf = TCONF.ssl_external_kms_info.str(); + const bool is_local_file_mode = (0 == strcmp("file", ssl_ext_kms_info_conf)); + const char *ssl_external_kms_info = NULL; + char external_info_val[OB_MAX_CONFIG_VALUE_LEN]; + external_info_val[0] = '\0'; + + if (enable_ssl_client_authentication) { + if (is_local_file_mode) { + ssl_external_kms_info = ssl_ext_kms_info_conf; + } else { + if (OB_FAIL(common::hex_to_cstr(ssl_ext_kms_info_conf, strlen(ssl_ext_kms_info_conf), + external_info_val, OB_MAX_CONFIG_VALUE_LEN))) { + LOG_ERROR("fail to hex to cstr", KR(ret)); + } else { + ssl_external_kms_info = external_info_val; + } + } + + if (OB_SUCC(ret)) { + ObString ssl_config(ssl_external_kms_info); + + bool file_exist = false; + const uint64_t new_hash_value = is_local_file_mode + ? observer::ObSrvNetworkFrame::get_ssl_file_hash(OB_CLIENT_SSL_CA_FILE, OB_CLIENT_SSL_CERT_FILE, OB_CLIENT_SSL_KEY_FILE, + file_exist) + : ssl_config.hash(); + + if (ssl_config.empty() && ! file_exist) { + LOG_ERROR("ssl file not available", K(new_hash_value)); + ret = OB_INVALID_CONFIG; + } else if (last_ssl_info_hash_ == new_hash_value) { + LOG_INFO("no need reload_ssl_config", K(new_hash_value)); + } else { + bool use_bkmi = false; + bool use_sm = false; + const char *ca_cert = NULL; + const char *public_cert = NULL; + const char *private_key = NULL; + int64_t ssl_key_expired_time = 0; + + if (is_local_file_mode) { + if (EASY_OK != easy_ssl_ob_config_check(OB_CLIENT_SSL_CA_FILE, OB_CLIENT_SSL_CERT_FILE, + OB_CLIENT_SSL_KEY_FILE, true/* is_from_file */, false/* is_babassl */)) { + LOG_ERROR("Local file mode: key and cert not match", KR(ret)); + ret = OB_INVALID_CONFIG; + } else if (OB_FAIL(observer::ObSrvNetworkFrame::extract_expired_time(OB_CLIENT_SSL_CERT_FILE, ssl_key_expired_time))) { + LOG_ERROR("extract_expired_time failed", KR(ret), K(use_bkmi)); + } else { + ca_cert = OB_CLIENT_SSL_CA_FILE; + public_cert = OB_CLIENT_SSL_CERT_FILE; + private_key = OB_CLIENT_SSL_KEY_FILE; + } + } else { + share::ObSSLClient client; + + if (OB_FAIL(client.init(ssl_config.ptr(), ssl_config.length()))) { + OB_LOG(WARN, "kms client init", K(ret), K(ssl_config)); + } else if (OB_FAIL(client.check_param_valid())) { + OB_LOG(WARN, "kms client param is not valid", K(ret)); + } else { + use_bkmi = client.is_bkmi_mode(); + use_sm = client.is_sm_scene(); + ca_cert = client.get_root_ca().ptr(); + public_cert = client.public_cert_.content_.ptr(); + private_key = client.private_key_.content_.ptr(); + ssl_key_expired_time = client.private_key_.key_expired_time_; + } + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(net_client_.load_ssl_config(ca_cert, public_cert, private_key))) { + LOG_ERROR("ObNetClient load_ssl_config failed", KR(ret), K(use_bkmi), K(use_sm)); + } else { + last_ssl_info_hash_ = new_hash_value; + ssl_key_expired_time_ = ssl_key_expired_time; + LOG_INFO("finish reload_ssl_config", K(use_bkmi), K(use_sm), K(new_hash_value), K(ssl_key_expired_time_)); + } + } + } + } + } else { + last_ssl_info_hash_ = UINT64_MAX; + ssl_key_expired_time_ = 0; + + LOG_INFO("reload_ssl_config: SSL is closed"); + } + + return ret; +} + +void ObLogRpc::configure(const ObLogConfig &cfg) +{ + int64_t rpc_process_handler_time_upper_limit_msec = cfg.rpc_process_handler_time_upper_limit_msec; + + ATOMIC_STORE(&g_rpc_process_handler_time_upper_limit, + rpc_process_handler_time_upper_limit_msec * _MSEC_); + LOG_INFO("[CONFIG]", K(rpc_process_handler_time_upper_limit_msec)); +} + +} +} diff --git a/src/liboblog/src/ob_log_rpc.h b/src/liboblog/src/ob_log_rpc.h new file mode 100644 index 0000000000000000000000000000000000000000..d2979afa8aec635236894a7c2e0c3e0756466e39 --- /dev/null +++ b/src/liboblog/src/ob_log_rpc.h @@ -0,0 +1,120 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_RPC_H_ +#define OCEANBASE_LIBOBLOG_OB_LOG_RPC_H_ + +#include "lib/net/ob_addr.h" // ObAddr +#include "rpc/obrpc/ob_net_client.h" // ObNetClient +#include "clog/ob_log_external_rpc.h" // obrpc +#include "rpc/obrpc/ob_rpc_packet.h" // OB_LOG_OPEN_STREAM +#include "rpc/obrpc/ob_rpc_proxy.h" // ObRpcProxy + +#include "ob_log_utils.h" // _SEC_ + +namespace oceanbase +{ +namespace liboblog +{ + +// RPC interface +// +// all asynchronous rpc start with "async" +class IObLogRpc +{ +public: + virtual ~IObLogRpc() { } + + // reuest start log id by timestamp + virtual int req_start_log_id_by_tstamp(const common::ObAddr &svr, + const obrpc::ObLogReqStartLogIdByTsRequestWithBreakpoint& req, + obrpc::ObLogReqStartLogIdByTsResponseWithBreakpoint& res, + const int64_t timeout) = 0; + + // reuest heartbeat from partition leader + virtual int req_leader_heartbeat(const common::ObAddr &svr, + const obrpc::ObLogLeaderHeartbeatReq &req, + obrpc::ObLogLeaderHeartbeatResp &res, + const int64_t timeout) = 0; + + // open stream with synchronous RPC + virtual int open_stream(const common::ObAddr &svr, + const obrpc::ObLogOpenStreamReq &req, + obrpc::ObLogOpenStreamResp &resp, + const int64_t timeout) = 0; + + // get logs based on log stream + // Asynchronous RPC + virtual int async_stream_fetch_log(const common::ObAddr &svr, + const obrpc::ObLogStreamFetchLogReq &req, + obrpc::ObLogExternalProxy::AsyncCB &cb, + const int64_t timeout) = 0; +}; + +//////////////////////////////////////////// ObLogRpc ////////////////////////////////////// + +class ObLogConfig; +class ObLogRpc : public IObLogRpc +{ +public: + static int64_t g_rpc_process_handler_time_upper_limit; + const char *const OB_CLIENT_SSL_CA_FILE = "wallet/ca.pem"; + const char *const OB_CLIENT_SSL_CERT_FILE = "wallet/client-cert.pem"; + const char *const OB_CLIENT_SSL_KEY_FILE = "wallet/client-key.pem"; + +public: + ObLogRpc(); + virtual ~ObLogRpc(); + + static void configure(const ObLogConfig &cfg); + +public: + int req_start_log_id_by_tstamp(const common::ObAddr &svr, + const obrpc::ObLogReqStartLogIdByTsRequestWithBreakpoint& req, + obrpc::ObLogReqStartLogIdByTsResponseWithBreakpoint& res, + const int64_t timeout); + + int req_leader_heartbeat(const common::ObAddr &svr, + const obrpc::ObLogLeaderHeartbeatReq &req, + obrpc::ObLogLeaderHeartbeatResp &res, + const int64_t timeout); + + int open_stream(const common::ObAddr &svr, + const obrpc::ObLogOpenStreamReq &req, + obrpc::ObLogOpenStreamResp &resp, + const int64_t timeout); + + int async_stream_fetch_log(const common::ObAddr &svr, + const obrpc::ObLogStreamFetchLogReq &req, + obrpc::ObLogExternalProxy::AsyncCB &cb, + const int64_t timeout); + +public: + int init(const uint64_t tenant_id, const int64_t io_thread_num); + void destroy(); + int reload_ssl_config(); + +private: + bool inited_; + uint64_t tenant_id_; + obrpc::ObNetClient net_client_; + uint64_t last_ssl_info_hash_; + int64_t ssl_key_expired_time_; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogRpc); +}; + +} +} + +#endif diff --git a/src/liboblog/src/ob_log_schema_cache_info.cpp b/src/liboblog/src/ob_log_schema_cache_info.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1770dc72ffff94608f38f36b77b1914442260550 --- /dev/null +++ b/src/liboblog/src/ob_log_schema_cache_info.cpp @@ -0,0 +1,768 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG + +#include "ob_log_schema_cache_info.h" +#include "ob_obj2str_helper.h" // ObObj2strHelper +#include "ob_log_utils.h" // filter_non_user_column +#include "ob_log_config.h" // TCONF + +#include "share/schema/ob_table_schema.h" // ObTableSchema + +#define SCHEMA_STAT_INFO(fmt, args...) LOG_INFO("[SCHEMA_CACHE_STAT] " fmt, args) +#define SCHEMA_STAT_DEBUG(fmt, args...) LOG_DEBUG("[SCHEMA_CACHE_STAT] " fmt, args) + +using namespace oceanbase::common; +using namespace oceanbase::share::schema; +namespace oceanbase +{ +namespace liboblog +{ +ColumnSchemaInfo::ColumnSchemaInfo() + : column_flag_(DELETE_COLUMN_FLAG), + column_idx_(0), + meta_type_(), + accuracy_(), + collation_type_(), + orig_default_value_str_(NULL), + extended_type_info_size_(0), + extended_type_info_(NULL), + is_rowkey_(false) +{ + // default column is delete +} + +ColumnSchemaInfo::~ColumnSchemaInfo() +{ + destroy(); +} + +int ColumnSchemaInfo::init(const share::schema::ObTableSchema &table_schema, + const share::schema::ObColumnSchemaV2 &column_table_schema, + const int64_t column_idx, + ObObj2strHelper &obj2str_helper, + common::ObIAllocator &allocator, + const bool is_hidden_pk_table_pk_increment_column) +{ + int ret = OB_SUCCESS; + common::ObString *orig_default_value_str = NULL; + + if (OB_UNLIKELY(column_idx < 0 || column_idx > OB_USER_ROW_MAX_COLUMNS_COUNT)) { + LOG_ERROR("invalid argument", K(column_idx)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(get_column_ori_default_value_(table_schema, column_table_schema, column_idx, + obj2str_helper, allocator, orig_default_value_str))) { + LOG_ERROR("get_column_ori_default_value_ fail", KR(ret), K(table_schema), K(column_table_schema), + K(column_idx)); + } else if (OB_ISNULL(orig_default_value_str)) { + LOG_ERROR("orig_default_value_str is null", K(orig_default_value_str)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(init_extended_type_info_(table_schema, column_table_schema, column_idx, allocator))) { + LOG_ERROR("init_extended_type_info_ fail", KR(ret), + "table_id", table_schema.get_table_id(), + "table_name", table_schema.get_table_name(), + "column_name", column_table_schema.get_column_name(), + K(column_idx)); + } else { + const ObObjMeta &meta_type = column_table_schema.get_meta_type(); + const ObAccuracy &accuracy = column_table_schema.get_accuracy(); + const ObCollationType &collation_type = column_table_schema.get_collation_type(); + + if (is_hidden_pk_table_pk_increment_column) { + column_flag_ = HIDDEN_PRIMARY_KEY_TABLE_PK_INCREMENT_COLUMN_FLAG; + } else { + column_flag_ = NORMAL_COLUMN_FLAG; + } + column_idx_ = static_cast(column_idx); + meta_type_.set_meta(meta_type); + accuracy_ = accuracy; + collation_type_ = collation_type; + orig_default_value_str_ = orig_default_value_str; + is_rowkey_ = column_table_schema.is_original_rowkey_column(); + } + + return ret; +} + +int ColumnSchemaInfo::init(ColumnFlag column_flag) +{ + int ret = OB_SUCCESS; + + reset(); + column_flag_ = column_flag; + + return ret; +} + +void ColumnSchemaInfo::destroy() +{ + reset(); +} + +void ColumnSchemaInfo::reset() +{ + column_flag_ = DELETE_COLUMN_FLAG; + column_idx_ = 0; + meta_type_.reset(); + accuracy_.reset(); + collation_type_ = ObCollationType::CS_TYPE_INVALID; + + if (NULL != orig_default_value_str_) { + LOG_ERROR("orig_default_value_str_ should be null", K(orig_default_value_str_)); + orig_default_value_str_ = NULL; + } + + extended_type_info_size_ = 0; + extended_type_info_ = NULL; + is_rowkey_ = false; +} + +void ColumnSchemaInfo::get_extended_type_info(common::ObArrayHelper &str_array) const +{ + str_array.reset(); + str_array.init(extended_type_info_size_, extended_type_info_, extended_type_info_size_); +} + +int ColumnSchemaInfo::get_column_ori_default_value_(const share::schema::ObTableSchema &table_schema, + const share::schema::ObColumnSchemaV2 &column_table_schema, + const int64_t column_idx, + ObObj2strHelper &obj2str_helper, + common::ObIAllocator &allocator, + common::ObString *&str) +{ + int ret = OB_SUCCESS; + str = NULL; + + const ObObj &orig_default_obj = column_table_schema.get_orig_default_value(); + str = static_cast(allocator.alloc(sizeof(ObString))); + + if (OB_ISNULL(str)) { + LOG_ERROR("allocate memory for ObString fail", K(sizeof(ObString))); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + new (str) ObString(); + + // Deep copy of string values is required when converting defaults to strings + // because the memory for the default value is stored in the Schema, which cannot be relied upon + if (OB_FAIL(obj2str_helper.obj2str(table_schema.get_tenant_id(), + table_schema.get_table_id(), + column_table_schema.get_column_id(), + orig_default_obj, *str, allocator, true, + column_table_schema.get_extended_type_info(), + column_table_schema.get_accuracy(), + column_table_schema.get_collation_type()))) { + LOG_ERROR("obj2str cast orig_default_value fail", KR(ret), K(orig_default_obj), K(*str), + "tenant_id", table_schema.get_tenant_id(), + "table_id", table_schema.get_table_id(), + "table_name", table_schema.get_table_name(), + "column_id", column_table_schema.get_column_id(), + "column_name", column_table_schema.get_column_name(), + K(column_idx)); + } + } + + if (OB_SUCCESS != ret && NULL != str) { + str->~ObString(); + allocator.free(str); + str = NULL; + } + + return ret; +} + +int ColumnSchemaInfo::init_extended_type_info_(const share::schema::ObTableSchema &table_schema, + const share::schema::ObColumnSchemaV2 &column_table_schema, + const int64_t column_idx, + common::ObIAllocator &allocator) +{ + int ret = OB_SUCCESS; + + if (! column_table_schema.is_enum_or_set()) { + // do nothing + } else { + // Only enum or set types are cached + const common::ObIArray &src_extended_type_info = + column_table_schema.get_extended_type_info(); + const int64_t alloc_size = src_extended_type_info.count() * static_cast(sizeof(ObString)); + void *buf = NULL; + + if (src_extended_type_info.count() <= 0) { + // do nothing + } else if (OB_ISNULL(buf = allocator.alloc(alloc_size))) { + LOG_ERROR("alloc memory failed", K(alloc_size)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else if (OB_ISNULL(extended_type_info_ = static_cast(buf))) { + LOG_ERROR("extended_type_info_ is null", K(extended_type_info_)); + ret = OB_ERR_UNEXPECTED; + } else { + extended_type_info_size_ = src_extended_type_info.count(); + + for (int64_t idx = 0; OB_SUCC(ret) && idx < src_extended_type_info.count(); ++idx) { + ObString &str= extended_type_info_[idx]; + + if (OB_FAIL(deep_copy_str(src_extended_type_info.at(idx), str, allocator))) { + LOG_ERROR("deep_copy_str failed", KR(ret), K(idx), K(str), + "table_id", table_schema.get_table_id(), + "table_name", table_schema.get_table_name(), + "column_id", column_table_schema.get_column_id(), + "column_name", column_table_schema.get_column_name(), + K(column_idx)); + } else { + LOG_INFO("extended_type_info_ get succ", K(idx), K(str), + "table_id", table_schema.get_table_id(), + "table_name", table_schema.get_table_name(), + "column_id", column_table_schema.get_column_id(), + "column_name", column_table_schema.get_column_name(), + K(column_idx)); + } + } // for + + + if (OB_SUCCESS != ret && NULL != buf) { + allocator.free(buf); + } + } + } + + return ret; +} + +void ColumnSchemaInfo::release_mem(common::ObIAllocator &allocator) +{ + if (NULL != orig_default_value_str_) { + allocator.free(orig_default_value_str_); + orig_default_value_str_ = NULL; + } + + if (NULL != extended_type_info_) { + for (int64_t idx = 0; idx < extended_type_info_size_; ++idx) { + void *ptr = static_cast(&extended_type_info_[idx]); + if (NULL != ptr) { + allocator.free(ptr); + ptr = NULL; + } + } + + allocator.free(static_cast(extended_type_info_)); + extended_type_info_ = NULL; + extended_type_info_size_ = 0; + } +} + +ObLogRowkeyInfo::ObLogRowkeyInfo() + : size_(0), + column_id_array_(NULL) +{ +} + +ObLogRowkeyInfo::~ObLogRowkeyInfo() +{ + destroy(); +} + +int ObLogRowkeyInfo::init(common::ObIAllocator &allocator, + const int64_t size, + const common::ObArray &column_ids) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(size <= 0)) { + LOG_ERROR("invalid argument", K(size)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_UNLIKELY(size != column_ids.size())) { + LOG_ERROR("size is not equal to column_ids size", K(size), "column_ids_size", column_ids.size()); + ret = OB_ERR_UNEXPECTED; + } else { + size_ = size; + int64_t alloc_size = size * sizeof(column_id_array_[0]); + + if (OB_ISNULL(column_id_array_ = static_cast(allocator.alloc(alloc_size)))) { + LOG_ERROR("allocate memory fail", K(column_id_array_), K(alloc_size)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + for (int64_t idx=0, cnt = size_; OB_SUCC(ret) && idx < cnt; idx++) { + column_id_array_[idx] = column_ids[idx]; + } + } + } + + return ret; +} + +void ObLogRowkeyInfo::destroy() +{ + if (NULL != column_id_array_) { + LOG_ERROR("column_id_array_ should be null", K(column_id_array_)); + column_id_array_ = NULL; + } + size_ = 0; +} + +void ObLogRowkeyInfo::release_mem(common::ObIAllocator &allocator) +{ + if (NULL != column_id_array_) { + allocator.free(column_id_array_); + column_id_array_ = NULL; + } +} + +bool ObLogRowkeyInfo::is_valid() const +{ + bool bool_ret = false; + + bool_ret = (NULL != column_id_array_ && size_ > 0); + + return bool_ret; +} + +int ObLogRowkeyInfo::get_column_id(const int64_t index, uint64_t &column_id) const +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(index < 0 || index >= size_)) { + LOG_ERROR("invalid argument", K(index), K_(size)); + ret = OB_INVALID_ARGUMENT; + } else if (! is_valid()) { + LOG_ERROR("ObLogRowkeyInfo is not valid", K(index), K_(size), KP_(column_id_array)); + ret = OB_NOT_INIT; + } else { + column_id = column_id_array_[index]; + } + + return ret; +} + +int64_t ObLogRowkeyInfo::to_string(char* buf, const int64_t buf_len) const +{ + int64_t pos = 0; + + if (NULL != buf && buf_len > 0) { + (void)common::databuff_printf(buf, buf_len, pos, "{size=%ld, ", size_); + + if (NULL != column_id_array_) { + (void)common::databuff_printf(buf, buf_len, pos, "column_id:["); + + for (int64_t idx=0; idx < size_ - 1; idx++) { + (void)common::databuff_printf(buf, buf_len, pos, "%ld,", column_id_array_[idx]); + } + + (void)common::databuff_printf(buf, buf_len, pos, "%ld]}", column_id_array_[size_ - 1]); + } + } + + return pos; +} + +TableSchemaInfo::TableSchemaInfo(ObIAllocator &allocator) + : is_inited_(false), + allocator_(allocator), + is_hidden_pk_table_(false), + rowkey_info_(), + user_column_id_array_(NULL), + user_column_id_array_cnt_(0), + column_schema_array_(NULL), + column_schema_array_cnt_(0) +{ +} + +TableSchemaInfo::~TableSchemaInfo() +{ + destroy(); +} + +int TableSchemaInfo::init(const share::schema::ObTableSchema *table_schema) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(is_inited_)) { + LOG_ERROR("TableSchemaInfo has been initialized"); + ret = OB_INIT_TWICE; + } else if (OB_ISNULL(table_schema)) { + LOG_ERROR("invalid argument", K(table_schema)); + ret = OB_INVALID_ARGUMENT; + } else { + // To avoid performance overhead here, do not rely on table schema traversal to get the number of columns that do not contain hidden columns, + // externally set user_column_id_array_cnt_ to the number of hidden columns not included + user_column_id_array_cnt_ = table_schema->get_column_count(); + column_schema_array_cnt_ = table_schema->get_max_used_column_id() - OB_APP_MIN_COLUMN_ID + 1; + const bool is_hidden_pk_table = table_schema->is_no_pk_table(); + + // No primary key table, record hidden primary key information at the end column_id=1, column_name="__pk_increment", reserved position + if (is_hidden_pk_table) { + ++user_column_id_array_cnt_; + ++column_schema_array_cnt_; + } + + // TODO fix me + // There is a bad case where frequent addition and deletion of columns will lead to serious memory amplification, but online environments generally do not delete columns, so it is safe here + + if (OB_FAIL(init_rowkey_info_(table_schema))) { + LOG_ERROR("init_rowkey_info_ fail", KR(ret), K(table_schema)); + } else if (OB_FAIL(init_user_column_id_array_(user_column_id_array_cnt_))) { + LOG_ERROR("init_user_column_id_array_ fail", KR(ret), K(user_column_id_array_cnt_)); + } else if (OB_FAIL(init_column_schema_array_(column_schema_array_cnt_))) { + LOG_ERROR("init_column_schema_array_ fail", KR(ret), K(column_schema_array_cnt_)); + } else { + is_inited_ = true; + is_hidden_pk_table_ = is_hidden_pk_table; + + LOG_INFO("table_schema_info init succ", "table_id", table_schema->get_table_id(), + "table_name", table_schema->get_table_name(), + K_(is_hidden_pk_table), + "version", table_schema->get_schema_version(), + "user_column_id_array_cnt", user_column_id_array_cnt_, + "max_used_column_id", table_schema->get_max_used_column_id(), + "column_schema_array_cnt", column_schema_array_cnt_); + } + } + + if (OB_FAIL(ret)) { + destroy(); + } + + return ret; +} + +void TableSchemaInfo::destroy() +{ + is_inited_ = false; + + is_hidden_pk_table_ = false; + rowkey_info_.release_mem(allocator_); + + destroy_user_column_id_array_(); + destroy_column_schema_array_(); +} + +int TableSchemaInfo::init_rowkey_info_(const share::schema::ObTableSchema *table_schema) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(table_schema)) { + LOG_ERROR("invalid argument", K(table_schema)); + ret = OB_INVALID_ARGUMENT; + } else { + const ObRowkeyInfo &rowkey_info = table_schema->get_rowkey_info(); + ObArray column_ids; + + if (OB_FAIL(rowkey_info.get_column_ids(column_ids))) { + LOG_ERROR("rowkey info get_column_ids fail", KR(ret), K(column_ids)); + } else if (OB_FAIL(rowkey_info_.init(allocator_, rowkey_info.get_size(), column_ids))) { + LOG_ERROR("rowkey info init fail", KR(ret), "size", rowkey_info.get_size(), + K(column_ids)); + } else { + // succ + } + } + + return ret; +} + +int TableSchemaInfo::init_user_column_id_array_(const int64_t cnt) +{ + int ret = OB_SUCCESS; + int64_t alloc_size = cnt * sizeof(user_column_id_array_[0]); + + if (OB_UNLIKELY(cnt <= 0)) { + LOG_ERROR("invalid argument", K(cnt)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(user_column_id_array_ = static_cast(allocator_.alloc(alloc_size)))) { + LOG_ERROR("allocate memory fail", K(user_column_id_array_), K(alloc_size), K(cnt)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + for (int64_t idx = 0; OB_SUCCESS == ret && idx < cnt; ++idx) { + user_column_id_array_[idx] = OB_INVALID_ID; + } + } + + return ret; +} + +void TableSchemaInfo::destroy_user_column_id_array_() +{ + if (NULL != user_column_id_array_) { + allocator_.free(user_column_id_array_); + user_column_id_array_ = NULL; + user_column_id_array_cnt_ = 0; + } +} + +int TableSchemaInfo::init_column_schema_array_(const int64_t cnt) +{ + int ret = OB_SUCCESS; + int64_t alloc_size = cnt * sizeof(column_schema_array_[0]); + + if (OB_UNLIKELY(cnt <= 0)) { + LOG_ERROR("invalid argument", K(cnt)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(column_schema_array_ = static_cast(allocator_.alloc(alloc_size)))) { + LOG_ERROR("allocate memory fail", K(column_schema_array_), K(alloc_size), K(cnt)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + for (int64_t idx = 0; OB_SUCCESS == ret && idx < cnt; ++idx) { + new(column_schema_array_ + idx) ColumnSchemaInfo(); + } + } + + return ret; +} + +void TableSchemaInfo::destroy_column_schema_array_() +{ + if (NULL != column_schema_array_) { + for (int64_t idx = 0, cnt = column_schema_array_cnt_; idx < cnt; ++idx) { + column_schema_array_[idx].release_mem(allocator_); + column_schema_array_[idx].~ColumnSchemaInfo(); + } + + allocator_.free(column_schema_array_); + column_schema_array_ = NULL; + column_schema_array_cnt_ = 0; + } +} + +int TableSchemaInfo::get_column_schema_info_(const uint64_t column_id, + const bool enable_output_hidden_primary_key, + ColumnSchemaInfo *&column_schema_info, + bool &is_non_user_column, + bool &is_hidden_pk_table_pk_increment_column) const +{ + int ret = OB_SUCCESS; + column_schema_info = NULL; + is_non_user_column = false; + is_hidden_pk_table_pk_increment_column = false; + + if (OB_UNLIKELY(! is_inited_)) { + LOG_ERROR("TableSchemaInfo has not inited"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(OB_INVALID_ID == column_id)) { + LOG_ERROR("invalid argument", K(column_id)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(filter_non_user_column(is_hidden_pk_table_, enable_output_hidden_primary_key, + column_id, is_non_user_column, is_hidden_pk_table_pk_increment_column))) { + LOG_ERROR("filter_non_user_column fail", KR(ret), K(column_id), K(enable_output_hidden_primary_key), + K(is_non_user_column), K(is_hidden_pk_table_pk_increment_column)); + } else if (is_non_user_column) { + // 1. filter out non-user columns directly + // 2. if enable_output_hidden_primary_key=true, no primary key table hidden primary key support fetch, no filtering here + // No logs are printed here, external calls are printed + } else { + int64_t column_schema_array_idx = -1; + + if (is_hidden_pk_table_pk_increment_column) { + column_schema_array_idx = column_schema_array_cnt_ - 1; + } else { + column_schema_array_idx = column_id - OB_APP_MIN_COLUMN_ID; + } + + if (OB_UNLIKELY(column_schema_array_idx < 0 || column_schema_array_idx >= column_schema_array_cnt_)) { + LOG_ERROR("invalid column_schema_array_idx", K(column_id), K(column_schema_array_idx), + K(column_schema_array_cnt_)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_ISNULL(column_schema_info = &column_schema_array_[column_schema_array_idx])) { + LOG_ERROR("column_schema_info is null", K(column_id), K(column_schema_info)); + ret = OB_ERR_UNEXPECTED; + } else { + // succ + } + } + + return ret; +} + +int TableSchemaInfo::init_column_schema_info(const share::schema::ObTableSchema &table_schema, + const share::schema::ObColumnSchemaV2 &column_table_schema, + const int64_t column_idx, + const bool enable_output_hidden_primary_key, + ObObj2strHelper &obj2str_helper) +{ + int ret = OB_SUCCESS; + const uint64_t column_id = column_table_schema.get_column_id(); + const char *column_name = column_table_schema.get_column_name(); + const bool is_hidden_column = column_table_schema.is_hidden(); + const bool enable_output_invisible_column = TCONF.enable_output_invisible_column; + const bool is_invisible_column = column_table_schema.is_invisible_column(); + ColumnSchemaInfo *column_schema_info = NULL; + bool is_non_user_column = false; + bool is_hidden_pk_table_pk_increment_column = false; + + if (OB_UNLIKELY(! is_inited_)) { + LOG_ERROR("TableSchemaInfo has not inited"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(column_idx < 0)) { + LOG_ERROR("invalid argument", K(column_idx)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(get_column_schema_info_(column_id, enable_output_hidden_primary_key, + column_schema_info, is_non_user_column, is_hidden_pk_table_pk_increment_column))) { + LOG_ERROR("get_column_schema_info_ fail", KR(ret), K(column_id), K(enable_output_hidden_primary_key), + KPC(column_schema_info), K(is_non_user_column), K(is_hidden_pk_table_pk_increment_column)); + } else if (is_non_user_column) { + SCHEMA_STAT_INFO("ignore non user column", K(is_non_user_column), + "table_id", table_schema.get_table_id(), + "table_name", table_schema.get_table_name(), + "version", table_schema.get_schema_version(), + K(column_idx), K(column_id)); + } else if (OB_ISNULL(column_schema_info)) { + LOG_ERROR("column_schema_info is null", K(column_id), K(column_schema_info)); + ret = OB_ERR_UNEXPECTED; + } else if (is_hidden_column && ! is_hidden_pk_table_pk_increment_column) { + // Hidden columns do not record columnID, columnIdx, meta_type, accuracy information + // Note: Here the initialization of the hidden column [not the hidden primary key of a non-primary table] is completed + if (OB_FAIL(column_schema_info->init(ColumnFlag::HIDDEN_COLUMN_FLAG))) { + LOG_ERROR("column_schema_info init fail", KR(ret), + "table_id", table_schema.get_table_id(), + "table_name", table_schema.get_table_name(), + "version", table_schema.get_schema_version(), + K(column_id), K(column_name), K(is_hidden_column), + K(is_hidden_pk_table_pk_increment_column), + K(column_idx)); + } + LOG_INFO("column_schema_info init hidden column", + "table_id", table_schema.get_table_id(), + "table_name", table_schema.get_table_name(), + "version", table_schema.get_schema_version(), + K(column_id), K(column_name), K(is_hidden_column), + K(is_hidden_pk_table_pk_increment_column), + K(column_idx)); + } else if (is_invisible_column && ! enable_output_invisible_column) { + // invisible columns do not record columnID, columnIdx, meta_type, accuracy information + if (OB_FAIL(column_schema_info->init(ColumnFlag::OBLOG_INVISIBLE_COLUMN_FLAG))) { + LOG_ERROR("column_schema_info init fail", KR(ret), K(enable_output_invisible_column), + "table_id", table_schema.get_table_id(), + "table_name", table_schema.get_table_name(), + "version", table_schema.get_schema_version(), + K(column_id), K(column_name), K(is_invisible_column), + K(column_idx)); + } else { + LOG_INFO("column_schema_info init invisible column", K(enable_output_invisible_column), + "table_id", table_schema.get_table_id(), + "table_name", table_schema.get_table_name(), + "version", table_schema.get_schema_version(), + K(column_id), K(column_name), K(is_invisible_column), + K(column_idx)); + } + } else { + // Non-hidden columns set column_id and initialize + if (OB_FAIL(set_column_id(column_idx, column_id))) { + LOG_ERROR("set_column_id fail", KR(ret), K(column_idx), K(column_id)); + } else if (OB_FAIL(column_schema_info->init(table_schema, column_table_schema, column_idx, + obj2str_helper, get_allocator(), is_hidden_pk_table_pk_increment_column))) { + LOG_ERROR("column_schema_info init fail", KR(ret), + "table_id", table_schema.get_table_id(), + "table_name", table_schema.get_table_name(), + "version", table_schema.get_schema_version(), + K(column_idx), K(is_hidden_pk_table_pk_increment_column), + "meta_type", column_table_schema.get_meta_type(), + "accuracy", column_table_schema.get_accuracy()); + } else { + // succ + } + } + + return ret; +} + +int TableSchemaInfo::get_column_schema_info(const uint64_t column_id, + const bool enable_output_hidden_primary_key, + ColumnSchemaInfo *&column_schema_info, + ColumnPropertyFlag &column_property_flag) const +{ + int ret = OB_SUCCESS; + column_schema_info = NULL; + column_property_flag.reset(); + bool is_non_user_column = false; + bool is_hidden_pk_table_pk_increment_column = false; + bool is_hidden_column = false; + bool is_delete_column = false; + bool is_invisible_column = false; + + // range of user columns: + // OB_APP_MIN_COLUMN_ID: 16 + // OB_MIN_SHADOW_COLUMN_ID: 32767 + if (OB_UNLIKELY(! is_inited_)) { + LOG_ERROR("TableSchemaInfo has not inited"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(OB_INVALID_ID == column_id)) { + LOG_ERROR("invalid argument", K(column_id)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(get_column_schema_info_(column_id, enable_output_hidden_primary_key, column_schema_info, + is_non_user_column, is_hidden_pk_table_pk_increment_column))) { + LOG_ERROR("get_column_schema_info_ fail", KR(ret), K(column_id), + K(enable_output_hidden_primary_key), KPC(column_schema_info), + K(is_non_user_column), K(is_hidden_pk_table_pk_increment_column)); + } else if (is_non_user_column) { + // do nothing + } else if (OB_ISNULL(column_schema_info)) { + LOG_ERROR("column_schema_info is null", K(column_id), K(column_schema_info)); + ret = OB_ERR_UNEXPECTED; + } else { + if (column_schema_info->is_hidden()) { + is_hidden_column = true; + } + + if (column_schema_info->is_delete()) { + is_delete_column = true; + } + + if (column_schema_info->is_invisible()) { + is_invisible_column = true; + } + } + + if (OB_SUCC(ret)) { + if (is_non_user_column || is_hidden_column || is_delete_column || is_invisible_column) { + column_schema_info = NULL; + } + column_property_flag.reset(is_non_user_column, is_hidden_column, is_delete_column, is_invisible_column); + } + + return ret; +} + +int TableSchemaInfo::get_column_id(const int64_t index, uint64_t &column_id) const +{ + int ret = OB_SUCCESS; + column_id = OB_INVALID_ID; + + if (OB_UNLIKELY(! is_inited_)) { + LOG_ERROR("TableSchemaInfo has not inited"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(index < 0 || index >= user_column_id_array_cnt_)) { + LOG_ERROR("invalid argument", K(index), K_(user_column_id_array_cnt)); + ret = OB_INVALID_ARGUMENT; + } else { + column_id = user_column_id_array_[index]; + } + + return ret; +} + +int TableSchemaInfo::set_column_id(const int64_t index, const uint64_t column_id) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! is_inited_)) { + LOG_ERROR("TableSchemaInfo has not inited"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(index < 0 || index >= user_column_id_array_cnt_)) { + LOG_ERROR("invalid argument", K(index), K_(user_column_id_array_cnt)); + ret = OB_INVALID_ARGUMENT; + } else { + user_column_id_array_[index] = column_id; + } + + return ret; +} + +} // namespace liboblog +} // namespace oceanbase diff --git a/src/liboblog/src/ob_log_schema_cache_info.h b/src/liboblog/src/ob_log_schema_cache_info.h new file mode 100644 index 0000000000000000000000000000000000000000..ba39ea5105cd711f3778783b0104f157384d3ed5 --- /dev/null +++ b/src/liboblog/src/ob_log_schema_cache_info.h @@ -0,0 +1,345 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_SCHEMA_CACHE_INFO_H__ +#define OCEANBASE_LIBOBLOG_SCHEMA_CACHE_INFO_H__ + +#include "share/ob_errno.h" // OB_SUCCESS +#include "common/object/ob_object.h" // ObObjMeta +#include "common/ob_accuracy.h" // ObAccuracy +#include "lib/container/ob_array_helper.h" + +namespace oceanbase +{ +namespace share +{ +namespace schema +{ +class ObTableSchema; +class ObColumnSchemaV2; +} // namespace schema +} // namespace share + +namespace liboblog +{ +class ObObj2strHelper; +class ObLogSchemaGuard; + +// The primary keyless table has three hidden columns. +// column_id=OB_HIDDEN_PK_INCREMENT_COLUMN_ID[1], column_name="__pk_increment" +// column_id=OB_HIDDEN_PK_CLUSTER_COLUMN_ID[4], column_name="__pk_cluster_id" +// column_id=OB_HIDDEN_PK_PARTITION_COLUMN_ID[5], column_name="__pk_partition_id" +// Exposed externally using column_id=1 as the output primary key +enum ColumnFlag +{ + NORMAL_COLUMN_FLAG = 0, + + HIDDEN_COLUMN_FLAG = 1, + DELETE_COLUMN_FLAG =2, + HIDDEN_PRIMARY_KEY_TABLE_PK_INCREMENT_COLUMN_FLAG = 3, + // Macro definition of INVISIBLE_COLUMN_FLAG already available + OBLOG_INVISIBLE_COLUMN_FLAG +}; +const char *print_column_flag(ColumnFlag column_flag); + +class ColumnSchemaInfo +{ +public: + ColumnSchemaInfo(); + virtual ~ColumnSchemaInfo(); + +public: + // Hidden column/invisible column initialisation interface + int init(ColumnFlag column_flag); + // Common column initialization interface + // allocator is used to allocate memory for orig_default_value_str, consistent with release_mem + int init(const share::schema::ObTableSchema &table_schema, + const share::schema::ObColumnSchemaV2 &column_table_schema, + const int64_t column_idx, + ObObj2strHelper &obj2str_helper, + common::ObIAllocator &allocator, + const bool is_hidden_pk_table_pk_increment_column); + void destroy(); + void reset(); + // You need to call release_mem in advance before destructuring to free memory + // 1. release the default memory + // 2. free extended_type_info memory + void release_mem(common::ObIAllocator &allocator); + + inline bool is_hidden() const { return HIDDEN_COLUMN_FLAG == column_flag_; } + inline bool is_delete() const { return DELETE_COLUMN_FLAG == column_flag_; } + inline bool is_hidden_pk_table_pk_increment_column() const + { return HIDDEN_PRIMARY_KEY_TABLE_PK_INCREMENT_COLUMN_FLAG == column_flag_; } + inline bool is_invisible() const { return OBLOG_INVISIBLE_COLUMN_FLAG == column_flag_; } + inline bool is_rowkey() const { return is_rowkey_; } + + void set_column_idx(const int64_t column_idx) { column_idx_ = static_cast(column_idx); } + inline int64_t get_column_idx() const { return column_idx_; } + + void set_meta_type(const common::ObObjMeta &meta_type) { meta_type_.set_meta(meta_type); } + inline common::ObObjMeta get_meta_type() const { return meta_type_; } + + void set_accuracy(const common::ObAccuracy &accuracy) { accuracy_ = accuracy; } + inline const common::ObAccuracy &get_accuracy() const { return accuracy_; } + inline const common::ObCollationType &get_collation_type() const { return collation_type_; } + + void set_orig_default_value_str(common::ObString &orig_default_value_str) + { + orig_default_value_str_ = &orig_default_value_str; + } + inline const common::ObString *get_orig_default_value_str() const { return orig_default_value_str_; } + // 1. To resolve the memory space, ObArrayHelper is not used directly to store information, get_extended_type_info returns size and an array of pointers directly + // 2. call ObArrayHelper(size, str_ptr, size) directly from the outer layer to construct a temporary array + inline void get_extended_type_info(int64_t &size, common::ObString *&str_ptr) const + { + size = extended_type_info_size_; + str_ptr = extended_type_info_; + } + void get_extended_type_info(common::ObArrayHelper &str_array) const; + +public: + TO_STRING_KV(K_(column_flag), + K_(column_idx), + K_(meta_type), + K_(accuracy), + K_(collation_type), + K_(orig_default_value_str), + K_(extended_type_info_size), + K_(extended_type_info), + K_(is_rowkey)); + +private: + int get_column_ori_default_value_(const share::schema::ObTableSchema &table_schema, + const share::schema::ObColumnSchemaV2 &column_table_schema, + const int64_t column_idx, + ObObj2strHelper &obj2str_helper, + common::ObIAllocator &allocator, + common::ObString *&str); + + int init_extended_type_info_(const share::schema::ObTableSchema &table_schema, + const share::schema::ObColumnSchemaV2 &column_table_schema, + const int64_t column_idx, + common::ObIAllocator &allocator); + +private: + int16_t column_flag_; + // Record column_idx (does not contain hidden columns) + int16_t column_idx_; + common::ObObjMeta meta_type_; + common::ObAccuracy accuracy_; + common::ObCollationType collation_type_; + // TODO: There are no multiple versions of the default value, consider maintaining a copy + common::ObString *orig_default_value_str_; + // used for enum and set + int64_t extended_type_info_size_; + common::ObString *extended_type_info_; + // The rowkey_info in TableSchema is not accurate because the new no primary key table will change the partition key to the primary key. + // need to mark if this column was the primary key when the user created it + bool is_rowkey_; + +private: + DISALLOW_COPY_AND_ASSIGN(ColumnSchemaInfo); +}; + +// liboblo customer row key info, only mandatory information, size and columnID array +class ObLogRowkeyInfo +{ +public: + ObLogRowkeyInfo(); + ~ObLogRowkeyInfo(); + int init(common::ObIAllocator &allocator, + const int64_t size, + const common::ObArray &column_ids); + void destroy(); + // Before destructuring, you need to call release_mem to release the memory of the silent column_id_array. + void release_mem(common::ObIAllocator &allocator); + +public: + bool is_valid() const; + + inline int64_t get_size() const { return size_; } + + /** + * get rowkey column id based on index + * @param[in] index column index in RowkeyInfo + * @param[out] column_id column id + * + * @return int return OB_SUCCESS if get the column, otherwist return OB_ERROR + */ + int get_column_id(const int64_t index, uint64_t &column_id) const; + +public: + int64_t to_string(char* buf, const int64_t buf_len) const; + +private: + int64_t size_; + uint64_t *column_id_array_; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogRowkeyInfo); +}; + +// Non-primary key table outputs hidden primary key externally, column_id=1, column_name="__pk_increment" +// 1. schema perspective, non-user columns (column_id < 16) and hidden columns +// 2. user perspective, user columns and non-hidden columns, so here. +// is_non_user_column_ = false; +// is_hidden_column_ = false; +struct ColumnPropertyFlag +{ + ColumnPropertyFlag() { reset(); } + ~ColumnPropertyFlag() { reset(); } + + void reset() + { + is_non_user_column_ = false; + is_hidden_column_ = false; + is_delete_column_ = false; + is_invisible_column_ = false; + } + + void reset(const bool is_non_user_column, + const bool is_hidden_column, + const bool is_delete_column, + const bool is_invisible_column) + { + is_non_user_column_ = is_non_user_column; + is_hidden_column_ = is_hidden_column; + is_delete_column_ = is_delete_column; + is_invisible_column_ = is_invisible_column; + } + + inline bool is_non_user() const { return is_non_user_column_; } + inline bool is_hidden() const { return is_hidden_column_; } + inline bool is_delete() const { return is_delete_column_; } + inline bool is_invisible() const { return is_invisible_column_; } + + bool is_non_user_column_; + bool is_hidden_column_; + bool is_delete_column_; + bool is_invisible_column_; + + TO_STRING_KV(K_(is_non_user_column), + K_(is_hidden_column), + K_(is_delete_column), + K_(is_invisible_column)); +}; + +class TableSchemaInfo +{ +public: + explicit TableSchemaInfo(common::ObIAllocator &allocator); + virtual ~TableSchemaInfo(); + +public: + int init(const share::schema::ObTableSchema *table_schema); + void destroy(); + + common::ObIAllocator &get_allocator() { return allocator_; } + + inline bool is_hidden_pk_table() const { return is_hidden_pk_table_; } + + inline const ObLogRowkeyInfo &get_rowkey_info() const { return rowkey_info_; } + + inline int64_t get_non_hidden_column_count() const { return user_column_id_array_cnt_; } + void set_non_hidden_column_count(const int64_t non_hidden_column_cnt) + { + user_column_id_array_cnt_ = non_hidden_column_cnt; + } + + inline uint64_t *get_column_id_array() { return user_column_id_array_; } + inline const uint64_t *get_column_id_array() const { return user_column_id_array_; } + + inline ColumnSchemaInfo *get_column_schema_array() { return column_schema_array_; } + inline const ColumnSchemaInfo *get_column_schema_array() const { return column_schema_array_; } + + // init column schema info + int init_column_schema_info(const share::schema::ObTableSchema &table_schema, + const share::schema::ObColumnSchemaV2 &column_table_schema, + const int64_t column_idx, + const bool enable_output_hidden_primary_key, + ObObj2strHelper &obj2str_helper); + + /// Get column_schema based on column_id + /// Returns column_schema_info=NULL when it is a non-user/deleted/hidden column + /// + /// @param column_id [in] column id + /// @param column_schema_info [out] column schema info + /// @param column_property_flag [out] Returns the column property identifier, whether it is a non-user/deleted column/hidden column + /// + /// @retval OB_SUCCESS success + /// @retval other error code fail + int get_column_schema_info(const uint64_t column_id, + const bool enable_output_hidden_primary_key, + ColumnSchemaInfo *&column_schema_info, + ColumnPropertyFlag &column_property_flag) const; + + /// get column_id based on index + /// + /// @param column_index [in] column index + /// @param column_id [out] column id + /// + /// @retval OB_SUCCESS success + /// @retval other error code fail + int get_column_id(const int64_t column_index, uint64_t &column_id) const; + + /// set column_id based on index + /// + /// @param column_index [in] column index + /// @param column_id [in] column id + /// + /// @retval OB_SUCCESS success + /// @retval other error code fail + int set_column_id(const int64_t column_index, const uint64_t column_id); + +public: + TO_STRING_KV(K_(rowkey_info), + K_(user_column_id_array), + K_(user_column_id_array_cnt), + K_(column_schema_array), + K_(column_schema_array_cnt)); + +private: + int init_rowkey_info_(const share::schema::ObTableSchema *table_schema); + int init_user_column_id_array_(const int64_t cnt); + void destroy_user_column_id_array_(); + int init_column_schema_array_(const int64_t cnt); + void destroy_column_schema_array_(); + // 1. Non-user columns are filtered out directly + // 2. No primary key tables hide primary keys and do not filter + int get_column_schema_info_(const uint64_t column_id, + const bool enable_output_hidden_primary_key, + ColumnSchemaInfo *&column_schema_info, + bool &is_non_user_column, + bool &is_hidden_pk_table_pk_increment_column) const; + +private: + bool is_inited_; + common::ObIAllocator &allocator_; + + bool is_hidden_pk_table_; + ObLogRowkeyInfo rowkey_info_; + + // For tables without primary keys: user_column_id_array_ and column_schema_array_ are stored last at the end of the user column for external primary key output + // column array stores the columnIdx corresponding to the columnID (does not contain hidden columns) + uint64_t *user_column_id_array_; + int64_t user_column_id_array_cnt_; + + ColumnSchemaInfo *column_schema_array_; + int64_t column_schema_array_cnt_; + +private: + DISALLOW_COPY_AND_ASSIGN(TableSchemaInfo); +}; + +} // namespace liboblog +} // namespace oceanbase +#endif diff --git a/src/liboblog/src/ob_log_schema_getter.cpp b/src/liboblog/src/ob_log_schema_getter.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8d1c31792f40880ca96ac14e4c4118f67b14e442 --- /dev/null +++ b/src/liboblog/src/ob_log_schema_getter.cpp @@ -0,0 +1,939 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG_SCHEMA + +#include "ob_log_schema_getter.h" + +#include "lib/mysqlclient/ob_mysql_server_provider.h" // ObMySQLServerProvider +#include "share/config/ob_common_config.h" // ObCommonConfig +#include "share/ob_get_compat_mode.h" // ObCompatModeGetter +#include "share/inner_table/ob_inner_table_schema.h" // OB_CORE_SCHEMA_VERSION + +#include "ob_log_utils.h" // is_mysql_client_errno +#include "ob_log_common.h" // GET_SCHEMA_TIMEOUT_ON_START_UP +#include "ob_log_config.h" // TCONF + +// NOTICE: retry will exist if func return error code OB_TENANT_HAS_BEEN_DROPPED +// errorcode OB_TENANT_NOT_EXIST DOESN'T mean tenant has been dropped: tenant may be not create/deleted, besides, +// sql exectue error or request to a delay rs server may also get this code even if tenant exist(e.g. just created) +// should continue retry until get OB_TENANT_HAS_BEEN_DROPPED. +// should check tenant status by query_tenant_status(INSIDE SCHEMA_SERVICE) when func get OB_TENANT_NOT_EXIST +// (RETRY_ON_FAIL_WITH_TENANT_ID will check tenant status in macro) +// +// currently only function below will use this macro: +// ObLogSchemaGuard::get_database_schema database_id get from ddl_stmt(__all_ddl_operation) may not real database id +// ObLogSchemaGuard::get_available_tenant_ids args doesn't contain tenant_id +// auto_switch_mode_and_refresh_schema in ObLogSchemaGetter::init tenant_id when init may invalid to refresh all_tenant schema +// ObLogSchemaGetter::load_split_schema_version args doesn't contain tenant_id +#define RETRY_ON_FAIL(timeout, var, func, args...) \ + do { \ + if (OB_SUCC(ret)) { \ + static const int64_t SLEEP_TIME_ON_FAIL = 100L * 1000L; \ + int64_t start_time = get_timestamp(); \ + int64_t end_time = start_time + timeout; \ + int old_err = OB_SUCCESS; \ + while (OB_FAIL((var).func(args))) { \ + old_err = ret; \ + int64_t now = get_timestamp(); \ + if (end_time <= now) { \ + LOG_ERROR(#func " timeout", KR(ret), K(timeout)); \ + ret = OB_TIMEOUT; \ + break; \ + } \ + if (OB_NOT_SUPPORTED == ret || OB_TENANT_HAS_BEEN_DROPPED == ret) { \ + /* retrun if errno needn't retry */ \ + break; \ + } \ + LOG_ERROR(#func " fail, retry later", KR(ret), KR(old_err), K(timeout), \ + "retry_time", now - start_time); \ + ret = OB_SUCCESS; \ + usleep(SLEEP_TIME_ON_FAIL); \ + } \ + } \ + } while (0) + +// query tenant status before retry schema service +// function call this should make sure TENANT_ID MUST BE the FIRST one in its ARG list +#define RETRY_ON_FAIL_WITH_TENANT_ID(tenant_id, timeout, var, func, args...) \ + do { \ + if (OB_SUCC(ret)) { \ + static const int64_t SLEEP_TIME_ON_FAIL = 100L * 1000L; \ + if (OB_INVALID_TENANT_ID == tenant_id) {\ + ret = OB_INVALID_ARGUMENT; \ + LOG_ERROR(#func " fail: RETRY_ON_FAIL_WITH_TENANT_ID not support invalid tenant_id", KR(ret), K(tenant_id), K(timeout)); \ + } else { \ + int64_t start_time = get_timestamp(); \ + int64_t end_time = start_time + timeout; \ + int old_err = OB_SUCCESS; \ + LOG_DEBUG(#func " trying with tenant id", K(tenant_id)); \ + bool test_mode_force_check_tenant_status = (1 == TCONF.test_mode_on) && (1 == TCONF.test_mode_force_check_tenant_status); \ + while (OB_FAIL((var).func(args)) || test_mode_force_check_tenant_status) { \ + int64_t now = get_timestamp(); \ + if (end_time <= now) { \ + LOG_ERROR(#func " timeout", KR(ret), K(timeout)); \ + ret = OB_TIMEOUT; \ + break; \ + } \ + old_err = ret; \ + if (test_mode_force_check_tenant_status) { \ + /* force query tenant status only one time for each first time access schema service in test mode */ \ + ret = OB_TENANT_NOT_EXIST; \ + test_mode_force_check_tenant_status = false; \ + } \ + if (OB_TENANT_NOT_EXIST == ret) { \ + /* retry to ensure tenant status */ \ + TenantStatus tenant_status = TENANT_STATUS_INVALID; \ + if (OB_FAIL(SchemaServiceType::get_instance().query_tenant_status(tenant_id, tenant_status))) { \ + LOG_WARN(#func " veriry tenant_status fail", KR(ret), KR(old_err), K(tenant_id), K(timeout)); \ + } else if (TENANT_DELETED == tenant_status) { \ + ret = OB_TENANT_HAS_BEEN_DROPPED; \ + } else { \ + } \ + LOG_INFO(#func " query tenant status for schema_getter retry", KR(ret), KR(old_err), K(tenant_id), K(tenant_status)); \ + } \ + if (OB_NOT_SUPPORTED == ret || OB_TENANT_HAS_BEEN_DROPPED == ret) { \ + /* retrun if errno needn't retry */ \ + break; \ + } \ + LOG_ERROR(#func " fail, retry later", KR(ret), KR(old_err), K(tenant_id), K(timeout), "retry_time", now - start_time); \ + ret = OB_SUCCESS; \ + usleep(SLEEP_TIME_ON_FAIL); \ + } \ + } \ + } \ + } while (0) + +using namespace oceanbase::common; +using namespace oceanbase::common::sqlclient; +using namespace oceanbase::share::schema; + +namespace oceanbase +{ +namespace liboblog +{ + +ObLogSchemaGuard::ObLogSchemaGuard() : tenant_id_(OB_INVALID_TENANT_ID), guard_() +{} + +ObLogSchemaGuard::~ObLogSchemaGuard() +{ + tenant_id_ = OB_INVALID_TENANT_ID; +} + +int ObLogSchemaGuard::do_check_force_fallback_mode_(const bool is_lazy, const char *func) const +{ + int ret = OB_SUCCESS; + if (is_lazy) { + _LOG_ERROR("schema guard should be fallback mode, but is lazy mode, " + "'%s' not supported, is_lazy=%d, tenant_id=%lu", + func, is_lazy, tenant_id_); + ret = OB_NOT_SUPPORTED; + } else { + // Not a lazy model, meeting expectations + LOG_DEBUG("schema guard is fallback mode, check done", K(tenant_id_), K(is_lazy), K(func)); + } + return ret; +} + +// Check if it is fallback mode and report an error if it is not +int ObLogSchemaGuard::check_force_fallback_mode_(const uint64_t tenant_id, const char *func) const +{ + int ret = OB_SUCCESS; + bool is_lazy = false; + // First check if the SYS tenant is in lazy mode + if (OB_FAIL(guard_.is_lazy_mode(tenant_id, is_lazy))) { + LOG_ERROR("check is_lazy_mode fail", KR(ret), K(tenant_id)); + } else { + ret = do_check_force_fallback_mode_(is_lazy, func); + } + return ret; +} + +// Check if it is fallback mode and report an error if it is not +// Requires both SYS tenant and target tenant to be in fallback mode +int ObLogSchemaGuard::check_force_fallback_mode_(const char *func) const +{ + int ret = OB_SUCCESS; + bool is_lazy = false; + // First check if the SYS tenant is in lazy mode + if (OB_FAIL(guard_.is_lazy_mode(OB_SYS_TENANT_ID, is_lazy))) { + LOG_ERROR("check is_lazy_mode by SYS tenant id fail", KR(ret)); + } + // If the SYS tenant is not lazy, then check if the normal tenant is in lazy mode + // Continue checking only when tenant_id is valid + else if (! is_lazy + && OB_INVALID_TENANT_ID != tenant_id_ + && OB_SYS_TENANT_ID != tenant_id_ + && OB_FAIL(guard_.is_lazy_mode(tenant_id_, is_lazy))) { + LOG_ERROR("check is_lazy_mode fail", KR(ret), K(tenant_id_), K(is_lazy)); + } else { + ret = do_check_force_fallback_mode_(is_lazy, func); + } + return ret; +} + +int ObLogSchemaGuard::get_table_schema(const uint64_t table_id, + const ObTableSchema *&table_schema, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + const uint64_t tenant_id = extract_tenant_id(table_id); + RETRY_ON_FAIL_WITH_TENANT_ID(tenant_id, timeout, guard_, get_table_schema, table_id, table_schema); + return ret; +} + +int ObLogSchemaGuard::get_table_schema(const uint64_t table_id, + const ObSimpleTableSchemaV2 *&table_schema, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + const uint64_t tenant_id = extract_tenant_id(table_id); + RETRY_ON_FAIL_WITH_TENANT_ID(tenant_id, timeout, guard_, get_table_schema, table_id, table_schema); + return ret; +} + +int ObLogSchemaGuard::get_tablegroup_schemas_in_tenant(const uint64_t tenant_id, + common::ObArray &tablegroup_schemas, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + // You must use the fallback schema guard, otherwise may get an error in following function, here is a precaution + ret = check_force_fallback_mode_("get_tablegroup_schemas_in_tenant"); + RETRY_ON_FAIL_WITH_TENANT_ID(tenant_id, timeout, guard_, get_tablegroup_schemas_in_tenant, tenant_id, tablegroup_schemas); + return ret; +} + +int ObLogSchemaGuard::get_database_schema(uint64_t database_id, + const ObDatabaseSchema *&database_schema, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + RETRY_ON_FAIL(timeout, guard_, get_database_schema, database_id, database_schema); + return ret; +} + +int ObLogSchemaGuard::get_database_schema(uint64_t database_id, + const ObSimpleDatabaseSchema *&database_schema, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + // The schema guard must be in fallback mode, otherwise the following function may be called with an error, prevented here + // Database Schema uses the tenant ID of the DB to determine + ret = check_force_fallback_mode_(extract_tenant_id(database_id), "get_database_schema"); + RETRY_ON_FAIL(timeout, guard_, get_database_schema, database_id, database_schema); + return ret; +} + +int ObLogSchemaGuard::get_tenant_info(uint64_t tenant_id, + const ObTenantSchema *&tenant_info, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + RETRY_ON_FAIL_WITH_TENANT_ID(tenant_id, timeout, guard_, get_tenant_info, tenant_id, tenant_info); + return ret; +} + +int ObLogSchemaGuard::get_tenant_info(uint64_t tenant_id, + const ObSimpleTenantSchema *&tenant_info, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + // The schema guard must be in fallback mode, otherwise the following function may be called with an error, so here is a precaution + // Tenant Schema uses the SYS tenant ID to determine + ret = check_force_fallback_mode_(OB_SYS_TENANT_ID, "get_tenant_info"); + RETRY_ON_FAIL_WITH_TENANT_ID(tenant_id, timeout, guard_, get_tenant_info, tenant_id, tenant_info); + return ret; +} + +int ObLogSchemaGuard::get_tenant_schema_info(uint64_t tenant_id, + TenantSchemaInfo &tenant_schema_info, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + bool is_lazy = false; + + // First determine if it is Lazy mode + // Note: The tenant schema is determined using the SYS tenant, as it is used to get the tenant schema from the SYS tenant mgr + if (OB_FAIL(guard_.is_lazy_mode(OB_SYS_TENANT_ID, is_lazy))) { + LOG_ERROR("get is_lazy_mode() fail when get_tenant_schema_info", KR(ret)); + } else if (is_lazy) { + // get Full Tenant Schema in lazy mode + const ObTenantSchema *tenant_schema = NULL; + if (OB_FAIL(get_tenant_info(tenant_id, tenant_schema, timeout))) { + if (OB_TIMEOUT != ret && OB_TENANT_HAS_BEEN_DROPPED != ret) { + LOG_ERROR("get_tenant_info fail when get tenant name", KR(ret), K(tenant_id), K(is_lazy)); + } + } else if (OB_ISNULL(tenant_schema)) { + LOG_WARN("schema error: tenant schema is NULL", K(tenant_id)); + ret = OB_TENANT_HAS_BEEN_DROPPED; + } else { + tenant_schema_info.reset(tenant_id, + tenant_schema->get_schema_version(), + tenant_schema->get_tenant_name(), + tenant_schema->is_restore()); + } + } else { + // Non-Lazy mode, get Simple Table Schema + const ObSimpleTenantSchema *simple_tenant_schema = NULL; + if (OB_FAIL(get_tenant_info(tenant_id, simple_tenant_schema, timeout))) { + if (OB_TIMEOUT != ret && OB_TENANT_HAS_BEEN_DROPPED != ret) { + LOG_ERROR("get simple tenant schema fail when get tenant name", KR(ret), K(tenant_id), K(is_lazy)); + } + } else if (OB_ISNULL(simple_tenant_schema)) { + LOG_WARN("schema error: simple tenant schema is NULL", K(tenant_id)); + ret = OB_TENANT_HAS_BEEN_DROPPED; + } else { + tenant_schema_info.reset(tenant_id, + simple_tenant_schema->get_schema_version(), + simple_tenant_schema->get_tenant_name(), + simple_tenant_schema->is_restore()); + } + } + return ret; +} + +int ObLogSchemaGuard::get_database_schema_info(uint64_t database_id, + DBSchemaInfo &db_schema_info, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + bool is_lazy = false; + const uint64_t tenant_id = extract_tenant_id(database_id); + + // First determine if it is a Lazy schema + // The DB schema should use the corresponding tenant ID to determine if it is a lazy schema + if (OB_FAIL(guard_.is_lazy_mode(tenant_id, is_lazy))) { + LOG_ERROR("get is_lazy_mode() fail when get_database_schema_info", KR(ret), K(tenant_id), + K(database_id)); + } else if (is_lazy) { + // get Full Database Schema in lazy mode + const ObDatabaseSchema *db_schema = NULL; + if (OB_FAIL(get_database_schema(database_id, db_schema, timeout))) { + if (OB_TIMEOUT != ret && OB_TENANT_HAS_BEEN_DROPPED != ret) { + LOG_ERROR("get_database_schema fail when get database name", KR(ret), K(database_id), K(is_lazy)); + } + } else if (OB_ISNULL(db_schema)) { + LOG_ERROR("schema error: database schema is NULL", K(tenant_id), K(database_id)); + ret = OB_TENANT_HAS_BEEN_DROPPED; + } else { + db_schema_info.reset(database_id, + db_schema->get_schema_version(), + db_schema->get_database_name()); + } + } else { + // get Simple Database Schema in lazy mode + const ObSimpleDatabaseSchema *simple_db_schema = NULL; + if (OB_FAIL(get_database_schema(database_id, simple_db_schema, timeout))) { + if (OB_TIMEOUT != ret && OB_TENANT_HAS_BEEN_DROPPED != ret) { + LOG_ERROR("get_database_schema fail when get tenant name", KR(ret), K(tenant_id), + K(database_id), K(is_lazy)); + } + } else if (OB_ISNULL(simple_db_schema)) { + LOG_WARN("schema error: simple database schema is NULL", K(database_id)); + ret = OB_TENANT_HAS_BEEN_DROPPED; + } else { + db_schema_info.reset(database_id, + simple_db_schema->get_schema_version(), + simple_db_schema->get_database_name()); + } + } + return ret; +} + +int ObLogSchemaGuard::get_available_tenant_ids(common::ObIArray &tenant_ids, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + // The schema guard must be in fallback mode, otherwise the following function may be called with an error, so here is a precaution + ret = check_force_fallback_mode_("get_available_tenant_ids"); + RETRY_ON_FAIL(timeout, guard_, get_available_tenant_ids, tenant_ids); + return ret; +} + +// Pure memory operations, no retries, no changes here for consistency of code logic +int ObLogSchemaGuard::get_table_schemas_in_tenant(const uint64_t tenant_id, + ObIArray &table_schemas, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + // The schema guard must be in fallback mode, otherwise the following function may be called with an error, so here is a precaution + // Check both SYS tenants and target tenants + ret = check_force_fallback_mode_("get_table_schemas_in_tenant"); + RETRY_ON_FAIL_WITH_TENANT_ID(tenant_id, timeout, guard_, get_table_schemas_in_tenant, tenant_id, table_schemas); + return ret; +} + +int ObLogSchemaGuard::get_tablegroup_schema(uint64_t tablegroup_id, + const ObTablegroupSchema *&tablegourp_schema, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + const uint64_t tenant_id = extract_tenant_id(tablegroup_id); + RETRY_ON_FAIL_WITH_TENANT_ID(tenant_id, timeout, guard_, get_tablegroup_schema, tablegroup_id, tablegourp_schema); + return ret; +} + +int ObLogSchemaGuard::query_partition_status(const common::ObPartitionKey &pkey, + oceanbase::share::schema::PartitionStatus &part_status, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + const uint64_t tenant_id = pkey.get_tenant_id(); + RETRY_ON_FAIL_WITH_TENANT_ID(tenant_id, timeout, guard_, query_partition_status, pkey, part_status); + return ret; +} + +int ObLogSchemaGuard::get_schema_version(const uint64_t tenant_id, int64_t &schema_version) const +{ + int ret = OB_SUCCESS; + schema_version = OB_INVALID_TIMESTAMP; + + if (OB_FAIL(guard_.get_schema_version(tenant_id, schema_version))) { + LOG_ERROR("get_schema_version fail", KR(ret), K(tenant_id), K(schema_version)); + } else if (OB_UNLIKELY(OB_INVALID_TIMESTAMP == schema_version)) { + LOG_ERROR("schema_version is invalid", K(tenant_id), K(schema_version)); + ret = OB_ERR_UNEXPECTED; + } else { + // succ + } + + return ret; +} + + +int ObLogSchemaGuard::get_table_ids_in_tablegroup(const uint64_t tenant_id, + const uint64_t tablegroup_id, + common::ObIArray &table_id_array, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + // The schema guard must be in fallback mode, otherwise the following function may be called with an error, so here is a precaution + // Check both SYS tenants and normal tenants + ret = check_force_fallback_mode_("get_table_ids_in_tablegroup"); + RETRY_ON_FAIL_WITH_TENANT_ID(tenant_id, timeout, guard_, get_table_ids_in_tablegroup, tenant_id, tablegroup_id, table_id_array); + return ret; +} + +int ObLogSchemaGuard::get_tenant_compat_mode(const uint64_t tenant_id, + share::ObWorker::CompatMode &compat_mode, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + RETRY_ON_FAIL_WITH_TENANT_ID(tenant_id, timeout, guard_, get_tenant_compat_mode, tenant_id, compat_mode); + return ret; +} + +/////////////////////////////////////////////////////////////////////////////// + +ObLogSchemaGetter::ObLogSchemaGetter() : inited_(false), + schema_service_(ObMultiVersionSchemaService::get_instance()) +{ +} + +ObLogSchemaGetter::~ObLogSchemaGetter() +{ + destroy(); +} + +int ObLogSchemaGetter::init(common::ObMySQLProxy &mysql_proxy, + common::ObCommonConfig *config, + const int64_t max_cached_schema_version_count, + const int64_t max_history_schema_version_count) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("schema getter has been initialized"); + ret = OB_INIT_TWICE; + } else if (OB_ISNULL(config) + || OB_UNLIKELY(max_cached_schema_version_count <= 0) + || OB_UNLIKELY(max_history_schema_version_count <= 0)) { + LOG_ERROR("invalid argument", K(config), K(max_cached_schema_version_count), K(max_history_schema_version_count)); + ret = OB_INVALID_ARGUMENT; + // init Schema Service + } else if (OB_FAIL(schema_service_.init(&mysql_proxy, NULL, config, max_cached_schema_version_count, + max_history_schema_version_count, false))) { + LOG_ERROR("init schema service fail", KR(ret), K(max_cached_schema_version_count), K(max_history_schema_version_count)); + } else { + // refresh Schema + ObSchemaService::g_ignore_column_retrieve_error_ = true; + ObSchemaService::g_liboblog_mode_ = true; + ObMultiVersionSchemaService::g_skip_resolve_materialized_view_definition_ = true; + const uint64_t tenant_id = OB_INVALID_TENANT_ID; // to refresh schema of all tenants + const int64_t timeout = GET_SCHEMA_TIMEOUT_ON_START_UP; + bool is_init_succ = false; + + // tenant_id is OB_INVALID_TENANT_ID, can't use RETRY_ON_FAIL_WITH_TENANT_ID + RETRY_ON_FAIL(timeout, schema_service_, auto_switch_mode_and_refresh_schema, tenant_id); + + if (OB_TENANT_HAS_BEEN_DROPPED == ret) { + // Indicates that there are some tenant deletions that do not affect refreshing the schema to the latest version + LOG_WARN("tenant has been dropped when auto_switch_mode_and_refresh_schema, ignore it", + KR(ret), K(tenant_id)); + is_init_succ = true; + ret = OB_SUCCESS; + } else if (OB_SUCCESS != ret) { + LOG_ERROR("auto_switch_mode_and_refresh_schema failed", KR(ret), K(tenant_id), K(timeout)); + } else { + is_init_succ = true; + } + + if (OB_SUCC(ret) && is_init_succ) { + inited_ = true; + LOG_INFO("init schema service succ", KR(ret), K(max_cached_schema_version_count), K(max_history_schema_version_count)); + } + } + + return ret; +} + +void ObLogSchemaGetter::destroy() +{ + inited_ = false; +} + +int ObLogSchemaGetter::get_lazy_schema_guard(const uint64_t tenant_id, + const int64_t version, + const int64_t timeout, + IObLogSchemaGuard &schema_guard) +{ + int ret = OB_SUCCESS; + const ObMultiVersionSchemaService::RefreshSchemaMode force_lazy = ObMultiVersionSchemaService::RefreshSchemaMode::FORCE_LAZY; + const bool specify_version_mode = true; // Specify the version to get the schema schema + int64_t refreshed_version = OB_INVALID_VERSION; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("schema getter has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(version <= 0)) { + LOG_ERROR("invalid argument", K(version)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(get_schema_guard_(tenant_id, specify_version_mode, version, timeout, + schema_guard, force_lazy, refreshed_version))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get_schema_guard_ fail", KR(ret), K(tenant_id), K(specify_version_mode), + K(version), K(force_lazy)); + } + } + + return ret; +} + +int ObLogSchemaGetter::get_fallback_schema_guard(const uint64_t tenant_id, + const int64_t version, + const int64_t timeout, + IObLogSchemaGuard &schema_guard) +{ + int ret = OB_SUCCESS; + const ObMultiVersionSchemaService::RefreshSchemaMode force_fallback = ObMultiVersionSchemaService::RefreshSchemaMode::FORCE_FALLBACK; + const bool specify_version_mode = true; // Specify the version to get the schema schema + int64_t refreshed_version = OB_INVALID_VERSION; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("schema getter has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(version <= 0)) { + LOG_ERROR("invalid argument", K(version)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(get_schema_guard_(tenant_id, specify_version_mode, version, timeout, + schema_guard, force_fallback, refreshed_version))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get_schema_guard_ fail", KR(ret), K(tenant_id), K(specify_version_mode), + K(version), K(force_fallback)); + } + } + + return ret; +} + +int ObLogSchemaGetter::check_schema_guard_suitable_for_table_(IObLogSchemaGuard &schema_guard, + const uint64_t table_id, + const int64_t expected_version, + const int64_t refreshed_version, + const int64_t timeout, + const ObSimpleTableSchemaV2 *&tb_schema, + bool &is_suitable) +{ + int ret = OB_SUCCESS; + // Determine if the latest version of the local schema meets the requirements + // Here is an optimization: instead of fetching the schema of a specific version, just base it directly on the latest version of the schema. + // + // The prerequisite is that the schema information for Table Schema, DB Name, Tenant Name and other dependencies has not changed from the specified + // version to the latest version. So, the key to correctness here is to determine whether the schema information has changed + // + // Criteria: All of the following conditions are met + // 1. table_schema_version <= expected_version <= latest_schema_version + // 2. db_schema_version <= expected_version <= latest_schema_version + // 3. TODO tenant level information changes should also be taken into account, e.g. tenant name change + // + // Essentially, in addition to the schema information for tables, the schema for databases and tenants currently only rely on their names, including + // whitelist matching and setting the DBName field on the binlog record. So, changes to these two pieces of information should also be considered. + // + // 1. database name: DB name is currently not supported by RENMAE, but dropping database to recyclebin will change its name + // 2. tenant name: currently rename is not supported, in the future when tenant rename is supported, tenant name will need to be managed at tenant level, and by then it will be necessary to determine if the tenant name has changed. + + // The default is to meet the requirements + is_suitable = true; + const char *reason = ""; + const ObSimpleDatabaseSchema *simple_db_schema = NULL; + + // Get the Simple Table Schema, which is already cached locally, it just fetches the structure from the local cache + if (OB_FAIL(schema_guard.get_table_schema(table_id, tb_schema, timeout))) { + LOG_ERROR("get table schema from local schema cache fail", KR(ret), K(table_id), + K(expected_version), K(refreshed_version)); + } else if (OB_ISNULL(tb_schema)) { + is_suitable = false; + reason = "table schema is NULL in local refreshed schema, maybe dropped"; + } else if (tb_schema->get_schema_version() > expected_version) { + // The schema of the table has changed and does not meet the desired version + is_suitable = false; + reason = "table schema is changed after expected version"; + } + // Start get Database information + else if (OB_FAIL(schema_guard.get_database_schema(tb_schema->get_database_id(), + simple_db_schema, timeout))) { + LOG_ERROR("get database schema from local schema cache fail", KR(ret), + K(tb_schema->get_database_id()), K(table_id), + K(expected_version), K(refreshed_version)); + } else if (OB_ISNULL(simple_db_schema)) { + is_suitable = false; + reason = "database schema is NULL in local refreshed schema, maybe dropped"; + } else if (simple_db_schema->get_schema_version() > expected_version) { + is_suitable = false; + reason = "database schema is changed after expected version"; + } else { + // Finally all conditions are met and the latest version of schema guard is available + is_suitable = true; + LOG_DEBUG("[GET_SCHEMA] [USE_LATEST_SCHEMA_GUARD]", + K(table_id), K(expected_version), K(refreshed_version), + "delta", refreshed_version - expected_version); + } + + if (OB_SUCCESS == ret && ! is_suitable) { + LOG_DEBUG("[GET_SCHEMA] [NEED_SPECIFY_VERSION]", K(reason), K(table_id), + K(expected_version), K(refreshed_version), "delta", refreshed_version - expected_version); + } + + return ret; +} + +int ObLogSchemaGetter::get_schema_guard_and_table_schema( + const uint64_t table_id, + const int64_t expected_version, + const int64_t timeout, + IObLogSchemaGuard &schema_guard, + const ObSimpleTableSchemaV2 *&tb_schema) +{ + int ret = OB_SUCCESS; + // Get ObSimpleTableSchemaV2 must use force lazy mode + const bool force_lazy = true; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("schema getter has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(get_schema_guard_and_simple_table_schema_(table_id, expected_version, timeout, force_lazy, + schema_guard, tb_schema))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get_schema_guard_and_simple_table_schema_ fail", KR(ret), K(table_id), K(expected_version), K(force_lazy), + KPC(tb_schema)); + } + } else { + // succ + } + + return ret; +} + +int ObLogSchemaGetter::get_schema_guard_and_full_table_schema( + const uint64_t table_id, + const int64_t expected_version, + const int64_t timeout, + IObLogSchemaGuard &schema_guard, + const oceanbase::share::schema::ObTableSchema *&full_tb_schema) +{ + int ret = OB_SUCCESS; + // ObTableSchema cannot be obtained by force lazy mode + const bool force_lazy = false; + const ObSimpleTableSchemaV2 *tb_schema = NULL; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("schema getter has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(get_schema_guard_and_simple_table_schema_(table_id, expected_version, timeout, force_lazy, + schema_guard, tb_schema))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get_schema_guard_and_simple_table_schema_ fail", KR(ret), K(table_id), K(expected_version), K(force_lazy), + KPC(tb_schema)); + } + } else if (OB_FAIL(schema_guard.get_table_schema(table_id, full_tb_schema, timeout))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get table schema fail", KR(ret), K(table_id), K(expected_version), K(force_lazy), KPC(full_tb_schema)); + } + } else { + // succ + } + + return ret; +} + +int ObLogSchemaGetter::get_schema_guard_and_simple_table_schema_( + const uint64_t table_id, + const int64_t expected_version, + const int64_t timeout, + const bool force_lazy, + IObLogSchemaGuard &schema_guard, + const oceanbase::share::schema::ObSimpleTableSchemaV2 *&tb_schema) +{ + int ret = OB_SUCCESS; + // default not ues force_fallback/force_lazy + const ObMultiVersionSchemaService::RefreshSchemaMode normal = ObMultiVersionSchemaService::RefreshSchemaMode::NORMAL; + const uint64_t tenant_id = extract_tenant_id(table_id); + int64_t refreshed_version = OB_INVALID_VERSION; + bool schema_guard_suitable = true; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("schema getter has not been initialized"); + ret = OB_NOT_INIT; + } + // First get the latest version of the local schema + // Default to get the latest local schema + else if (OB_FAIL(get_schema_guard_(tenant_id, false, expected_version, timeout, + schema_guard, normal, refreshed_version))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get_schema_guard_ by non-specify_version_mode fail", KR(ret), K(tenant_id), + K(expected_version), K(normal)); + } + } + // check if it is the right schema guard + // will also get the table schema + else if (OB_FAIL(check_schema_guard_suitable_for_table_(schema_guard, + table_id, + expected_version, + refreshed_version, + timeout, + tb_schema, + schema_guard_suitable))) { + LOG_WARN("check_schema_guard_suitable_for_table_ fail", KR(ret), K(table_id), + K(expected_version), K(refreshed_version)); + } else if (schema_guard_suitable) { + // suitable + } else { + ObMultiVersionSchemaService::RefreshSchemaMode refresh_schema_mode = ObMultiVersionSchemaService::RefreshSchemaMode::NORMAL; + if (force_lazy) { + refresh_schema_mode = ObMultiVersionSchemaService::RefreshSchemaMode::FORCE_LAZY; + } + + // The latest local version of the schema guard does not meet expectations, get the specified version of the schema guard + if (OB_FAIL(get_schema_guard_(tenant_id, true, expected_version, timeout, schema_guard, + refresh_schema_mode, refreshed_version))) { + LOG_ERROR("get_schema_guard_ by specify_version_mode fail", KR(ret), K(tenant_id), + K(expected_version), K(refresh_schema_mode), K(force_lazy)); + } else if (OB_FAIL(schema_guard.get_table_schema(table_id, tb_schema, timeout))) { + LOG_ERROR("get_table_schema fail", KR(ret), K(table_id), K(tenant_id), K(expected_version)); + } else { + // success + } + } + return ret; +} + +// @retval OB_SUCCESS Success +// @retval OB_TENANT_HAS_BEEN_DROPPED Tenant has been dropped +// @retval OB_TIMEOUT Timeout +// @retval Other error codes Fail +int ObLogSchemaGetter::refresh_to_expected_version_(const uint64_t tenant_id, + const int64_t expected_version, + const int64_t timeout, + int64_t &latest_version) +{ + int ret = OB_SUCCESS; + int64_t start_time = get_timestamp(); + latest_version = OB_INVALID_VERSION; + + // Requires valid tenant ID and version + if (OB_UNLIKELY(OB_INVALID_TENANT_ID == tenant_id) + || OB_UNLIKELY(OB_INVALID_VERSION == expected_version)) { + ret = OB_INVALID_ARGUMENT; + LOG_ERROR("invalid argument", K(tenant_id), K(expected_version)); + } else if (OB_FAIL(schema_service_.get_tenant_refreshed_schema_version(tenant_id, latest_version))) { + if (OB_ENTRY_NOT_EXIST == ret) { + // Circumvent the problem of liboblog reporting an error on the first refresh of the tenant's schema + ret = OB_SUCCESS; + latest_version = OB_INVALID_VERSION; + LOG_INFO("first load tenant schema, force renew schema", KR(ret), K(tenant_id)); + } else { + LOG_WARN("fail to get refreshed schema version", KR(ret), K(tenant_id)); + } + } + + if (OB_FAIL(ret)) { + // fail + } else if (OB_INVALID_VERSION == latest_version || latest_version < expected_version) { + // If the tenant version is invalid, or if the desired schema version is not reached, then a refresh of the schema is requested + LOG_INFO("begin refresh schema to expected version", K(tenant_id), K(expected_version), + K(latest_version)); + + RETRY_ON_FAIL_WITH_TENANT_ID(tenant_id, timeout, schema_service_, auto_switch_mode_and_refresh_schema, tenant_id, + expected_version); + + if (OB_SUCC(ret)) { + // Get the latest schema version again + RETRY_ON_FAIL_WITH_TENANT_ID(tenant_id, timeout, schema_service_, get_tenant_refreshed_schema_version, tenant_id, latest_version); + } + + int64_t cost_time = get_timestamp() - start_time; + LOG_INFO("refresh schema to expected version", KR(ret), K(tenant_id), + K(latest_version), K(expected_version), "delta", latest_version - expected_version, + "latest_version", TS_TO_STR(latest_version), + "cost_time", TVAL_TO_STR(cost_time)); + } + + return ret; +} + +// Unique interface for obtaining Schema Guard +// +// specify_version_mode indicates whether it is the specified version to get the schema mode. +// if true, it requires the schema of the specified version (expected_version) to be fetched +// If false, the schema is fetched directly from the local refresh and requires a version number greater than or equal to expected_version +// +// @retval OB_SUCCESS success +// @retval OB_TENANT_HAS_BEEN_DROPPED tenant has been dropped +// @retval OB_TIMEOUT timeout +// @retval other error code fail +int ObLogSchemaGetter::get_schema_guard_(const uint64_t tenant_id, + const bool specify_version_mode, + const int64_t expected_version, + const int64_t timeout, + IObLogSchemaGuard &guard, + const ObMultiVersionSchemaService::RefreshSchemaMode refresh_schema_mode, + int64_t &refreshed_version) +{ + int ret = OB_SUCCESS; + ObSchemaGetterGuard &schema_guard = guard.get_guard(); + + // version refreshed locally + refreshed_version = OB_INVALID_VERSION; + + // First refresh the schema to ensure it is greater than or equal to expected_version + if (OB_FAIL(refresh_to_expected_version_(tenant_id, expected_version, timeout, refreshed_version))) { + if (OB_TENANT_HAS_BEEN_DROPPED != ret && OB_TIMEOUT != ret) { + LOG_ERROR("refresh_to_expected_version_ fail", KR(ret), K(tenant_id), K(expected_version)); + } + } else { + int64_t target_version = OB_INVALID_VERSION;; + + if (specify_version_mode) { + // If the requested version is newer than the latest version, the latest version is used + if (expected_version > refreshed_version) { + LOG_INFO("asked schema version is greater than latest schema version", K(tenant_id), + K(expected_version), K(refreshed_version)); + // Take the current version + target_version = refreshed_version; + } else { + // Otherwise the specified version is used + target_version = expected_version; + } + } else { + // Take the latest locally refreshed version + target_version = OB_INVALID_VERSION; + } + + // The corresponding sys tenant schema version does not need to be provided when fetching tenant level schema, the locally refreshed version is used by default + const int64_t sys_schema_version = OB_INVALID_VERSION; + // The get_tenant_schema_guard() function may also return an error, here need to retry until it times out or succeeds + RETRY_ON_FAIL_WITH_TENANT_ID(tenant_id, timeout, schema_service_, get_tenant_schema_guard, tenant_id, schema_guard, + target_version, + sys_schema_version, + refresh_schema_mode); + + if (OB_FAIL(ret)) { + LOG_WARN("get_tenant_schema_guard fail", KR(ret), K(tenant_id), K(target_version), + K(sys_schema_version), K(refresh_schema_mode), K(specify_version_mode), K(refreshed_version)); + } + + if (OB_SUCC(ret)) { + // set tenant id + guard.set_tenant_id(tenant_id); + } + } + + return ret; +} + +int ObLogSchemaGetter::get_schema_version_by_timestamp(const uint64_t tenant_id, + const int64_t timestamp, + int64_t &schema_version, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + + ObRefreshSchemaStatus schema_status; + schema_status.tenant_id_ = tenant_id; // use strong read + RETRY_ON_FAIL_WITH_TENANT_ID(tenant_id, timeout, schema_service_, get_schema_version_by_timestamp, + schema_status, tenant_id, timestamp, schema_version); + + if (OB_SUCC(ret)) { + LOG_INFO("get_schema_version_by_timestamp", K(tenant_id), K(timestamp), K(schema_version)); + } + + return ret; +} + +int ObLogSchemaGetter::get_first_trans_end_schema_version(const uint64_t tenant_id, + int64_t &schema_version, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + RETRY_ON_FAIL_WITH_TENANT_ID(tenant_id, timeout, schema_service_, get_first_trans_end_schema_version, tenant_id, + schema_version); + return ret; +} + +int ObLogSchemaGetter::load_split_schema_version(int64_t &split_schema_version, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + RETRY_ON_FAIL(timeout, schema_service_, load_split_schema_version, split_schema_version); + return ret; +} + +int ObLogSchemaGetter::get_tenant_refreshed_schema_version(const uint64_t tenant_id, int64_t &version) +{ + int ret = OB_SUCCESS; + int64_t refreshed_version = 0; + const int64_t timeout = GET_SCHEMA_TIMEOUT_ON_START_UP; + + RETRY_ON_FAIL_WITH_TENANT_ID(tenant_id, timeout, schema_service_, get_tenant_refreshed_schema_version, tenant_id, refreshed_version); + if (OB_FAIL(ret)) { + if (OB_TENANT_HAS_BEEN_DROPPED == ret) { + LOG_INFO("get tenant refreshed schema version: tenant has been dropped", KR(ret), K(tenant_id), K(refreshed_version)); + } else { + LOG_WARN("fail to get tenant refreshed schema version", KR(ret), K(tenant_id), K(refreshed_version)); + } + } else if (refreshed_version <= share::OB_CORE_SCHEMA_VERSION) { + // When the tenant's schema is not swiped before, its version is invalid and will return less than or equal to OB_CORE_SCHEMA_VERSION + // We also consider this to be a case where the tenant does not exist + LOG_INFO("get_tenant_refreshed_schema_version: tenant schema version is not refreshed, " + "consider tenant not exist", + K(tenant_id), K(refreshed_version)); + ret = OB_TENANT_HAS_BEEN_DROPPED; + } else { + version = refreshed_version; + } + + return ret; +} + +} // namespace liboblog +} // namespace oceanbase diff --git a/src/liboblog/src/ob_log_schema_getter.h b/src/liboblog/src/ob_log_schema_getter.h new file mode 100644 index 0000000000000000000000000000000000000000..afd616398058823f085e564d88aa109f67a108aa --- /dev/null +++ b/src/liboblog/src/ob_log_schema_getter.h @@ -0,0 +1,683 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_SCHEMA_GETTER_H__ +#define OCEANBASE_LIBOBLOG_SCHEMA_GETTER_H__ + +#include "lib/mysqlclient/ob_mysql_proxy.h" // ObMySQLProxy +#include "share/schema/ob_multi_version_schema_service.h" // ObMultiVersionSchemaService +#include "share/schema/ob_schema_getter_guard.h" // ObSchemaGetterGuard +#include "share/schema/ob_schema_struct.h" // TableStatus +#include "share/schema/ob_schema_mgr.h" // ObSimpleDatabaseSchema, ObSimpleTenantSchema +#include "share/ob_worker.h" // CompatMode + +namespace oceanbase +{ +namespace share +{ +namespace schema +{ +class ObTableSchema; +class ObSimpleTableSchemaV2; +class ObDatabaseSchema; +class ObTenantSchema; +class ObTablegroupSchema; +} // namespace schema +} // namespace share +namespace common +{ +class ObCommonConfig; + +namespace sqlclient +{ +class ObMySQLServerProvider; +} // namespace sqlclient +} // namespace common + +namespace liboblog +{ + +// Database Schema信息 +struct DBSchemaInfo +{ + uint64_t db_id_; + int64_t version_; + const char *name_; + + TO_STRING_KV(K_(db_id), K_(version), K_(name)); + + DBSchemaInfo() { reset(); } + + void reset() + { + db_id_ = common::OB_INVALID_ID; + version_ = common::OB_INVALID_VERSION; + name_ = NULL; + } + + void reset(const uint64_t db_id, + const int64_t version, + const char *name) + { + db_id_ = db_id; + version_ = version; + name_ = name; + } + + bool is_valid() const + { + // Only the most basic information is verified here + return common::OB_INVALID_ID != db_id_ + && common::OB_INVALID_VERSION != version_ + && NULL != name_; + } +}; + +// Schema info of tenant +struct TenantSchemaInfo +{ + uint64_t tenant_id_; + int64_t version_; + const char *name_; + // tenant whether in restore state or not at specified version + bool is_restore_; + + TO_STRING_KV(K_(tenant_id), K_(version), K_(name), K(is_restore_)); + + TenantSchemaInfo() { reset(); } + + void reset() + { + tenant_id_ = common::OB_INVALID_TENANT_ID; + version_ = common::OB_INVALID_VERSION; + name_ = NULL; + is_restore_ = false; + } + + void reset(const uint64_t tenant_id, const int64_t version, const char *name, const bool is_restore) + { + tenant_id_ = tenant_id; + version_ = version; + name_ = name; + is_restore_ = is_restore; + } + + bool is_valid() const + { + // Only the most basic information is verified here + return common::OB_INVALID_TENANT_ID != tenant_id_ + && common::OB_INVALID_VERSION != version_ + && NULL != name_; + } +}; + +///////////////////////////////////// IObLogSchemaGuard ///////////////////////////////// +class IObLogSchemaGuard +{ + friend class ObLogSchemaGetter; +public: + virtual ~IObLogSchemaGuard() {} + +public: + virtual oceanbase::share::schema::ObSchemaGetterGuard &get_guard() = 0; + virtual void set_tenant_id(const uint64_t tenant_id) = 0; + + /// Get Table Schema + /// + /// @param [in] table_id Target table ID + /// @param [out] table_schema The table schema returned + /// @param [in] timeout Timeout timeout + /// + /// @retval OB_SUCCESS success + /// @retval OB_TENANT_HAS_BEEN_DROPPED tenent has been dropped + /// @retval OB_TIMEOUT timeout + /// @retval other error code fail + virtual int get_table_schema(const uint64_t table_id, + const oceanbase::share::schema::ObTableSchema *&table_schema, + const int64_t timeout) = 0; + + /// Get Simple Table Schema (does not contain column information, only partial meta information is stored) + /// + /// @param [in] table_id Target table ID + /// @param [out] table_schema Returned table schema + /// @param [in] timeout timeout + /// + /// @retval OB_SUCCESS success + /// @retval OB_TENANT_HAS_BEEN_DROPPED tenent has been dropped + /// @retval OB_TIMEOUT timeout + /// @retval other error code fail + virtual int get_table_schema(const uint64_t table_id, + const oceanbase::share::schema::ObSimpleTableSchemaV2 *&table_schema, + const int64_t timeout) = 0; + + /// Get Database Schema related information + /// This function masks the DB Schema details externally for the purpose of internal optimization based on the Schema Guard type + /// + /// @param [in] database_id Target DB ID + /// @param [out] db_schema_info The database schema information returned + /// @param [in] timeout timeout + /// + /// @retval OB_SUCCESS success + /// @retval OB_TENANT_HAS_BEEN_DROPPED tenent has been dropped + /// @retval OB_TIMEOUT timeout + /// @retval other error code fail + virtual int get_database_schema_info(uint64_t database_id, + DBSchemaInfo &db_schema_info, + const int64_t timeout) = 0; + + /// Get Tenant Schema information + /// This function is externally masked to get Tenant Schema details for the purpose of internal optimization based on Schema Guard types + /// + /// @param [in] tenant_id Target DB ID + /// @param [out] tenant_schema_info Tenant Schema information returned + /// @param [in] timeout timeout + /// + /// @retval OB_SUCCESS success + /// @retval OB_TENANT_HAS_BEEN_DROPPED tenent has been dropped + /// @retval OB_TIMEOUT timeout + /// @retval other error code fail + virtual int get_tenant_schema_info(uint64_t tenant_id, + TenantSchemaInfo &tenant_schema_info, + const int64_t timeout) = 0; + + /// Get all Tenant IDs that have been created successfully and have not been deleted + /// + /// @param [out] tenant_ids Returned array of tenant ids + /// @param [in] timeout timeout + /// + /// @retval OB_SUCCESS success + /// @retval OB_TENANT_HAS_BEEN_DROPPED tenent has been dropped + /// @retval OB_TIMEOUT timeout + /// @retval other error code fail + virtual int get_available_tenant_ids(common::ObIArray &tenant_ids, + const int64_t timeout) = 0; + + /// Get all Simple Table Schema for the tenant + /// NOTE: The guard must be in fallback mode + /// + /// @param [in] tenant_id Tenant ID + /// @param [out] table_schemas Array of returned Table Schema's + /// @param [in] timeout timeout + /// + /// @retval OB_SUCCESS success + /// @retval OB_TENANT_HAS_BEEN_DROPPED tenent has been dropped + /// @retval OB_TIMEOUT timeout + /// @retval other error code fail + virtual int get_table_schemas_in_tenant(const uint64_t tenant_id, + common::ObIArray &table_schemas, + const int64_t timeout) = 0; + + /// Get all Tablegroup Schema for a given tenant + /// NOTE: guards must get by fallback mode + /// + /// @param [in] tenant_id Tenant ID + /// @param [out] tablegroup_schemas tablegroup schema array + /// @param [in] timeout timeout + /// + /// @retval OB_SUCCESS success + /// @retval OB_TIMEOUT timeout + /// @retval other error code fail + virtual int get_tablegroup_schemas_in_tenant(const uint64_t tenant_id, + common::ObArray &tablegroup_schemas, + const int64_t timeout) = 0; + + /// get Tablegroup Schema + /// + /// @param [in] tablegroup_id tablegroup id + /// @param [out] tablegroup_schema returned able Group Schema + /// @param [in] timeout timeout + /// + /// @retval OB_SUCCESS success + /// @retval OB_TENANT_HAS_BEEN_DROPPED tenent has been dropped + /// @retval OB_TIMEOUT timeout + /// @retval other error code fail + virtual int get_tablegroup_schema(uint64_t tablegroup_id, + const oceanbase::share::schema::ObTablegroupSchema *&tablegourp_schema, + const int64_t timeout) = 0; + + /// Querying the status of a partition + /// + /// @retval OB_SUCCESS success + /// @retval OB_TENANT_HAS_BEEN_DROPPED tenent has been dropped + /// @retval OB_TIMEOUT timeout + /// @retval other error code fail + virtual int query_partition_status(const common::ObPartitionKey &pkey, + oceanbase::share::schema::PartitionStatus &table_status, + const int64_t timeout) = 0; + + virtual int get_schema_version(const uint64_t tenant_id, int64_t &schema_version) const = 0; + + /// Get all the table ids in the tablegroup + /// NOTE: guards must get by fallback mode + /// + /// @retval OB_SUCCESS success + /// @retval OB_TENANT_HAS_BEEN_DROPPED tenent has been dropped + /// @retval OB_TIMEOUT timeout + /// @retval other error code fail + virtual int get_table_ids_in_tablegroup(const uint64_t tenant_id, + const uint64_t tablegroup_id, + common::ObIArray &table_id_array, + const int64_t timeout) = 0; + + /// Get the working mode of the specified tenant: mysql or oracle + /// Note: This interface does not return OB_SCHEMA_ERROR/OB_TENANT_HAS_BEEN_DROPPED and does not need to handle such errors + /// + /// @param [in] tenant_id tenant ID + /// @param [out] compat_mode MYSQL, ORACLE + /// + /// @retval OB_SUCCESS success + /// @retval OB_TIMEOUT timeout + /// @retval other error code fail + virtual int get_tenant_compat_mode(const uint64_t tenant_id, + share::ObWorker::CompatMode &compat_mode, + const int64_t timeout) = 0; + +protected: + /// get tenant mode + /// + /// @param [in] tenant_id target tenant id + /// @param [out] tenant_info returned tenant schema + /// @param [in] timeout timeout + /// + /// @retval OB_SUCCESS success + /// @retval OB_TENANT_HAS_BEEN_DROPPED tenent has been dropped + /// @retval OB_TIMEOUT timeout + /// @retval other error code fail + virtual int get_tenant_info(uint64_t tenant_id, + const oceanbase::share::schema::ObTenantSchema *&tenant_info, + const int64_t timeout) = 0; + + /// get Simple Tenant Schema + /// + /// @param [in] tenant_id target tenant id + /// @param [out] tenant_info returned tenant schema + /// @param [in] timeout timeout + /// + /// @retval OB_SUCCESS success + /// @retval OB_TENANT_HAS_BEEN_DROPPED tenent has been dropped + /// @retval OB_TIMEOUT timeout + /// @retval other error code fail + virtual int get_tenant_info(uint64_t tenant_id, + const oceanbase::share::schema::ObSimpleTenantSchema *&tenant_info, + const int64_t timeout) = 0; + + /// get Database Schema + /// + /// @param [in] tenant_id target tenant id + /// @param [in] database_id target db id + /// @param [out] database_schema returned database schema + /// @param [in] timeout timeout + /// + /// @retval OB_SUCCESS success + /// @retval OB_TENANT_HAS_BEEN_DROPPED tenent has been dropped + /// @retval OB_TIMEOUT timeout + /// @retval other error code fail + virtual int get_database_schema(uint64_t database_id, + const oceanbase::share::schema::ObDatabaseSchema *&database_schema, + const int64_t timeout) = 0; + + /// get Database Schema + /// + /// @param [in] database_id target DB ID + /// @param [out] database_schema returned database schema + /// @param [in] timeout timeout + /// + /// @retval OB_SUCCESS success + /// @retval OB_TENANT_HAS_BEEN_DROPPED tenent has been dropped + /// @retval OB_TIMEOUT timeout + /// @retval other error code fail + virtual int get_database_schema(uint64_t database_id, + const oceanbase::share::schema::ObSimpleDatabaseSchema *&database_schema, + const int64_t timeout) = 0; + + ////********************************************* + //// NOTE: Note that subsequent additions to the interface will have to take into account the return value case when the tenant does not exist. + //// I: requires the schema module to return a specific error code OB_TENANT_HAS_BEEN_DROPPED when the tenant does not exist, liboblog will exit the retry loop when it encounters this error code + //// + //// II: The use of the place to determine the tenant does not exist error code OB_TENANT_HAS_BEEN_DROPPED, considered normal +}; + +///////////////////////////////////// IObLogSchemaGetter ///////////////////////////////// +class IObLogSchemaGetter +{ +public: + virtual ~IObLogSchemaGetter() {} + +public: + // allow_lazy: lazy mode: all full schemas are supported, ObSimpleTableSchemaV2 + // non-lazy mode: all supported + // force_fallback: all supported + // force_lazy: fetching ObSimpleTableSchemaV2, ObTablegroupSchema, full db_schema/tenant_schema supported + // full table_schema is not supported + /// + /// Get Schema Guard in Lazy mode, here use force_lazy mode + /// + /// The modules that currently call this interface are. + /// 1. PartMgr: refresh schema to get ObTablegroupSchema and ObSimpleTableSchemaV2; and query_partition_status + /// 2. DdlHandler: DDLs that do not require PartMgr processing, need to refresh schema to get tenant_name and db_name + /// 3. TenantMgr: add tenant, refresh schema to get tenant_schema_info + /// + /// @param [in] tenant_id Tenant ID + /// @param [in] version target version + /// @param [in] timeout timeout timeout + /// @param [out] schema_guard The Schema Guard returned + /// + /// @retval OB_SUCCESS success + /// @retval OB_TENANT_HAS_BEEN_DROPPED tenent has been dropped + /// @retval OB_TIMEOUT timeout + /// @retval other error code fail + virtual int get_lazy_schema_guard(const uint64_t tenant_id, + const int64_t version, + const int64_t timeout, + IObLogSchemaGuard &schema_guard) = 0; + + /// Get Schema Guard in fallback mode + /// + /// @param [in] tenant_id Tenant ID + /// @param [in] version target version + /// @param [in] timeout timeout timeout + /// @param [out] schema_guard The Schema Guard returned + /// + /// @retval OB_SUCCESS success + /// @retval OB_TENANT_HAS_BEEN_DROPPED tenent has been dropped + /// @retval OB_TIMEOUT timeout + /// @retval other error code fail + virtual int get_fallback_schema_guard(const uint64_t tenant_id, + const int64_t version, + const int64_t timeout, + IObLogSchemaGuard &schema_guard) = 0; + + /// Get Simple Table Schema while getting Schema Guard + /// NOTE: + /// 1. Special optimizations can be made inside the function + /// 2. The returned Schema Guard may or may not be in Lazy mode + /// + /// Implementation flow: + /// 1. Get the latest version of the local schema by default, here it is the full schema already refreshed, no need for force_lazy + /// 2. Check if the local latest meets the condition, if it does then it can be used + /// 3. Otherwise force force_lazy mode to fetch schema + /// + /// Usage Scenarios: + /// 1. Formatter formatting data + /// 2. ParMgr refreshes schema + /// + /// @param [in] tenant_id Tenant ID + /// @param [in] expected_version Target version + /// @param [in] timeout timeout timeout + /// @param [out] schema_guard Returned Schema Guard + /// @param [out] tb_schema Returned Simple Table Schema + /// + /// @retval OB_SUCCESS success + /// @retval OB_TENANT_HAS_BEEN_DROPPED tenent has been dropped + /// @retval OB_TIMEOUT timeout + /// @retval other error code fail + virtual int get_schema_guard_and_table_schema( + const uint64_t table_id, + const int64_t expected_version, + const int64_t timeout, + IObLogSchemaGuard &schema_guard, + const oceanbase::share::schema::ObSimpleTableSchemaV2 *&tb_schema) = 0; + + /// Get the Table Schema while getting the Schema Guard + /// + /// @param [in] tenant_id Tenant ID + /// @param [in] expected_version Target version + /// @param [in] timeout timeout timeout + /// @param [out] schema_guard Returned Schema Guard + /// @param [out] full_tb_schema Full Table Schema returned + /// + /// @retval OB_SUCCESS success + /// @retval OB_TENANT_HAS_BEEN_DROPPED tenent has been dropped + /// @retval OB_TIMEOUT timeout + /// @retval other error code fail + virtual int get_schema_guard_and_full_table_schema( + const uint64_t table_id, + const int64_t expected_version, + const int64_t timeout, + IObLogSchemaGuard &schema_guard, + const oceanbase::share::schema::ObTableSchema *&full_tb_schema) = 0; + + /// Get the corresponding Schema version, based on the timestamp + /// + /// @param [in] tenant_id Tenant ID + /// @param [in] timestamp Timestamp + /// @param [out] schema_version The Schema version to return + /// @param [in] timeout timeout + /// + /// @retval OB_SUCCESS success + /// @retval OB_TENANT_HAS_BEEN_DROPPED tenent has been dropped + /// @retval OB_TIMEOUT timeout + /// @retval other error code fail + virtual int get_schema_version_by_timestamp(const uint64_t tenant_id, + const int64_t timestamp, + int64_t &schema_version, + const int64_t timeout) = 0; + + virtual bool is_inited() const = 0; + + /// Get the schema version at the end of the tenant's first DDL transaction, i.e. the first available schema version + /// Users dynamically add tenants + /// + /// @retval OB_SUCCESS success + /// @retval OB_TENANT_HAS_BEEN_DROPPED tenent has been dropped + /// @retval OB_TIMEOUT timeout + /// @retval other error code fail + virtual int get_first_trans_end_schema_version(const uint64_t tenant_id, + int64_t &schema_version, + const int64_t timeout) = 0; + + /// get split schema version + /// + /// @retval OB_SUCCESS success + /// @retval OB_TIMEOUT timeout + /// @retval other error code fail + virtual int load_split_schema_version(int64_t &split_schema_version, const int64_t timeout) = 0; + + // Print statistics + virtual void print_stat_info() = 0; + + // Periodic memory recycling + virtual void try_recycle_memory() = 0; + + // Get the latest schema version that the tenant has been flushed to + // 1. For version 1.4, the schema is not split, so even though the tenant_id is specified, the latest schema version of the cluster is still returned + // 2. For version 2.21 and above, the schema service is guaranteed to return the tenant level schema version + // + /// @retval OB_SUCCESS success + /// @retval OB_TENANT_HAS_BEEN_DROPPED tenent has been dropped + /// @retval other error code fail + virtual int get_tenant_refreshed_schema_version(const uint64_t tenant_id, int64_t &version) = 0; + + //// NOTE: Note that all subsequent additions to the interface should consider the return value case when the tenant does not exist. + //// I: requires the schema module to return a specific error code OB_TENANT_HAS_BEEN_DROPPED when a tenant does not exist. liboblog encounters this error code and only this error code will determine that the tenant has been deleted and exit the retry loop + //// + //// II: Where used to determine that the tenant does not have the error code OB_TENANT_HAS_BEEN_DROPPED, it is considered normal +}; + + +///////////////////////////////////// ObLogSchemaGuard ///////////////////////////////// +class ObLogSchemaGuard : public IObLogSchemaGuard +{ +public: + ObLogSchemaGuard(); + virtual ~ObLogSchemaGuard(); + +public: + share::schema::ObSchemaGetterGuard &get_guard() { return guard_; } + void set_tenant_id(const uint64_t tenant_id) { tenant_id_ = tenant_id; } + +public: + int get_table_schema(const uint64_t table_id, + const share::schema::ObTableSchema *&table_schema, + const int64_t timeout); + int get_table_schema(const uint64_t table_id, + const share::schema::ObSimpleTableSchemaV2 *&table_schema, + const int64_t timeout); + int get_tablegroup_schemas_in_tenant(const uint64_t tenant_id, + common::ObArray &tablegroup_schemas, + const int64_t timeout); + int get_database_schema_info(uint64_t database_id, + DBSchemaInfo &db_schema_info, + const int64_t timeout); + int get_tenant_schema_info(uint64_t tenant_id, + TenantSchemaInfo &tenant_schema_info, + const int64_t timeout); + int get_tenant_name(uint64_t tenant_id, + const char *&tenant_name, + const int64_t timeout); + int get_available_tenant_ids(common::ObIArray &tenant_ids, + const int64_t timeout); + int get_table_schemas_in_tenant(const uint64_t tenant_id, + common::ObIArray &table_schemas, + const int64_t timeout); + int get_tablegroup_schema(uint64_t tablegroup_id, + const share::schema::ObTablegroupSchema *&tablegourp_schema, + const int64_t timeout); + virtual int query_partition_status(const common::ObPartitionKey &pkey, + oceanbase::share::schema::PartitionStatus &part_status, + const int64_t timeout); + int get_schema_version(const uint64_t tenant_id, int64_t &schema_version) const; + int get_table_ids_in_tablegroup(const uint64_t tenant_id, + const uint64_t tablegroup_id, + common::ObIArray &table_id_array, + const int64_t timeout); + int get_tenant_compat_mode(const uint64_t tenant_id, + share::ObWorker::CompatMode &compat_mode, + const int64_t timeout); + +protected: + int get_tenant_info(uint64_t tenant_id, + const share::schema::ObTenantSchema *&tenant_info, + const int64_t timeout); + int get_tenant_info(uint64_t tenant_id, + const oceanbase::share::schema::ObSimpleTenantSchema *&tenant_info, + const int64_t timeout); + int get_database_schema(uint64_t database_id, + const share::schema::ObDatabaseSchema *&database_schema, + const int64_t timeout); + int get_database_schema(uint64_t database_id, + const oceanbase::share::schema::ObSimpleDatabaseSchema *&database_schema, + const int64_t timeout); + +private: + int check_force_fallback_mode_(const char *func) const; + int check_force_fallback_mode_(const uint64_t tenant_id, const char *func) const; + int do_check_force_fallback_mode_(const bool is_lazy, const char *func) const; + int get_is_lazy_mode_(bool &is_lazy) const; + +private: + uint64_t tenant_id_; + share::schema::ObSchemaGetterGuard guard_; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogSchemaGuard); +}; + +///////////////////////////////////// ObLogSchemaGetter ///////////////////////////////// + +typedef oceanbase::share::schema::ObMultiVersionSchemaService SchemaServiceType; + +class ObLogSchemaGetter : public IObLogSchemaGetter +{ +public: + static const int64_t RECYCLE_MEMORY_INTERVAL = 60 * 1000L * 1000L; + +public: + ObLogSchemaGetter(); + virtual ~ObLogSchemaGetter(); + +public: + int get_lazy_schema_guard(const uint64_t tenant_id, + const int64_t version, + const int64_t timeout, + IObLogSchemaGuard &schema_guard); + int get_fallback_schema_guard(const uint64_t tenant_id, + const int64_t version, + const int64_t timeout, + IObLogSchemaGuard &schema_guard); + int get_schema_guard_and_table_schema( + const uint64_t table_id, + const int64_t expected_version, + const int64_t timeout, + IObLogSchemaGuard &schema_guard, + const oceanbase::share::schema::ObSimpleTableSchemaV2 *&tb_schema); + int get_schema_guard_and_full_table_schema( + const uint64_t table_id, + const int64_t expected_version, + const int64_t timeout, + IObLogSchemaGuard &schema_guard, + const oceanbase::share::schema::ObTableSchema *&full_tb_schema); + int get_schema_version_by_timestamp(const uint64_t tenant_id, + const int64_t timestamp, + int64_t &schema_version, + const int64_t timeout); + bool is_inited() const { return inited_; } + int get_first_trans_end_schema_version(const uint64_t tenant_id, + int64_t &schema_version, + const int64_t timeout); + int load_split_schema_version(int64_t &split_schema_version, const int64_t timeout); + void print_stat_info() + { + schema_service_.dump_schema_statistics(); + } + void try_recycle_memory() + { + (void)schema_service_.try_eliminate_schema_mgr(); + } + int get_tenant_refreshed_schema_version(const uint64_t tenant_id, int64_t &version); + +public: + int init(common::ObMySQLProxy &mysql_proxy, + common::ObCommonConfig *config, + const int64_t max_cached_schema_version_count, + const int64_t max_history_schema_version_count); + void destroy(); + +private: + // 1. ObSimpleTableSchemaV2 must use force lazy mode + // 2. ObTableSchema cannot be fetched in force lazy mode + int get_schema_guard_and_simple_table_schema_( + const uint64_t table_id, + const int64_t expected_version, + const int64_t timeout, + const bool force_lazy, + IObLogSchemaGuard &schema_guard, + const oceanbase::share::schema::ObSimpleTableSchemaV2 *&tb_schema); + int get_schema_guard_(const uint64_t tenant_id, + const bool specify_version_mode, + const int64_t expected_version, + const int64_t timeout, + IObLogSchemaGuard &guard, + const share::schema::ObMultiVersionSchemaService::RefreshSchemaMode refresh_schema_mode, + int64_t &refreshed_version); + int refresh_to_expected_version_(const uint64_t tenant_id, + const int64_t expected_version, + const int64_t timeout, + int64_t &latest_version); + int check_schema_guard_suitable_for_table_(IObLogSchemaGuard &schema_guard, + const uint64_t table_id, + const int64_t expected_version, + const int64_t refreshed_version, + const int64_t timeout, + const oceanbase::share::schema::ObSimpleTableSchemaV2 *&tb_schema, + bool &is_suitable); + +private: + bool inited_; + SchemaServiceType &schema_service_; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogSchemaGetter); +}; +} // namespace liboblog +} // namespace oceanbase +#endif /* OCEANBASE_LIBOBLOG_SCHEMA_GETTER_H__ */ diff --git a/src/liboblog/src/ob_log_sequencer1.cpp b/src/liboblog/src/ob_log_sequencer1.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4ca92b705e2c6d2910f6b8ba3f171d3576834010 --- /dev/null +++ b/src/liboblog/src/ob_log_sequencer1.cpp @@ -0,0 +1,1105 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG_SEQUENCER + +#include "ob_log_sequencer1.h" + +#include "lib/string/ob_string.h" // ObString +#include "lib/atomic/ob_atomic.h" +#include "lib/thread/ob_thread_name.h" +#include "storage/transaction/ob_trans_define.h" // ObTransID +#include "ob_log_instance.h" // IObLogErrHandler, TCTX +#include "ob_log_tenant.h" // ObLogTenantGuard, ObLogTenant +#include "ob_log_config.h" // TCONF +#include "ob_log_trans_ctx_mgr.h" // IObLogTransCtxMgr +#include "ob_log_trans_stat_mgr.h" // IObLogTransStatMgr +#include "ob_log_committer.h" // IObLogCommitter +#include "ob_log_data_processor.h" // IObLogDataProcessor +#include "ob_log_row_data_index.h" // ObLogRowDataIndex + +#define _STAT(level, tag_str, args...) _OBLOG_SEQUENCER_LOG(level, "[STAT] [SEQ] " tag_str, ##args) +#define STAT(level, tag_str, args...) OBLOG_SEQUENCER_LOG(level, "[STAT] [SEQ] " tag_str, ##args) +#define _ISTAT(tag_str, args...) _STAT(INFO, tag_str, ##args) +#define ISTAT(tag_str, args...) STAT(INFO, tag_str, ##args) +#define _DSTAT(tag_str, args...) _STAT(DEBUG, tag_str, ##args) +#define DSTAT(tag_str, args...) STAT(DEBUG, tag_str, ##args) + +#define REVERT_TRANS_CTX(trans_ctx) \ + do { \ + if (NULL != trans_ctx) { \ + int err = trans_ctx_mgr_->revert_trans_ctx(trans_ctx); \ + if (OB_SUCCESS != err) { \ + LOG_ERROR("revert_trans_ctx fail", K(err), K(trans_ctx)); \ + ret = OB_SUCCESS == ret ? err : ret; \ + } \ + trans_ctx = NULL; \ + } \ + } while (0) + +using namespace oceanbase::common; +using namespace oceanbase::transaction; + +namespace oceanbase +{ +namespace liboblog +{ +bool ObLogSequencer::g_print_participant_not_serve_info = ObLogConfig::default_print_participant_not_serve_info; + +ObLogSequencer::ObLogSequencer() + : inited_(false), + round_value_(0), + heartbeat_round_value_(0), + trans_ctx_mgr_(NULL), + trans_stat_mgr_(NULL), + trans_committer_(NULL), + data_processor_(NULL), + err_handler_(NULL), + global_checkpoint_(OB_INVALID_TIMESTAMP), + last_global_checkpoint_(OB_INVALID_TIMESTAMP), + global_seq_(0), + br_committer_queue_seq_(0), + trans_queue_(), + trans_queue_lock_(), + total_part_trans_task_count_(0), + ddl_part_trans_task_count_(0), + dml_part_trans_task_count_(0), + hb_part_trans_task_count_(0), + queue_part_trans_task_count_(0) +{ +} + +ObLogSequencer::~ObLogSequencer() +{ + destroy(); +} + +void ObLogSequencer::configure(const ObLogConfig &config) +{ + bool print_participant_not_serve_info = config.print_participant_not_serve_info; + + ATOMIC_STORE(&g_print_participant_not_serve_info, print_participant_not_serve_info); + + LOG_INFO("[CONFIG]", K(print_participant_not_serve_info)); +} + +int ObLogSequencer::init(const int64_t thread_num, + const int64_t queue_size, + IObLogTransCtxMgr &trans_ctx_mgr, + IObLogTransStatMgr &trans_stat_mgr, + IObLogCommitter &trans_committer, + IObLogDataProcessor &data_processor, + IObLogErrHandler &err_handler) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("ObLogSequencer has been initialized"); + ret = OB_INIT_TWICE; + } else if (OB_UNLIKELY(thread_num <= 0) + || OB_UNLIKELY(queue_size <= 0)) { + LOG_ERROR("invalid arguments", K(thread_num), K(queue_size)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(SequencerThread::init(thread_num, queue_size))) { + LOG_ERROR("init sequencer queue thread fail", KR(ret), K(thread_num), K(queue_size)); + } else { + round_value_ = 0; + heartbeat_round_value_ = 0; + trans_ctx_mgr_ = &trans_ctx_mgr; + trans_committer_ = &trans_committer; + trans_stat_mgr_ = &trans_stat_mgr; + data_processor_ = &data_processor; + err_handler_ = &err_handler; + global_checkpoint_ = OB_INVALID_TIMESTAMP; + last_global_checkpoint_ = OB_INVALID_TIMESTAMP; + global_seq_ = 0; + br_committer_queue_seq_ = 0; + total_part_trans_task_count_ = 0; + ddl_part_trans_task_count_ = 0; + dml_part_trans_task_count_ = 0; + hb_part_trans_task_count_ = 0; + queue_part_trans_task_count_ = 0; + LOG_INFO("init sequencer succ", K(thread_num), K(queue_size)); + inited_ = true; + } + + return ret; +} + +void ObLogSequencer::destroy() +{ + SequencerThread::destroy(); + + lib::ThreadPool::wait(); + lib::ThreadPool::destroy(); + + inited_ = false; + round_value_ = 0; + heartbeat_round_value_ = 0; + trans_ctx_mgr_ = NULL; + trans_stat_mgr_ = NULL; + trans_committer_ = NULL; + data_processor_ = NULL; + err_handler_ = NULL; + global_checkpoint_ = OB_INVALID_TIMESTAMP; + last_global_checkpoint_ = OB_INVALID_TIMESTAMP; + global_seq_ = 0; + total_part_trans_task_count_ = 0; + ddl_part_trans_task_count_ = 0; + dml_part_trans_task_count_ = 0; + hb_part_trans_task_count_ = 0; + queue_part_trans_task_count_ = 0; +} + +int ObLogSequencer::start() +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogSequencer has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(lib::ThreadPool::start())) { + LOG_ERROR("ThreadPool start fail" , KR(ret)); + } else if (OB_FAIL(SequencerThread::start())) { + LOG_ERROR("start sequencer thread fail", KR(ret), "thread_num", get_thread_num()); + } else { + LOG_INFO("start sequencer threads succ", "thread_num", get_thread_num()); + } + + return ret; +} + +void ObLogSequencer::stop() +{ + if (inited_) { + lib::ThreadPool::stop(); + SequencerThread::stop(); + LOG_INFO("stop threads succ", "thread_num", get_thread_num()); + } +} + +int ObLogSequencer::push(PartTransTask *part_trans_task, volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogSequencer has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(part_trans_task)) { + LOG_ERROR("invalid arguments", K(part_trans_task)); + ret = OB_INVALID_ARGUMENT; + } else { + const bool is_global_heartbeat = part_trans_task->is_global_heartbeat(); + uint64_t hash_value = 0; + + if (is_global_heartbeat) { + hash_value = ATOMIC_FAA(&heartbeat_round_value_, 1); + } else { + hash_value = ATOMIC_FAA(&round_value_, 1); + } + void *push_task = static_cast(part_trans_task); + RETRY_FUNC(stop_flag, *(static_cast(this)), push, push_task, hash_value, DATA_OP_TIMEOUT); + + if (OB_SUCC(ret)) { + (void)ATOMIC_AAF(&queue_part_trans_task_count_, 1); + do_stat_for_part_trans_task_count_(*part_trans_task, 1); + } + + if (OB_FAIL(ret)) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("push task into sequencer fail", KR(ret), K(push_task), K(hash_value)); + } + } + } + + return ret; +} + +void ObLogSequencer::get_task_count(SeqStatInfo &stat_info) +{ + stat_info.total_part_trans_task_count_ = ATOMIC_LOAD(&total_part_trans_task_count_); + stat_info.ddl_part_trans_task_count_ = ATOMIC_LOAD(&ddl_part_trans_task_count_); + stat_info.dml_part_trans_task_count_ = ATOMIC_LOAD(&dml_part_trans_task_count_); + stat_info.hb_part_trans_task_count_ = ATOMIC_LOAD(&hb_part_trans_task_count_); + stat_info.queue_part_trans_task_count_ = ATOMIC_LOAD(&queue_part_trans_task_count_); +} + +// A thread is responsible for continually rotating the sequence of transactions that need sequence +void ObLogSequencer::run1() +{ + const int64_t SLEEP_US = 1000; + lib::set_thread_name("ObLogSequencerTrans"); + int ret = OB_SUCCESS; + + while (OB_SUCC(ret) && ! lib::ThreadPool::has_set_stop()) { + // Global checkpoint not updated or initial value, do nothing + if (ATOMIC_LOAD(&global_checkpoint_) == ATOMIC_LOAD(&last_global_checkpoint_)) { + lib::this_routine::usleep(SLEEP_US); + } else { + ObByteLockGuard guard(trans_queue_lock_); + + while (OB_SUCC(ret) && ! trans_queue_.empty()) { + TrxSortElem top_trx_sort_elem = trans_queue_.top(); + const int64_t global_trans_version = top_trx_sort_elem.get_global_trans_version(); + + if (global_trans_version <= ATOMIC_LOAD(&global_checkpoint_)) { + if (OB_FAIL(handle_to_be_sequenced_trans_(top_trx_sort_elem, lib::ThreadPool::has_set_stop()))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("handle_to_be_sequenced_trans_ fail", KR(ret), K(top_trx_sort_elem)); + } + } else { + trans_queue_.pop(); + } + } else { + break; + } + } // empty + } + lib::this_routine::usleep(SLEEP_US); + + if (REACH_TIME_INTERVAL(PRINT_SEQ_INFO_INTERVAL)) { + ISTAT("[OUTPUT]", K(global_checkpoint_), K(last_global_checkpoint_), K(global_seq_), "size", trans_queue_.size()); + } + } + + // exit on fail + if (OB_SUCCESS != ret && OB_IN_STOP_STATE != ret && NULL != err_handler_) { + err_handler_->handle_error(ret, "sequencer thread exits, err=%d", ret); + ObLogSequencer::stop(); + } +} + +int ObLogSequencer::handle_to_be_sequenced_trans_(TrxSortElem &trx_sort_elem, + volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + TransCtx *trans_ctx = trx_sort_elem.get_trans_ctx_host(); + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogSequencer has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(trans_ctx)) { + LOG_ERROR("trans_ctx is NULL", K(trx_sort_elem)); + ret = OB_ERR_UNEXPECTED; + // sequence + } else if (OB_FAIL(trans_ctx->sequence(ATOMIC_FAA(&global_seq_, 1)))) { + LOG_ERROR("trans_ctx sequence fail", KR(ret), K(trx_sort_elem)); + } else { + const int64_t participant_count = trans_ctx->get_ready_participant_count(); + PartTransTask *participant_list = trans_ctx->get_participant_objs(); + const bool is_dml_trans = participant_list->is_dml_trans(); + uint64_t tenant_id = OB_INVALID_TENANT_ID; + ObLogTenantGuard guard; + ObLogTenant *tenant = NULL; + + if (OB_FAIL(trans_ctx->get_tenant_id(tenant_id))) { + LOG_ERROR("trans_ctx get_tenant_id fail", KR(ret), K(tenant_id)); + } else if (OB_FAIL(TCTX.get_tenant_guard(tenant_id, guard))) { + // There is no need to deal with the tenant not existing here, it must exist, and there is a bug if it doesn’t exist + LOG_ERROR("get_tenant fail", KR(ret), K(tenant_id)); + } else if (OB_ISNULL(tenant = guard.get_tenant())) { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("tenant is NULL, unexpected error", KR(ret), K(guard)); + } else { + if (OB_FAIL(trans_ctx->wait_data_ready(WAIT_TIMEOUT, stop_flag))) { + if (OB_IN_STOP_STATE != ret && OB_TIMEOUT != ret) { + LOG_ERROR("trans_ctx wait_data_ready fail", KR(ret)); + } + } + + // TODO non-block + if (is_dml_trans) { + if (OB_SUCC(ret)) { + if (OB_UNLIKELY(! trans_ctx->is_data_ready())) { + LOG_ERROR("trans_ctx is not date ready", KPC(trans_ctx)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(handle_dml_trans_(tenant_id, *trans_ctx, stop_flag))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("handle_dml_trans_ fail", KR(ret), K(tenant_id), KPC(trans_ctx)); + } + } else { + // succ + } + } // while + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(push_task_into_committer_(participant_list, participant_count, stop_flag, tenant))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("push_task_into_committer_ fail", KR(ret), K(tenant_id), K(participant_list), + K(participant_count), K(tenant)); + } + } else { + // No further operation is possible after that and may be recalled at any time + } + } + } + } + + return ret; +} + +int ObLogSequencer::handle(void *data, const int64_t thread_index, volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + PartTransTask *part_trans_task = static_cast(data); + (void)ATOMIC_AAF(&queue_part_trans_task_count_, -1); + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogSequencer has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(part_trans_task)) { + LOG_ERROR("invalid arguments", KPC(part_trans_task)); + ret = OB_INVALID_ARGUMENT; + } else { + if (! part_trans_task->is_global_heartbeat()) { + LOG_DEBUG("ObLogSequencer handle", KPC(part_trans_task), K(thread_index)); + } + + if (part_trans_task->is_global_heartbeat()) { + if (OB_FAIL(handle_global_hb_part_trans_task_(*part_trans_task, stop_flag))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("handle_global_hb_part_trans_task_ fail", KR(ret), K(thread_index), KPC(part_trans_task)); + } + } + } else if (part_trans_task->is_dml_trans() || part_trans_task->is_ddl_trans()) { + if (OB_FAIL(handle_part_trans_task_(*part_trans_task, stop_flag))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("handle_part_trans_task_ fail", KR(ret), K(thread_index), KPC(part_trans_task)); + } + } + } else { + LOG_ERROR("not supported task", KPC(part_trans_task)); + ret = OB_NOT_SUPPORTED; + } + } + + if (stop_flag) { + ret = OB_IN_STOP_STATE; + } + + // exit on fail + if (OB_SUCCESS != ret && OB_IN_STOP_STATE != ret && NULL != err_handler_) { + err_handler_->handle_error(ret, "Sequencer thread exits, thread_index=%ld, err=%d", + thread_index, ret); + stop_flag = true; + } + + return ret; +} + +int ObLogSequencer::handle_global_hb_part_trans_task_(PartTransTask &part_trans_task, + volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("sequencer has not been intiaizlied"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(! part_trans_task.is_global_heartbeat())) { + LOG_ERROR("part_trans_task is not ddl_trans, unexpected", K(part_trans_task)); + ret = OB_ERR_UNEXPECTED; + } else { + if (0 == part_trans_task.dec_ref_cnt()) { + const int64_t global_checkpoint = part_trans_task.get_timestamp(); + const int64_t cur_global_checkpoint = ATOMIC_LOAD(&global_checkpoint_); + + // If global checkpoint rollback, unexpected + if (global_checkpoint < cur_global_checkpoint) { + LOG_ERROR("global_checkpoint is less than cur_global_checkpoint, unexpected", K(global_checkpoint), + K(cur_global_checkpoint), K(last_global_checkpoint_), K(part_trans_task)); + ret = OB_ERR_UNEXPECTED; + } else { + // record last checkpoint + last_global_checkpoint_ = cur_global_checkpoint; + // udpate current checkpoint + ATOMIC_STORE(&global_checkpoint_, global_checkpoint); + + LOG_DEBUG("handle_global_hb_part_trans_task_", K(part_trans_task), K(last_global_checkpoint_), K(global_checkpoint_)); + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(push_task_into_committer_(&part_trans_task, 1, stop_flag, NULL/*tenant*/))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("push_task_into_committer_ fail", KR(ret), K(part_trans_task)); + } + } else { + // No further operation is possible after that and may be recalled at any time + } + } + } + } + + return ret; +} + +int ObLogSequencer::handle_part_trans_task_(PartTransTask &part_trans_task, + volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("sequencer has not been initialized"); + ret = OB_NOT_INIT; + } else { + const bool is_dml_trans = part_trans_task.is_dml_trans(); + TransCtx *trans_ctx = NULL; + bool is_part_trans_served = true; // default serve + bool is_all_participants_ready = false; + + if (OB_FAIL(prepare_trans_ctx_(part_trans_task, is_part_trans_served, trans_ctx, stop_flag))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("prepare_trans_ctx_ fail", KR(ret), K(part_trans_task)); + } + } else { + // Attempt to add to the participant list, if the partition does not exist in the participant list, the partition transaction must not be served + if (is_part_trans_served) { + if (OB_ISNULL(trans_ctx)) { + LOG_ERROR("prepare trans ctx fail", K(part_trans_task), K(is_part_trans_served)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(trans_ctx->add_participant(part_trans_task, + is_part_trans_served, + is_all_participants_ready))) { + LOG_ERROR("add participant fail", KR(ret), K(part_trans_task), K(*trans_ctx)); + } else { + // handle success + } + } + + // So far it has been confirmed that the partition serve or not, move on to the next step + if (OB_SUCC(ret)) { + if (! is_part_trans_served) { + // Handling partitioned transactions not serve + if (OB_FAIL(handle_not_served_trans_(part_trans_task, stop_flag))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("handle_not_served_trans_ fail", KR(ret), K(part_trans_task)); + } + } else { + // handle success + } + } else { + // Handle if partitioned transaction serve + if (is_all_participants_ready) { + // If all participants are gathered, start the next step + if (OB_FAIL(handle_participants_ready_trans_(is_dml_trans, trans_ctx, stop_flag))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("handle_participants_ready_trans_ fail", KR(ret), K(is_dml_trans), K(*trans_ctx)); + } + } else { + // handle success + } + } else { + // Participants have not yet finished gathering, no processing will be done + } + } + } // OB_SUCC(ret) + } + + REVERT_TRANS_CTX(trans_ctx); + } + + + return ret; +} + + +int ObLogSequencer::prepare_trans_ctx_(PartTransTask &part_trans_task, + bool &is_part_trans_served, + TransCtx *&trans_ctx, + volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + const uint64_t tenant_id = part_trans_task.get_tenant_id(); + ObLogTenantGuard guard; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("sequencer has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(TCTX.get_tenant_guard(tenant_id, guard))) { + // It is not possible for a tenant not to exist during the processing of data, and here a direct error is reported + LOG_ERROR("get_tenant fail", KR(ret), K(tenant_id)); + } else if (OB_ISNULL(guard.get_tenant())) { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("tenant is NULL, unexpected", KR(ret), K(guard), K(part_trans_task)); + } else { + bool enable_create = true; + bool trans_ctx_need_discard = false; + const ObTransID &trans_id = part_trans_task.get_trans_id(); + ObLogTenant *tenant = guard.get_tenant(); + + trans_ctx = NULL; + is_part_trans_served = true; // default to serve + + // get a valid TransCtx + while (OB_SUCCESS == ret && ! stop_flag) { + // Get the transaction context, or create one if it doesn't exist + trans_ctx = NULL; + ret = trans_ctx_mgr_->get_trans_ctx(trans_id, trans_ctx, enable_create); + + if (OB_FAIL(ret)) { + LOG_ERROR("get_trans_ctx fail", KR(ret), K(trans_id)); + break; + } + + trans_ctx_need_discard = false; + + const bool print_participant_not_serve_info = ATOMIC_LOAD(&g_print_participant_not_serve_info); + + // prepare trans context + ret = trans_ctx->prepare(part_trans_task, + tenant->get_part_mgr(), + print_participant_not_serve_info, + stop_flag, + trans_ctx_need_discard); + + if (OB_INVALID_ERROR != ret) { + break; + } + + ret = OB_SUCCESS; + + // If the transaction context has been deprecated, change the transaction context next time + REVERT_TRANS_CTX(trans_ctx); + + PAUSE(); + } + + if (stop_flag) { + ret = OB_IN_STOP_STATE; + } + + if (OB_FAIL(ret)) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("prepare trans_ctx fail", KR(ret), K(trans_ctx), K(part_trans_task)); + } + + // Reversing the transaction context in the case of error + REVERT_TRANS_CTX(trans_ctx); + } else if (trans_ctx_need_discard) { + // If the transaction context needs to be deprecated, then the partitioned transaction is not being served and the transaction context needs to be deleted + (void)trans_ctx_mgr_->remove_trans_ctx(trans_id); + is_part_trans_served = false; + + REVERT_TRANS_CTX(trans_ctx); + } else { + // succ + } + } + return ret; +} + +int ObLogSequencer::handle_not_served_trans_(PartTransTask &part_trans_task, + volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("sequencer has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(! part_trans_task.is_dml_trans()) + && OB_UNLIKELY(! part_trans_task.is_ddl_trans())) { + LOG_ERROR("part_trans_task is not DML or DDL trans", K(part_trans_task)); + ret = OB_INVALID_ARGUMENT; + } else { + // If the partitioned transaction is no longer in service, its resources are reclaimed and passed to the Committer as a "non-serviceable transaction" type task + // + // Note that. + // 1. When reclaiming resources, it is not necessary to decrement the number of transactions on the partition, because + // it is when the decrementing of the number of transactions on the partition fails that the partition is known to be unserviced + // 2. This cannot be converted to a heartbeat type task, the heartbeat type timestamp has a special meaning and can only be generated by the fetcher + _ISTAT("[PART_NOT_SERVE] TRANS_ID=%s PART=%s LOG_ID=%ld LOG_TIMESTAMP=%ld", + to_cstring(part_trans_task.get_trans_id()), + to_cstring(part_trans_task.get_partition()), + part_trans_task.get_prepare_log_id(), + part_trans_task.get_timestamp()); + + // Conversion of transaction tasks to "unserviced partitioned transactions" + if (OB_FAIL(part_trans_task.convert_to_not_served_trans())) { + LOG_ERROR("convert_to_not_served_trans fail", KR(ret), K(part_trans_task)); + } + // push to Committer, unserviced transaction tasks do not need to provide tenant structures + else if (OB_FAIL(push_task_into_committer_(&part_trans_task, 1/*task_count*/, stop_flag, NULL))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("push_task_into_committer_ fail", KR(ret), K(part_trans_task)); + } + } + } + + return ret; +} + +int ObLogSequencer::push_task_into_data_processor_(ObLogRowDataIndex &row_data_index, + volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("sequencer has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(data_processor_)) { + LOG_ERROR("data_processor_ is NULL", K(data_processor_)); + ret = OB_ERR_UNEXPECTED; + } else { + RETRY_FUNC(stop_flag, (*data_processor_), push, row_data_index, DATA_OP_TIMEOUT); + } + + return ret; +} + +int ObLogSequencer::push_task_into_committer_(PartTransTask *task, + const int64_t task_count, + volatile bool &stop_flag, + ObLogTenant *tenant) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("sequencer has not been initialized", K(tenant)); + ret = OB_NOT_INIT; + } else { + // Counting the number of partitioned tasks + do_stat_for_part_trans_task_count_(*task, -task_count); + + RETRY_FUNC(stop_flag, (*trans_committer_), push, task, task_count, DATA_OP_TIMEOUT, tenant); + } + + return ret; +} + +int ObLogSequencer::handle_participants_ready_trans_(const bool is_dml_trans, + TransCtx *trans_ctx, + volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(trans_ctx)) { + LOG_ERROR("invalid argument", K(trans_ctx)); + ret = OB_INVALID_ARGUMENT; + } else { + // Avoiding TransCtx recycling + uint64_t tenant_id = OB_INVALID_TENANT_ID; + ObLogTenantGuard guard; + ObLogTenant *tenant = NULL; + + if (OB_FAIL(trans_ctx->get_tenant_id(tenant_id))) { + LOG_ERROR("trans_ctx get_tenant_id fail", KR(ret), K(tenant_id)); + } else if (OB_FAIL(TCTX.get_tenant_guard(tenant_id, guard))) { + // There is no need to deal with the tenant not existing here, it must exist, and if it doesn't there is a bug + LOG_ERROR("get_tenant fail", KR(ret), K(tenant_id)); + } else if (OB_ISNULL(tenant = guard.get_tenant())) { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("tenant is NULL, unexpected error", KR(ret), K(guard)); + } else { + if (OB_FAIL(recycle_resources_after_trans_ready_(*trans_ctx, *tenant))) { + LOG_ERROR("recycle_resources_after_trans_ready_ fail", KR(ret), KPC(trans_ctx), KPC(tenant), K(stop_flag)); + } + } + + if (OB_SUCC(ret)) { + TrxSortElem &trx_sort_elem = trans_ctx->get_trx_sort_elem(); + ObByteLockGuard guard(trans_queue_lock_); + trans_queue_.push(trx_sort_elem); + + _DSTAT("[TRANS_QUEUE] TRANS_ID=%s QUEUE_SIZE=%lu ID_DML=%d", + to_cstring(trx_sort_elem), + trans_queue_.size(), + is_dml_trans); + } + } + + return ret; +} + +int ObLogSequencer::handle_dml_trans_(const uint64_t tenant_id, TransCtx &trans_ctx, volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogSequencer has not been initialized"); + ret = OB_NOT_INIT; + } else { + uint64_t total_br_count = 0; + ObLogRowDataIndex *br_head = NULL; + ObLogRowDataIndex *br_tail = NULL; + PartTransTask *participants = trans_ctx.get_participant_objs(); + // Process all participants + // String all Binlog Records of all participants into a chain + PartTransTask *part = participants; + int64_t valid_part_trans_task_count = 0; + + if (OB_FAIL(build_binlog_record_list_(trans_ctx, part, br_head, br_tail, + total_br_count, valid_part_trans_task_count, stop_flag))) { + LOG_ERROR("build_binlog_record_list_", KR(ret), K(part), K(br_head), K(br_tail), + K(total_br_count), K(valid_part_trans_task_count)); + } else { + trans_ctx.set_total_br_count(total_br_count); + trans_ctx.set_valid_part_trans_task_count(valid_part_trans_task_count); + } + + // Concurrent reading of persistent data + if (OB_SUCC(ret) && total_br_count > 0) { + ObLogRowDataIndex *br = br_head; + + while (OB_SUCC(ret) && NULL != br) { + ObLogRowDataIndex *br_next = br->get_next(); + + if (OB_FAIL(push_task_into_data_processor_(*br, stop_flag))) { + LOG_ERROR("push_task_into_data_processor_ fail", KR(ret), KPC(br)); + } else { + br = br_next; + } + } + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(do_trans_stat_(tenant_id, total_br_count))) { + LOG_ERROR("do trans stat fail", KR(ret), K(tenant_id), K(total_br_count)); + } + } + } + + return ret; +} + +int ObLogSequencer::build_binlog_record_list_(TransCtx &trans_ctx, + PartTransTask *part, + ObLogRowDataIndex *&br_head, + ObLogRowDataIndex *&br_tail, + uint64_t &valid_br_num, + int64_t &valid_part_trans_task_count, + volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + const bool enable_output_trans_order_by_sql_operation = TCONF.enable_output_trans_order_by_sql_operation != 0; + + if (enable_output_trans_order_by_sql_operation) { + if (OB_FAIL(build_binlog_record_list_order_by_sql_no_(trans_ctx, part, br_head, br_tail, valid_br_num, + valid_part_trans_task_count, stop_flag))) { + LOG_ERROR("build_binlog_record_list_order_by_sql_no_", KR(ret), KPC(part), K(br_head), K(br_tail), + K(valid_br_num), K(valid_part_trans_task_count)); + } + } else { + if (OB_FAIL(build_binlog_record_list_order_by_partition_(trans_ctx, part, br_head, br_tail, valid_br_num, + valid_part_trans_task_count, stop_flag))) { + LOG_ERROR("build_binlog_record_list_order_by_partition_", KR(ret), KPC(part), K(br_head), K(br_tail), + K(valid_br_num), K(valid_part_trans_task_count)); + } + } + + return ret; +} + +int ObLogSequencer::build_binlog_record_list_order_by_partition_(TransCtx &trans_ctx, + PartTransTask *part, + ObLogRowDataIndex *&br_head, + ObLogRowDataIndex *&br_tail, + uint64_t &valid_br_num, + int64_t &valid_part_trans_task_count, + volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + + // Binlog Record with a chain of all valid partition transactions + while (! stop_flag && OB_SUCCESS == ret && NULL != part) { + PartTransTask *next = part->next_task(); + int64_t valid_part_br_num = 0; + + // Set the reference count for each participant to ensure it is the number of Binlog Records + 1 + // because the reference count is reduced by one for each Binlog Record recycled + // To ensure that the participants are valid when the Commit message is updated, need to add an extra reference count here + part->set_ref_cnt(part->get_br_num() + 1); + + if (OB_FAIL(handle_br_of_part_trans_task_(trans_ctx, part, valid_part_br_num, br_head, br_tail))) { + LOG_ERROR("handle_br_of_part_trans_task_ fail", K(part), K(br_head), K(br_tail)); + } else { + part = next; + valid_br_num += valid_part_br_num; + if (valid_part_br_num > 0) { + ++valid_part_trans_task_count; + } + } + } // while + + return ret; +} + +int ObLogSequencer::handle_br_of_part_trans_task_(TransCtx &trans_ctx, + PartTransTask *part, + int64_t &valid_br_num, + ObLogRowDataIndex *&br_head, + ObLogRowDataIndex *&br_tail) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogSequencer has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(part)) { + LOG_ERROR("part is NULL"); + ret = OB_INVALID_ARGUMENT; + } else { + ObLogRowDataIndex *row_data_index = part->get_sorted_dml_row_list().get_head(); + const int64_t part_row_num = part->get_sorted_dml_row_list().get_row_num(); + valid_br_num = 0; + + // Iterate through all statements and validate the Binlog Record in each statement + // Pick out the invalid Binlog Records and recycle them + // form a chain of valid ones and set the exact index + while (OB_SUCCESS == ret && NULL != row_data_index) { + ObLogRowDataIndex *next_row_data_index = row_data_index->get_next(); + LOG_DEBUG("handle row_data_index", KPC(row_data_index), KPC(next_row_data_index)); + + if (OB_FAIL(add_dml_br_to_binlog_record_list_(trans_ctx, row_data_index, br_head, br_tail))) { + LOG_ERROR("add_dml_br_to_binlog_record_list_ fail", KR(ret), KPC(row_data_index), KPC(part), + "binlog_record:", row_data_index->get_binlog_record(), + "sql_no", row_data_index->get_row_sql_no(), + K(br_head), K(br_tail), K(valid_br_num)); + } else { + valid_br_num++; + row_data_index = next_row_data_index; + } + } + + if (OB_SUCC(ret)) { + if (OB_UNLIKELY(valid_br_num != part_row_num)) { + LOG_ERROR("valid_br_num is not equal to part_row_num, unexpected", K(valid_br_num), K(part_row_num), KPC(part)); + ret = OB_ERR_UNEXPECTED; + } + } + } + + return ret; +} + +int ObLogSequencer::build_binlog_record_list_order_by_sql_no_(TransCtx &trans_ctx, + PartTransTask *part, + ObLogRowDataIndex *&br_head, + ObLogRowDataIndex *&br_tail, + uint64_t &valid_br_num, + int64_t &valid_part_trans_task_count, + volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + std::priority_queue, StmtSequerenceCompFunc> heap; + // 1. Initialise the minimal heap: place the head node of the statement chain for each partitioned transaction into the array to be sorted + PartTransTask *part_trans_task = part; + + while (OB_SUCCESS == ret && NULL != part_trans_task && ! stop_flag) { + // Set the reference count, otherwise the PartTransTask structure cannot be recycled + part_trans_task->set_ref_cnt(part_trans_task->get_br_num() + 1); + + ObLogRowDataIndex *row_data_index = + static_cast(part_trans_task->get_sorted_dml_row_list().get_head()); + if (NULL != row_data_index) { + heap.push(row_data_index); + } + part_trans_task = part_trans_task->next_task(); + } + + // 2. Output elements from the priority queue one by one, string them into the binlog_record list, and then string the next element in + while (OB_SUCCESS == ret && !heap.empty() && ! stop_flag) { + // 2.1. get smallest row index + ObLogRowDataIndex *row_data_index = heap.top(); + heap.pop(); + if (heap.empty()) { + // If the heap is empty, then there are no other partitions and only need to iterate over this partition; otherwise need to put this partition into the heap and compare it with the other partitions + while (OB_SUCCESS == ret && ! stop_flag && NULL != row_data_index) { + ObLogRowDataIndex *next_row_data_index = row_data_index->get_next(); + // Add to binlog_record list and update statistics after success + if (OB_FAIL(add_br_to_br_list_and_statics_(trans_ctx, row_data_index, br_head, br_tail, valid_br_num, valid_part_trans_task_count))) { + LOG_ERROR("add binlog record to br_list fail", KR(ret), K(trans_ctx), KPC(row_data_index), + K(valid_br_num), K(valid_part_trans_task_count)); + } else { + row_data_index = next_row_data_index; + } + } + } else { + ObLogRowDataIndex *next_row_data_index = row_data_index->get_next(); + // 2.2 After popping the smallest element, the heap is still not empty, indicating that there is data from other partitions and the sorting needs to continue + // 2.2.1. Add to binlog_record list and update statistics after success + if (OB_FAIL(add_br_to_br_list_and_statics_(trans_ctx, row_data_index, br_head, br_tail, valid_br_num, valid_part_trans_task_count))) { + LOG_ERROR("add binlog record to br_list fail", KR(ret), K(trans_ctx), KPC(row_data_index), + K(br_head), K(br_tail), K(valid_br_num), K(valid_part_trans_task_count)); + } else { + // 2.2.1. Add the next stmt element to the array: if the next of the current statement is not empty, add the statement of the current partition + ObLogRowDataIndex *next_stmt = next_row_data_index; + if (NULL != next_stmt) { + // Put in the new element and add it to the previous heap + // If the popped element next is not empty, add next to the array and execute step 2. + heap.push(next_stmt); + } + // 2.2.3. If the element popped is empty, the statement for that partition (ObLogRowDataIndex) has been processed and step 2 is executed directly to continue processing statements for other partitions. + } + } + } + + return ret; +} + +int ObLogSequencer::add_br_to_br_list_and_statics_(TransCtx &trans_ctx, + ObLogRowDataIndex *row_data_index, + ObLogRowDataIndex *&br_head, + ObLogRowDataIndex *&br_tail, + uint64_t &valid_br_num, + int64_t &valid_part_trans_task_count) +{ + int ret = OB_SUCCESS; + PartTransTask *part_trans_task = NULL; + + if (OB_ISNULL(row_data_index)) { + LOG_ERROR("row_data_index is NULL"); + ret = OB_ERR_UNEXPECTED; + } else if (OB_ISNULL(part_trans_task = static_cast(row_data_index->get_host()))) { + LOG_ERROR("part_trans_task is NULL"); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(add_dml_br_to_binlog_record_list_(trans_ctx, row_data_index, br_head, br_tail))) { + LOG_ERROR("add_dml_br_to_binlog_record_list_ fail", KR(ret), KPC(row_data_index), KPC(part_trans_task), + "binlog_record:", row_data_index->get_binlog_record(), + "sql_no", row_data_index->get_row_sql_no(), + K(br_head), K(br_tail), K(valid_br_num), K(valid_part_trans_task_count)); + } else { + const bool is_test_mode_on = TCONF.test_mode_on != 0; + if (is_test_mode_on) { + LOG_DEBUG("log dml stmt info under test mode ", K(part_trans_task), "sql_no:", row_data_index->get_row_sql_no()); + } + + valid_br_num++; + if (! part_trans_task->has_valid_binlog_record()) { + part_trans_task->set_has_valid_binlog_record(); + valid_part_trans_task_count++; + } + } + + return ret; +} + +int ObLogSequencer::add_dml_br_to_binlog_record_list_(TransCtx &trans_ctx, + ObLogRowDataIndex *row_data_index, + ObLogRowDataIndex *&br_head, + ObLogRowDataIndex *&br_tail) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(row_data_index)) { + LOG_ERROR("row_data_index is NULL"); + ret = OB_ERR_UNEXPECTED; + } else if (! row_data_index->is_valid()) { + LOG_ERROR("row_data_index is not valid, unexpected", KPC(row_data_index)); + ret = OB_ERR_UNEXPECTED; + } else { + row_data_index->set_next(NULL); + row_data_index->set_br_commit_seq(ATOMIC_FAA(&br_committer_queue_seq_, 1), &trans_ctx); + + if (NULL == br_head) { + br_head = row_data_index; + br_tail = row_data_index; + } else { + br_tail->set_next(row_data_index); + br_tail = row_data_index; + } + } + + return ret; +} + +// Consider a scenario that dec_part_trans_count after sequence +// 1. create table ... pk hash 100 +// 2. liboblog has opened a transaction on the partition +// 3. drop table ... When deleting the partition, because the PartMgr reference count is not 0, mark Offline +// 4. The above partition progress is not advancing, so the global progress is not advancing +// 5. global heartbeat does not advance, sequencer cannot advance based on safety loci and thus cannot result in sequenced transaction output +// 6. The inability to sequence does not decrement the reference count, leading to interdependencies and deadlocks +// +// Therefore, unlike previous implementations, resources are not reclaimed after sequencing, but after the distributed transaction has been assembled +int ObLogSequencer::recycle_resources_after_trans_ready_(TransCtx &trans_ctx, ObLogTenant &tenant) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("sequencer has not been initialized"); + ret = OB_NOT_INIT; + // } else if (OB_UNLIKELY(! trans_ctx.is_sequenced())) { + } else if (OB_UNLIKELY(! trans_ctx.is_participants_ready())) { + LOG_ERROR("trans is not sequenced", K(trans_ctx)); + ret = OB_INVALID_ARGUMENT; + } else { + PartTransTask *participant = trans_ctx.get_participant_objs(); + + // Iterate over each statement of each partitioned transaction of a distributed transaction + while (NULL != participant) { + if (participant->is_dml_trans() || participant->is_ddl_trans()) { + const ObPartitionKey &pkey = participant->get_partition(); + // Decrement the count of ongoing transactions on the partition + if (OB_FAIL(tenant.get_part_mgr().dec_part_trans_count(pkey))) { + LOG_ERROR("dec_part_trans_count fail", KR(ret), K(pkey)); + } + } + + participant = participant->next_task(); + } + } + + return ret; +} + +void ObLogSequencer::do_stat_for_part_trans_task_count_(PartTransTask &part_trans_task, + const int64_t task_count) +{ + bool is_hb_sub_stat = false; + int64_t hb_dec_task_count = 0; + + if (part_trans_task.is_ddl_trans()) { + (void)ATOMIC_AAF(&ddl_part_trans_task_count_, task_count); + } else if (part_trans_task.is_dml_trans()) { + (void)ATOMIC_AAF(&dml_part_trans_task_count_, task_count); + } else { + // heartbeat + if (task_count < 0) { + is_hb_sub_stat = true; + hb_dec_task_count = task_count * SequencerThread::get_thread_num(); + (void)ATOMIC_AAF(&hb_part_trans_task_count_, hb_dec_task_count); + } else { + (void)ATOMIC_AAF(&hb_part_trans_task_count_, task_count); + } + } + + if (is_hb_sub_stat) { + (void)ATOMIC_AAF(&total_part_trans_task_count_, hb_dec_task_count); + } else { + (void)ATOMIC_AAF(&total_part_trans_task_count_, task_count); + } +} + +int ObLogSequencer::do_trans_stat_(const uint64_t tenant_id, + const int64_t total_stmt_cnt) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(trans_stat_mgr_)) { + LOG_ERROR("trans_stat_mgr_ is null", K(trans_stat_mgr_)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_UNLIKELY(OB_INVALID_TENANT_ID == tenant_id) || OB_UNLIKELY(total_stmt_cnt < 0)) { + LOG_ERROR("invalid argument", K(tenant_id), K(total_stmt_cnt)); + ret = OB_INVALID_ARGUMENT; + } else { + trans_stat_mgr_->do_tps_stat(); + trans_stat_mgr_->do_rps_stat_before_filter(total_stmt_cnt); + if (OB_FAIL(trans_stat_mgr_->do_tenant_tps_rps_stat(tenant_id, total_stmt_cnt))) { + LOG_ERROR("do tenant rps stat before filter", KR(ret), K(tenant_id), K(total_stmt_cnt)); + } + } + + return ret; +} + +} // namespace liboblog +} // namespace oceanbase diff --git a/src/liboblog/src/ob_log_sequencer1.h b/src/liboblog/src/ob_log_sequencer1.h new file mode 100644 index 0000000000000000000000000000000000000000..7ebfa955595c3e67baf37f4decaec8908935acec --- /dev/null +++ b/src/liboblog/src/ob_log_sequencer1.h @@ -0,0 +1,256 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_SEQUENCER_H__ +#define OCEANBASE_LIBOBLOG_SEQUENCER_H__ + +#include // std::priority_queue +#include // std::vector +#include "lib/allocator/ob_allocator.h" // ObIAllocator +#include "lib/thread/ob_multi_fixed_queue_thread.h" // ObMQThread +#include "lib/thread/thread_pool.h" // lib::ThreadPool +#include "lib/lock/ob_small_spin_lock.h" // ObByteLock + +#include "ob_log_trans_ctx.h" // TransCtx +#include "ob_log_part_trans_task.h" // PartTransTask + +using namespace oceanbase::logmessage; +namespace oceanbase +{ +namespace common +{ +class ObString; +} + +namespace liboblog +{ +class ObLogConfig; +///////////////////////////////////////////////////////////////////////////////////////// +// IObLogSequencer + +class IObLogSequencer +{ +public: + enum + { + MAX_SEQUENCER_NUM = 64, + GET_SCHEMA_TIMEOUT = 1 * 1000 * 1000, + }; + + struct SeqStatInfo + { + SeqStatInfo() { reset(); } + ~SeqStatInfo() { reset(); } + + void reset() + { + total_part_trans_task_count_ = 0; + ddl_part_trans_task_count_ = 0; + dml_part_trans_task_count_ = 0; + hb_part_trans_task_count_ = 0; + queue_part_trans_task_count_ = 0; + } + int64_t total_part_trans_task_count_ CACHE_ALIGNED; + int64_t ddl_part_trans_task_count_ CACHE_ALIGNED; + int64_t dml_part_trans_task_count_ CACHE_ALIGNED; + int64_t hb_part_trans_task_count_ CACHE_ALIGNED; + int64_t queue_part_trans_task_count_ CACHE_ALIGNED; + }; + +public: + virtual ~IObLogSequencer() {} + virtual void configure(const ObLogConfig &config) = 0; + +public: + virtual int start() = 0; + virtual void stop() = 0; + virtual void mark_stop_flag() = 0; + // ObLogFetcherDispatcher single-threaded call + // ObLogDDLHandler + // 1. DDL/DML partitioning transaction task + // 2. Global heartbeat + virtual int push(PartTransTask *task, volatile bool &stop_flag) = 0; + virtual void get_task_count(SeqStatInfo &stat_info) = 0; + virtual int64_t get_thread_num() const = 0; +}; + + +///////////////////////////////////////////////////////////////////////////////////////// + +class IObLogTransCtxMgr; +class IObLogTransStatMgr; +class IObLogCommitter; +class IObLogDataProcessor; +class IObLogErrHandler; +class ObLogTenant; + +typedef common::ObMQThread SequencerThread; + +// ObClockGenerator +class ObLogSequencer : public IObLogSequencer, public SequencerThread, public lib::ThreadPool +{ +public: + ObLogSequencer(); + virtual ~ObLogSequencer(); + +public: + static bool g_print_participant_not_serve_info; + void configure(const ObLogConfig &config); + +public: + int start(); + void stop(); + void mark_stop_flag() { SequencerThread::mark_stop_flag(); } + int push(PartTransTask *task, volatile bool &stop_flag); + void get_task_count(SeqStatInfo &stat_info); + int64_t get_thread_num() const { return SequencerThread::get_thread_num(); } + int handle(void *data, const int64_t thread_index, volatile bool &stop_flag); + +public: + int init(const int64_t thread_num, + const int64_t queue_size, + IObLogTransCtxMgr &trans_ctx_mgr, + IObLogTransStatMgr &trans_stat_mgr, + IObLogCommitter &trans_committer, + IObLogDataProcessor &data_processor, + IObLogErrHandler &err_handler); + void destroy(); + +private: + static const int64_t PRINT_SEQ_INFO_INTERVAL = 10 * _SEC_; + static const int64_t DATA_OP_TIMEOUT = 1 * _SEC_; + static const int64_t WAIT_TIMEOUT = 10 * _SEC_; + typedef liboblog::TransCtxSortElement TrxSortElem; + typedef liboblog::TransCtxSortElement::TransCtxCmp TrxCmp; + typedef std::priority_queue, TrxCmp> TransQueue; + +private: + void run1() final; + int handle_to_be_sequenced_trans_(TrxSortElem &trx_sort_elem, + volatile bool &stop_flag); + int handle_global_hb_part_trans_task_(PartTransTask &part_trans_task, + volatile bool &stop_flag); + int handle_part_trans_task_(PartTransTask &part_trans_task, + volatile bool &stop_flag); + + // First prepare the transaction context + // If it is confirmed that the partitioned transaction is not serviced, the transaction context returned is empty + // Note: if a partitioned transaction is returned as being in service, this does not mean that the partitioned transaction is necessarily in service, + // and the final confirmation of whether the partitioned transaction is in service will have to wait until it is added to the list of participants + int prepare_trans_ctx_(PartTransTask &part_trans_task, + bool &is_part_trans_served, + TransCtx *&trans_ctx, + volatile bool &stop_flag); + int handle_not_served_trans_(PartTransTask &part_trans_task, volatile bool &stop_flag); + int handle_participants_ready_trans_(const bool is_dml_trans, + TransCtx *trans_ctx, + volatile bool &stop_flag); + // Once the participants are gathered, the entire DML transaction is processed + int handle_dml_trans_(const uint64_t tenant_id, TransCtx &trans_ctx, volatile bool &stop_flag); + int recycle_resources_after_trans_ready_(TransCtx &trans_ctx, ObLogTenant &tenant); + int push_task_into_data_processor_(ObLogRowDataIndex &row_data_index, volatile bool &stop_flag); + int push_task_into_committer_(PartTransTask *task, + const int64_t task_count, + volatile bool &stop_flag, + ObLogTenant *tenant); + void do_stat_for_part_trans_task_count_(PartTransTask &part_trans_task, + const int64_t task_count); + + // Constructs a chain of all statement binlogs of partitioned transactions in a distributed transaction for output to the user queue + int build_binlog_record_list_(TransCtx &trans_ctx, + PartTransTask *part, + ObLogRowDataIndex *&br_head, + ObLogRowDataIndex *&br_tail, + uint64_t &valid_br_num, + int64_t &valid_part_trans_task_count, + volatile bool &stop_flag); + // Build the binlog_record list in the order of the partitions in the PartTransTask list + int build_binlog_record_list_order_by_partition_(TransCtx &trans_ctx, + PartTransTask *part, + ObLogRowDataIndex *&br_head, + ObLogRowDataIndex *&br_tail, + uint64_t &valid_br_num, + int64_t &valid_part_trans_task_count, + volatile bool &stop_flag); + int handle_br_of_part_trans_task_(TransCtx &trans_ctx, + PartTransTask *part, + int64_t &valid_br_num, + ObLogRowDataIndex *&br_head, + ObLogRowDataIndex *&br_tail); + // Build a binlog_record chain by sequencing statements within a transaction according to sql_no + int build_binlog_record_list_order_by_sql_no_(TransCtx &trans_ctx, + PartTransTask *part, + ObLogRowDataIndex *&br_head, + ObLogRowDataIndex *&br_tail, + uint64_t &valid_br_num, + int64_t &valid_part_trans_task_count, + volatile bool &stop_flag); + // Add DML binlog_record to br_list and count information/print logs etc. + int add_br_to_br_list_and_statics_(TransCtx &trans_ctx, + ObLogRowDataIndex *row_data_index, + ObLogRowDataIndex *&br_head, + ObLogRowDataIndex *&br_tail, + uint64_t &valid_br_num, + int64_t &valid_part_trans_task_count); + // Add the DML binlog_record to the binlog_record list for output to the user queue + int add_dml_br_to_binlog_record_list_(TransCtx &trans_ctx, + ObLogRowDataIndex *row_data_index, + ObLogRowDataIndex *&br_head, + ObLogRowDataIndex *&br_tail); + + struct StmtSequerenceCompFunc + { + // Statement sort operator: current sort based on sql_no, used to find the minimum value of the heap, used only for sorting DML statements + bool operator()(const ObLogRowDataIndex *task1, const ObLogRowDataIndex *task2) + { + return task1->get_row_sql_no() > task2->get_row_sql_no(); + } + }; + + // 1. statistics on transaction tps and rps (rps before and after Formatter filtering) + // 2. count tenant rps information + int do_trans_stat_(const uint64_t tenant_id, const int64_t total_stmt_cnt); + +private: + bool inited_; + uint64_t round_value_; + uint64_t heartbeat_round_value_; + + IObLogTransCtxMgr *trans_ctx_mgr_; + IObLogTransStatMgr *trans_stat_mgr_; + IObLogCommitter *trans_committer_; + IObLogDataProcessor *data_processor_; + IObLogErrHandler *err_handler_; + + int64_t global_checkpoint_ CACHE_ALIGNED; + int64_t last_global_checkpoint_ CACHE_ALIGNED; + uint64_t global_seq_ CACHE_ALIGNED; + uint64_t br_committer_queue_seq_ CACHE_ALIGNED; + TransQueue trans_queue_; + common::ObByteLock trans_queue_lock_; + + // Counting the number of partitioned tasks owned by Sequencer + int64_t total_part_trans_task_count_ CACHE_ALIGNED; + int64_t ddl_part_trans_task_count_ CACHE_ALIGNED; + int64_t dml_part_trans_task_count_ CACHE_ALIGNED; + int64_t hb_part_trans_task_count_ CACHE_ALIGNED; + // Counting the number of partitioned tasks owned by the Sequencer queue + int64_t queue_part_trans_task_count_ CACHE_ALIGNED; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogSequencer); +}; + + +} // namespace liboblog +} // namespace oceanbase +#endif /* OCEANBASE_LIBOBLOG_SEQUENCER_H__ */ diff --git a/src/liboblog/src/ob_log_server_priority.cpp b/src/liboblog/src/ob_log_server_priority.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2bf9e7b4446cc283bd4b802a95cbb27c4c0f88d0 --- /dev/null +++ b/src/liboblog/src/ob_log_server_priority.cpp @@ -0,0 +1,100 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include "ob_log_server_priority.h" + +namespace oceanbase +{ +namespace liboblog +{ +using namespace oceanbase::common; + +const char *print_region_priority(RegionPriority type) +{ + const char *str = "UNKNOWN"; + switch (type) { + case REGION_PRIORITY_UNKNOWN: + str = "UNKNOWN"; + break; + case REGION_PRIORITY_LOW: + str = "LOW"; + break; + case REGION_PRIORITY_HIGH: + str = "HIGH"; + break; + default: + str = "INVALID"; + break; + } + return str; +} + +const char *print_replica_priority(ReplicaPriority type) +{ + const char *str = "UNKNOWN"; + switch (type) { + case REPLICA_PRIORITY_UNKNOWN: + str = "UNKNOWN"; + break; + case REPLICA_PRIORITY_OTHER: + str = "OTHER_REPLICA"; + break; + case REPLICA_PRIORITY_FULL: + str = "FULL"; + break; + case REPLICA_PRIORITY_READONLY: + str = "READ_ONLY"; + break; + case REPLICA_PRIORITY_LOGONLY: + str = "LOG_ONLY"; + break; + default: + str = "INVALID"; + break; + } + return str; +} + +int get_replica_priority(const common::ObReplicaType type, + ReplicaPriority &priority) +{ + int ret = OB_SUCCESS; + priority = REPLICA_PRIORITY_UNKNOWN; + + if (!ObReplicaTypeCheck::is_replica_type_valid(type)) { + ret = OB_INVALID_ARGUMENT; + } else { + switch(type) { + case REPLICA_TYPE_FULL: { + priority = REPLICA_PRIORITY_FULL; + break; + } + case REPLICA_TYPE_READONLY: { + priority = REPLICA_PRIORITY_READONLY; + break; + } + case REPLICA_TYPE_LOGONLY: { + priority = REPLICA_PRIORITY_LOGONLY; + break; + } + default: { + priority = REPLICA_PRIORITY_OTHER; + break; + } + }; + } + + return ret; +} + +} +} diff --git a/src/liboblog/src/ob_log_server_priority.h b/src/liboblog/src/ob_log_server_priority.h new file mode 100644 index 0000000000000000000000000000000000000000..f4eaff5ef4f613292fdceaaa3b489ea902c28d6f --- /dev/null +++ b/src/liboblog/src/ob_log_server_priority.h @@ -0,0 +1,57 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_SERVER_PRIORITY_H_ +#define OCEANBASE_LIBOBLOG_OB_LOG_SERVER_PRIORITY_H_ + +#include "share/ob_define.h" +#include "share/ob_errno.h" + +namespace oceanbase +{ +namespace liboblog +{ +// region priority +// The smaller the value, the higher the priority +enum RegionPriority +{ + REGION_PRIORITY_UNKNOWN = -1, + REGION_PRIORITY_HIGH = 0, + REGION_PRIORITY_LOW = 1, + REGION_PRIORITY_MAX +}; +const char *print_region_priority(RegionPriority type); + +// Replica type priority +// The smaller the value, the higher the priority +// +// The priority is from highest to lowest as follows: L > R > F > OTHER +// where F is a fully functional copy, R is a read-only copy, L is a logged copy +// OTHER is the other type of replica, with the lowest default level +enum ReplicaPriority +{ + REPLICA_PRIORITY_UNKNOWN = -1, + REPLICA_PRIORITY_LOGONLY = 0, + REPLICA_PRIORITY_READONLY = 1, + REPLICA_PRIORITY_FULL = 2, + REPLICA_PRIORITY_OTHER = 3, + REPLICA_PRIORITY_MAX +}; +const char *print_replica_priority(ReplicaPriority type); + +// Get replica priority based on replica type +int get_replica_priority(const common::ObReplicaType type, + ReplicaPriority &priority); +} +} + +#endif diff --git a/src/liboblog/src/ob_log_sql_server_provider.cpp b/src/liboblog/src/ob_log_sql_server_provider.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0fcc437096eb8493161d44f8dd9bf96795e9e335 --- /dev/null +++ b/src/liboblog/src/ob_log_sql_server_provider.cpp @@ -0,0 +1,454 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG + +#include "ob_log_sql_server_provider.h" + +#include "share/ob_define.h" +#include "share/ob_web_service_root_addr.h" // fetch_rs_list_from_url + +using namespace oceanbase::common; +using namespace oceanbase::share; +namespace oceanbase +{ +namespace liboblog +{ + +const char *ObLogSQLServerProvider::DEFAULT_VALUE_OF_RS_CONF = "|"; + +ObLogSQLServerProvider::ObLogSQLServerProvider() : inited_(false), + is_using_rs_list_(false), + server_list_(ObModIds::OB_LOG_SERVER_PROVIDER, OB_MALLOC_NORMAL_BLOCK_SIZE), + refresh_lock_(), + rs_conf_lock_(), + svr_blacklist_() +{ + (void)memset(rs_list_, 0, sizeof(char) * MAX_CONFIG_LENGTH); + (void)memset(config_url_, 0, sizeof(char) * MAX_CONFIG_LENGTH); +} + +ObLogSQLServerProvider::~ObLogSQLServerProvider() +{ + destroy(); +} + +int ObLogSQLServerProvider::init(const char *config_url, const char *rs_list) +{ + int ret = OB_SUCCESS; + const char *sql_server_blacklist = TCONF.sql_server_blacklist.str(); + const bool is_sql_server = true; + + if (OB_UNLIKELY(inited_)) { + ret = OB_INIT_TWICE; + } else if (OB_FAIL(svr_blacklist_.init(sql_server_blacklist, is_sql_server))) { + LOG_ERROR("svr_blacklist_ init fail", KR(ret), K(sql_server_blacklist), K(is_sql_server)); + // try inited by rs list if rs_list if not empty otherwise use cluster_url, will exist if can't init by rs_list or cluster_url + } else if (OB_FAIL(check_rs_list_valid_(rs_list))) { + LOG_ERROR("failed to verify rslist", KR(ret), K(rs_list)); + } else { + if (is_using_rs_list_) { + if (OB_FAIL(init_by_rs_list_conf_(rs_list))) { + LOG_ERROR("failed to init sql server provider by rs_list, please check rs_list is valid or not!", KR(ret), K(rs_list)); + } + } else { + if (OB_FAIL(init_by_cluster_url_(config_url))) { + LOG_ERROR("failed to init sql server provider by cluster url", KR(ret), K(config_url)); + } + } + } + + if (OB_SUCC(ret)) { + inited_ = true; + } + + return ret; +} + +void ObLogSQLServerProvider::destroy() +{ + inited_ = false; + is_using_rs_list_ = false; + (void)memset(rs_list_, 0, sizeof(char) * MAX_CONFIG_LENGTH); + (void)memset(config_url_, 0, sizeof(char) * MAX_CONFIG_LENGTH); + server_list_.destroy(); + svr_blacklist_.destroy(); +} + +void ObLogSQLServerProvider::configure(const ObLogConfig &cfg) +{ + const char *sql_server_blacklist = cfg.sql_server_blacklist.str(); + // reload rs_list + const char *rs_list = cfg.rootserver_list.str(); + LOG_INFO("[CONFIG]", K(sql_server_blacklist), K(rs_list)); + + check_rs_list_valid_(rs_list); + + svr_blacklist_.refresh(sql_server_blacklist); +} + +int ObLogSQLServerProvider::prepare_refresh() +{ + return OB_SUCCESS; +} + +int ObLogSQLServerProvider::check_rs_list_valid_(const char *rs_list) +{ + int ret = OB_SUCCESS; + int64_t pos_rs_list = 0; + + if (OB_ISNULL(rs_list) || OB_UNLIKELY(MAX_CONFIG_LENGTH <= strlen(rs_list)) || OB_UNLIKELY(0 == strlen(rs_list))) { + ret = OB_INVALID_ARGUMENT; + LOG_ERROR("invald rs list", KR(ret), K(rs_list)); + } else if (0 == strcmp(rs_list, DEFAULT_VALUE_OF_RS_CONF)) { + is_using_rs_list_ = false; + } else { + is_using_rs_list_ = true; + // add lock to protect rs_list_ for write; + ObSmallSpinLockGuard guard(rs_conf_lock_); + if (OB_FAIL(databuff_printf(rs_list_, sizeof(rs_list_), pos_rs_list, "%s", rs_list))) { + LOG_ERROR("copy rs_list fail", KR(ret), "buf_size", sizeof(rs_list_), "rs_list length", strlen(rs_list), + K(pos_rs_list), K(rs_list), K_(is_using_rs_list)); + } + } + + return ret; +} + +int ObLogSQLServerProvider::init_by_rs_list_conf_(const char *rs_list) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(rs_list) || OB_UNLIKELY(0 == strcmp(rs_list, DEFAULT_VALUE_OF_RS_CONF))) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument, may not init by rs_list", KR(ret), K(rs_list), K(DEFAULT_VALUE_OF_RS_CONF)); + } else if (OB_FAIL(refresh_by_rs_list_conf_(rs_list, server_list_))) { + LOG_ERROR("failed to refresh serverlist by rs_list config", KR(ret), K(rs_list)); + } else{ + LOG_INFO("init sql server provider by rs list success", K(rs_list), K_(server_list)); + } + + return ret; +} + +int ObLogSQLServerProvider::init_by_cluster_url_(const char *config_url) +{ + int ret = OB_SUCCESS; + int64_t pos_cluster_url = 0; + + if (OB_ISNULL(config_url) || OB_UNLIKELY(0 == strcmp(config_url, DEFAULT_VALUE_OF_RS_CONF))) { + LOG_ERROR("invalid arguments", K(config_url), K(DEFAULT_VALUE_OF_RS_CONF)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(databuff_printf(config_url_, sizeof(config_url_), pos_cluster_url, "%s", config_url))) { + LOG_ERROR("copy config_url fail", KR(ret), "buf_size", sizeof(config_url), + "config_url_len", strlen(config_url), K(pos_cluster_url), K(config_url)); + } else if (OB_FAIL(refresh_until_success_(config_url, server_list_))) { + LOG_ERROR("refresh server list fail", KR(ret), K(config_url)); + } else { + LOG_INFO("init sql server provider by cluster_url success", K(config_url), K_(server_list)); + } + + return ret; +} + +int ObLogSQLServerProvider::refresh_by_rs_list_conf_(const char *rs_list, ServerList &server_list) +{ + OB_ASSERT(NULL != rs_list); + int ret = OB_SUCCESS; + + if (OB_FAIL(parse_rs_list_(rs_list, server_list))) { + LOG_WARN("failed to parse rslist", KR(ret), K(rs_list)); + } else if (0 == server_list.count()) { + ret = OB_INVALID_ARGUMENT; + LOG_ERROR("found empty rs list", KR(ret), K(rs_list)); + } else if (OB_FAIL(get_svr_list_based_on_blacklist_(server_list))) { + LOG_ERROR("failed to get svr after filter by sql svr blacklis", KR(ret), K(rs_list), K(server_list)); + } else { + // success + } + + return ret; +} + +int ObLogSQLServerProvider::parse_rs_list_(const char *rs_list, ServerList &server_list) +{ + int ret = OB_SUCCESS; + static const char *rs_delimiter = ";"; + static const char *param_delemiter = ":"; + static const int64_t expected_rs_param_num = 3; // expect rs param: ip, rpc_port, sql_port + + char rs_list_copy[MAX_CONFIG_LENGTH]; + if (OB_FAIL(get_copy_of_rs_list_conf_(rs_list_copy))) { + LOG_ERROR("failed to get copy of rs list conf str", KR(ret), K_(rs_list), K(rs_list_copy)); + } else { + LOG_DEBUG("get copy of rs_list conf str", K_(rs_list), K(rs_list_copy)); + char *rs_ptr = NULL; + char *p = NULL; + + rs_ptr = strtok_r(rs_list_copy, rs_delimiter, &p); + while (OB_SUCC(ret) && rs_ptr != NULL) { + int64_t rs_param_cnt = 0; + const char *rs_param_res[3]; + + if (OB_ISNULL(rs_ptr) || (0 == strlen(rs_ptr))) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid rs info str", KR(ret), K(rs_list), K(rs_ptr)); + } else if (OB_FAIL(split(rs_ptr, param_delemiter, expected_rs_param_num, rs_param_res, rs_param_cnt))) { + LOG_WARN("failed to split rs_info_str", KR(ret), K(rs_ptr)); + } else if (expected_rs_param_num != rs_param_cnt) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid rs param count", KR(ret), K(rs_ptr), K(rs_list), K(expected_rs_param_num)); + } else { + const char *ip = rs_param_res[0]; + int64_t rpc_port_64 = -1; + int64_t sql_port = -1; + if (OB_ISNULL(rs_param_res[1]) || OB_ISNULL(rs_param_res[2]) || (0 == rs_param_res[0]) || (0 == rs_param_res[1])) { + ret = OB_INVALID_ARGUMENT; + LOG_ERROR("invalid rs param", KR(ret), K(rs_list), K(rs_param_res[1]), K(rs_param_res[2])); + } else if (OB_FAIL(c_str_to_int(rs_param_res[1], rpc_port_64))) { + LOG_ERROR("fail to convert rpc_port_str to rpc_port_int64", KR(ret), K(rs_param_res[1]), K(rpc_port_64)); + } else if (OB_FAIL(c_str_to_int(rs_param_res[2], sql_port))) { + LOG_ERROR("fail to convert sql_port_str to sql_port_int64", KR(ret), K(rs_param_res[2]), K(sql_port)); + } else { + ObAddr addr; + ObRootAddr rs_addr; + addr.reset(); + rs_addr.reset(); + int32_t rpc_port_32 = static_cast(rpc_port_64); + + if (addr.set_ip_addr(ip, rpc_port_32)) { + rs_addr.server_ = addr; + rs_addr.sql_port_ = sql_port; + if (!rs_addr.is_valid()) { + LOG_WARN("invalid rs addr, will ignore this server", K(rs_list), K(ip), K(rpc_port_32), K(sql_port)); + } else if (OB_FAIL(server_list.push_back(rs_addr))) { + LOG_ERROR("failed to pushback rs server to server list", KR(ret)); + } + } else { + LOG_WARN("invalid ip address for rs list config, will ignore this server", K(rs_list), K(ip), K(rpc_port_32), K(sql_port)); + } + } + } + rs_ptr = strtok_r(NULL, rs_delimiter, &p); + } + } + + return ret; +} + +int ObLogSQLServerProvider::get_copy_of_rs_list_conf_(char *rs_list_copy) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(rs_list_) || OB_UNLIKELY(MAX_CONFIG_LENGTH <= strlen(rs_list_)) || OB_UNLIKELY(0 == strlen(rs_list_))) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invald rs list", KR(ret), K_(rs_list)); + } else { + ObSmallSpinLockGuard guard(rs_conf_lock_); + (void)memset(rs_list_copy, 0, sizeof(char) * MAX_CONFIG_LENGTH); + int64_t pos = 0; + if (OB_FAIL(databuff_printf(rs_list_copy, sizeof(rs_list_), pos, "%s", rs_list_))) { + LOG_WARN("failed to copy rs_list_ to rs_list_copy", KR(ret), K_(rs_list), K(rs_list_copy)); + } + } + return ret; +} + +int ObLogSQLServerProvider::refresh_until_success_(const char *url, ServerList &server_list) +{ + int ret = OB_SUCCESS; + int64_t check_count = TCONF.test_mode_on ? TCONF.test_mode_block_sqlserver_count : -1; // Test mode to mock the number of failed times of sqlServer refresh rs list + while (OB_FAIL(refresh_by_cluster_url_(url, server_list)) || check_count-- > 0) + { + LOG_WARN("refresh rs list fail, will retry until success", KR(ret), K(url)); + usleep(FETCH_RS_RETRY_INTERVAL_ON_INIT_FAIL); + } + return ret; +} + +int ObLogSQLServerProvider::refresh_by_cluster_url_(const char *url, ServerList &server_list) +{ + OB_ASSERT(NULL != url); + + int ret = OB_SUCCESS; + int64_t timeout_ms = REFRESH_SERVER_LIST_TIMEOUT_MS; + ServerList readonly_server_list; //for compatible, not used + ObClusterType cluster_type; + if (OB_FAIL(ObWebServiceRootAddr::fetch_rs_list_from_url(NULL, url, timeout_ms, server_list, readonly_server_list, + cluster_type))) { + LOG_ERROR("fetch_rs_list_from_url fail", KR(ret), K(url), K(timeout_ms)); + } else if (OB_FAIL(get_svr_list_based_on_blacklist_(server_list))) { + LOG_ERROR("failed to get svr after filter by sql svr blacklist", KR(ret),K(url)); + } + + return ret; +} + +int ObLogSQLServerProvider::get_svr_list_based_on_blacklist_(ServerList &server_list) +{ + int ret = OB_SUCCESS; + const int64_t svr_count_before_filter = server_list.count(); + ObArray remove_svrs; + + if (OB_FAIL(filter_by_svr_blacklist_(server_list, remove_svrs))) { + LOG_ERROR("filter_by_svr_blacklist_ fail", KR(ret), K(server_list), K(remove_svrs)); + } else { + const int64_t svr_count_after_filter = server_list.count(); + LOG_INFO("[SQL_SERVER_PROVIDER] refresh server list succ", "server_count", svr_count_after_filter, + "remove_server_count", svr_count_before_filter - svr_count_after_filter, K(remove_svrs)); + + for (int64_t index = 0; index < svr_count_after_filter; index++) { + ObRootAddr &addr = server_list.at(index); + + _LOG_INFO("[SQL_SERVER_PROVIDER] server[%ld/%ld]=%s role=%ld sql_port=%ld", + index, svr_count_after_filter, to_cstring(addr.server_), + static_cast(addr.role_), addr.sql_port_); + } + } + return ret; +} + +int ObLogSQLServerProvider::filter_by_svr_blacklist_(ServerList &server_list, + common::ObArray &remove_svrs) +{ + int ret = OB_SUCCESS; + bool has_done = false; + const int64_t svr_blacklist_cnt = svr_blacklist_.count(); + int64_t svr_remove_cnt = 0; + + for (int64_t svr_idx = server_list.count() - 1; OB_SUCC(ret) && ! has_done && svr_idx >= 0; --svr_idx) { + const ObAddr &svr = server_list.at(svr_idx).server_; + const int64_t svr_count = server_list.count(); + + if (1 == svr_count) { + // Retain, do not dispose + has_done = true; + } else if (svr_remove_cnt >= svr_blacklist_cnt) { + // Based on the fact that the svr blacklist has been cleared + has_done = true; + } else { + if (svr_blacklist_.is_exist(svr)) { + if (OB_FAIL(remove_svrs.push_back(svr))) { + LOG_ERROR("remove_svrs push_back fail", KR(ret), K(svr)); + } else if (OB_FAIL(server_list.remove(svr_idx))) { + LOG_ERROR("remove svr item fail", KR(ret), K(svr_idx), K(svr), K(server_list)); + } else { + ++svr_remove_cnt; + } + } else { + // do nothing + } + } + } + + return ret; +} + +int ObLogSQLServerProvider::get_server( + const int64_t cluster_id, + const int64_t svr_idx, + common::ObAddr &server) +{ + int ret = OB_SUCCESS; + UNUSED(cluster_id); + if (! inited_) { + ret = OB_NOT_INIT; + } else if (svr_idx < 0) { + ret = OB_INVALID_ARGUMENT; + } else { + // add read lock + SpinRLockGuard guard(refresh_lock_); + ObRootAddr addr; + + if (svr_idx >= server_list_.count()) { + // out of svr count, need retry + ret = OB_ENTRY_NOT_EXIST; + } else if (OB_FAIL(server_list_.at(svr_idx, addr))) { + LOG_ERROR("get server from server list fail", KR(ret), K(svr_idx)); + } else { + server = addr.server_; + // Set SQL port + server.set_port((int32_t)addr.sql_port_); + } + + _LOG_DEBUG("[SQL_SERVER_PROVIDER] get_server(%ld/%ld)=>%s ret=%d", + svr_idx, server_list_.count(), to_cstring(server), ret); + } + + return ret; +} + +int ObLogSQLServerProvider::get_cluster_list(common::ObIArray &cluster_list) +{ + int ret = OB_SUCCESS; + cluster_list.reset(); + if (inited_) { + if (OB_FAIL(cluster_list.push_back(OB_INVALID_ID))) { + LOG_WARN("fail to push back cluster_id", K(ret)); + } + } + return ret; +} + +int64_t ObLogSQLServerProvider::get_cluster_count() const +{ + return inited_ ? 1 : 0; +} + +int64_t ObLogSQLServerProvider::get_server_count(const int64_t cluster_id) const +{ + UNUSED(cluster_id); + SpinRLockGuard guard(refresh_lock_); + return server_list_.count(); +} + +int64_t ObLogSQLServerProvider::get_server_count() const +{ + SpinRLockGuard guard(refresh_lock_); + return inited_ ? server_list_.count() : 0; +} + +int ObLogSQLServerProvider::refresh_server_list(void) +{ + return OB_SUCCESS; +} + +int ObLogSQLServerProvider::call_refresh_server_list(void) +{ + int ret = OB_SUCCESS; + ServerList new_server_list; + + if (! inited_) { + ret = OB_NOT_INIT; + } else { + if (is_using_rs_list_) { + // should refresh by rslist + if (OB_FAIL(refresh_by_rs_list_conf_(rs_list_, new_server_list))) { + LOG_ERROR("refresh server list by rootserver_list failed", KR(ret), K_(rs_list)); + } + } else { + if (OB_FAIL(refresh_by_cluster_url_(config_url_, new_server_list))) { + LOG_ERROR("refresh server list by config url fail", KR(ret), K_(config_url)); + } + } + + if (OB_SUCC(ret)) { + // write lock to modify server_list + SpinWLockGuard guard(refresh_lock_); + server_list_ = new_server_list; + } + } + + return ret; +} + +} // namespace liboblog +} // namespace oceanbase diff --git a/src/liboblog/src/ob_log_sql_server_provider.h b/src/liboblog/src/ob_log_sql_server_provider.h new file mode 100644 index 0000000000000000000000000000000000000000..458dad64f571abd8b0934ae92c586ef6e59b6321 --- /dev/null +++ b/src/liboblog/src/ob_log_sql_server_provider.h @@ -0,0 +1,98 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_SQL_SERVER_PROVIDER_H__ +#define OCEANBASE_LIBOBLOG_SQL_SERVER_PROVIDER_H__ + +#include "lib/mysqlclient/ob_mysql_server_provider.h" // ObMySQLServerProvider +#include "lib/container/ob_se_array.h" // ObSEArray +#include "lib/container/ob_array.h" // ObArray +#include "share/ob_root_addr_agent.h" // ObRootAddr +#include "lib/lock/ob_spin_rwlock.h" // SpinRWLock +#include "lib/lock/ob_small_spin_lock.h" // ObByteLock +#include "ob_log_svr_blacklist.h" // ObLogSvrBlacklist +#include "ob_log_config.h" // TCONF, ObLogConfig + +namespace oceanbase +{ +namespace liboblog +{ +class ObLogSQLServerProvider : public common::sqlclient::ObMySQLServerProvider +{ + static const int64_t MAX_CONFIG_LENGTH = 1 << 10; // for config_url and rootservice_list + static const int64_t DEFAULT_ROOT_SERVER_NUM = 16; + static const int64_t REFRESH_SERVER_LIST_TIMEOUT_MS = 60 * 1000 * 1000; + static const int64_t FETCH_RS_RETRY_INTERVAL_ON_INIT_FAIL = 100 * 1000; // 100ms Retry to retrieve RS list + static const char *DEFAULT_VALUE_OF_RS_CONF; // default value of config rootserver_list and config_url + + typedef common::ObSEArray ServerList; + +public: + ObLogSQLServerProvider(); + virtual ~ObLogSQLServerProvider(); + +public: + virtual int get_cluster_list(common::ObIArray &cluster_list); + virtual int get_server(const int64_t cluster_id, const int64_t svr_idx, common::ObAddr &server); + virtual int64_t get_cluster_count() const; + virtual int64_t get_server_count(const int64_t cluster_id) const; + virtual int64_t get_server_count() const; + // Call when ObMySQLConnectionPool background timing task refresh, in the process of refreshing the connection pool of read and write locks, when the config server exception, parsing does not return, resulting in write locks can not be released, thus affecting the Formatter refresh schema to get server list, resulting in Delay + // Optimisation options. + // (1) Timed task do nothing in the background, relying on ObLog active refresh calls + // (2) Concurrent correctness guarantee: rely on ObLogSQLServerProvider's refresh_lock_ + // (3) Set libcurl connection timeout + virtual int refresh_server_list(void); + virtual int prepare_refresh() override; + + // Called by ObLog active refresh + int call_refresh_server_list(void); + +public: + int init(const char *config_url, const char *rs_list); + void destroy(); + void configure(const ObLogConfig &cfg); + +private: + // str_len of rs_list should be greater than 1(default valud is ';') and less than MAX_CONFIG_LENGTH + int check_rs_list_valid_(const char *rs_list); + int init_by_rs_list_conf_(const char *rs_list); + // rs list is valid or not(multi svr split by `;`), format: ip:rpc_port:sql_port + int parse_rs_list_(const char *rs_list, ServerList &server_list); + // get copy of rs_lislt_ (conf str), caller should guaratee call this function when is_using_rs_list_ = true + int get_copy_of_rs_list_conf_(char *rs_list_copy); + int refresh_by_rs_list_conf_(const char *rs_list, ServerList &server_list); + int init_by_cluster_url_(const char *config_url); + int refresh_until_success_(const char *url, ServerList &server_list); + int refresh_by_cluster_url_(const char *url, ServerList &server_list); + // get valid rs server list after filter by server blacklist + int get_svr_list_based_on_blacklist_(ServerList &server_list); + int filter_by_svr_blacklist_(ServerList &server_list, + common::ObArray &remove_svrs); + +private: + bool inited_; + bool is_using_rs_list_; + char rs_list_[MAX_CONFIG_LENGTH]; + char config_url_[MAX_CONFIG_LENGTH]; + ServerList server_list_; + + mutable common::SpinRWLock refresh_lock_; // lock to protect refresh ServerList server_list_ + mutable common::ObByteLock rs_conf_lock_; // lock to protect refresh conf(char*) rs_list_ + ObLogSvrBlacklist svr_blacklist_; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogSQLServerProvider); +}; +} // namespace liboblog +} // namespace oceanbase +#endif /* OCEANBASE_LIBOBLOG_SQL_SERVER_PROVIDER_H__ */ diff --git a/src/liboblog/src/ob_log_start_log_id_locator.cpp b/src/liboblog/src/ob_log_start_log_id_locator.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1d25a1dea15f95d19646e838c46dd2fdb8ae1fd0 --- /dev/null +++ b/src/liboblog/src/ob_log_start_log_id_locator.cpp @@ -0,0 +1,969 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG_FETCHER + +#include "ob_log_start_log_id_locator.h" + +#include "lib/allocator/ob_mod_define.h" // ObModIds + +#include "ob_log_instance.h" // IObLogErrHandler +#include "ob_log_trace_id.h" // ObLogTraceIdGuard +#include "ob_log_rpc.h" // IObLogRpc + +namespace oceanbase +{ +using namespace common; + +namespace liboblog +{ + +int64_t ObLogStartLogIdLocator::g_batch_count = + ObLogConfig::default_start_log_id_locator_batch_count; +int64_t ObLogStartLogIdLocator::g_rpc_timeout = + ObLogConfig::default_start_log_id_locator_rpc_timeout_sec * _SEC_; +int64_t ObLogStartLogIdLocator::g_observer_clog_save_time = + ObLogConfig::default_observer_clog_save_time_minutes * _MIN_; +bool ObLogStartLogIdLocator::g_enable_force_start_mode = + ObLogConfig::default_enable_force_start_mode; + +ObLogStartLogIdLocator::ObLogStartLogIdLocator() : + inited_(false), + worker_cnt_(0), + locate_count_(0), + rpc_(NULL), + err_handler_(NULL), + worker_data_(NULL), + allocator_(ObModIds::OB_LOG_START_LOG_ID_LOCATOR) +{ +} + +ObLogStartLogIdLocator::~ObLogStartLogIdLocator() +{ + destroy(); +} + +int ObLogStartLogIdLocator::init( + const int64_t worker_cnt, + const int64_t locate_count, + IObLogRpc &rpc, + IObLogErrHandler &err_handle) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("init twice"); + ret = OB_INIT_TWICE; + } else if (OB_UNLIKELY((worker_cnt_ = worker_cnt) <= 0) + || OB_UNLIKELY(locate_count <= 0) + || OB_UNLIKELY(MAX_THREAD_NUM < worker_cnt)) { + LOG_ERROR("invalid worker cnt", K(worker_cnt), "max_thread_num", MAX_THREAD_NUM, K(locate_count)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(LocateWorker::init(worker_cnt, ObModIds::OB_LOG_START_LOG_ID_LOCATOR))) { + LOG_ERROR("init locate worker fail", KR(ret), K(worker_cnt)); + } else { + int64_t alloc_size = worker_cnt * sizeof(WorkerData); + void *buf = ob_malloc(alloc_size, ObModIds::OB_LOG_START_LOG_ID_LOCATOR); + + if (OB_ISNULL(worker_data_ = static_cast(buf))) { + LOG_ERROR("allocate memory fail", K(worker_data_), K(alloc_size), K(worker_cnt)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + for (int64_t idx = 0, cnt = worker_cnt; OB_SUCCESS == ret && idx < cnt; ++idx) { + new (worker_data_ + idx) WorkerData(); + WorkerData &data = worker_data_[idx]; + + if (OB_FAIL(data.init())) { + LOG_ERROR("init worker data fail", KR(ret)); + } + } + } + + if (OB_SUCCESS == ret) { + locate_count_ = locate_count; + rpc_ = &rpc; + err_handler_ = &err_handle; + + inited_ = true; + LOG_INFO("init start log id locator succ", "thread_num", LocateWorker::get_thread_num()); + } + } + + if (OB_SUCCESS != ret) { + destroy(); + } + return ret; +} + +void ObLogStartLogIdLocator::destroy() +{ + stop(); + + inited_ = false; + + LocateWorker::destroy(); + + if (NULL != worker_data_) { + for (int64_t idx = 0, cnt = worker_cnt_; idx < cnt; ++idx) { + // free SvrReq memory here + free_all_svr_req_(worker_data_[idx]); + worker_data_[idx].~WorkerData(); + } + + ob_free(worker_data_); + worker_data_ = NULL; + } + + worker_cnt_ = 0; + locate_count_ = 0; + rpc_ = NULL; + err_handler_ = NULL; + worker_data_ = NULL; + allocator_.clear(); + + LOG_INFO("destroy start log id locator succ"); +} + +int ObLogStartLogIdLocator::async_start_log_id_req(StartLogIdLocateReq *req) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not inited"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(req)) { + LOG_ERROR("invalid argument", K(req)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_UNLIKELY(! req->is_state_idle())) { + LOG_ERROR("invalid request, state is not IDLE", KPC(req)); + ret = OB_INVALID_ARGUMENT; + } else { + req->set_state_req(); + + if (OB_FAIL(dispatch_worker_(req))) { + LOG_ERROR("dispatch worker fail", KR(ret), KPC(req)); + } + } + return ret; +} + +// 1. if the request is finished, set its status to DONE +// 2. otherwise assign it to the next server +int ObLogStartLogIdLocator::dispatch_worker_(StartLogIdLocateReq *req) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not inited"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(req)) { + LOG_ERROR("invalid request", K(req)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_UNLIKELY(! req->is_state_req())) { + LOG_ERROR("invalid request, state is not REQ", KPC(req), K(req->get_state())); + ret = OB_INVALID_ARGUMENT; + } else { + ObAddr svr; + StartLogIdLocateReq::SvrItem *item = NULL; + + // Mark DONE if the request is completed, or if all servers are requested. + if (req->is_request_ended(locate_count_)) { + // If the request ends, set to DONE + // NOTE: After setting to DONE, no further access is possible + LOG_DEBUG("start log id locate request ended", KPC(req)); + req->set_state_done(); + } else if (OB_FAIL(req->next_svr_item(item)) || OB_ISNULL(item)) { + LOG_ERROR("get next server item fail", KR(ret), KPC(req), K(item)); + ret = (OB_SUCCESS == ret ? OB_ERR_UNEXPECTED : ret); + } else { + const ObAddr &svr = item->svr_; + + // Hashing by server to the corresponding worker thread + // The purpose is to ensure that requests from the same server are aggregated + uint64_t hash_val = svr.hash(); + + LOG_DEBUG("dispatch start log id locate request", + "worker_idx", hash_val % worker_cnt_, + K(svr), + KPC(req)); + + if (OB_FAIL(LocateWorker::push(req, hash_val))) { + LOG_ERROR("push req to worker fail", KR(ret), KPC(req), K(hash_val), K(svr)); + } else { + // done + } + } + } + return ret; +} + +int ObLogStartLogIdLocator::start() +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not inited"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(LocateWorker::start())) { + LOG_ERROR("start locate worker fail", KR(ret)); + } else { + LOG_INFO("start log id locator succ", K_(worker_cnt)); + } + return ret; +} + +void ObLogStartLogIdLocator::stop() +{ + if (OB_LIKELY(inited_)) { + LocateWorker::stop(); + LOG_INFO("stop log id locator succ"); + } +} + +// start log id locator worker thread +void ObLogStartLogIdLocator::run(const int64_t thread_index) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not inited"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(thread_index < 0) || OB_UNLIKELY(thread_index >= worker_cnt_)) { + LIB_LOG(ERROR, "invalid thread index", K(thread_index), K(worker_cnt_)); + ret = OB_ERR_UNEXPECTED; + } else { + WorkerData &data = worker_data_[thread_index]; + + while (! stop_flag_ && OB_SUCCESS == ret) { + if (OB_FAIL(do_retrieve_(thread_index, data))) { + LOG_ERROR("retrieve request fail", KR(ret), K(thread_index)); + } else if (OB_FAIL(do_request_(data))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("do request fail", KR(ret)); + } + } else { + cond_timedwait(thread_index, DATA_OP_TIMEOUT); + } + } + + if (stop_flag_) { + ret = OB_IN_STOP_STATE; + } + } + + if (OB_SUCCESS != ret && OB_IN_STOP_STATE != ret) { + LOG_ERROR("start log id locator worker exit on fail", KR(ret), K(thread_index)); + if (OB_NOT_NULL(err_handler_)) { + err_handler_->handle_error(ret, + "start log id locator worker exit on fail, ret=%d, thread_index=%ld", + ret, thread_index); + } + } +} + +void ObLogStartLogIdLocator::configure(const ObLogConfig &config) +{ + int64_t start_log_id_locator_rpc_timeout_sec = config.start_log_id_locator_rpc_timeout_sec; + int64_t start_log_id_locator_batch_count = config.start_log_id_locator_batch_count; + int64_t observer_clog_save_time_minutes = config.observer_clog_save_time_minutes; + bool enable_force_start_mode = config.enable_force_start_mode; + + ATOMIC_STORE(&g_rpc_timeout, start_log_id_locator_rpc_timeout_sec * _SEC_); + LOG_INFO("[CONFIG]", K(start_log_id_locator_rpc_timeout_sec)); + + ATOMIC_STORE(&g_batch_count, start_log_id_locator_batch_count); + LOG_INFO("[CONFIG]", K(start_log_id_locator_batch_count)); + + ATOMIC_STORE(&g_observer_clog_save_time, observer_clog_save_time_minutes * _MIN_); + LOG_INFO("[CONFIG]", K(observer_clog_save_time_minutes)); + + ATOMIC_STORE(&g_enable_force_start_mode, enable_force_start_mode); + LOG_INFO("[CONFIG]", K(enable_force_start_mode)); +} + +// Pop out data from Queue, then use Map, aggregating requests by server +int ObLogStartLogIdLocator::do_retrieve_(const int64_t thread_index, WorkerData &worker_data) +{ + int ret = OB_SUCCESS; + int64_t batch_count = ATOMIC_LOAD(&g_batch_count); + + for (int64_t cnt = 0; OB_SUCCESS == ret && (cnt < batch_count); ++cnt) { + StartLogIdLocateReq *request = NULL; + StartLogIdLocateReq::SvrItem *item = NULL; + SvrReq *svr_req = NULL; + void *data = NULL; + + if (OB_FAIL(LocateWorker::pop(thread_index, data))) { + if (OB_EAGAIN != ret) { + LOG_ERROR("pop data from queue fail", KR(ret), K(thread_index), K(request)); + } + } else if (OB_ISNULL(request = static_cast(data))) { + LOG_ERROR("request is NULL", K(request), K(thread_index)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(request->cur_svr_item(item)) || OB_ISNULL(item)) { + LOG_ERROR("get current server item fail", KR(ret), KPC(request), K(item)); + ret = (OB_SUCCESS == ret ? OB_ERR_UNEXPECTED : ret); + } else if (OB_FAIL(get_svr_req_(worker_data, item->svr_, svr_req)) || OB_ISNULL(svr_req)) { + LOG_ERROR("get svr req fail", KR(ret), K(item->svr_), K(svr_req)); + ret = OB_SUCCESS == ret ? OB_ERR_UNEXPECTED : ret; + } else if (OB_FAIL(svr_req->push(request))) { + LOG_ERROR("push request into request list fail", KR(ret), KPC(svr_req), K(request)); + } else { + // succ + } + } + + if (OB_SUCCESS == ret) { + // Meeting the volume requirements for batch processing + } else if (OB_EAGAIN == ret) { + // empty queue + ret = OB_SUCCESS; + } else { + LOG_ERROR("pop and aggregate request fail", KR(ret), K(thread_index)); + } + return ret; +} + +int ObLogStartLogIdLocator::get_svr_req_(WorkerData &data, + const common::ObAddr &svr, + SvrReq *&svr_req) +{ + int ret = OB_SUCCESS; + SvrReqList &svr_req_list = data.svr_req_list_; + SvrReqMap &svr_req_map = data.svr_req_map_; + + svr_req = NULL; + + // Fetching the corresponding record from the Map is preferred + if (OB_FAIL(svr_req_map.get(svr, svr_req))) { + // If the record does not exist, insert the corresponding record in the Array first, and then insert the object in the Array into the Map + if (OB_ENTRY_NOT_EXIST == ret) { + ret = OB_SUCCESS; + + // Assign a new request + if (OB_ISNULL(svr_req = alloc_svr_req_(svr))) { + LOG_ERROR("allocate svr request fail", K(svr)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else if (OB_FAIL(svr_req_list.push_back(svr_req))) { + LOG_ERROR("push svr req into array fail", KR(ret), K(svr_req), K(svr_req_list)); + } else if (OB_FAIL(svr_req_map.insert(svr, svr_req))) { + LOG_ERROR("insert svr req into map fail", KR(ret), K(svr), KPC(svr_req)); + } + } else { + LOG_ERROR("get svr req from map fail", KR(ret), K(svr)); + } + } else { + // succ + } + return ret; +} + +ObLogStartLogIdLocator::SvrReq *ObLogStartLogIdLocator::alloc_svr_req_(const common::ObAddr &svr) +{ + SvrReq *svr_req = NULL; + void *buf = allocator_.alloc(sizeof(SvrReq)); + if (OB_NOT_NULL(buf)) { + svr_req = new(buf) SvrReq(svr); + } + return svr_req; +} + +void ObLogStartLogIdLocator::free_svr_req_(SvrReq *req) +{ + if (NULL != req) { + req->~SvrReq(); + allocator_.free(req); + req = NULL; + } +} + +void ObLogStartLogIdLocator::free_all_svr_req_(WorkerData &data) +{ + for (int64_t index = 0; index < data.svr_req_list_.count(); index++) { + SvrReq *svr_req = data.svr_req_list_.at(index); + + if (OB_NOT_NULL(svr_req)) { + free_svr_req_(svr_req); + svr_req = NULL; + } + } + + data.reset(); +} + +int ObLogStartLogIdLocator::do_request_(WorkerData &data) +{ + int ret = OB_SUCCESS; + SvrReqList &svr_req_list = data.svr_req_list_; + + if (OB_ISNULL(rpc_)) { + LOG_ERROR("invalid rpc handle", K(rpc_)); + ret = OB_INVALID_ARGUMENT; + } else { + for (int64_t idx = 0, cnt = svr_req_list.count(); + ! stop_flag_ && OB_SUCCESS == ret && (idx < cnt); ++idx) { + if (OB_ISNULL(svr_req_list.at(idx))) { + LOG_ERROR("svr request is NULL", K(idx), K(cnt), K(svr_req_list)); + ret = OB_ERR_UNEXPECTED; + } else { + SvrReq &svr_req = *(svr_req_list.at(idx)); + + // Requesting a single server + // 1. The number of partitions on a single server may be greater than the maximum number of partitions for a single RPC and needs to be split into multiple requests + // 2. Each partition request is removed from the request list as soon as it completes, so each request is split into multiple requests, each starting with the first element + // 3. Partition request completion condition: regardless of success, as long as no breakpoint message is returned, the request is considered completed + while (! stop_flag_ && OB_SUCCESS == ret && svr_req.locate_req_list_.count() > 0) { + // 一次请求的最大个数 + int64_t item_cnt_limit = RpcReq::ITEM_CNT_LMT; + int64_t req_cnt = std::min(svr_req.locate_req_list_.count(), item_cnt_limit); + + // A single request does not guarantee that all partition requests will be successful, and partition requests that return a breakpoint message need to be retried immediately + // Note: A separate loop must be used here to ensure that the partition in the retry request is the same "breakpoint partition", + // if the requested partition is not the same "breakpoint partition" but a new partition is added, the server will have + // to scan the file from the "head" again and the breakpoint information will be invalid. + while (! stop_flag_ && OB_SUCCESS == ret && req_cnt > 0) { + // Set different trace ids for different requests + ObLogTraceIdGuard trace_guard; + + RpcReq rpc_req; + int64_t succ_req_cnt = 0; + + // Build request parameters + if (OB_FAIL(build_request_params_(rpc_req, svr_req, req_cnt))) { + LOG_ERROR("build request params fail", KR(ret), K(rpc_req), K(req_cnt), K(svr_req)); + } + // Executing RPC requests + else if (OB_FAIL(do_rpc_and_dispatch_(*(rpc_), rpc_req, svr_req, succ_req_cnt))) { + LOG_ERROR("do rpc and dispatch fail", KR(ret), K(rpc_req), K(svr_req), K(succ_req_cnt)); + } else { + // One request completed + req_cnt -= succ_req_cnt; + } + } + } + } + } + } + + if (stop_flag_) { + ret = OB_IN_STOP_STATE; + } + + return ret; +} + +int ObLogStartLogIdLocator::build_request_params_(RpcReq &req, + const SvrReq &svr_req, + const int64_t req_cnt) +{ + int ret = OB_SUCCESS; + int64_t total_cnt = svr_req.locate_req_list_.count(); + + req.reset(); + + for (int64_t index = 0; OB_SUCCESS == ret && index < req_cnt && index < total_cnt; ++index) { + StartLogIdLocateReq *request = svr_req.locate_req_list_.at(index); + StartLogIdLocateReq::SvrItem *svr_item = NULL; + + if (OB_ISNULL(request)) { + LOG_ERROR("invalid request", K(index), "req_list", svr_req.locate_req_list_); + ret = OB_ERR_UNEXPECTED; + } + // Get the current SvrItem corresponding to the partition request + else if (OB_FAIL(request->cur_svr_item(svr_item))) { + LOG_ERROR("get current server item fail", KR(ret), KPC(request)); + } + // Verify the validity of the SvrItem, to avoid setting the breakpoint information incorrectly, the server is required to match + else if (OB_ISNULL(svr_item) || OB_UNLIKELY(svr_item->svr_ != svr_req.svr_)) { + LOG_ERROR("invalid svr_item which does not match SvrReq", KPC(svr_item), K(svr_req), + KPC(request)); + ret = OB_ERR_UNEXPECTED; + } else { + RpcReq::Param param; + // Maintaining breakpoint information + param.reset(request->pkey_, request->start_tstamp_, svr_item->breakinfo_); + + // Requires append operation to be successful + if (OB_FAIL(req.append_param(param))) { + LOG_ERROR("append param fail", KR(ret), K(req_cnt), K(index), K(req), K(param)); + } + } + } + return ret; +} + +int ObLogStartLogIdLocator::do_rpc_and_dispatch_( + IObLogRpc &rpc, + RpcReq &rpc_req, + SvrReq &svr_req, + int64_t &succ_req_cnt) +{ + int ret = OB_SUCCESS; + int rpc_err = OB_SUCCESS; + int svr_err = OB_SUCCESS; + RpcRes rpc_res; + const int64_t request_cnt = rpc_req.get_params().count(); + int64_t rpc_timeout = ATOMIC_LOAD(&g_rpc_timeout); + TraceIdType *trace_id = ObCurTraceId::get_trace_id(); + + succ_req_cnt = 0; + + rpc_err = rpc.req_start_log_id_by_tstamp(svr_req.svr_, rpc_req, rpc_res, rpc_timeout); + + // send rpc fail + if (OB_SUCCESS != rpc_err) { + LOG_ERROR("rpc request start log id by tstamp fail, rpc error", + K(rpc_err), "svr", svr_req.svr_, K(rpc_req), K(rpc_res)); + } + // observer handle fail + else if (OB_SUCCESS != (svr_err = rpc_res.get_err())) { + LOG_ERROR("rpc request start log id by tstamp fail, server error", + K(svr_err), "svr", svr_req.svr_, K(rpc_req), K(rpc_res)); + } + // Both the RPC and server return success, requiring the number of results returned to match the number of requests + else { + const int64_t result_cnt = rpc_res.get_results().count(); + + if (request_cnt != result_cnt) { + LOG_ERROR("result count does not equal to request count", + K(request_cnt), K(result_cnt), K(rpc_req), K(rpc_res)); + ret = OB_ERR_UNEXPECTED; + } + } + + if (OB_SUCCESS == ret) { + // Scanning of arrays in reverse order to support deletion of completed partition requests + for (int64_t idx = request_cnt - 1; OB_SUCCESS == ret && idx >= 0; idx--) { + bool has_break_info = false; + int partition_err = OB_SUCCESS; + const obrpc::BreakInfo *bkinfo = NULL; + obrpc::BreakInfo default_breakinfo; + uint64_t start_log_id = OB_INVALID_ID; + int64_t start_log_tstamp = OB_INVALID_TIMESTAMP; + StartLogIdLocateReq *request = svr_req.locate_req_list_.at(idx); + + if (OB_ISNULL(request)) { + LOG_ERROR("invalid request in server request list", K(request), K(idx), + K(request_cnt), "req_list", svr_req.locate_req_list_); + ret = OB_ERR_UNEXPECTED; + } + // Set an invalid start log id if the RPC or server returns a failure + else if (OB_SUCCESS != rpc_err || OB_SUCCESS != svr_err) { + default_breakinfo.reset(); + + partition_err = OB_SUCCESS; + bkinfo = &default_breakinfo; + start_log_id = OB_INVALID_ID; + start_log_tstamp = OB_INVALID_TIMESTAMP; + } + // If the result is valid, get the corresponding result and set the result + else { + const RpcRes::Result &result = rpc_res.get_results().at(idx); + + has_break_info = (OB_EXT_HANDLE_UNFINISH == result.err_); + + partition_err = result.err_; + bkinfo = &result.break_info_; + start_log_id = result.start_log_id_; + start_log_tstamp = result.start_log_ts_; + } + + if (OB_SUCCESS == ret) { + // set result + if (OB_FAIL(request->set_result(svr_req.svr_, rpc_err, svr_err, + partition_err, *bkinfo, start_log_id, start_log_tstamp, trace_id))) { + LOG_ERROR("request set result fail", KR(ret), "svr", svr_req.svr_, + K(rpc_err), K(svr_err), K(partition_err), + KPC(bkinfo), K(start_log_id), K(start_log_tstamp), KPC(request), K(trace_id)); + } + // For requests without breakpoint information, dispatch directly and remove from the request array + else if (! has_break_info) { + if (OB_FAIL(dispatch_worker_(request))) { + LOG_ERROR("dispatch worker fail", KR(ret), KPC(request)); + } else if (OB_FAIL(svr_req.locate_req_list_.remove(idx))) { + LOG_ERROR("remove from request list fail", KR(ret), K(idx), + "req_list", svr_req.locate_req_list_); + } else { + succ_req_cnt++; + request = NULL; + } + } else { + // Do not process requests with break info + LOG_DEBUG("start log id locate done with break info", KPC(request)); + } + } + } + } + + return ret; +} + + +//////////////////////////// ObLogStartLogIdLocator::WorkerData //////////////////////////// + +int ObLogStartLogIdLocator::WorkerData::init() +{ + int ret = OB_SUCCESS; + if (OB_FAIL(svr_req_map_.init(ObModIds::OB_LOG_START_LOG_ID_LOCATOR))) { + LOG_ERROR("init request map fail", KR(ret)); + } else { + svr_req_list_.set_label(ObModIds::OB_LOG_START_LOG_ID_LOCATOR); + svr_req_list_.reset(); + } + return ret; +} + +void ObLogStartLogIdLocator::WorkerData::destroy() +{ + svr_req_list_.reset(); + (void)svr_req_map_.destroy(); +} + + +//////////////////////////// StartLogIdLocateReq //////////////////////////// + +void StartLogIdLocateReq::reset() +{ + set_state(IDLE); + pkey_.reset(); + start_tstamp_ = OB_INVALID_TIMESTAMP; + svr_list_.reset(); + svr_list_consumed_ = 0; + result_svr_list_idx_ = -1; + cur_max_start_log_id_ = OB_INVALID_ID; + cur_max_start_log_tstamp_ = OB_INVALID_TIMESTAMP; + succ_locate_count_ = 0; +} + +void StartLogIdLocateReq::reset(const common::ObPartitionKey &pkey, const int64_t start_tstamp) +{ + reset(); + pkey_ = pkey; + start_tstamp_ = start_tstamp; +} + +int StartLogIdLocateReq::next_svr_item(SvrItem *&svr_item) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(svr_list_consumed_ < 0)) { + LOG_ERROR("invalid parameter", K(svr_list_consumed_)); + ret = OB_INDEX_OUT_OF_RANGE; + } else if (svr_list_consumed_ >= svr_list_.count()) { + ret = OB_ITER_END; + } else { + svr_item = &(svr_list_.at(svr_list_consumed_)); + svr_list_consumed_++; + } + + return ret; +} + +int StartLogIdLocateReq::cur_svr_item(SvrItem *&svr_item) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(svr_list_consumed_ <= 0 || svr_list_consumed_ > svr_list_.count())) { + LOG_ERROR("index out of range", K(svr_list_consumed_), K(svr_list_.count())); + ret = OB_INDEX_OUT_OF_RANGE; + } else { + svr_item = &(svr_list_.at(svr_list_consumed_ - 1)); + } + return ret; +} + +void StartLogIdLocateReq::check_locate_result_(const int64_t start_log_tstamp, + const uint64_t start_log_id, + const common::ObAddr &svr, + bool &is_consistent) const +{ + // default to be consistent + is_consistent = true; + + // There may be a bug in the OB where the logs located are large, add checks here and report an error if multiple locations return logs larger than the start-up timestamp + if (start_tstamp_ > 0 + && start_log_tstamp >= start_tstamp_ + && cur_max_start_log_tstamp_ >= start_tstamp_ + && start_log_tstamp != cur_max_start_log_tstamp_) { + LOG_ERROR("start log id locate results from different servers are not consistent, " + "may be OceanBase server BUG, need check manually", + K_(pkey), K(svr), K(start_log_id), K(start_log_tstamp), K(cur_max_start_log_tstamp_), + K(start_tstamp_), K(cur_max_start_log_id_), K(svr_list_consumed_), + K(succ_locate_count_), K(svr_list_)); + // get un-consistent result + is_consistent = false; + } else { + is_consistent = true; + } +} + +int StartLogIdLocateReq::set_result(const common::ObAddr &svr, + const int rpc_err, + const int svr_err, + const int part_err, + const obrpc::BreakInfo &breakinfo, + const uint64_t start_log_id, + const int64_t start_log_tstamp, + const TraceIdType *trace_id) +{ + int ret = OB_SUCCESS; + SvrItem *item = NULL; + + if (OB_FAIL(cur_svr_item(item)) || OB_ISNULL(item)) { + LOG_ERROR("get current server item fail", KR(ret), KPC(item)); + ret = (OB_SUCCESS == ret ? OB_ERR_UNEXPECTED : ret); + } else if (OB_UNLIKELY(svr != item->svr_)) { + LOG_ERROR("server does not match, result is invalid", K(svr), KPC(item)); + ret = OB_INVALID_ARGUMENT; + } else { + int new_part_err = part_err; + + // If it returns less than a lower bound error and the lower bound is 1, then it has a start log id of 1 + if (OB_ERR_OUT_OF_LOWER_BOUND == part_err && 1 == start_log_id) { + new_part_err = OB_SUCCESS; + } + + item->set_result(rpc_err, svr_err, new_part_err, breakinfo, start_log_id, start_log_tstamp, trace_id); + + // If the result is valid, set a valid server item index + if (OB_SUCCESS == rpc_err && OB_SUCCESS == svr_err && OB_SUCCESS == new_part_err) { + // No need to update results by default + bool need_update_result = false; + + // Increase the number of successfully located servers + ++succ_locate_count_; + + // First successful locating start log id, results need to be recorded + if (OB_INVALID_ID == cur_max_start_log_id_) { + need_update_result = true; + } else { + bool is_consistent = true; + + // Verify the second and subsequent results for incorrect results + check_locate_result_(start_log_tstamp, start_log_id, svr, is_consistent); + + if (OB_UNLIKELY(! is_consistent)) { + // Scenarios with inconsistent processing results + if (TCONF.skip_start_log_id_locator_result_consistent_check) { + // Skip inconsistent data checks + // + // For multiple logs with a timestamp greater than the starting timestamp, take the smallest value to ensure the result is safe + if (start_log_tstamp < cur_max_start_log_tstamp_ && start_log_tstamp >= start_tstamp_) { + // Overwrite the previous positioning result with the current result + need_update_result = true; + } + } else { + ret = OB_INVALID_DATA; + } + } else { + // The result is normal, updated maximum log information + if (start_log_id > cur_max_start_log_id_) { + need_update_result = true; + } + } + } + + // Update final location log result: Use current server location result as final location result + if (OB_SUCCESS == ret && need_update_result) { + cur_max_start_log_id_ = start_log_id; + cur_max_start_log_tstamp_ = start_log_tstamp; + result_svr_list_idx_ = (svr_list_consumed_ - 1); + } + } + + LOG_INFO("start log id locate request of one server is done", + KR(ret), + K_(pkey), "svr", item->svr_, K(start_log_id), K(start_log_tstamp), K_(start_tstamp), + "delta", start_log_tstamp - start_tstamp_, + "rpc_err", ob_error_name(rpc_err), + "svr_err", ob_error_name(svr_err), + "part_err", ob_error_name(new_part_err), + K(cur_max_start_log_id_), K(cur_max_start_log_tstamp_), + K(result_svr_list_idx_), K(succ_locate_count_), + K_(svr_list_consumed), + "svr_cnt", svr_list_.count()); + } + + return ret; +} + +bool StartLogIdLocateReq::is_request_ended(const int64_t locate_count) const +{ + bool bool_ret = false; + + // Ending conditions. + // 1. all servers are exhausted + // 2. or the required number of servers has been located + bool_ret = (svr_list_.count() <= svr_list_consumed_) + || (succ_locate_count_ == locate_count); + + return bool_ret; +} + +bool StartLogIdLocateReq::get_result(uint64_t &start_log_id, common::ObAddr &svr) +{ + start_log_id = common::OB_INVALID_ID; + bool succeed = false; + + if (result_svr_list_idx_ >= 0 && result_svr_list_idx_ < svr_list_.count()) { + succeed = true; + SvrItem &item = svr_list_.at(result_svr_list_idx_); + start_log_id = item.start_log_id_; + svr = item.svr_; + } else if (svr_list_.count() == svr_list_consumed_) { + // Handle all server return lower bound cases + // FIXME: In version 2.0, one log is no longer guaranteed to be written per major version, and it is probable that all logs for a cold partition are recycled, + // with only the largest log information ever recorded in saved storage info. This inevitably leads to a serious problem: if all the logs of a cold partition are recycled, + // and a log is written at that point, then the log location request before that log will fail. To solve this type of problem, the following solutions complement each other. + // 1. observer maintains accurate local log service information to ensure that locates do not fail. This feature will not be available until the ilog refactoring + // 2. until 1 is completed, combined with the last version of log information recorded in saved storage info, to ensure that the last version of the log is located + // 3. but 2 does not solve the problem completely, once the partition is minimally freeze, then there are also problems.For this reason, liboblog needs a further protection solution. + // When all servers return less than the lower bound, and the startup timestamp is not too old (say within 2 hours of the current time), we assume that the observer + // will not recycle the logs during this time, then if the server returns less than the lower bound at this point, then the logs must be served. Therefore, we can safely start from the lower bound log. + // 4. But in practice it is not guaranteed that all servers will return less than the lower bound, it is possible that some servers will not serve this log, in order + // to be able to operate and maintain in this case, we need to add a configuration item, whether or not to force the start log id to be considered successful + // and force the start with the minimum log id, so as to facilitate operation and maintenance + uint64_t min_start_log_id = OB_INVALID_ID; + ObAddr min_start_log_svr; + int64_t lower_bound_svr_cnt = 0; + + bool enable_force_start_mode = + ATOMIC_LOAD(&ObLogStartLogIdLocator::g_enable_force_start_mode); + + // Iterate through all servers to check if all of them return less than the lower bound + // If some of the servers fail in their requests, or if the RPC fails, then do not continue and assume that there is a problem with that server. + // Because further protection is provided by options 3 and 4, there is no requirement for a server to fail and for all servers to be online and return less than the lower bound. + for (int64_t index = 0; index < svr_list_.count(); index++) { + SvrItem &item = svr_list_.at(index); + if (item.rpc_executed_ + && OB_SUCCESS == item.rpc_err_ + && OB_SUCCESS == item.svr_err_ + && OB_ERR_OUT_OF_LOWER_BOUND == item.partition_err_) { + if (OB_INVALID_ID == min_start_log_id || min_start_log_id > item.start_log_id_) { + min_start_log_id = item.start_log_id_; + min_start_log_svr = item.svr_; + } + lower_bound_svr_cnt++; + } + } + + + int64_t start_tstamp_delay_time = get_timestamp() - start_tstamp_; + int64_t observer_clog_save_time = + ATOMIC_LOAD(&ObLogStartLogIdLocator::g_observer_clog_save_time); + + if (lower_bound_svr_cnt == svr_list_.count() && OB_INVALID_ID != min_start_log_id) { + if (start_tstamp_delay_time > observer_clog_save_time) { + LOG_ERROR("start tstamp is too old, can not force-startup " + "when all server is out of lower bound", + K(start_tstamp_delay_time), K(observer_clog_save_time), K(start_tstamp_), + K(pkey_), "svr_cnt", svr_list_.count(), K_(svr_list_consumed), + K_(result_svr_list_idx), K_(svr_list)); + } else { + LOG_WARN("****** FIXME ******* all server is out of lower bound. " + "we will force-startup. this might lose data.", + K_(pkey), K_(start_tstamp), K(min_start_log_id), K(min_start_log_svr), + K(observer_clog_save_time), "svr_cnt", svr_list_.count(), K_(svr_list_consumed), + K_(result_svr_list_idx), K_(svr_list)); + + start_log_id = min_start_log_id; + svr = min_start_log_svr; + succeed = true; + } + } + + // Forced start mode is only required when option 3 is unsuccessful + if (! succeed) { + // Forced mode take in effect + if (enable_force_start_mode) { + // At least one server returns OB_ERR_OUT_OF_LOWER_BOUND + if (lower_bound_svr_cnt > 0) { + if (start_tstamp_delay_time > observer_clog_save_time) { + LOG_ERROR("start tstamp is too old, can not force-startup " + "when handle enable force start mode", + K(enable_force_start_mode), K(lower_bound_svr_cnt), + K(start_tstamp_delay_time), K(observer_clog_save_time), K(start_tstamp_), + K(pkey_), "svr_cnt", svr_list_.count(), K_(svr_list_consumed), + K_(result_svr_list_idx), K_(svr_list)); + } else { + LOG_ERROR("at least one server is out of lower bound. " + "we will force-startup. this might lose data.", + K(enable_force_start_mode), K(lower_bound_svr_cnt), + K_(pkey), K_(start_tstamp), K(min_start_log_id), K(min_start_log_svr), + K(observer_clog_save_time), "svr_cnt", svr_list_.count(), K_(svr_list_consumed), + K_(result_svr_list_idx), K_(svr_list)); + + start_log_id = min_start_log_id; + svr = min_start_log_svr; + succeed = true; + } + } else { + LOG_ERROR("no one server is out of lower bound. we can not force-startup.", + K(enable_force_start_mode), K(lower_bound_svr_cnt), + K_(pkey), K_(start_tstamp), K(min_start_log_id), K(min_start_log_svr), + K(observer_clog_save_time), "svr_cnt", svr_list_.count(), K_(svr_list_consumed), + K_(result_svr_list_idx), K_(svr_list)); + } + } + } + } + + if (! succeed) { + LOG_ERROR("request start log id from all server fail", K_(pkey), + K_(start_tstamp), "svr_cnt", svr_list_.count(), K_(svr_list_consumed), + K_(result_svr_list_idx), K_(svr_list)); + } + + return succeed; +} + +void StartLogIdLocateReq::SvrItem::reset() +{ + svr_.reset(); + rpc_executed_ = false; + rpc_err_ = OB_SUCCESS; + svr_err_ = OB_SUCCESS; + partition_err_ = OB_SUCCESS; + breakinfo_.reset(); + start_log_id_ = OB_INVALID_ID; + start_log_tstamp_ = OB_INVALID_TIMESTAMP; + trace_id_.reset(); +} + +void StartLogIdLocateReq::SvrItem::reset(const common::ObAddr &svr) +{ + reset(); + svr_ = svr; +} + +// Set result. +void StartLogIdLocateReq::SvrItem::set_result(const int rpc_err, + const int svr_err, + const int partition_err, + const obrpc::BreakInfo &breakinfo, + const uint64_t start_log_id, + const int64_t start_log_tstamp, + const TraceIdType *trace_id) +{ + rpc_executed_ = true; + rpc_err_ = rpc_err; + svr_err_ = svr_err; + partition_err_ = partition_err; + breakinfo_ = breakinfo; + start_log_id_ = start_log_id; + start_log_tstamp_ = start_log_tstamp; + + if (OB_NOT_NULL(trace_id)) { + trace_id_ = *trace_id; + } else { + trace_id_.reset(); + } +} + +} +} diff --git a/src/liboblog/src/ob_log_start_log_id_locator.h b/src/liboblog/src/ob_log_start_log_id_locator.h new file mode 100644 index 0000000000000000000000000000000000000000..4e07d6c6a9627c1eb953e4698be1c01e9176977a --- /dev/null +++ b/src/liboblog/src/ob_log_start_log_id_locator.h @@ -0,0 +1,313 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_START_LOG_ID_LOCATOR_H_ +#define OCEANBASE_LIBOBLOG_OB_LOG_START_LOG_ID_LOCATOR_H_ + +#include "lib/atomic/ob_atomic.h" // ATOMIC_* +#include "lib/net/ob_addr.h" // ObAddr +#include "lib/container/ob_array.h" // ObArray +#include "lib/hash/ob_linear_hash_map.h" // ObLinearHashMap +#include "lib/utility/ob_print_utils.h" // TO_STRING_KV +#include "lib/allocator/ob_safe_arena.h" // ObSafeArena +#include "lib/profile/ob_trace_id.h" // ObCurTraceId +#include "common/ob_partition_key.h" // ObPartitionKey +#include "clog/ob_log_external_rpc.h" // BreakInfo + +#include "ob_map_queue_thread.h" // ObMapQueueThread +#include "ob_log_config.h" // ObLogConfig +#include "ob_log_utils.h" // _SEC_ + +namespace oceanbase +{ +namespace liboblog +{ + +struct StartLogIdLocateReq; +class IObLogStartLogIdLocator +{ +public: + virtual ~IObLogStartLogIdLocator() {} + +public: + virtual int async_start_log_id_req(StartLogIdLocateReq *req) = 0; + + virtual int start() = 0; + virtual void stop() = 0; + virtual void mark_stop_flag() = 0; +}; + +//////////////////////////// ObLogStartLogIdLocator //////////////////////////// + +typedef common::ObMapQueueThread LocateWorker; + +class IObLogRpc; +class IObLogErrHandler; +class ObLogStartLogIdLocator : public IObLogStartLogIdLocator, public LocateWorker +{ + static const int64_t DATA_OP_TIMEOUT = 100 * _MSEC_; + typedef obrpc::ObLogReqStartLogIdByTsRequestWithBreakpoint RpcReq; + typedef obrpc::ObLogReqStartLogIdByTsResponseWithBreakpoint RpcRes; + typedef common::ObSafeArena AllocatorType; + + // Class member variables +public: + static int64_t g_batch_count; + static int64_t g_rpc_timeout; + static int64_t g_observer_clog_save_time; // Maximum log retention time of observer + static bool g_enable_force_start_mode; // Forced start mode, at least one server returns OB_ERR_OUT_OF_LOWER_BOUND + +public: + ObLogStartLogIdLocator(); + virtual ~ObLogStartLogIdLocator(); + + int init( + const int64_t worker_cnt, + const int64_t locate_count, + IObLogRpc &rpc, + IObLogErrHandler &err_handle); + void destroy(); + +public: + int async_start_log_id_req(StartLogIdLocateReq *req); + int start(); + void stop(); + void mark_stop_flag() { LocateWorker::mark_stop_flag(); } + +public: + // Implementation of LocateWorker's thread handling functions + void run(const int64_t thread_index); + + // Class member function +public: + static void configure(const ObLogConfig &config); + +// private member function +private: + struct WorkerData; + struct SvrReq; + int dispatch_worker_(StartLogIdLocateReq *req); + int do_retrieve_(const int64_t thread_index, WorkerData &data); + int get_svr_req_(WorkerData &data, + const common::ObAddr &svr, + SvrReq *&svr_list); + SvrReq *alloc_svr_req_(const common::ObAddr &svr); + void free_svr_req_(SvrReq *req); + void free_all_svr_req_(WorkerData &data); + int do_request_(WorkerData &data); + int build_request_params_(RpcReq &req, + const SvrReq &svr_req, + const int64_t req_cnt); + int init_worker_data_(WorkerData &data); + void destroy_worker_data_(WorkerData &data); + int do_rpc_and_dispatch_( + IObLogRpc &rpc, + RpcReq &rpc_req, + SvrReq &svr_req, + int64_t &succ_req_cnt); + +// private structs +private: + // Requests from a single server + struct SvrReq + { + typedef common::ObArray ReqList; + + common::ObAddr svr_; + ReqList locate_req_list_; + + explicit SvrReq(const common::ObAddr &svr) : svr_(svr), locate_req_list_() + {} + + TO_STRING_KV(K_(svr), "req_cnt", locate_req_list_.count(), K_(locate_req_list)); + + // won't reset of server information + void reset() + { + locate_req_list_.reset(); + } + + void reset(const common::ObAddr &svr) + { + svr_ = svr; + locate_req_list_.reset(); + } + + int push(StartLogIdLocateReq *req) + { + return locate_req_list_.push_back(req); + } + + private: + DISALLOW_COPY_AND_ASSIGN(SvrReq); + }; + + typedef common::ObArray SvrReqList; + typedef common::ObLinearHashMap SvrReqMap; + + // Data local to each Worker + struct WorkerData + { + SvrReqList svr_req_list_; + SvrReqMap svr_req_map_; + + WorkerData() : svr_req_list_(), svr_req_map_() + {} + + ~WorkerData() { destroy(); } + + int init(); + void destroy(); + void reset() + { + svr_req_list_.reset(); + svr_req_map_.reset(); + } + }; + +// member variables +private: + bool inited_; + int64_t worker_cnt_; + int64_t locate_count_; + IObLogRpc *rpc_; + IObLogErrHandler *err_handler_; + WorkerData *worker_data_; // The data belonging to each worker + + // Module Arena dispenser with multi-threaded support + AllocatorType allocator_; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogStartLogIdLocator); +}; + +typedef common::ObCurTraceId::TraceId TraceIdType; + +//////////////////////////// StartLogIdLocateReq //////////////////////////// +/// +/// StartLogIdLocateReq +//// +//// Request Status. +/// - IDLE: Idle state +/// - REQ: requesting state, result is not readable, external need to ensure request memory validity +/// - DONE: request processing completed, result can be read and reset +struct StartLogIdLocateReq +{ + enum State { IDLE = 0, REQ, DONE }; + struct SvrItem + { + common::ObAddr svr_; + bool rpc_executed_; + int rpc_err_; + int svr_err_; + int partition_err_; + obrpc::BreakInfo breakinfo_; + uint64_t start_log_id_; + uint64_t start_log_tstamp_; + TraceIdType trace_id_; + + TO_STRING_KV( + K_(svr), + K_(rpc_executed), + K_(rpc_err), + K_(svr_err), + K_(partition_err), + K_(breakinfo), + K_(start_log_id), + K_(start_log_tstamp), + K_(trace_id)); + + void reset(); + void reset(const common::ObAddr &svr); + + void set_result(const int rpc_err, + const int svr_err, + const int partition_err, + const obrpc::BreakInfo &breakinfo, + const uint64_t start_log_id, + const int64_t start_log_tstamp, + const TraceIdType *trace_id); + }; + static const int64_t DEFAULT_SERVER_NUM = 16; + typedef common::ObSEArray SvrList; + + // state + State state_; + + // request parameters + common::ObPartitionKey pkey_; + int64_t start_tstamp_; + + // server list + SvrList svr_list_; + int64_t svr_list_consumed_; + + // The server index where the result of a valid request is located, always pointing to the maximum start_log_id-server + // Invalid value: -1 + int64_t result_svr_list_idx_; + // Record the maximum start_log_id and the corresponding timestamp for the current partition location + uint64_t cur_max_start_log_id_; + int64_t cur_max_start_log_tstamp_; + // Record the number of start_log_id's that have been successfully located at the server + int64_t succ_locate_count_; + + TO_STRING_KV( + K_(state), + K_(pkey), + K_(start_tstamp), + "svr_cnt", svr_list_.count(), + K_(svr_list_consumed), + K_(result_svr_list_idx), + K_(cur_max_start_log_id), + K_(cur_max_start_log_tstamp), + K_(succ_locate_count), + K_(svr_list)); + + void reset(); + void reset(const common::ObPartitionKey &pkey, const int64_t start_tstamp); + void set_state(const State state) { ATOMIC_STORE(&state_, state); } + State get_state() const { return (ATOMIC_LOAD(&state_)); } + + void set_state_idle() { ATOMIC_STORE(&state_, IDLE); } + void set_state_req() { ATOMIC_STORE(&state_, REQ); } + void set_state_done() { ATOMIC_STORE(&state_, DONE); } + bool is_state_idle() const { return (ATOMIC_LOAD(&state_)) == IDLE; } + bool is_state_req() const { return (ATOMIC_LOAD(&state_)) == REQ; } + bool is_state_done() const { return (ATOMIC_LOAD(&state_)) == DONE; } + + int next_svr_item(SvrItem *&svr_item); + int cur_svr_item(SvrItem *&svr_item); + + bool is_request_ended(const int64_t locate_count) const; + bool get_result(uint64_t &start_log_id, common::ObAddr &svr); + + int set_result(const common::ObAddr &svr, + const int rpc_err, + const int svr_err, + const int partition_err, + const obrpc::BreakInfo &breakinfo, + const uint64_t start_log_id, + const int64_t start_log_tstamp, + const TraceIdType *trace_id); + +private: + void check_locate_result_(const int64_t start_log_tstamp, + const uint64_t start_log_id, + const common::ObAddr &svr, + bool &is_consistent) const; + +}; + +} +} + +#endif diff --git a/src/liboblog/src/ob_log_start_schema_matcher.cpp b/src/liboblog/src/ob_log_start_schema_matcher.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5d9fa269e09c2f004c1ce385363a3da04e5c1f76 --- /dev/null +++ b/src/liboblog/src/ob_log_start_schema_matcher.cpp @@ -0,0 +1,198 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include "ob_log_start_schema_matcher.h" +#include "ob_log_utils.h" // ob_log_malloc +#include "ob_log_instance.h" +#include "lib/string/ob_string.h" + +namespace oceanbase +{ +namespace liboblog +{ +ObLogStartSchemaMatcher::ObLogStartSchemaMatcher() : + buf_(NULL), + buf_size_(0), + tenant_schema_array_() +{ } + +ObLogStartSchemaMatcher::~ObLogStartSchemaMatcher() +{ + (void)destroy(); +} + +int ObLogStartSchemaMatcher::init(const char *schema_version_str) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(set_pattern_(schema_version_str))) { + OBLOG_LOG(ERROR, "init fail", KR(ret), K(schema_version_str)); + } else { + // succ + } + return ret; +} + +int ObLogStartSchemaMatcher::destroy() +{ + int ret = OB_SUCCESS; + + if (NULL != buf_) { + ob_log_free(buf_); + buf_ = NULL; + } + buf_size_ = 0; + + tenant_schema_array_.reset(); + return ret; +} + +int ObLogStartSchemaMatcher::set_pattern_(const char *schema_version_str) +{ + int ret = OB_SUCCESS; + char **buffer = &buf_; + int64_t *buffer_size = &buf_size_; + bool build_pattern = true; + + if (OB_ISNULL(schema_version_str) || 0 == strlen(schema_version_str) + || 0 == strcmp(schema_version_str, DEFAULT_START_SCHEMA_VERSION_STR)) { + ret = OB_SUCCESS; + build_pattern = false; + OBLOG_LOG(INFO, "schema_version_str is NULL or default", KR(ret), K(schema_version_str), + K(DEFAULT_START_SCHEMA_VERSION_STR)); + } else { + int tmp_ret = 0; + *buffer_size = strlen(schema_version_str) + 1; + // Alloc + if (OB_ISNULL(*buffer = reinterpret_cast(ob_log_malloc(*buffer_size)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + OBLOG_LOG(ERROR, "err alloc pattern string buffer", KR(ret), K(buffer_size)); + } else if (*buffer_size <= (tmp_ret = snprintf(*buffer, *buffer_size, "%s", schema_version_str)) + || (tmp_ret < 0)) { + ret = OB_ERR_UNEXPECTED; + OBLOG_LOG(ERROR, "err snprintf", KR(ret), K(tmp_ret), K(buffer_size), K(schema_version_str)); + } else { + OBLOG_LOG(DEBUG, "pattern string", KR(ret), K(schema_version_str), K(buf_), K(buf_size_)); + } + } + + // Split String + if (OB_SUCC(ret) && build_pattern) { + if (OB_FAIL(build_tenant_schema_version_())) { + OBLOG_LOG(ERROR, "build tenant schema version failed", KR(ret), K(buf_), K(buf_size_)); + } else { + //succ + OBLOG_LOG(INFO, "data_start_schema_version set_pattern succ", KR(ret), + K(tenant_schema_array_), K(schema_version_str)); + } + } + + if (OB_UNLIKELY(OB_SUCCESS != ret) && NULL != *buffer) { + ob_log_free(*buffer); + *buffer = NULL; + *buffer_size = 0; + } + + return ret; +} + +int ObLogStartSchemaMatcher::build_tenant_schema_version_() +{ + int ret = OB_SUCCESS; + const char pattern_delimiter = '|'; + char **buffer = &buf_; + bool done = false; + + TenantSchemaArray &tenant_schema_array = tenant_schema_array_; + + if (OB_ISNULL(buf_) || OB_UNLIKELY(buf_size_ <= 0)) { + ret = OB_ERR_UNEXPECTED; + OBLOG_LOG(ERROR, "invalid buffer", KR(ret), K(buf_), K(buf_size_)); + } else { + ObString remain(strlen(*buffer), *buffer); + ObString cur_pattern; + TenantSchema tenant_schema; + + while (OB_SUCCESS == ret && !done) { + tenant_schema.reset(); + cur_pattern = remain.split_on(pattern_delimiter); + if (cur_pattern.empty()) { + cur_pattern = remain; + done = true; + } + + if (OB_SUCC(ret)) { + ObString &str = cur_pattern; + *(str.ptr() + str.length()) = '\0'; + str.set_length(1 + str.length()); + } + + uint64_t tenant_id = 0; + int64_t schema_version = -1; + if (OB_SUCC(ret)) { + if (2 != sscanf(cur_pattern.ptr(), "%lu:%ld", &tenant_id, &schema_version)) { + ret = OB_ERR_UNEXPECTED; + OBLOG_LOG(ERROR, "sscanf failed, data_start_schema_version pattern is invalid", + KR(ret), K(cur_pattern), K(tenant_id), K(schema_version)); + } else if (OB_UNLIKELY(0 == tenant_id) || OB_UNLIKELY(schema_version <= 0)) { + ret = OB_ERR_UNEXPECTED; + OBLOG_LOG(ERROR, "data_start_schema_version input is invalid, set pattern fail", + KR(ret), K(cur_pattern), K(tenant_id), K(schema_version)); + } else { + tenant_schema.tenant_id_ = tenant_id; + tenant_schema.schema_version_ = schema_version; + } + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(tenant_schema_array.push_back(tenant_schema))) { + OBLOG_LOG(ERROR, "tenant schema array push back failed", KR(ret), K(tenant_schema)); + } + } + } // while + } + + return ret; +} + +int ObLogStartSchemaMatcher::match_data_start_schema_version(const uint64_t tenant_id, + bool &match, + int64_t &schema_version) +{ + int ret = OB_SUCCESS; + match = false; + if (OB_UNLIKELY(OB_INVALID_TENANT_ID == tenant_id)) { + ret = OB_ERR_UNEXPECTED; + OBLOG_LOG(ERROR, "tenant_id is invalid", KR(ret), K(tenant_id)); + } else { + for (int64_t i = 0; OB_SUCCESS == ret && !match && i < tenant_schema_array_.count(); i++) { + TenantSchema &tenant_schema = tenant_schema_array_.at(i); + if (tenant_schema.tenant_id_ == tenant_id) { + match = true; + schema_version = tenant_schema.schema_version_; + } + } + } + + if (OB_SUCC(ret) && match) { + OBLOG_LOG(INFO, "[START_SCHEMA_MATCH] set_data_start_schema_version succ", + K(tenant_id), KR(ret), K(match), K(schema_version)); + } + return ret; +} + +void ObLogStartSchemaMatcher::TenantSchema::reset() +{ + tenant_id_ = 0; + schema_version_ = 0; +} +} +} diff --git a/src/liboblog/src/ob_log_start_schema_matcher.h b/src/liboblog/src/ob_log_start_schema_matcher.h new file mode 100644 index 0000000000000000000000000000000000000000..59639cedc6b2835cc30c0675fc3aa09935684d49 --- /dev/null +++ b/src/liboblog/src/ob_log_start_schema_matcher.h @@ -0,0 +1,84 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_START_SCHEMA_MATCHER_H__ +#define OCEANBASE_LIBOBLOG_START_SCHEMA_MATCHER_H__ + +#include "lib/utility/ob_print_utils.h" // TO_STRING_KV +#include "lib/container/ob_array.h" // ObArray + +namespace oceanbase +{ +namespace liboblog +{ +class IObLogStartSchemaMatcher +{ +public: + virtual ~IObLogStartSchemaMatcher() {} + +public: + // match function + virtual int match_data_start_schema_version(const uint64_t tenant_id, + bool &match, + int64_t &schema_version) = 0; +}; + + +/* + * Impl. + * + */ +class ObLogStartSchemaMatcher : public IObLogStartSchemaMatcher +{ + const char* DEFAULT_START_SCHEMA_VERSION_STR = "|"; +public: + ObLogStartSchemaMatcher(); + virtual ~ObLogStartSchemaMatcher(); + +public: + int init(const char *schema_version_str); + int destroy(); + + // Matches a tenant and returns the schema_version set according to the profile + int match_data_start_schema_version(const uint64_t tenant_id, bool &match, int64_t &schema_version); + +private: + // Initialising the configuration according to the configuration file + int set_pattern_(const char *schema_version_str); + + int build_tenant_schema_version_(); + +private: + struct TenantSchema + { + uint64_t tenant_id_; + int64_t schema_version_; + + void reset(); + + TO_STRING_KV(K(tenant_id_), K(schema_version_)); + }; + typedef common::ObArray TenantSchemaArray; + +private: + char *buf_; + int64_t buf_size_; + + TenantSchemaArray tenant_schema_array_; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogStartSchemaMatcher); +}; + +} // liboblog +} // oceanbase +#endif /* OCEANBASE_LIBOBLOG_START_SCHEMA_MATCHER_H__ */ diff --git a/src/liboblog/src/ob_log_storager.cpp b/src/liboblog/src/ob_log_storager.cpp new file mode 100644 index 0000000000000000000000000000000000000000..57bca2a7b103528285650f5488b04a02671eddac --- /dev/null +++ b/src/liboblog/src/ob_log_storager.cpp @@ -0,0 +1,635 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG + +#include "lib/string/ob_string.h" // ObString +#include // LogMsgInit, LogMsgDestroy +#include // ITableMeta +#include "ob_log_storager.h" +#include "ob_log_row_data_index.h" // ObLogRowDataIndex +#include "ob_log_instance.h" +#include "ob_log_store_service.h" + +using namespace oceanbase::common; +using namespace oceanbase::logmessage; +namespace oceanbase +{ +namespace liboblog +{ + +ObLogStorager::ObLogStorager() : + inited_(false), + round_value_(0), + store_buf_array_(), + rps_stat_(), + last_stat_time_(0), + log_entry_task_count_(0), + store_service_stat_(), + store_service_(NULL), + err_handler_(NULL) +{ +} + +ObLogStorager::~ObLogStorager() +{ + destroy(); +} + +int ObLogStorager::init(const int64_t thread_num, + const int64_t queue_size, + IObStoreService &store_service, + IObLogErrHandler &err_handler) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("ObLogStorager has been initialized"); + ret = OB_INIT_TWICE; + } else if (OB_UNLIKELY(thread_num <= 0) + || OB_UNLIKELY(queue_size <= 0)) { + LOG_ERROR("invalid arguments", K(thread_num), K(queue_size)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(StoragerThread::init(thread_num, queue_size))) { + LOG_ERROR("init storager queue thread fail", K(ret), K(thread_num), K(queue_size)); + } else if (OB_FAIL(init_store_buf_array_(thread_num))) { + LOG_ERROR("init_store_buf_array_ fail", KR(ret), K(thread_num)); + } else { + round_value_ = 0; + rps_stat_.reset(); + last_stat_time_ = get_timestamp(); + store_service_ = &store_service; + log_entry_task_count_ = 0; + err_handler_ = &err_handler; + inited_ = true; + } + + return ret; +} + +void ObLogStorager::destroy() +{ + if (inited_) { + const int64_t thread_num = StoragerThread::get_thread_num(); + StoragerThread::destroy(); + + LogMsgDestroy(); + + inited_ = false; + round_value_ = 0; + destroy_store_buf_array_(thread_num); + rps_stat_.reset(); + last_stat_time_ = 0; + log_entry_task_count_ = 0; + store_service_stat_.reset(); + store_service_ = NULL; + err_handler_ = NULL; + } +} + +int ObLogStorager::start() +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogStorager has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(StoragerThread::start())) { + LOG_ERROR("start storager thread fail", K(ret), "thread_num", get_thread_num()); + } else { + LOG_INFO("start storager threads succ", "thread_num", get_thread_num()); + } + + return ret; +} + +void ObLogStorager::stop() +{ + if (inited_) { + StoragerThread::stop(); + LOG_INFO("stop storager threads succ", "thread_num", get_thread_num()); + } +} + +int ObLogStorager::push(ObLogEntryTask &task, const int64_t timeout) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogStorager has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(! task.is_valid())) { + LOG_ERROR("invalid arguments", K(task)); + ret = OB_INVALID_ARGUMENT; + } else { + uint64_t hash_value = ATOMIC_FAA(&round_value_, 1); + void *push_task = static_cast(&task); + if (OB_FAIL(StoragerThread::push(push_task, hash_value, timeout))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("push task into storager fail", K(ret), K(push_task), K(hash_value)); + } + } else { + // succ + ATOMIC_INC(&log_entry_task_count_); + } + } + + return ret; +} + +int ObLogStorager::thread_begin() +{ + int ret = OB_SUCCESS; + int tmp_ret = 0; + + // First call, initialization required + if (0 != (tmp_ret = LogMsgLocalInit())) { + LOG_ERROR("LogMsgLocalInit fail", K(tmp_ret)); + ret = OB_ERR_UNEXPECTED; + } else { + LOG_INFO("ObLogStorager LogMsgLocalInit succ"); + } + + return ret; +} + +void ObLogStorager::thread_end() +{ + // LogMsgLocalDestroy return void + LogMsgLocalDestroy(); + LOG_INFO("ObLogStorager LogMsgLocalDestroy succ"); +} + +int ObLogStorager::handle(void *data, const int64_t thread_index, volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + ObLogEntryTask *task = static_cast(data); + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogStorager has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(task) || OB_UNLIKELY(! task->is_valid())) { + LOG_ERROR("invalid arguments", KPC(task)); + ret = OB_INVALID_ARGUMENT; + } else { + if (OB_FAIL(handle_log_entry_task_(*task, thread_index, stop_flag))) { + LOG_ERROR("handle_log_entry_task_ fail", KR(ret), KPC(task), K(thread_index)); + } else if (OB_FAIL(handle_task_callback_(*task, stop_flag))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("handle_task_callback_ fail", KR(ret), KPC(task)); + } + } else { + task = NULL; + ATOMIC_DEC(&log_entry_task_count_); + } + } + + if (OB_SUCC(ret)) { + if (0 == thread_index) { + if (REACH_TIME_INTERVAL(PRINT_TASK_COUNT_INTERVAL)) { + print_task_count_(); + } + // TODO Improvements to monitoring items + // rocksdb::SetPerfLevel(rocksdb::PerfLevel::kEnableTime); // enable profiling + + print_rps_(); + } + } + + if (stop_flag) { + ret = OB_IN_STOP_STATE; + } + + // exit on fail + if (OB_SUCCESS != ret && OB_IN_STOP_STATE != ret && NULL != err_handler_) { + err_handler_->handle_error(ret, "storager thread exits, thread_index=%ld, err=%d", + thread_index, ret); + stop_flag = true; + } + + return ret; +} + +int ObLogStorager::handle_log_entry_task_(ObLogEntryTask &log_entry_task, + const int64_t thread_index, + volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + const uint64_t tenant_id = log_entry_task.get_tenant_id(); + ObLogTenantGuard guard; + ObLogTenant *tenant = NULL; + DmlRedoLogMetaNode *dml_meta_node = NULL; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogStorager has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(TCTX.get_tenant_guard(tenant_id, guard))) { + LOG_ERROR("get_tenant_guard fail", KR(ret), K(tenant_id)); + } else if (OB_ISNULL(dml_meta_node = log_entry_task.get_meta_node())) { + LOG_ERROR("dml_meta_node is NULL"); + ret = OB_ERR_UNEXPECTED; + } else { + tenant = guard.get_tenant(); + void *column_family_handle = tenant->get_cf(); + ObLogRowDataIndex *row_data_index = dml_meta_node->get_row_head(); + LOG_DEBUG("handle_log_entry_task_", K(thread_index), KPC(row_data_index), K(log_entry_task)); + + while (OB_SUCC(ret) && ! stop_flag && NULL != row_data_index) { + const bool is_rollback = row_data_index->is_rollback(); + + if (is_rollback) { + LOG_DEBUG("is_rollback stmt, do nothing", KPC(row_data_index)); + // serialize + } else if (OB_FAIL(to_string_binlog_record_(log_entry_task, *row_data_index, thread_index))) { + LOG_ERROR("ILogRecord to_string fail", K(ret), K(log_entry_task)); + } else if (OB_FAIL(write_store_service_(*row_data_index, column_family_handle, thread_index))) { + LOG_ERROR("write_store_service_ fail", KR(ret), K(log_entry_task)); + } else { + // succ + } + + // rollback row and not-rollback row all free_br_data + if (OB_SUCC(ret)) { + if (OB_FAIL(row_data_index->free_br_data())) { + LOG_ERROR("row_data_index free_br_data fail", KR(ret), K(log_entry_task)); + } + } + + if (OB_SUCC(ret)) { + row_data_index = row_data_index->get_next(); + } + } + } + + + return ret; +} + +int ObLogStorager::to_string_binlog_record_(ObLogEntryTask &log_entry_task, + ObLogRowDataIndex &row_data_index, + const int64_t thread_index) +{ + int ret = OB_SUCCESS; + ObLogBR *task = row_data_index.get_binlog_record(); + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogStorager has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(task)) { + LOG_ERROR("invalid argument", KPC(task), K(row_data_index), K(log_entry_task)); + ret = OB_INVALID_ARGUMENT; + } else { + ILogRecord *binlog_record = task->get_data(); + + if (OB_ISNULL(binlog_record)) { + LOG_ERROR("invalid argument", K(binlog_record), K(row_data_index)); + ret = OB_ERR_UNEXPECTED; + } else { + const int record_type = binlog_record->recordType(); + const char *buf = NULL; // Pointer to the first address of the BinlogRecord serialized byte stream + size_t buf_len = 0; // Length of BinlogRecord serialised byte stream + buf = binlog_record->toString(&buf_len); + StoreBuf &store_buf = store_buf_array_[thread_index]; + void *ptr = NULL; + char *alloc_buf = NULL; + + if (OB_ISNULL(buf) || OB_UNLIKELY(buf_len <= 0)) { + LOG_ERROR("ILogRecord toString fail", KPC(task), K(record_type), K(binlog_record), K(buf), K(buf_len)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(store_buf.alloc(buf_len + 1, ptr))) { + LOG_ERROR("store_buf alloc fail", KR(ret), K(buf_len)); + } else if (OB_ISNULL(alloc_buf = static_cast(ptr))) { + LOG_ERROR("alloc_buf is NULL"); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + MEMCPY(alloc_buf, buf, buf_len); + alloc_buf[buf_len] = '\0'; + + // Statistics rps + rps_stat_.do_rps_stat(1); + store_service_stat_.do_data_stat(buf_len); + } + } + } + + return ret; +} + +int ObLogStorager::write_store_service_(ObLogRowDataIndex &row_data_index, + void *column_family_handle, + const int64_t thread_index) +{ + int ret = OB_SUCCESS; + std::string key; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogStorager has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(row_data_index.get_storage_key(key))) { + LOG_ERROR("get_storage_key fail", KR(ret)); + } else { + StoreBuf &store_buf = store_buf_array_[thread_index]; + const char *br_string = store_buf.get_buf(); + const size_t br_string_len = store_buf.get_buf_len(); + // print_serilized_br_value_(row_data_index, key, br_string, br_string_len); + + if (OB_FAIL(store_service_->put(column_family_handle, key, ObSlice(br_string, br_string_len)))) { + LOG_ERROR("store_service_ put fail", KR(ret), K(key.c_str()), K(br_string_len)); + } else { + LOG_DEBUG("store_service_ put succ", K(key.c_str()), K(br_string_len), K(br_string)); + store_buf.free(); + } + } + + return ret; +} + +int ObLogStorager::handle_task_callback_(ObLogEntryTask &log_entry_task, + volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + PartTransTask *part_trans_task = static_cast(log_entry_task.get_host()); + bool is_unserved_part_trans_task_can_be_recycled = false; + + if (OB_ISNULL(part_trans_task)) { + LOG_ERROR("part_trans_task is NULL"); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(part_trans_task->handle_log_entry_task_callback(ObLogEntryTask::STORAGER_CB, + log_entry_task, + is_unserved_part_trans_task_can_be_recycled))) { + LOG_ERROR("handle_log_entry_task_callback fail", KR(ret), K(log_entry_task), KPC(part_trans_task), K(stop_flag)); + } else if (is_unserved_part_trans_task_can_be_recycled) { + LOG_DEBUG("handle_log_entry_task_callback: part_trans_task is revert", K(part_trans_task)); + part_trans_task->revert(); + } else {} + + return ret; +} + +ObLogStorager::StoreBuf::StoreBuf() : + data_buf_(), + use_data_buf_(true), + big_buf_(NULL), + buf_len_(0) +{ +} + +ObLogStorager::StoreBuf::~StoreBuf() +{ + destroy(); +} + +int ObLogStorager::StoreBuf::init(const int64_t size) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(size <= 0)) { + LOG_ERROR("invalid argument", K(size)); + ret = OB_INVALID_ARGUMENT; + } else { + void *ptr = ob_malloc(size); + + if (OB_ISNULL(ptr)) { + LOG_ERROR("ptr is NULL"); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + data_buf_.set_data(static_cast(ptr), size); + use_data_buf_ = true; + buf_len_ = 0; + } + } + + return ret; +} + +void ObLogStorager::StoreBuf::destroy() +{ + char *data = data_buf_.get_data(); + if (NULL != data) { + ob_free(data); + data_buf_.reset(); + } + + if (NULL != big_buf_) { + ob_free(big_buf_); + big_buf_ = NULL; + } + + use_data_buf_ = true; + buf_len_ = 0; +} + +int ObLogStorager::StoreBuf::alloc(const int64_t sz, void *&ptr) +{ + int ret = OB_SUCCESS; + ptr = data_buf_.alloc(sz); + buf_len_ = sz; + + if (NULL != ptr) { + use_data_buf_ = true; + } else { + if (OB_ISNULL(big_buf_ = static_cast(ob_malloc(sz)))) { + LOG_ERROR("alloc big_buf_ fail"); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + use_data_buf_ = false; + ptr = big_buf_; + } + } + + return ret; +} + +void ObLogStorager::StoreBuf::free() +{ + if (use_data_buf_) { + data_buf_.free(); + } else { + if (NULL != big_buf_) { + ob_free(big_buf_); + big_buf_ = NULL; + } + } +} + +const char *ObLogStorager::StoreBuf::get_buf() const +{ + const char *ret_buf = NULL; + + if (use_data_buf_) { + ret_buf = data_buf_.get_data(); + } else { + ret_buf = big_buf_; + } + + return ret_buf; +} + +int ObLogStorager::init_store_buf_array_(const int64_t thread_num) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(thread_num <= 0)) { + LOG_ERROR("invalid argument", K(thread_num)); + ret = OB_INVALID_ARGUMENT; + } else { + for (int64_t idx = 0; OB_SUCC(ret) && idx < thread_num; ++idx) { + if (OB_FAIL(store_buf_array_[idx].init(MAX_STORAG_DATA_BUF_SIZE + 1))) { + LOG_ERROR("store_buf init fail", KR(ret), K(idx)); + } + } // for + } + + return ret; +} + +void ObLogStorager::destroy_store_buf_array_(const int64_t thread_num) +{ + for (int64_t idx = 0; idx < thread_num; ++idx) { + store_buf_array_[idx].destroy(); + } // for +} + +void ObLogStorager::print_task_count_() +{ + int ret = OB_SUCCESS; + int64_t total_thread_num = get_thread_num(); + + for (int64_t idx = 0; OB_SUCC(ret) && idx < total_thread_num; ++idx) { + int64_t task_count = 0; + if (OB_FAIL(get_task_num(idx, task_count))) { + LOG_ERROR("get_task_num fail", K(ret)); + } else { + _LOG_INFO("[STAT] [STORAGER] [%ld/%ld] COUNT=%ld", idx, total_thread_num, task_count); + } + } +} + +void ObLogStorager::print_rps_() +{ + if (REACH_TIME_INTERVAL(PRINT_RPS_STAT_INTERVAL)) { + int64_t current_timestamp = get_timestamp(); + int64_t local_last_stat_time = last_stat_time_; + int64_t delta_time = current_timestamp - local_last_stat_time; + // Update last statistic + last_stat_time_ = current_timestamp; + + // ObIncBackUpUploader + double storager_rps = rps_stat_.calc_rps(delta_time); + double write_rate = store_service_stat_.calc_rate(delta_time); + double write_total_size = store_service_stat_.get_total_data_size(); + _LOG_INFO("[STORAGE] [STAT] RPS=%.3lf WRITE_RATE=%.5fM/s WRITE_TOTAL_SIZE=%.5fG", + storager_rps, write_rate, write_total_size); + + // rocksdb::SetPerfLevel(rocksdb::PerfLevel::kDisable); + // _LOG_INFO("[STORAGE] [STAT] perf=%s", rocksdb::get_perf_context()->ToString().c_str()); + // rocksdb::get_perf_context()->Reset(); + // rocksdb::get_iostats_context()->Reset(); + // rocksdb::SetPerfLevel(rocksdb::PerfLevel::kEnableTime); //开启profiling + } +} + +int ObLogStorager::print_unserilized_br_value_(ObLogBR &task, + std::string &key) +{ + int ret = OB_SUCCESS; + ILogRecord *binlog_record = task.get_data(); + ObArray new_values; + bool is_table_meta_null = false; + ITableMeta *table_meta = NULL; + + if (0 != binlog_record->getTableMeta(table_meta)) { + LOG_ERROR("getTableMeta fail"); + ret = OB_ERR_UNEXPECTED; + } else { + int64_t col_count = 0; + + if (NULL == table_meta) { + is_table_meta_null = true; + } else { + col_count = table_meta->getColCount(); + } + + if (OB_ISNULL(binlog_record)) { + LOG_ERROR("invalid argument", K(binlog_record)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(get_mem_br_value(binlog_record, new_values))) { + LOG_ERROR("get_mem_br_value fail", KR(ret)); + } else { + LOG_INFO("store_service_ before serilized", "key", key.c_str(), K(new_values), K(task), K(is_table_meta_null), + K(col_count)); + } + } + + return ret; +} + +int ObLogStorager::print_serilized_br_value_(ObLogRowDataIndex &row_data_index, + std::string &key, + const char *br_string, + const size_t br_string_len) +{ + int ret = OB_SUCCESS; + ILogRecord *binlog_record = LogMsgFactory::createLogRecord(TCTX.drc_message_factory_binlog_record_type_, false/*creating_binlog_record*/); + ObArray new_values; + ITableMeta *table_meta = NULL; + + if (OB_ISNULL(binlog_record)) { + LOG_ERROR("invalid argument", K(binlog_record)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(binlog_record->parse(br_string, br_string_len))) { + LOG_ERROR("binlog_record parse fail", K(ret), K(binlog_record), K(row_data_index)); + } else if (OB_FAIL(get_br_value(binlog_record, new_values))) { + LOG_ERROR("get_mem_br_value fail", KR(ret)); + } else if (OB_ISNULL(table_meta = LogMsgFactory::createTableMeta())) { + LOG_ERROR("table_meta is NULL"); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else if (0 != binlog_record->getTableMeta(table_meta)) { + LOG_ERROR("getTableMeta fail"); + ret = OB_ERR_UNEXPECTED; + } else { + bool is_table_meta_null = false; + int64_t col_count = 0; + + if (NULL == table_meta) { + is_table_meta_null = true; + } else { + col_count = table_meta->getColCount(); + } + + LOG_INFO("store_service_ serilized but before put", "key", key.c_str(), K(new_values), K(row_data_index), K(is_table_meta_null), + K(col_count)); + } + + if (NULL != table_meta) { + LogMsgFactory::destroy(table_meta); + } + + return ret; +} + +int ObLogStorager::read_store_service_(const std::string &key) +{ + int ret = OB_SUCCESS; + std::string br_string_res; + + if (OB_FAIL(store_service_->get(key, br_string_res))) { + LOG_ERROR("store_service_ get fail", KR(ret), K(key.c_str()), K(br_string_res.length())); + } else { + LOG_DEBUG("store_service_ get succ", KR(ret), K(key.c_str()), K(br_string_res.length()), K(br_string_res.c_str())); + } + + return ret; +} + +} // namespace liboblog +} // namespace oceanbase diff --git a/src/liboblog/src/ob_log_storager.h b/src/liboblog/src/ob_log_storager.h new file mode 100644 index 0000000000000000000000000000000000000000..bf077a0fb06738cda1a7a6a2d0df86cb388f2b15 --- /dev/null +++ b/src/liboblog/src/ob_log_storager.h @@ -0,0 +1,152 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_STORAGER_H_ +#define OCEANBASE_LIBOBLOG_STORAGER_H_ + +#include "lib/thread/ob_multi_fixed_queue_thread.h" // ObMQThread +#include "ob_log_trans_stat_mgr.h" // TransRpsStatInfo +#include "ob_log_store_service_stat.h" // StoreServiceStatInfo +#include "ob_log_part_trans_task.h" + +namespace oceanbase +{ +namespace liboblog +{ +///////////////////////////////////////////////////////////////////////////////////////// +class IObLogStorager +{ +public: + enum + { + MAX_STORAGER_NUM = 64 + }; + +public: + virtual ~IObLogStorager() {} + +public: + virtual int start() = 0; + virtual void stop() = 0; + virtual void mark_stop_flag() = 0; + virtual int push(ObLogEntryTask &task, int64_t timeout) = 0; + virtual void get_task_count(int64_t &log_entry_task_count) const = 0; +}; + +///////////////////////////////////////////////////////////////////////////////////////// + +class IObStoreService; +class IObLogErrHandler; +class ObLogRowDataIndex; + +typedef common::ObMQThread StoragerThread; + +class ObLogStorager : public IObLogStorager, public StoragerThread +{ +public: + ObLogStorager(); + virtual ~ObLogStorager(); + +public: + virtual int thread_begin() override; + virtual void thread_end() override; + +public: + int start(); + void stop(); + void mark_stop_flag() { StoragerThread::mark_stop_flag(); } + int push(ObLogEntryTask &task, int64_t timeout); + void get_task_count(int64_t &log_entry_task_count) const { log_entry_task_count = ATOMIC_LOAD(&log_entry_task_count_); } + int handle(void *data, const int64_t thread_index, volatile bool &stop_flag); + +public: + int init(const int64_t thread_num, + const int64_t queue_size, + IObStoreService &store_service, + IObLogErrHandler &err_handler); + void destroy(); + +private: + static const int64_t DATA_OP_TIMEOUT = 1 * 1000 * 1000; + static const int64_t PRINT_TASK_COUNT_INTERVAL = 10 * _SEC_; + static const int64_t PRINT_RPS_STAT_INTERVAL = 10 * _SEC_; + static const int64_t MAX_STORAG_DATA_BUF_SIZE = 2L * 1024L * 1024L; // 2M + struct StoreBuf + { + StoreBuf(); + ~StoreBuf(); + + int init(const int64_t size); + void destroy(); + + int alloc(const int64_t sz, void *&ptr); + void free(); + + int64_t get_buf_len() const { return buf_len_; } + const char *get_buf() const; + + ObDataBuffer data_buf_; + bool use_data_buf_; + char *big_buf_; + int64_t buf_len_; + }; + +private: + int init_store_buf_array_(const int64_t thread_num); + void destroy_store_buf_array_(const int64_t thread_num); + int handle_log_entry_task_(ObLogEntryTask &log_entry_task, + const int64_t thread_index, + volatile bool &stop_flag); + // BinlogRecord serialisation to byte streams + int to_string_binlog_record_(ObLogEntryTask &log_entry_task, + ObLogRowDataIndex &row_data_index, + const int64_t thread_index); + int write_store_service_(ObLogRowDataIndex &row_data_index, + void *column_family_handle, + const int64_t thread_index); + int handle_task_callback_(ObLogEntryTask &log_entry_task, + volatile bool &stop_flag); + + void print_task_count_(); + void print_rps_(); + + // for test and debug + int print_unserilized_br_value_(ObLogBR &br, + std::string &key); + int print_serilized_br_value_(ObLogRowDataIndex &row_data_index, + std::string &key, + const char *buf, + const size_t buf_len); + int read_store_service_(const std::string &key); + +private: + bool inited_; + // To ensure that tasks are evenly distributed to threads + uint64_t round_value_; + // Thread local buf + StoreBuf store_buf_array_[IObLogStorager::MAX_STORAGER_NUM]; + + TransRpsStatInfo rps_stat_; + int64_t last_stat_time_ CACHE_ALIGNED; + int64_t log_entry_task_count_ CACHE_ALIGNED; + StoreServiceStatInfo store_service_stat_; + + IObStoreService *store_service_; + IObLogErrHandler *err_handler_; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogStorager); +}; + +} // namespace liboblog +} // namespace oceanbase +#endif diff --git a/src/liboblog/src/ob_log_store_service.h b/src/liboblog/src/ob_log_store_service.h new file mode 100644 index 0000000000000000000000000000000000000000..3f885f5f66de10fd7d2215a5248cd6369d84997e --- /dev/null +++ b/src/liboblog/src/ob_log_store_service.h @@ -0,0 +1,74 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_STORE_SERVICE_H_ +#define OCEANBASE_LIBOBLOG_OB_LOG_STORE_SERVICE_H_ + +#include +#include +#include "lib/oblog/ob_log_module.h" + +namespace oceanbase +{ +namespace liboblog +{ +struct ObSlice +{ +public: + ObSlice() : buf_(NULL), buf_len_(0) {} + ObSlice(const char *buf, const int64_t buf_len) : buf_(buf), buf_len_(buf_len) {} + ~ObSlice() { reset(); } + + void reset() + { + buf_ = NULL; + buf_len_ = 0; + } + bool is_valid() { return (NULL != buf_ && buf_len_ > 0); } + +public: + const char *buf_; + int64_t buf_len_; +}; + +class IObStoreService +{ +public: + virtual ~IObStoreService() {} + virtual int init(const std::string &path) = 0; + virtual int close() = 0; + +public: + virtual int put(const std::string &key, const ObSlice &value) = 0; + virtual int put(void *cf_handle, const std::string &key, const ObSlice &value) = 0; + + virtual int batch_write(void *cf_handle, const std::vector &keys, const std::vector &values) = 0; + + virtual int get(const std::string &key, std::string &value) = 0; + virtual int get(void *cf_handle, const std::string &key, std::string &value) = 0; + + virtual int del(const std::string &key) = 0; + virtual int del(void *cf_handle, const std::string &key) = 0; + + virtual int create_column_family(const std::string& column_family_name, + void *&cf_handle) = 0; + virtual int drop_column_family(void *cf_handle) = 0; + virtual int destory_column_family(void *cf_handle) = 0; + + virtual void get_mem_usage(const std::vector ids, + const std::vector cf_handles) = 0; +}; + +} +} + +#endif diff --git a/src/liboblog/src/ob_log_store_service_stat.cpp b/src/liboblog/src/ob_log_store_service_stat.cpp new file mode 100644 index 0000000000000000000000000000000000000000..936a0cb6598b3a130c91b4b86b71ad914310986a --- /dev/null +++ b/src/liboblog/src/ob_log_store_service_stat.cpp @@ -0,0 +1,64 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG + +#include "ob_log_store_service_stat.h" +#include "ob_log_utils.h" // _G_ +#include "lib/oblog/ob_log.h" // ObLogger +#include "lib/oblog/ob_log_module.h" // LOG_* +#include "lib/ob_errno.h" + +namespace oceanbase +{ +namespace liboblog +{ +void StoreServiceStatInfo::reset() +{ + total_data_size_ = 0; + last_total_data_size_ = 0; +} + +void StoreServiceStatInfo::do_data_stat(int64_t record_len) +{ + ATOMIC_AAF(&total_data_size_, record_len); +} + +double StoreServiceStatInfo::calc_rate(const int64_t delta_time) +{ + double rate = 0.0; + const int64_t local_totol_data_size = ATOMIC_LOAD(&total_data_size_); + const int64_t local_last_totol_data_size = ATOMIC_LOAD(&last_total_data_size_); + const int64_t delta_data_size = local_totol_data_size - local_last_totol_data_size; + double delta_data_size_formatted = (double)delta_data_size / (double)_M_; + + if (delta_time > 0) { + rate = (double)(delta_data_size_formatted) * 1000000.0 / (double)delta_time; + } + + // Update the last statistics + last_total_data_size_ = local_totol_data_size; + + return rate; +} + +double StoreServiceStatInfo::get_total_data_size() const +{ + double total_size = 0.0; + const int64_t local_totol_data_size = ATOMIC_LOAD(&total_data_size_); + total_size = (double)local_totol_data_size / (double)_G_; + + return total_size; +} + +} +} diff --git a/src/liboblog/src/ob_log_store_service_stat.h b/src/liboblog/src/ob_log_store_service_stat.h new file mode 100644 index 0000000000000000000000000000000000000000..9dff186fab92d6e351a4cfc7f6ef983c54fd6ba6 --- /dev/null +++ b/src/liboblog/src/ob_log_store_service_stat.h @@ -0,0 +1,45 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_STORE_SERVICE_STAT_H_ +#define OCEANBASE_LIBOBLOG_OB_LOG_STORE_SERVICE_STAT_H_ + +#include "lib/utility/ob_print_utils.h" // TO_STRING_KV + +namespace oceanbase +{ +namespace liboblog +{ +struct StoreServiceStatInfo +{ + int64_t total_data_size_ CACHE_ALIGNED; + int64_t last_total_data_size_ CACHE_ALIGNED; + + StoreServiceStatInfo() { reset(); } + ~StoreServiceStatInfo() { reset(); } + + void reset(); + + void do_data_stat(int64_t record_len); + + double calc_rate(const int64_t delta_time); + + double get_total_data_size() const; + + TO_STRING_KV(K_(total_data_size), + K_(last_total_data_size)); +}; + +} +} + +#endif diff --git a/src/liboblog/src/ob_log_stream_worker.cpp b/src/liboblog/src/ob_log_stream_worker.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7bc686437fa61c4e662e2768f760c5ed5d803096 --- /dev/null +++ b/src/liboblog/src/ob_log_stream_worker.cpp @@ -0,0 +1,565 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG_FETCHER + +#include "ob_log_stream_worker.h" + +#include "ob_log_timer.h" // ObLogFixedTimer +#include "ob_log_instance.h" // IObLogErrHandler +#include "ob_log_all_svr_cache.h" // IObLogAllSvrCache +#include "ob_log_part_fetch_ctx.h" // PartFetchCtx +#include "ob_log_fetcher_idle_pool.h" // IObLogFetcherIdlePool +#include "ob_log_fetcher_dead_pool.h" // IObLogFetcherDeadPool + +namespace oceanbase +{ +namespace liboblog +{ + +// Defining class global variables +int64_t ObLogStreamWorker::g_blacklist_survival_time = + ObLogConfig::default_blacklist_survival_time_sec * _SEC_; + +bool ObLogStreamWorker::g_print_stream_dispatch_info = + ObLogConfig::default_print_stream_dispatch_info; + +ObLogStreamWorker::ObLogStreamWorker() : + inited_(false), + stream_paused_(false), + fetcher_resume_time_(OB_INVALID_TIMESTAMP), + rpc_(NULL), + idle_pool_(NULL), + dead_pool_(NULL), + svr_finder_(NULL), + err_handler_(NULL), + all_svr_cache_(NULL), + heartbeater_(NULL), + progress_controller_(NULL), + timer_(), + fs_pool_(), + rpc_result_pool_(), + svr_stream_map_(), + svr_stream_pool_(), + svr_stream_alloc_lock_(), + stream_task_seq_(0) +{} + +ObLogStreamWorker::~ObLogStreamWorker() +{ + destroy(); +} + +int ObLogStreamWorker::init(const int64_t worker_thread_num, + const int64_t svr_stream_cached_count, + const int64_t fetch_stream_cached_count, + const int64_t rpc_result_cached_count, + const int64_t max_timer_task_count, + IObLogRpc &rpc, + IObLogFetcherIdlePool &idle_pool, + IObLogFetcherDeadPool &dead_pool, + IObLogSvrFinder &svr_finder, + IObLogErrHandler &err_handler, + IObLogAllSvrCache &all_svr_cache, + IObLogFetcherHeartbeatWorker &heartbeater, + PartProgressController &progress_controller) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("init twice", K(inited_)); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(worker_thread_num <= 0) + || OB_UNLIKELY(worker_thread_num > IObLogStreamWorker::MAX_THREAD_NUM)) { + LOG_ERROR("invalid argument", K(worker_thread_num)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(timer_.init(err_handler, max_timer_task_count))) { + LOG_ERROR("init timer fail", KR(ret), K(max_timer_task_count)); + } else if (OB_FAIL(fs_pool_.init(fetch_stream_cached_count))) { + LOG_ERROR("init fetch stream pool fail", KR(ret), K(fetch_stream_cached_count)); + } else if (OB_FAIL(rpc_result_pool_.init(rpc_result_cached_count))) { + LOG_ERROR("init rpc result pool fail", KR(ret), K(rpc_result_cached_count)); + } + // Initializing the thread pool + else if (OB_FAIL(StreamWorkerThread::init(worker_thread_num, + ObModIds::OB_LOG_STREAM_WORKER_THREAD))) { + LOG_ERROR("init worker thread fail", KR(ret), K(worker_thread_num)); + } else if (OB_FAIL(svr_stream_map_.init(ObModIds::OB_LOG_SVR_STREAM_MAP))) { + LOG_ERROR("init svr stream map fail", KR(ret)); + } else if (OB_FAIL(svr_stream_pool_.init(svr_stream_cached_count, + ObModIds::OB_LOG_SVR_STREAM_POOL, + OB_SERVER_TENANT_ID, + SVR_STREAM_POOL_BLOCK_SIZE))) { + LOG_ERROR("init svr stream pool fail", KR(ret)); + } else { + rpc_ = &rpc; + idle_pool_ = &idle_pool; + dead_pool_ = &dead_pool; + svr_finder_ = &svr_finder; + err_handler_ = &err_handler; + all_svr_cache_ = &all_svr_cache; + heartbeater_ = &heartbeater; + progress_controller_ = &progress_controller; + + stream_task_seq_ = 0; + + stream_paused_ = false; + fetcher_resume_time_ = OB_INVALID_TIMESTAMP; + inited_ = true; + + LOG_INFO("init stream worker succ", K(worker_thread_num), K(this)); + } + + return ret; +} + +void ObLogStreamWorker::destroy() +{ + stop(); + + inited_ = false; + stream_paused_ = false; + fetcher_resume_time_ = OB_INVALID_TIMESTAMP; + StreamWorkerThread::destroy(); + + // TODO:consideration of resource release globally + free_all_svr_stream_(); + + (void)svr_stream_map_.destroy(); + svr_stream_pool_.destroy(); + timer_.destroy(); + fs_pool_.destroy(); + + rpc_ = NULL; + idle_pool_ = NULL; + dead_pool_ = NULL; + svr_finder_ = NULL; + err_handler_ = NULL; + all_svr_cache_ = NULL; + heartbeater_ = NULL; + progress_controller_ = NULL; + stream_task_seq_ = 0; + + LOG_INFO("destroy stream worker succ"); +} + +int ObLogStreamWorker::start() +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init", K(inited_)); + ret = OB_NOT_INIT; + } else if (OB_FAIL(timer_.start())) { + LOG_ERROR("start timer thread fail", KR(ret)); + } else if (OB_FAIL(StreamWorkerThread::start())) { + LOG_ERROR("start stream worker fail", KR(ret)); + } else { + LOG_INFO("start stream worker succ"); + } + return ret; +} + +void ObLogStreamWorker::stop() +{ + if (OB_LIKELY(inited_)) { + StreamWorkerThread::stop(); + LOG_INFO("stop stream worker succ"); + } +} + +void ObLogStreamWorker::mark_stop_flag() +{ + timer_.mark_stop_flag(); + StreamWorkerThread::mark_stop_flag(); +} + +void ObLogStreamWorker::pause() +{ + if (OB_LIKELY(inited_)) { + ATOMIC_STORE(&stream_paused_, true); + LOG_INFO("pause stream worker succ", K_(stream_paused)); + } +} + +void ObLogStreamWorker::resume(int64_t fetcher_resume_tstamp) +{ + if (OB_LIKELY(inited_)) { + ATOMIC_STORE(&fetcher_resume_time_, fetcher_resume_tstamp); + ATOMIC_STORE(&stream_paused_, false); + LOG_INFO("resume stream worker succ", K_(stream_paused)); + } +} + +int64_t ObLogStreamWorker::get_fetcher_resume_tstamp() +{ + int64_t fetcher_resume_tstamp = ATOMIC_LOAD(&fetcher_resume_time_); + return fetcher_resume_tstamp; +} + +// TODO: Add monitoring log, print dispatch reason +int ObLogStreamWorker::dispatch_fetch_task(PartFetchCtx &task, const char *dispatch_reason) +{ + int ret = OB_SUCCESS; + + // Mark out the reason for the assignment + task.dispatch_out(dispatch_reason); + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init", K(inited_)); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(idle_pool_) || OB_ISNULL(all_svr_cache_) || OB_ISNULL(dead_pool_)) { + LOG_ERROR("invalid handlers", K(idle_pool_), K(all_svr_cache_), K(dead_pool_)); + ret = OB_INVALID_ERROR; + } + // Recycle deleted partitions and add them to DEAD POOL + else if (OB_UNLIKELY(task.is_discarded())) { + LOG_DEBUG("[STAT] [STREAM_WORKER] [RECYCLE_FETCH_TASK]", "task", &task, K(task)); + + if (OB_FAIL(dead_pool_->push(&task))) { + LOG_DEBUG("push task into dead pool fail", KR(ret), K(task)); + } + } else { + ObAddr svr; + bool found_valid_svr = false; + + LOG_DEBUG("[STAT] [STREAM_WORKER] [DISPATCH_FETCH_TASK] begin to dispatch", + "task", &task, K(task)); + + // Get the next valid server for the service log + while (OB_SUCCESS == ret && ! found_valid_svr && OB_SUCC(task.next_server(svr))) { + found_valid_svr = is_svr_avail_(*all_svr_cache_, svr); + if (! found_valid_svr) { + // server is not available, blacklisted + int64_t svr_service_time = 0; + int64_t survival_time = ATOMIC_LOAD(&g_blacklist_survival_time); + if (task.add_into_blacklist(svr, svr_service_time, survival_time)) { + // add server to blacklist + LOG_ERROR("not-avail server, task add into blacklist fail", KR(ret), K(task), K(svr), + "svr_service_time", TVAL_TO_STR(svr_service_time), + "survival_time", TVAL_TO_STR(survival_time)); + } else { + LOG_DEBUG("not-avail server, task add into blacklist succ", KR(ret), K(task), K(svr), + "svr_service_time", TVAL_TO_STR(svr_service_time), + "survival_time", TVAL_TO_STR(survival_time)); + } + + LOG_WARN("[STAT] [STREAM_WORKER] [DISPATCH_FETCH_TASK] ignore not-avail server", + K(svr), "pkey", task.get_pkey()); + } + } + + // The server list is iterated over + if (OB_ITER_END == ret) { + ret = OB_SUCCESS; + } + + if (OB_SUCCESS == ret) { + // No server available, put it into idle pool + if (! found_valid_svr) { + LOG_DEBUG("[STAT] [STREAM_WORKER] [DISPATCH_FETCH_TASK] server list is used up, " + "dispatch to idle pool", "task", &task, K(task)); + + if (OB_FAIL(idle_pool_->push(&task))) { + LOG_ERROR("push into idle pool fail", KR(ret), K(task)); + } + } else { + LOG_DEBUG("[STAT] [STREAM_WORKER] [DISPATCH_FETCH_TASK] dispatch to next server", + K(svr), "task", &task, K(task)); + + // Assigning tasks to the server + if (OB_FAIL(dispatch_fetch_task_to_svr_(task, svr))) { + LOG_ERROR("dispatch fetch task to server fail", KR(ret), K(svr), K(task)); + } + } + } + } + + return ret; +} + +int ObLogStreamWorker::dispatch_stream_task(FetchStream &task, const char *from_mod) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init", K(inited_)); + ret = OB_NOT_INIT; + } else { + int64_t hash_val = ATOMIC_FAA(&stream_task_seq_, 1); + bool print_stream_dispatch_info = ATOMIC_LOAD(&g_print_stream_dispatch_info); + + if (print_stream_dispatch_info) { + LOG_INFO("[STAT] [STREAM_WORKER] [DISPATCH_STREAM_TASK]", + "fetch_stream", &task, K(from_mod), K(hash_val), K(task)); + } else { + LOG_DEBUG("[STAT] [STREAM_WORKER] [DISPATCH_STREAM_TASK]", + "fetch_stream", &task, K(from_mod), K(hash_val), K(task)); + } + + // Rotating the task of fetching log streams to work threads + if (OB_FAIL(StreamWorkerThread::push(&task, hash_val))) { + LOG_ERROR("push stream task into thread queue fail", KR(ret)); + } + } + return ret; +} + +int ObLogStreamWorker::hibernate_stream_task(FetchStream &task, const char *from_mod) +{ + int ret = OB_SUCCESS; + bool print_stream_dispatch_info = ATOMIC_LOAD(&g_print_stream_dispatch_info); + + if (print_stream_dispatch_info) { + LOG_INFO("[STAT] [STREAM_WORKER] [HIBERNATE_STREAM_TASK]", + "task", &task, K(from_mod), K(task)); + } else { + LOG_DEBUG("[STAT] [STREAM_WORKER] [HIBERNATE_STREAM_TASK]", + "task", &task, K(from_mod), K(task)); + } + + if (OB_FAIL(timer_.schedule(&task))) { + LOG_ERROR("schedule timer task fail", KR(ret)); + } else { + // success + } + return ret; +} + +// hendle function for thread pool +int ObLogStreamWorker::handle(void *data, + const int64_t thread_index, + volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + bool is_paused = ATOMIC_LOAD(&stream_paused_); + FetchStream *task = static_cast(data); + + LOG_DEBUG("[STAT] [STREAM_WORKER] [HANDLE_STREAM_TASK]", K_(stream_paused), K(thread_index), + K(task), KPC(task)); + + if (OB_ISNULL(task)) { + LOG_ERROR("invalid task", K(task), K(thread_index)); + ret = OB_INVALID_ARGUMENT; + } + // If the stream task is currently suspended, the task is put to sleep + // DDL tasks are exempt from suspend and require always processing + else if (OB_UNLIKELY(is_paused) && ! task->is_ddl_stream()) { + LOG_DEBUG("[STAT] [STREAM_WORKER] [HIBERNATE_STREAM_TASK_ON_PAUSE]", K(task)); + + if (OB_FAIL(hibernate_stream_task(*task, "PausedFetcher"))) { + LOG_ERROR("hibernate_stream_task on pause fail", KR(ret), K(task), KPC(task)); + } + } else if (OB_FAIL(task->handle(stop_flag))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("handle fetch stream task fail", KR(ret), K(task)); + } + } else { + // Can no longer continue with the task + } + + if (0 == thread_index) { + if (REACH_TIME_INTERVAL(STAT_INTERVAL)) { + print_stat_(); + } + } + + if (OB_SUCCESS != ret && OB_IN_STOP_STATE != ret && OB_NOT_NULL(err_handler_)) { + err_handler_->handle_error(ret, "stream worker exits on error, err=%d, thread_index=%ld", + ret, thread_index); + } + return ret; +} + +void ObLogStreamWorker::configure(const ObLogConfig & config) +{ + int64_t blacklist_survival_time_sec = config.blacklist_survival_time_sec; + bool print_stream_dispatch_info = config.print_stream_dispatch_info; + + ATOMIC_STORE(&g_blacklist_survival_time, blacklist_survival_time_sec * _SEC_); + LOG_INFO("[CONFIG]", K(blacklist_survival_time_sec)); + + ATOMIC_STORE(&g_print_stream_dispatch_info, print_stream_dispatch_info); + LOG_INFO("[CONFIG]", K(print_stream_dispatch_info)); +} + +bool ObLogStreamWorker::is_svr_avail_(IObLogAllSvrCache &all_svr_cache, const common::ObAddr &svr) +{ + // TODO: Add other policies such as blacklisting policies + return all_svr_cache.is_svr_avail(svr); +} + +int ObLogStreamWorker::dispatch_fetch_task_to_svr_(PartFetchCtx &task, const common::ObAddr &svr) +{ + int ret = OB_SUCCESS; + SvrStream *svr_stream = NULL; + + if (OB_FAIL(get_svr_stream_(svr, svr_stream))) { + LOG_ERROR("get_svr_stream_ fail", KR(ret), K(svr)); + } else if (OB_ISNULL(svr_stream)) { + LOG_ERROR("invalid svr stream", K(svr_stream)); + ret = OB_ERR_UNEXPECTED; + } else { + LOG_DEBUG("[STAT] [STREAM_WORKER] [DISPATCH_FETCH_TASK] dispatch to svr_stream", + "task", &task, K(svr_stream), KPC(svr_stream), K(task)); + + if (OB_FAIL(svr_stream->dispatch(task))) { + LOG_ERROR("dispatch to svr stream fail", KR(ret), K(task), K(svr_stream)); + } else { + // You cannot continue to operate on the task afterwards + } + } + return ret; +} + +int ObLogStreamWorker::get_svr_stream_(const common::ObAddr &svr, SvrStream *&svr_stream) +{ + int ret = OB_SUCCESS; + + svr_stream = NULL; + if (OB_FAIL(svr_stream_map_.get(svr, svr_stream))) { + if (OB_ENTRY_NOT_EXIST != ret) { + LOG_ERROR("get svr stream from map fail", KR(ret), K(svr)); + } else { + // SvrStream does not exist, create one and insert it into the map + ret = OB_SUCCESS; + + if (OB_FAIL(get_svr_stream_when_not_exist_(svr, svr_stream))) { + LOG_ERROR("get_svr_stream_when_not_exist_ fail", KR(ret), K(svr)); + } + } + } + + return ret; +} + +int ObLogStreamWorker::get_svr_stream_when_not_exist_(const common::ObAddr &svr, + SvrStream *&svr_stream) +{ + int ret = OB_SUCCESS; + svr_stream = NULL; + + // To ensure that SvrStream is allocated only once for the same server and to avoid wasting memory, + // lock control is used here so that only one thread can allocate SvrStream at the same time after a new server is added. + ObSpinLockGuard guard(svr_stream_alloc_lock_); + + if (OB_SUCC(svr_stream_map_.get(svr, svr_stream))) { + // already created + } else if (OB_UNLIKELY(OB_ENTRY_NOT_EXIST != ret)) { + LOG_ERROR("get svr stream from map fail", KR(ret), K(svr)); + } else { + ret = OB_SUCCESS; + + if (OB_ISNULL(rpc_) + || OB_ISNULL(svr_finder_) + || OB_ISNULL(heartbeater_) + || OB_ISNULL(progress_controller_)) { + LOG_ERROR("invalid handlers", K(rpc_), K(svr_finder_), K(heartbeater_), + K(progress_controller_)); + ret = OB_INVALID_ERROR; + } else if (OB_FAIL(svr_stream_pool_.alloc(svr_stream))) { + LOG_ERROR("allocate svr stream from pool fail", K(svr_stream)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else if (OB_ISNULL(svr_stream)) { + LOG_ERROR("allocate svr stream from pool fail", K(svr_stream)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + svr_stream->reset(svr, + *rpc_, + fs_pool_, + *svr_finder_, + *heartbeater_, + *this, + rpc_result_pool_, + *progress_controller_); + + // require must return OB_SUCCESS otherwise error + if (OB_FAIL(svr_stream_map_.insert(svr, svr_stream))) { + LOG_ERROR("insert into svr stream map fail", KR(ret), K(svr), K(svr_stream)); + free_svr_stream_(svr_stream); + svr_stream = NULL; + } else { + LOG_INFO("[STAT] [SVR_STREAM] [ALLOC]", K(svr_stream), KPC(svr_stream)); + } + } + } + + return ret; +} + +int ObLogStreamWorker::free_svr_stream_(SvrStream *svr_stream) +{ + int ret = OB_SUCCESS; + + if (OB_NOT_NULL(svr_stream)) { + svr_stream->reset(); + + if (OB_FAIL(svr_stream_pool_.free(svr_stream))) { + LOG_ERROR("free svr stream into pool fail", KR(ret), K(svr_stream)); + } else { + svr_stream = NULL; + } + } + + return ret; +} + +bool ObLogStreamWorker::SvrStreamFreeFunc::operator() (const common::ObAddr &key, + SvrStream* svr_stream) +{ + UNUSED(key); + + if (NULL != svr_stream) { + LOG_INFO("[STAT] [SVR_STREAM] [FREE]", K(svr_stream), KPC(svr_stream)); + svr_stream->reset(); + (void)pool_.free(svr_stream); + svr_stream = NULL; + } + + return true; +} + +void ObLogStreamWorker::free_all_svr_stream_() +{ + int ret = OB_SUCCESS; + if (svr_stream_map_.count() > 0) { + SvrStreamFreeFunc func(svr_stream_pool_); + + if (OB_FAIL(svr_stream_map_.remove_if(func))) { + LOG_ERROR("remove if from svr stream map fail", KR(ret)); + } + } +} + +void ObLogStreamWorker::print_stat_() +{ + int ret = OB_SUCCESS; + SvrStreamStatFunc svr_stream_stat_func; + + int64_t alloc_count = svr_stream_pool_.get_alloc_count(); + int64_t free_count = svr_stream_pool_.get_free_count(); + int64_t fixed_count = svr_stream_pool_.get_fixed_count(); + int64_t used_count = alloc_count - free_count; + int64_t dynamic_count = (alloc_count > fixed_count) ? alloc_count - fixed_count : 0; + + _LOG_INFO("[STAT] [SVR_STREAM_POOL] USED=%ld FREE=%ld FIXED=%ld DYNAMIC=%ld", + used_count, free_count, fixed_count, dynamic_count); + + fs_pool_.print_stat(); + rpc_result_pool_.print_stat(); + + // Statistics every SvrStream + if (OB_FAIL(svr_stream_map_.for_each(svr_stream_stat_func))) { + LOG_ERROR("for each svr stream map fail", KR(ret)); + } +} + +} +} diff --git a/src/liboblog/src/ob_log_stream_worker.h b/src/liboblog/src/ob_log_stream_worker.h new file mode 100644 index 0000000000000000000000000000000000000000..89309e1b4f0a3a84e02b45f61ceeecf8e5fa06d2 --- /dev/null +++ b/src/liboblog/src/ob_log_stream_worker.h @@ -0,0 +1,195 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_STREAM_WORKER_H__ +#define OCEANBASE_LIBOBLOG_OB_LOG_STREAM_WORKER_H__ + +#include "lib/hash/ob_linear_hash_map.h" // ObLinearHashMap +#include "lib/net/ob_addr.h" // ObAddr +#include "lib/objectpool/ob_small_obj_pool.h" // ObSmallObjPool +#include "lib/lock/ob_spin_lock.h" // ObSpinLock + +#include "ob_log_config.h" // ObLogConfig +#include "ob_log_svr_stream.h" // SvrStream +#include "ob_map_queue_thread.h" // ObMapQueueThread +#include "ob_log_timer.h" // ObLogFixedTimer +#include "ob_log_fetch_stream_pool.h" // FetchStreamPool +#include "ob_log_fetch_log_rpc.h" // FetchLogARpcResultPool + +namespace oceanbase +{ +namespace liboblog +{ + +class PartFetchCtx; +class IObLogStreamWorker +{ +public: + static const int64_t MAX_THREAD_NUM = ObLogConfig::max_stream_worker_thread_num; +public: + virtual ~IObLogStreamWorker() {} + +public: + virtual int start() = 0; + virtual void stop() = 0; + virtual void pause() = 0; + virtual void resume(int64_t fetcher_resume_tstamp) = 0; + virtual void mark_stop_flag() = 0; + virtual int64_t get_fetcher_resume_tstamp() = 0; + + // Assigning partitioning tasks to a particular fetch log stream + virtual int dispatch_fetch_task(PartFetchCtx &task, const char *dispatch_reason) = 0; + + // Putting the fetch log stream task into the work thread task pool + virtual int dispatch_stream_task(FetchStream &task, const char *from_mod) = 0; + + // Hibernate fetch log stream task + virtual int hibernate_stream_task(FetchStream &task, const char *from_mod) = 0; +}; + +//////////////////////////////////////////// ObLogStreamWorker //////////////////////////////////////////// + +class IObLogRpc; +class IObLogFetcherIdlePool; +class IObLogFetcherDeadPool; +class IObLogSvrFinder; +class IObLogErrHandler; +class IObLogAllSvrCache; +class IObLogFetcherHeartbeatWorker; +class PartProgressController; + +typedef common::ObMapQueueThread StreamWorkerThread; + +class ObLogStreamWorker : public IObLogStreamWorker, public StreamWorkerThread +{ + typedef common::ObLinearHashMap SvrStreamMap; + typedef common::ObSmallObjPool SvrStreamPool; + + // block size of SvrStreamPool + static const int64_t SVR_STREAM_POOL_BLOCK_SIZE = 1 << 22; + static const int64_t STAT_INTERVAL = 5 * _SEC_; + + // Class global variables +public: + // Hibernate time of the stream in case of fetch stream is paused + static int64_t g_blacklist_survival_time; + static bool g_print_stream_dispatch_info; + +public: + ObLogStreamWorker(); + virtual ~ObLogStreamWorker(); + +public: + int init(const int64_t worker_thread_num, + const int64_t svr_stream_cached_count, + const int64_t fetch_stream_cached_count, + const int64_t rpc_result_cached_count, + const int64_t max_timer_task_count, + IObLogRpc &rpc, + IObLogFetcherIdlePool &idle_pool, + IObLogFetcherDeadPool &dead_pool, + IObLogSvrFinder &svr_finder, + IObLogErrHandler &err_handler, + IObLogAllSvrCache &all_svr_cache, + IObLogFetcherHeartbeatWorker &heartbeater, + PartProgressController &progress_controller); + void destroy(); + +public: + int start(); + void stop(); + void pause(); + void resume(int64_t fetcher_resume_tstamp); + void mark_stop_flag(); + int64_t get_fetcher_resume_tstamp(); + + int dispatch_fetch_task(PartFetchCtx &task, const char *dispatch_reason); + int dispatch_stream_task(FetchStream &task, const char *from_mod); + int hibernate_stream_task(FetchStream &task, const char *from_mod); + +public: + // Overloading thread handling functions + virtual int handle(void *data, const int64_t thread_index, volatile bool &stop_flag); + +public: + static void configure(const ObLogConfig & config); + +private: + bool is_svr_avail_(IObLogAllSvrCache &all_svr_cache, const common::ObAddr &svr); + int dispatch_fetch_task_to_svr_(PartFetchCtx &task, const common::ObAddr &svr); + int get_svr_stream_(const common::ObAddr &svr, SvrStream *&svr_stream); + int get_svr_stream_when_not_exist_(const common::ObAddr &svr, SvrStream *&svr_stream); + int free_svr_stream_(SvrStream *svr_stream); + void free_all_svr_stream_(); + void print_stat_(); + +private: + struct SvrStreamFreeFunc + { + SvrStreamPool &pool_; + + explicit SvrStreamFreeFunc(SvrStreamPool &pool) : pool_(pool) {} + bool operator() (const common::ObAddr &key, SvrStream* value); + }; + + struct SvrStreamStatFunc + { + bool operator() (const common::ObAddr &key, SvrStream* value) + { + UNUSED(key); + if (NULL != value) { + value->do_stat(); + } + return true; + } + }; + + // private members +private: + bool inited_; + + // Is the stream task suspended + bool stream_paused_ CACHE_ALIGNED; + // record time of fetcher resume + int64_t fetcher_resume_time_ CACHE_ALIGNED; + + // External modules + IObLogRpc *rpc_; // RPC handler + IObLogFetcherIdlePool *idle_pool_; // IDLE POOl + IObLogFetcherDeadPool *dead_pool_; // DEAD POOL + IObLogSvrFinder *svr_finder_; // SvrFinder + IObLogErrHandler *err_handler_; // error handler + IObLogAllSvrCache *all_svr_cache_; // server cache + IObLogFetcherHeartbeatWorker *heartbeater_; // heatbeat mgr/worker + PartProgressController *progress_controller_; // progress controller + + // private module + ObLogFixedTimer timer_; // timer + FetchStreamPool fs_pool_; // FetchStream object pool + FetchLogARpcResultPool rpc_result_pool_; // RPC resujt object pool + + // SvrStream manager struct + // TODO: Support for recycling of useless SvrStream + SvrStreamMap svr_stream_map_; + SvrStreamPool svr_stream_pool_; // Supports multi-threaded alloc/release + common::ObSpinLock svr_stream_alloc_lock_; // SvrStream alloctor lock + + /// Fetch log stream task processing serial number for rotating the assignment of fetch log stream tasks + int64_t stream_task_seq_ CACHE_ALIGNED; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogStreamWorker); +}; + +} +} +#endif diff --git a/src/liboblog/src/ob_log_svr_blacklist.cpp b/src/liboblog/src/ob_log_svr_blacklist.cpp new file mode 100644 index 0000000000000000000000000000000000000000..cfd22615e11e835e6ceb9ebcf160077610c7c714 --- /dev/null +++ b/src/liboblog/src/ob_log_svr_blacklist.cpp @@ -0,0 +1,249 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG + +#include "ob_log_svr_blacklist.h" + +#include "share/ob_define.h" +#include "lib/string/ob_string.h" // ObString +#include "ob_log_utils.h" // ob_log_malloc + +namespace oceanbase +{ +namespace liboblog +{ +using namespace common; + +ObLogSvrBlacklist::ObLogSvrBlacklist() : + is_inited_(false), + is_sql_server_(false), + lock_(), + svrs_buf_size_(0), + cur_svr_list_idx_(0), + svrs_() +{ + svrs_buf_[0] = NULL; + svrs_buf_[1] = NULL; +} + +ObLogSvrBlacklist::~ObLogSvrBlacklist() +{ + destroy(); +} + +int ObLogSvrBlacklist::init(const char *svrs_list_str, + const bool is_sql_server) +{ + int ret = OB_SUCCESS; + int64_t buf_size = MAX_SVR_BUF_SIZE; + + if (OB_UNLIKELY(is_inited_)) { + LOG_ERROR("ObLogSvrBlacklist has been initialized"); + ret = OB_INIT_TWICE; + } else if (OB_ISNULL(svrs_list_str)) { + LOG_ERROR("invalid argument", K(svrs_list_str)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(svrs_buf_[0] = reinterpret_cast(ob_log_malloc(buf_size)))) { + LOG_ERROR("alloc svrs buffer 0 fail", KR(ret), K(buf_size)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else if (OB_ISNULL(svrs_buf_[1] = reinterpret_cast(ob_log_malloc(buf_size)))) { + LOG_ERROR("alloc svrs buffer 1 fail", KR(ret), K(buf_size)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + is_sql_server_ = is_sql_server; + svrs_buf_size_ = MAX_SVR_BUF_SIZE; + cur_svr_list_idx_ = 0; + + if (OB_FAIL(set_svrs_str_(svrs_list_str, cur_svr_list_idx_))) { + LOG_ERROR("set_svrs_str_ fail", KR(ret), K(svrs_list_str), K(cur_svr_list_idx_)); + } else { + is_inited_ = true; + } + } + + if (OB_FAIL(ret)) { + destroy(); + } + + return ret; +} + +void ObLogSvrBlacklist::destroy() +{ + is_inited_ = false; + is_sql_server_ = false; + + if (NULL != svrs_buf_[0]) { + ob_log_free(svrs_buf_[0]); + svrs_buf_[0] = NULL; + } + + if (NULL != svrs_buf_[1]) { + ob_log_free(svrs_buf_[1]); + svrs_buf_[1] = NULL; + } + svrs_buf_size_ = 0; + + cur_svr_list_idx_ = 0; + svrs_[0].reset(); + svrs_[1].reset(); +} + +int64_t ObLogSvrBlacklist::count() const +{ + // add read lock + common::SpinRLockGuard RLockGuard(lock_); + const int64_t cur_svr_list_idx = get_cur_svr_list_idx_(); + return svrs_[cur_svr_list_idx].count(); +} + +bool ObLogSvrBlacklist::is_exist(const common::ObAddr &svr) const +{ + // default not exist + bool bool_ret = false; + // add read lock + common::SpinRLockGuard RLockGuard(lock_); + const int64_t cur_svr_list_idx = get_cur_svr_list_idx_(); + const ExceptionalSvrArray &exceptional_svrs = svrs_[cur_svr_list_idx]; + bool has_done = false; + + for (int64_t idx = 0; ! has_done && idx < exceptional_svrs.count(); ++idx) { + const ObAddr &exceptional_svr = exceptional_svrs.at(idx); + + if (svr == exceptional_svr) { + bool_ret = true; + has_done = true; + } + } // for + + return bool_ret; +} + +void ObLogSvrBlacklist::refresh(const char *svrs_list_str) +{ + int ret = OB_SUCCESS; + const int64_t cur_svr_list_idx = get_bak_cur_svr_list_idx_(); + + if (! is_inited_) { + // No update before initialisation + } else { + if (OB_FAIL(set_svrs_str_(svrs_list_str, cur_svr_list_idx))) { + LOG_ERROR("set_svrs_str_ fail", KR(ret), K(svrs_list_str), K(cur_svr_list_idx)); + } else { + // update successful, switch to new server blacklist + // Add write lock + common::SpinWLockGuard WLockGuard(lock_); + switch_svr_list_(); + } + } +} + +int ObLogSvrBlacklist::set_svrs_str_(const char *svrs_list_str, + const int64_t cur_svr_list_idx) +{ + int ret = OB_SUCCESS; + ExceptionalSvrArray &exceptional_svrs = svrs_[cur_svr_list_idx]; + char *buffer = svrs_buf_[cur_svr_list_idx]; + const int64_t buffer_size = svrs_buf_size_; + int64_t pos = 0; + + if (OB_ISNULL(svrs_list_str)) { + LOG_ERROR("invalid argument", K(svrs_list_str)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(buffer)) { + LOG_ERROR("buffer is NULL", K(buffer)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(databuff_printf(buffer, buffer_size, pos, "%s", svrs_list_str))) { + LOG_ERROR("databuff_printf fail", KR(ret), K(buffer), K(buffer_size), K(pos), K(svrs_list_str)); + } else if (OB_FAIL(build_svr_patterns_(buffer, exceptional_svrs))) { + LOG_ERROR("build_svr_patterns_ fail", KR(ret), K(cur_svr_list_idx), K(svrs_list_str), + K(buffer), K(exceptional_svrs)); + } else { + // succ + } + + return ret; +} + +int ObLogSvrBlacklist::build_svr_patterns_(char *svrs_buf, + ExceptionalSvrArray &exceptional_svrs) +{ + int ret = OB_SUCCESS; + const char pattern_delimiter = '|'; + exceptional_svrs.reset(); + + if (OB_ISNULL(svrs_buf)) { + LOG_ERROR("svrs_buf is NULL", K(svrs_buf)); + ret = OB_INVALID_ARGUMENT; + } else { + ObString remain(strlen(svrs_buf), svrs_buf); + ObString cur_pattern; + bool done = false; + + // No server blacklisting, no parsing required + if (0 == strcmp(svrs_buf, "|")) { + done = true; + } + + while (OB_SUCC(ret) && ! done) { + cur_pattern = remain.split_on(pattern_delimiter); + + if (cur_pattern.empty()) { + cur_pattern = remain; + done = true; + } + + if (OB_SUCC(ret)) { + ObString &str = cur_pattern; + *(str.ptr() + str.length()) = '\0'; + str.set_length(1 + str.length()); + } + + if (OB_SUCC(ret)) { + ObAddr svr; + + if (OB_FAIL(svr.parse_from_string(cur_pattern))) { + LOG_ERROR("svr parse_from_string fail", KR(ret), K(cur_pattern)); + } else if (OB_UNLIKELY(! svr.is_valid())) { + LOG_ERROR("svr is not valid", K(svr), K(cur_pattern)); + ret = OB_INVALID_DATA; + } else if (OB_FAIL(exceptional_svrs.push_back(svr))) { + LOG_ERROR("exceptional_svrs push_back svr fail", KR(ret), K(svr)); + } else { + _LOG_INFO("[%sSERVER_BLACKLIST] [ADD] [SVR=%s] [SVR_CNT=%ld]", + is_sql_server_ ? "SQL_": "", to_cstring(svr), exceptional_svrs.count()); + } + } + } // while + } + + return ret; +} + +int64_t ObLogSvrBlacklist::get_cur_svr_list_idx_() const +{ + return (ATOMIC_LOAD(&cur_svr_list_idx_)) % 2; +} + +int64_t ObLogSvrBlacklist::get_bak_cur_svr_list_idx_() const +{ + return (ATOMIC_LOAD(&cur_svr_list_idx_) + 1) % 2; +} + +void ObLogSvrBlacklist::switch_svr_list_() +{ + ATOMIC_INC(&cur_svr_list_idx_); +} + +} // namespace liboblog +} // namespace oceanbase diff --git a/src/liboblog/src/ob_log_svr_blacklist.h b/src/liboblog/src/ob_log_svr_blacklist.h new file mode 100644 index 0000000000000000000000000000000000000000..f9bbb0554f15af8ab0702e191787b92c19dd0493 --- /dev/null +++ b/src/liboblog/src/ob_log_svr_blacklist.h @@ -0,0 +1,79 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_SERVER_BLACKLIST_H_ +#define OCEANBASE_LIBOBLOG_SERVER_BLACKLIST_H_ + +#include "lib/container/ob_array.h" // ObArray +#include "lib/net/ob_addr.h" // ObAddr +#include "lib/lock/ob_spin_rwlock.h" // SpinRWLock, SpinRLockGuard, SpinWLockGuard +#include "ob_log_utils.h" // _K_ + +namespace oceanbase +{ +namespace liboblog +{ +class ObLogSvrBlacklist +{ +public: + ObLogSvrBlacklist(); + ~ObLogSvrBlacklist(); + + int init(const char *svrs_list_str, + const bool is_sql_server); + void destroy(); + + // get current svr count + int64_t count() const; + /// Determine if the server is available: if it is on the server blacklist, it is not available; otherwise it is available + /// + /// @param [in] svr query server + /// + /// @retval true not exist in blacklist + /// @retval false exist + bool is_exist(const common::ObAddr &svr) const; + /// Parsing server_blacklist in the configuration file + /// + /// @param [in] svrs_list_str server blacklist string + /// + void refresh(const char *svrs_list_str); + +private: + typedef common::ObArray ExceptionalSvrArray; + static const int64_t MAX_SVR_BUF_SIZE = 4 * _K_; + + int set_svrs_str_(const char *svrs_list_str, + const int64_t cur_svr_list_idx); + int build_svr_patterns_(char *svrs_buf, + ExceptionalSvrArray &exceptional_svrs); + + int64_t get_cur_svr_list_idx_() const; + int64_t get_bak_cur_svr_list_idx_() const; + void switch_svr_list_(); + +private: + bool is_inited_; + bool is_sql_server_; + mutable common::SpinRWLock lock_; + char *svrs_buf_[2]; + int64_t svrs_buf_size_; + + int64_t cur_svr_list_idx_; + ExceptionalSvrArray svrs_[2]; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogSvrBlacklist); +}; + +} // namespace liboblog +} // namespace oceanbase +#endif /* OCEANBASE_LIBOBLOG_SERVER_BLACKLIST_H_ */ diff --git a/src/liboblog/src/ob_log_svr_finder.cpp b/src/liboblog/src/ob_log_svr_finder.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b304e25d92028b822ba510eeb1db68015f7590c5 --- /dev/null +++ b/src/liboblog/src/ob_log_svr_finder.cpp @@ -0,0 +1,1062 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG_FETCHER + +#include "ob_log_svr_finder.h" + +#include "lib/hash_func/murmur_hash.h" // murmurhash + +#include "ob_log_trace_id.h" // ObLogTraceIdGuard +#include "ob_log_instance.h" // IObLogErrHandler +#include "ob_log_all_svr_cache.h" // IObLogAllSvrCache +#include "ob_log_server_priority.h" // RegionPriority, ReplicaPriority + +namespace oceanbase +{ +using namespace common; +namespace liboblog +{ +int64_t ObLogSvrFinder::g_sql_batch_count = ObLogConfig::default_svr_finder_sql_batch_count; + +ObLogSvrFinder::ObLogSvrFinder() : + inited_(false), + thread_num_(0), + err_handler_(NULL), + all_svr_cache_(NULL), + systable_helper_(NULL), + worker_data_(NULL), + svr_blacklist_() +{ +} + +ObLogSvrFinder::~ObLogSvrFinder() +{ + destroy(); +} + +int ObLogSvrFinder::init(const int64_t thread_num, + IObLogErrHandler &err_handler, + IObLogAllSvrCache &all_svr_cache, + IObLogSysTableHelper &systable_helper) +{ + int ret = OB_SUCCESS; + int64_t max_thread_num = ObLogConfig::max_svr_finder_thread_num; + const char *server_blacklist = TCONF.server_blacklist.str(); + const bool is_sql_server = false; + + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("init twice"); + ret = OB_INIT_TWICE; + } else if ((OB_UNLIKELY(thread_num) <= 0) || OB_UNLIKELY(max_thread_num < thread_num)) { + LOG_ERROR("invalid thread number", K(thread_num), K(max_thread_num)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(SvrFinderWorker::init(thread_num, ObModIds::OB_LOG_FETCHER_SVR_FINDER))) { + LOG_ERROR("init svr finder worker fail", KR(ret), K(thread_num)); + } else if (OB_FAIL(init_worker_data_(thread_num))) { + LOG_ERROR("init_worker_data_ fail", KR(ret), K(thread_num)); + } else if (OB_FAIL(svr_blacklist_.init(server_blacklist, is_sql_server))) { + LOG_ERROR("svr_blacklist_ init fail", KR(ret), K(server_blacklist), K(is_sql_server)); + } else { + thread_num_ = thread_num; + err_handler_ = &err_handler; + all_svr_cache_ = &all_svr_cache; + systable_helper_ = &systable_helper; + inited_ = true; + + LOG_INFO("init svr finder succ", K(thread_num)); + } + return ret; +} + +void ObLogSvrFinder::destroy() +{ + inited_ = false; + + thread_num_ = 0; + SvrFinderWorker::destroy(); + err_handler_ = NULL; + all_svr_cache_ = NULL; + systable_helper_ = NULL; + destory_worker_data_(); + svr_blacklist_.destroy(); + + LOG_INFO("destroy svr finder succ"); +} + +int ObLogSvrFinder::start() +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(SvrFinderWorker::start())) { + LOG_ERROR("start svr finder worker fail", KR(ret)); + } else { + LOG_INFO("start svr finder succ", "thread_num", SvrFinderWorker::get_thread_num()); + } + return ret; +} + +void ObLogSvrFinder::stop() +{ + if (OB_LIKELY(inited_)) { + SvrFinderWorker::stop(); + LOG_INFO("stop svr finder succ"); + } +} + +int ObLogSvrFinder::async_svr_find_req(SvrFindReq *req) +{ + return async_req_(req); +} + +int ObLogSvrFinder::async_leader_find_req(LeaderFindReq *req) +{ + return async_req_(req); +} + +int ObLogSvrFinder::async_req_(ISvrFinderReq *req) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(req)) { + LOG_ERROR("invalid argument", K(req)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_UNLIKELY(! req->is_state_idle())) { + LOG_ERROR("invalid request, state is not IDLE", KPC(req)); + ret = OB_INVALID_ARGUMENT; + } else { + // Setting the status to request status + req->set_state_req(); + + if (OB_FAIL(dispatch_worker_(req))) { + LOG_ERROR("dispatch worker fail", KR(ret), KPC(req)); + } + + LOG_DEBUG("svr finder handle async req", KR(ret), KPC(req)); + } + return ret; +} + +int ObLogSvrFinder::dispatch_worker_(ISvrFinderReq *req) +{ + // No special err. + int ret = OB_SUCCESS; + if (OB_ISNULL(req)) { + LOG_ERROR("invalid argument", K(req)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_UNLIKELY(! req->is_state_req())) { + LOG_ERROR("invalid request, state is not REQ", KPC(req)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(SvrFinderWorker::push(req, req->hash()))) { + LOG_ERROR("push request fail", KR(ret), KPC(req), K(req->hash())); + } else { + // dispatch worker succ + } + return ret; +} + +void ObLogSvrFinder::run(const int64_t thread_index) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not inited", K(inited_)); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(thread_index < 0) || OB_UNLIKELY(thread_index >= thread_num_)) { + LIB_LOG(ERROR, "invalid thread index", K(thread_index), K(thread_num_)); + ret = OB_ERR_UNEXPECTED; + } else { + WorkerData &data = worker_data_[thread_index]; + + LOG_INFO("svr finder thread start", K(thread_index), K_(thread_num)); + + while (! stop_flag_ && OB_SUCCESS == ret) { + // query with different Trace ID for each request + ObLogTraceIdGuard trace_guard; + + if (OB_FAIL(do_retrieve_(thread_index, data))) { + if (OB_ITER_END != ret) { + LOG_ERROR("retrieve request fail", KR(ret), K(thread_index)); + } + } else if (OB_FAIL(do_aggregate_(thread_index, data))) { + LOG_ERROR("do_aggregate_ fail", KR(ret), K(thread_index)); + } else if (OB_FAIL(do_batch_query_(data))) { + if (OB_NEED_RETRY != ret) { + LOG_ERROR("do_batch_query_ fail", KR(ret), K(thread_index)); + } + } + // handle query result + else if (OB_FAIL(handle_batch_query_(data))) { + if (OB_NEED_RETRY != ret) { + LOG_ERROR("handle_batch_query_ fail", KR(ret), K(thread_index)); + } + } else { + // do nothing + } + + // No requests to process, waiting + if (OB_ITER_END == ret) { + cond_timedwait(thread_index, COND_TIME_WAIT); + ret = OB_SUCCESS; + } else { + // Reset whether successful or not and mark outstanding requests as completed + reset_all_req_(ret, data); + + if (OB_NEED_RETRY == ret) { + ret = OB_SUCCESS; + // If this query or processing fails, the connection is reset to ensure that the next query or processing is done with a different server + if (OB_FAIL(systable_helper_->reset_connection())) { + LOG_ERROR("reset_connection fail", KR(ret), K(thread_index)); + } + } + } + } // while + + if (stop_flag_) { + ret = OB_IN_STOP_STATE; + } + } + + if (OB_SUCCESS != ret && OB_IN_STOP_STATE != ret) { + LOG_ERROR("svr finder worker exit on fail", KR(ret), K(thread_index)); + if (OB_NOT_NULL(err_handler_)) { + err_handler_->handle_error(ret, "SvrFinderWorker worker exits on fail, ret=%d, thread_index=%ld", + ret, thread_index); + } + } +} + +void ObLogSvrFinder::configure(const ObLogConfig &config) +{ + int64_t svr_finder_sql_batch_count = config.svr_finder_sql_batch_count; + const char *server_blacklist = config.server_blacklist.str(); + + ATOMIC_STORE(&g_sql_batch_count, svr_finder_sql_batch_count); + svr_blacklist_.refresh(server_blacklist); + + LOG_INFO("[CONFIG]", K(svr_finder_sql_batch_count)); + LOG_INFO("[CONFIG]", K(server_blacklist)); +} + +// OB_ITER_END: no request available +int ObLogSvrFinder::do_retrieve_(const int64_t thread_index, WorkerData &worker_data) +{ + int ret = OB_SUCCESS; + int64_t sql_batch_count = ATOMIC_LOAD(&g_sql_batch_count); + // Each request corresponds to two SQL + int64_t req_batch_count = (sql_batch_count/2); + req_batch_count = req_batch_count > 0 ? req_batch_count : 1; // Guaranteed greater than 0 + SvrFinderReqList &req_list = worker_data.req_list_; + + // Fetching data from queues, batch processing + for (int64_t cnt = 0; OB_SUCCESS == ret && (cnt < req_batch_count); ++cnt) { + void *data = NULL; + ISvrFinderReq *request = NULL; + + if (OB_FAIL(SvrFinderWorker::pop(thread_index, data))) { + if (OB_EAGAIN != ret) { + LOG_ERROR("pop data from queue fail", KR(ret), K(thread_index), K(data)); + } + } else if (OB_ISNULL(request = static_cast(data))) { + LOG_ERROR("request is NULL", K(request), K(thread_index), K(data)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(req_list.push_back(request))) { + LOG_ERROR("req_list push_back fail", KR(ret), KPC(request)); + } else { + // success + } + } + + if (OB_EAGAIN == ret) { + ret = OB_SUCCESS; + } + + if (OB_SUCCESS == ret) { + // No requests pending, set iteration termination directly + if (req_list.count() <= 0) { + ret = OB_ITER_END; + } + } else { + LOG_ERROR("pop and aggregate request fail", KR(ret), K(thread_index)); + } + + LOG_DEBUG("svr finder do_retrieve done", KR(ret), K(thread_index), + "req_count", req_list.count(), K(req_batch_count), K(sql_batch_count)); + + return ret; +} + +int ObLogSvrFinder::do_aggregate_(const int64_t thread_index, WorkerData &worker_data) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not inited"); + ret = OB_NOT_INIT; + } else { + SvrFinderReqList &req_list = worker_data.req_list_; + + LOG_DEBUG("svr finder do_aggregate", K(thread_index), "req_count", req_list.count()); + + for (int64_t idx = 0; OB_SUCCESS == ret && idx < req_list.count(); ++idx) { + ISvrFinderReq *req = req_list.at(idx); + + if (OB_ISNULL(req)) { + LOG_ERROR("req is null", K(thread_index), KPC(req)); + ret = OB_ERR_UNEXPECTED; + } else { + if (req->is_find_svr_req()) { + if (OB_FAIL(do_sql_aggregate_for_svr_list_(req, worker_data))) { + LOG_ERROR("do_sql_aggregate_for_svr_list_ fail", KR(ret), KPC(req)); + } + } else if (req->is_find_leader_req()) { + if (OB_FAIL(do_sql_aggregate_for_leader_(req, worker_data))) { + LOG_ERROR("do_sql_aggregate_for_leader_ fail", KR(ret), KPC(req)); + } + } else { + LOG_ERROR("invalid request, unknown type", KPC(req)); + ret = OB_INVALID_ARGUMENT; + } + } + } // for + + // print count of aggregated request + if (req_list.count() > 0) { + LOG_DEBUG("svr finder do_aggregate_ succ", K(thread_index), "cnt", req_list.count()); + } + } + + return ret; +} + +int ObLogSvrFinder::do_batch_query_(WorkerData &worker_data) +{ + int ret = OB_SUCCESS; + IObLogSysTableHelper::BatchSQLQuery &query = worker_data.query_; + SvrFinderReqList &req_list = worker_data.req_list_; + int64_t req_list_cnt = req_list.count(); + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not inited"); + ret = OB_NOT_INIT; + } else if (req_list_cnt <=0) { + // no request, do nothing + } else { + int64_t start_ts = get_timestamp(); + if (OB_FAIL(systable_helper_->query_with_multiple_statement(query))) { + if (OB_NEED_RETRY == ret) { + LOG_WARN("query_with_multiple_statement fail, need retry", KR(ret), + "svr", query.get_server(), + "mysql_error_code", query.get_mysql_err_code(), + "mysql_error_msg", query.get_mysql_err_msg()); + } else { + LOG_ERROR("query_with_multiple_statement fail", KR(ret), + "svr", query.get_server(), + "mysql_error_code", query.get_mysql_err_code(), + "mysql_error_msg", query.get_mysql_err_msg()); + } + } else { + } + + int64_t end_ts = get_timestamp(); + if (end_ts - start_ts > SQL_TIME_THRESHOLD) { + LOG_WARN("SvrFinder do_batch_query cost too much time", "cost", end_ts - start_ts, + KR(ret), "svr", query.get_server(), K(req_list_cnt)); + } + + LOG_DEBUG("svr finder do_batch_query done", KR(ret), "cost", end_ts - start_ts, + "svr", query.get_server(), K(req_list_cnt)); + } + + return ret; +} + +void ObLogSvrFinder::reset_all_req_(const int err_code, WorkerData &data) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not inited"); + ret = OB_NOT_INIT; + } else { + TraceIdType *trace_id = ObCurTraceId::get_trace_id(); + + // If one SQL fails, all subsequent SQL is marked as failed + // TODO: If only one SQL fails, the subsequent SQL is retried in place + SvrFinderReqList &req_list = data.req_list_; + // Get mysql error for the last SQL error + int mysql_err_code = data.query_.get_mysql_err_code(); + + for (int64_t idx = 0; OB_SUCCESS == ret && idx < req_list.count(); ++idx) { + ISvrFinderReq *req = req_list.at(idx); + + // End the request, regardless of whether it was processed successfully or not, and set the error code + // Note: the request may end early, here the req may be NULL + if (NULL != req) { + req->set_state_done(err_code, mysql_err_code, trace_id); + } + } + // Reset local data (including BatchSQLQuery and reset of req_list) after all requests have been processed + data.reset(); + } +} + +int ObLogSvrFinder::handle_batch_query_(WorkerData &data) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not inited"); + ret = OB_NOT_INIT; + } else { + IObLogSysTableHelper::BatchSQLQuery &query = data.query_; + SvrFinderReqList &req_list = data.req_list_; + int64_t req_idx = 0; + int64_t start_time = get_timestamp(); + int64_t handle_time = 0; + TraceIdType *trace_id = ObCurTraceId::get_trace_id(); + + // Iterate through all requests and exit if a particular request is encountered and fails + for (req_idx = 0; OB_SUCCESS == ret && req_idx < req_list.count(); ++req_idx) { + ISvrFinderReq *req = req_list.at(req_idx); + + if (OB_ISNULL(req)) { + LOG_ERROR("req is null", KPC(req)); + ret = OB_ERR_UNEXPECTED; + } else { + if (req->is_find_svr_req()) { + if (OB_FAIL(handle_batch_query_for_svr_list_(req, query))) { + if (OB_NEED_RETRY != ret) { + LOG_ERROR("handle_batch_query_for_svr_list_ fail", KR(ret), KPC(req)); + } + } + } else if (req->is_find_leader_req()) { + if (OB_FAIL(handle_batch_query_for_leader_(req, query))) { + if (OB_NEED_RETRY != ret) { + LOG_ERROR("handle_batch_query_for_leader_ fail", KR(ret), KPC(req)); + } + } + } else { + LOG_ERROR("invalid request, unknown type", KPC(req)); + ret = OB_INVALID_ARGUMENT; + } + + // The end is set immediately, whether successful or not + req->set_state_done(ret, query.get_mysql_err_code(), trace_id); + + // Finally mark the request as empty and no further references may be made + req = NULL; + req_list[req_idx] = NULL; + } + } + + handle_time = get_timestamp() - start_time; + + LOG_DEBUG("svr finder handle_batch_query done", KR(ret), "cost", handle_time, + "svr", query.get_server(), "batch_req_count", req_list.count(), + "batch_sql_count", query.get_batch_sql_count()); + + if (OB_SUCCESS != ret) { + LOG_ERROR("SvrFinder handle_batch_query fail", KR(ret), + "mysql_error_code", query.get_mysql_err_code(), + "mysql_error_msg", query.get_mysql_err_msg(), + "fail_req_idx", req_idx - 1, + "batch_req_count", req_list.count(), + "batch_sql_count", query.get_batch_sql_count()); + + // For the rest of the requests, regardless of success, mark completion directly + // FIXME: where a request SQL fails, all subsequent unexecuted SQL will be marked as failed + for (; req_idx < req_list.count(); ++req_idx) { + if (NULL != req_list[req_idx]) { + req_list[req_idx]->set_state_done(ret, query.get_mysql_err_code(), trace_id); + req_list[req_idx] = NULL; + } + } + } else { + // Execution takes too long, prints WARN log + if (handle_time > SQL_TIME_THRESHOLD) { + LOG_WARN("SvrFinder handle_batch_query cost too much time", "cost", handle_time, + "svr", query.get_server(), "batch_req_count", req_list.count(), + "batch_sql_count", query.get_batch_sql_count()); + } + } + + // Check here that all requests are processed to avoid starving some requests after the reset + if (OB_UNLIKELY(req_idx < req_list.count())) { + LOG_ERROR("some requests have not been handled, unexpected error", K(req_idx), + K(req_list.count()), KR(ret), K(req_list)); + ret = OB_ERR_UNEXPECTED; + } + + // Finally the request list is reset anyway and the external no longer needs to be traversed + req_list.reset(); + } + + return ret; +} + +int ObLogSvrFinder::handle_batch_query_for_svr_list_(ISvrFinderReq *orig_req, + IObLogSysTableHelper::BatchSQLQuery &query) +{ + int ret = OB_SUCCESS; + SvrFindReq *req = NULL; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not inited"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(orig_req)) { + LOG_ERROR("invalid argument", K(orig_req)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_UNLIKELY(! orig_req->is_state_req())) { + LOG_ERROR("invalid request, state is not REQ", KR(ret), KPC(orig_req)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_UNLIKELY(! orig_req->is_find_svr_req())) { + LOG_ERROR("invalid request, type is not REQ_FIND_SVR", KPC(orig_req)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(req = static_cast(orig_req))) { + LOG_ERROR("SvrFindReq is null", K(req)); + ret = OB_ERR_UNEXPECTED; + } else { + // Process clog_history_records first, then meta_records according to SQL aggregation order + IObLogSysTableHelper::ClogHistoryRecordArray clog_history_records(common::ObModIds::OB_LOG_CLOG_HISTORY_RECORD_ARRAY, common::OB_MALLOC_NORMAL_BLOCK_SIZE); + clog_history_records.reset(); + IObLogSysTableHelper::MetaRecordArray meta_records(common::ObModIds::OB_LOG_META_RECORD_ARRAY, common::OB_MALLOC_NORMAL_BLOCK_SIZE); + meta_records.reset(); + + if (OB_FAIL(query.get_records(clog_history_records))) { + // OB_NEED_RETRY indicates that a retry is required + LOG_WARN("get_records fail for query clog history records", KR(ret), + "svr", query.get_server(), "mysql_error_code", query.get_mysql_err_code(), + "mysql_error_msg", query.get_mysql_err_msg(), KPC(req)); + } else if (OB_FAIL(handle_clog_history_info_records_(*req, clog_history_records))) { + LOG_ERROR("handle clog history info records fail", KR(ret), KPC(req), K(clog_history_records)); + } else { + // succ + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(query.get_records(meta_records))) { + // OB_NEED_RETRY indicates that a retry is required + LOG_WARN("get_records fail for query meta table records", KR(ret), + "svr", query.get_server(), "mysql_error_code", query.get_mysql_err_code(), + "mysql_error_msg", query.get_mysql_err_msg(), KPC(req)); + } else if (OB_FAIL(handle_meta_info_records_(*req, meta_records))) { + LOG_ERROR("handle meta info records fail", KR(ret), KPC(req), K(meta_records)); + } else { + // succ + } + } + + LOG_DEBUG("svr finder get_records for query svr_list done", KR(ret), KPC(req)); + } + + return ret; +} + +int ObLogSvrFinder::handle_batch_query_for_leader_(ISvrFinderReq *orig_req, + IObLogSysTableHelper::BatchSQLQuery &query) +{ + int ret = OB_SUCCESS; + LeaderFindReq *req = NULL; + + if (OB_ISNULL(orig_req)) { + LOG_ERROR("invalid argument", K(orig_req)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_UNLIKELY(! orig_req->is_find_leader_req())) { + LOG_ERROR("invalid request, type is not REQ_FIND_LEADER", KPC(orig_req)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_UNLIKELY(! orig_req->is_state_req())) { + LOG_ERROR("invalid request, state is not REQ", KPC(orig_req)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(req = static_cast(orig_req))) { + LOG_ERROR("dynamic cast from ISvrFinderReq to LeaderFindReq fail", K(orig_req), KPC(orig_req)); + ret = OB_ERR_UNEXPECTED; + } else { + req->has_leader_ = false; + req->leader_.reset(); + + if (OB_FAIL(query.get_records(req->has_leader_, req->leader_))) { + LOG_ERROR("query leader info fail", KR(ret), "pkey", req->pkey_, + "svr", query.get_server(), "mysql_error_code", query.get_mysql_err_code(), + "mysql_error_msg", query.get_mysql_err_msg()); + } + + LOG_DEBUG("svr finder get_records for query leader done", KR(ret), KPC(req)); + } + + return ret; +} + +int ObLogSvrFinder::do_sql_aggregate_for_svr_list_(ISvrFinderReq *orig_req, WorkerData &data) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(orig_req)) { + LOG_ERROR("invalid argument", K(orig_req)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_UNLIKELY(! orig_req->is_state_req())) { + LOG_ERROR("invalid request, state is not REQ", KR(ret), KPC(orig_req)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_UNLIKELY(! orig_req->is_find_svr_req())) { + LOG_ERROR("invalid request, type is not REQ_FIND_SVR", KPC(orig_req)); + ret = OB_INVALID_ARGUMENT; + } else { + SvrFindReq *req = static_cast(orig_req); + IObLogSysTableHelper::BatchSQLQuery &query = data.query_; + + if (OB_ISNULL(req)) { + LOG_ERROR("dynamic cast from ISvrFinderReq to SvrFindReq fail", K(orig_req), KPC(orig_req)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(do_sql_aggregate_for_query_clog_history_(*req, query))) { + LOG_ERROR("do_sql_aggregate_for_query_clog_history_ fail", KR(ret), KPC(req)); + } else if (OB_FAIL(do_sql_aggregate_for_query_meta_info_(*req, query))) { + LOG_ERROR("do_sql_aggregate_for_query_meta_info_ fail", KR(ret), KPC(req)); + } else { + LOG_DEBUG("svr finder do_aggregate for svr_list req", KPC(req)); + } + } + + return ret; +} + +int ObLogSvrFinder::do_sql_aggregate_for_query_clog_history_(SvrFindReq &req, IObLogSysTableHelper::BatchSQLQuery &query) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! req.is_valid())) { + LOG_ERROR("invalid argument", K(req)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(systable_helper_)) { + LOG_ERROR("invalid parameters", K(systable_helper_)); + ret = OB_ERR_UNEXPECTED; + } else { + QueryClogHistorySQLStrategy strategy; + // Query clog history table based on timestamp + if (req.req_by_start_tstamp_) { + if (OB_FAIL(strategy.init_by_tstamp_query(req.pkey_, req.start_tstamp_))) { + LOG_ERROR("init_by_tstamp_query fail", KR(ret), + "pkey", req.pkey_, "tstamp", req.start_tstamp_); + } + } + // Query clog history table based on log id + else if (req.req_by_next_log_id_) { + if (OB_FAIL(strategy.init_by_log_id_query(req.pkey_, req.next_log_id_))) { + LOG_ERROR("init_by_log_id_query fail", KR(ret), + "pkey", req.pkey_, "log_id", req.next_log_id_); + } + } else { + LOG_ERROR("invalid request which is not requested by tstamp or log id", K(req)); + ret = OB_INVALID_ARGUMENT; + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(query.do_sql_aggregate(&strategy))) { + LOG_ERROR("do_sql_aggregate fail", KR(ret), K(strategy)); + } + } + } + + return ret; +} + +int ObLogSvrFinder::do_sql_aggregate_for_query_meta_info_(SvrFindReq &req, IObLogSysTableHelper::BatchSQLQuery &query) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! req.is_valid())) { + LOG_ERROR("invalid argument", K(req)); + ret = OB_INVALID_ARGUMENT; + } else { + QueryMetaInfoSQLStrategy strategy; + bool only_query_leader = false; + + if (OB_FAIL(strategy.init(req.pkey_, only_query_leader))) { + LOG_ERROR("QueryMetaInfoSQLStrategy fail", KR(ret), K(req)); + } else if (OB_FAIL(query.do_sql_aggregate(&strategy))) { + LOG_ERROR("do_sql_aggregate fail", KR(ret), K(strategy)); + } else { + // succ + } + } + + return ret; +} + +int ObLogSvrFinder::do_sql_aggregate_for_leader_(ISvrFinderReq *orig_req, WorkerData &data) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(orig_req)) { + ret = OB_INVALID_ARGUMENT; + LOG_ERROR("invalid argument", K(orig_req)); + } else if (OB_ISNULL(systable_helper_)) { + LOG_ERROR("invalid parameters", K(systable_helper_)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_UNLIKELY(! orig_req->is_find_leader_req())) { + LOG_ERROR("invalid request, type is not REQ_FIND_LEADER", KPC(orig_req)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_UNLIKELY(! orig_req->is_state_req())) { + LOG_ERROR("invalid request, state is not REQ", KPC(orig_req)); + ret = OB_INVALID_ARGUMENT; + } else { + LeaderFindReq *req = static_cast(orig_req); + IObLogSysTableHelper::BatchSQLQuery &query = data.query_; + + if (OB_ISNULL(req)) { + LOG_ERROR("dynamic cast from ISvrFinderReq to LeaderFindReq fail", K(orig_req), KPC(orig_req)); + ret = OB_ERR_UNEXPECTED; + } else { + req->has_leader_ = false; + req->leader_.reset(); + QueryMetaInfoSQLStrategy strategy; + bool only_query_leader = true; + + if (OB_FAIL(strategy.init(req->pkey_, only_query_leader))) { + LOG_ERROR("QueryMetaInfoSQLStrategy init fail", KR(ret), KPC(req), K(only_query_leader)); + } else if (OB_FAIL(query.do_sql_aggregate(&strategy))) { + LOG_ERROR("do_sql_aggregate fail", KR(ret), K(strategy)); + } else { + LOG_DEBUG("svr finder do_aggregate for leader req", KPC(req)); + } + } + } + + return ret; +} + +int ObLogSvrFinder::handle_meta_info_records_(SvrFindReq &req, + const IObLogSysTableHelper::MetaRecordArray &records) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! req.is_valid())) { + LOG_ERROR("invalid argument", K(req)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(systable_helper_) || OB_ISNULL(all_svr_cache_)) { + LOG_ERROR("invalid parameters", K(systable_helper_), K(all_svr_cache_)); + ret = OB_ERR_UNEXPECTED; + } else { + IPartSvrList &svr_list = *(req.svr_list_); + // No log service range in the meta table, the maximum log service range is used by default + // Note: the service range is left open and right closed + const uint64_t start_log_id = 0; + const uint64_t end_log_id = OB_INVALID_ID; + // However, the log id range is not valid and there is no need to update the log service range when the server exists + const bool is_log_range_valid = false; + + // Use meta table records to supplement clog history records to prevent all clog history records from being incomplete and to avoid missing servers + for (int64_t idx = 0, cnt = records.count(); OB_SUCCESS == ret && (idx < cnt); ++idx) { + const IObLogSysTableHelper::MetaRecord &record = records.at(idx); + ObAddr svr; + svr.set_ip_addr(record.svr_ip_, record.svr_port_); + + RegionPriority region_prio = REGION_PRIORITY_UNKNOWN; + ReplicaPriority replica_prio = REPLICA_PRIORITY_UNKNOWN; + + // 1. meta table records have higher priority than clog history records, the latest record recorded in clog history does + // not mean it is the most appropriate one, because it may be a replica that is being migrated or doing rebuild, + // it may be very backward, in this case, it is not suitable for locating and fetching logs + // 2. for log locating scenarios, priority is given to locating the LEADER. it requires that: the LEADER replica has a higher + // priority than the other replicas, in most cases the LEADER has the most complete log on it and can be located successfully, + // and the LEADER must not return a very old log ID, it will either return the exact log ID or an error + // 3. The LEADER replica also has a high probability of avoiding the inaccurate timestamp range of the partition logs maintained + // by the ilog info block module, as it will periodically switch ilog files in 30 minutes, basically not allowing the time lapse in an ilog file to exceed 2100 seconds. + // + const bool is_located_in_meta_table = true; + const bool is_leader = (common::is_strong_leader(static_cast(record.role_))); + + if (! all_svr_cache_->is_svr_avail(svr, region_prio)) { + LOG_DEBUG("ignore server from meta table which is not active in all server table or server in encryption zone", + K(svr), "pkey", req.pkey_, "next_log_id", req.next_log_id_, + "start_tstamp", req.start_tstamp_, + "region_prio", print_region_priority(region_prio)); + } else if (OB_FAIL(get_replica_priority(record.replica_type_, replica_prio))) { + LOG_ERROR("get priority based replica fail", KR(ret), K(record), + "replica_prio", print_replica_priority(replica_prio)); + } + // Add or update server information + else if (OB_FAIL(svr_list.add_server_or_update( + svr, + start_log_id, + end_log_id, + is_located_in_meta_table, + region_prio, + replica_prio, + is_leader, + is_log_range_valid))) { + LOG_ERROR("add server into server list fail", KR(ret), K(svr), + K(is_located_in_meta_table), + "region_prio", print_region_priority(region_prio), + "replica_prio", print_replica_priority(replica_prio), + K(is_leader), + K(is_log_range_valid)); + } + } // for + + if (OB_SUCCESS == ret) { + // After processing __all_meta_table records, sort the server list + svr_list.sort_by_priority(); + } + + if (OB_SUCC(ret)) { + const int64_t svr_count_before_filter = svr_list.count(); + ObArray remove_svrs; + + if (OB_FAIL(svr_list.filter_by_svr_blacklist(svr_blacklist_, remove_svrs))) { + LOG_ERROR("svr_list filter_by_svr_blacklist fail", KR(ret), K(remove_svrs)); + } else { + const int64_t svr_count_after_filter = svr_list.count(); + + // print if has svr filtered + if (svr_count_before_filter > svr_count_after_filter) { + _LOG_INFO("[SERVER_BLACKLIST] [FILTER] [PKEY=%s] [FILTER_SVR_CNT=%ld(%ld/%ld)] [REMOVE_SVR=%s]", + to_cstring(req.pkey_), svr_count_before_filter - svr_count_after_filter, + svr_count_before_filter, svr_count_after_filter, to_cstring(remove_svrs)); + } + } + } + + LOG_DEBUG("update server list by meta table", KR(ret), "pkey", req.pkey_, + "next_log_id", req.next_log_id_, "start_tstamp", req.start_tstamp_, + "svr_list", *(req.svr_list_), "meta_records", records); + } // else + + return ret; +} + +int ObLogSvrFinder::handle_clog_history_info_records_(SvrFindReq &req, + const IObLogSysTableHelper::ClogHistoryRecordArray &records) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! req.is_valid())) { + LOG_ERROR("invalid argument", K(req)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(all_svr_cache_)) { + LOG_ERROR("invalid parameters", K(all_svr_cache_)); + ret = OB_INVALID_ERROR; + } else { + IPartSvrList &svr_list = *(req.svr_list_); + ObAddr svr; + RegionPriority region_prio = REGION_PRIORITY_UNKNOWN; + // The logging service scope of clog history is valid and should be updated + const bool is_log_range_valid = true; + // __all_clog_history_info_v2 returns records with no replica type, default full functional replica + // Wait for __all_meta_table record to return copy type, update the corresponding replica priority + ReplicaPriority replica_prio = REPLICA_PRIORITY_UNKNOWN; + if (OB_FAIL(get_replica_priority(REPLICA_TYPE_FULL, replica_prio))) { + LOG_ERROR("get priority based replica fail", KR(ret), K(REPLICA_TYPE_FULL), + "replica_prio", print_replica_priority(replica_prio)); + } else { + // located in clog history table + const bool is_located_in_meta_table = false; + // Query clog history table does not know the leader information, later query meta table and update + const bool is_leader = false; + + for (int64_t idx = 0, cnt = records.count(); OB_SUCCESS == ret && (idx < cnt); ++idx) { + const IObLogSysTableHelper::ClogHistoryRecord &info_record = records.at(idx); + svr.reset(); + svr.set_ip_addr(info_record.svr_ip_, info_record.svr_port_); + + if (! all_svr_cache_->is_svr_avail(svr, region_prio)) { + // Filter machines that are not in the __all_server list + LOG_DEBUG("ignore server from clog history which is not active in all server table or server in encryption zone", + K(svr), "pkey", req.pkey_, "next_log_id", req.next_log_id_, + "start_tstamp", req.start_tstamp_, K(info_record), + "region_prio", print_region_priority(region_prio)); + } else if (OB_FAIL(svr_list.add_server_or_update(svr, + info_record.start_log_id_, + info_record.end_log_id_, + is_located_in_meta_table, + region_prio, + replica_prio, + is_leader, + is_log_range_valid))) { + LOG_ERROR("add server into server list fail", KR(ret), K(svr), + "start_log_id", info_record.start_log_id_, + "end_log_id", info_record.end_log_id_, + K(is_located_in_meta_table), + "region_prio", print_region_priority(region_prio), + "replica_prio", print_replica_priority(replica_prio), + K(is_leader), + K(idx), + K(is_log_range_valid)); + } else { + // success + } + } // for + + LOG_DEBUG("update server list by clog history", KR(ret), "pkey", req.pkey_, + "next_log_id", req.next_log_id_, "start_tstamp", req.start_tstamp_, + "svr_list", req.svr_list_, "clog_history_records", records); + } + } + return ret; +} + +int ObLogSvrFinder::init_worker_data_(const int64_t thread_num) +{ + int ret = OB_SUCCESS; + + int64_t alloc_size = thread_num * sizeof(WorkerData); + // The number of aggregated SQL entries is initialised using the maximum value + int64_t max_sql_batch_count = ObLogConfig::max_svr_finder_sql_batch_count; + const int64_t sql_buf_len = max_sql_batch_count * DEFAULT_SQL_LENGTH; + worker_data_ = static_cast(ob_malloc(alloc_size, ObModIds::OB_LOG_FETCHER_SVR_FINDER)); + + if (OB_ISNULL(worker_data_)) { + LOG_ERROR("allocate memory fail", K(worker_data_), K(alloc_size), K(thread_num)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + // init worker data + for (int64_t idx = 0, cnt = thread_num; OB_SUCCESS == ret && idx < cnt; ++idx) { + new (worker_data_ + idx) WorkerData(); + WorkerData &data = worker_data_[idx]; + + if (OB_FAIL(data.init(sql_buf_len))) { + LOG_ERROR("init worker data fail", KR(ret), K(sql_buf_len)); + } else { + LOG_INFO("init worker data succ", "thread_idx", idx, K(thread_num), K(sql_buf_len)); + } + } + } + + return ret; +} + +void ObLogSvrFinder::destory_worker_data_() +{ + if (NULL != worker_data_) { + for (int64_t idx = 0, cnt = thread_num_; idx < cnt; ++idx) { + worker_data_[idx].~WorkerData(); + } + + ob_free(worker_data_); + worker_data_ = NULL; + } +} + +int ObLogSvrFinder::WorkerData::init(const int64_t sql_buf_len) +{ + int ret = OB_SUCCESS; + + if (OB_FAIL(query_.init(sql_buf_len))) { + LOG_ERROR("BatchSQLQuery init fail", KR(ret), K(sql_buf_len)); + } else { + // succ + } + + return ret; +} + +void ObLogSvrFinder::WorkerData::destroy() +{ + req_list_.reset(); + query_.destroy(); +} + +///////////////////////////////////////////// ISvrFinderReq ///////////////////////////////////////////// + +const char* ISvrFinderReq::print_state(const int state) +{ + const char *str = "INVALID"; + + switch (state) { + case IDLE: + str = "IDLE"; + break; + case REQ: + str = "REQ"; + break; + case DONE: + str = "DONE"; + break; + default: + str = "INVALID"; + break; + } + + return str; +} + +const char * ISvrFinderReq::print_type(const int type) +{ + const char *str = "INVALID"; + switch (type) { + case REQ_UNKNOWN: + str = "REQ_UNKNOWN"; + break; + case REQ_FIND_SVR: + str = "REQ_FIND_SVR"; + break; + case REQ_FIND_LEADER: + str = "REQ_FIND_LEADER"; + break; + default: + str = "INVALID"; + break; + } + return str; +} + +///////////////////////////////////////////// ISvrFinderReq ///////////////////////////////////////////// + +void SvrFindReq::reset() +{ + ISvrFinderReq::reset(); + + pkey_.reset(); + start_tstamp_ = OB_INVALID_TIMESTAMP; + next_log_id_ = OB_INVALID_ID; + req_by_start_tstamp_ = false; + req_by_next_log_id_ = false; + + svr_list_ = NULL; +} + +uint64_t SvrFindReq::hash() const +{ + // Hash by "PKEY + next_log_id" + return murmurhash(&next_log_id_, sizeof(next_log_id_), pkey_.hash()); +} + +bool SvrFindReq::is_valid() const +{ + // Requires that the list of servers cannot be queried based on both timestamp and log id + return NULL != svr_list_ && (req_by_start_tstamp_ != req_by_next_log_id_); +} + +void SvrFindReq::reset_for_req_by_tstamp(IPartSvrList &svr_list, + const common::ObPartitionKey &pkey, + const int64_t tstamp) +{ + reset(); + svr_list_ = &svr_list; + pkey_ = pkey; + req_by_start_tstamp_ = true; + start_tstamp_ = tstamp; +} + +void SvrFindReq::reset_for_req_by_log_id(IPartSvrList &svr_list, + const common::ObPartitionKey &pkey, + const uint64_t id) +{ + reset(); + svr_list_ = &svr_list; + pkey_ = pkey; + req_by_next_log_id_ = true; + next_log_id_ = id; +} + +} +} diff --git a/src/liboblog/src/ob_log_svr_finder.h b/src/liboblog/src/ob_log_svr_finder.h new file mode 100644 index 0000000000000000000000000000000000000000..2e57881fc05ec7d4fcf8d0bab0e0dd8132495df6 --- /dev/null +++ b/src/liboblog/src/ob_log_svr_finder.h @@ -0,0 +1,331 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_SVR_FINDER_H_ +#define OCEANBASE_LIBOBLOG_OB_LOG_SVR_FINDER_H_ + +#include "lib/utility/ob_print_utils.h" // TO_STRING_KV +#include "lib/oblog/ob_log_module.h" // K_ +#include "lib/atomic/ob_atomic.h" // ATOMIC_* +#include "lib/net/ob_addr.h" // ObAddr +#include "lib/profile/ob_trace_id.h" // ObCurTraceId +#include "common/ob_partition_key.h" // ObPartitionKey + +#include "ob_log_utils.h" // TS_TO_STR +#include "ob_map_queue_thread.h" // ObMapQueueThread +#include "ob_log_part_svr_list.h" // IPartSvrList +#include "ob_log_systable_helper.h" // IObLogSysTableHelper +#include "ob_log_config.h" // MAX_SVR_FINDER_THREAD_NUM +#include "ob_log_svr_blacklist.h" // ObLogSvrBlacklist + +namespace oceanbase +{ +namespace liboblog +{ + +struct ISvrFinderReq; +struct SvrFindReq; +struct LeaderFindReq; +class IObLogSvrFinder +{ +public: + virtual ~IObLogSvrFinder() {} + +public: + // Asynchronous request server list + virtual int async_svr_find_req(SvrFindReq *req) = 0; + // Asynchronous request leader + virtual int async_leader_find_req(LeaderFindReq *req) = 0; + + virtual int start() = 0; + virtual void stop() = 0; + virtual void mark_stop_flag() = 0; +}; + +/////////////////////////////////// ObLogSvrFinder /////////////////////////////////////// + +typedef ObMapQueueThread SvrFinderWorker; + +class IObLogErrHandler; +class IObLogAllSvrCache; + +class ObLogSvrFinder : public IObLogSvrFinder, public SvrFinderWorker +{ +public: + ObLogSvrFinder(); + virtual ~ObLogSvrFinder(); + + // Class global variables +public: + static int64_t g_sql_batch_count; + +public: + int init(const int64_t thread_num, + IObLogErrHandler &err_handler, + IObLogAllSvrCache &all_svr_cache, + IObLogSysTableHelper &systable_helper); + void destroy(); + +public: + // Asynchronous request server list + int async_svr_find_req(SvrFindReq *req); + + // Asynchronous request server leader + int async_leader_find_req(LeaderFindReq *req); + + int start(); + void stop(); + void mark_stop_flag() { SvrFinderWorker::mark_stop_flag(); } + +public: + // Processing data: support batch tasks + // 1. Batch aggregation of a batch of requests: request heartbeat/request server list, SQL aggregation + // 2. Initiate batch SQL requests + // 3. Process return batch results + virtual void run(const int64_t thread_index); + +public: + void configure(const ObLogConfig &config); + +private: + static const int64_t COND_TIME_WAIT = 1L * 1000L * 1000L; + static const int64_t DEFAULT_SQL_LENGTH = 1024; + static const int64_t SQL_TIME_THRESHOLD = 1 * _SEC_; + + typedef common::ObArray SvrFinderReqList; + + // Data local to each Worker + struct WorkerData + { + SvrFinderReqList req_list_; + IObLogSysTableHelper::BatchSQLQuery query_; + + WorkerData() : req_list_(), query_() {} + ~WorkerData() { destroy(); } + + int init(const int64_t sql_buf_len); + void destroy(); + + void reset() + { + req_list_.reset(); + query_.reset(); + } + }; + + int init_worker_data_(const int64_t thread_num); + void destory_worker_data_(); + +private: + int async_req_(ISvrFinderReq *req); + int dispatch_worker_(ISvrFinderReq *req); + + // Asynchronous request for bulk acquisition + int do_retrieve_(const int64_t thread_index, WorkerData &data); + // SQL agregate + int do_aggregate_(const int64_t thread_index, WorkerData &data); + int do_sql_aggregate_for_svr_list_(ISvrFinderReq *orig_req, WorkerData &data); + // aggregate query for meta_table + int do_sql_aggregate_for_query_meta_info_(SvrFindReq &req, IObLogSysTableHelper::BatchSQLQuery &query); + // aggregate query log_history_info_v2 + int do_sql_aggregate_for_query_clog_history_(SvrFindReq &req, IObLogSysTableHelper::BatchSQLQuery &query); + // aggregate query leader + int do_sql_aggregate_for_leader_(ISvrFinderReq *orig_req, WorkerData &data); + // batch query of SQL + int do_batch_query_(WorkerData &data); + // Processing bulk search results + int handle_batch_query_(WorkerData &data); + int handle_batch_query_for_svr_list_(ISvrFinderReq *req, + IObLogSysTableHelper::BatchSQLQuery &query); + int handle_clog_history_info_records_(SvrFindReq &req, + const IObLogSysTableHelper::ClogHistoryRecordArray &records); + int handle_meta_info_records_(SvrFindReq &req, + const IObLogSysTableHelper::MetaRecordArray &records); + int handle_batch_query_for_leader_(ISvrFinderReq *req, + IObLogSysTableHelper::BatchSQLQuery &query); + // reset + void reset_all_req_(const int err_code, WorkerData &data); + +// private data members +private: + bool inited_; + int64_t thread_num_; + IObLogErrHandler *err_handler_; + IObLogAllSvrCache *all_svr_cache_; + IObLogSysTableHelper *systable_helper_; + WorkerData *worker_data_; + ObLogSvrBlacklist svr_blacklist_; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogSvrFinder); +}; + +////////////////////////////////// request struct ////////////////////////////// + +typedef common::ObCurTraceId::TraceId TraceIdType; +/* + * Base class for request + * Request status: + * - IDLE: Idle state + * - REQ: requesting state, result is not readable, external need to ensure request memory validity + * - DONE: request processing completed, result can be read and reset + */ +struct ISvrFinderReq +{ + enum State { IDLE = 0, REQ, DONE }; + + // request type + enum Type + { + REQ_UNKNOWN = 0, + REQ_FIND_SVR = 1, // request server list + REQ_FIND_LEADER = 2, // request leader + }; + + int type_; + int state_; + int err_code_; + int mysql_err_code_; + TraceIdType trace_id_; + + explicit ISvrFinderReq(Type type) : type_(type) { reset(); } + virtual ~ISvrFinderReq() { reset(); } + + // Note: reset does not modify type + void reset() + { + state_ = IDLE; + err_code_ = 0; + mysql_err_code_ = 0; + trace_id_.reset(); + } + + virtual uint64_t hash() const = 0; + + void set_state_idle() { ATOMIC_STORE(&state_, IDLE); } + void set_state_req() { ATOMIC_STORE(&state_, REQ); } + void set_state_done(const int err_code, const int mysql_err_code, const TraceIdType *trace_id) + { + ATOMIC_STORE(&err_code_, err_code); + ATOMIC_STORE(&mysql_err_code_, mysql_err_code); + ATOMIC_STORE(&state_, DONE); + if (NULL != trace_id) { + trace_id_ = *trace_id; + } else { + trace_id_.reset(); + } + } + + int get_err_code() const { return ATOMIC_LOAD(&err_code_); } + int get_mysql_err_code() const { return ATOMIC_LOAD(&mysql_err_code_); } + int get_state() const { return (ATOMIC_LOAD(&state_)); } + + bool is_state_idle() const { return (ATOMIC_LOAD(&state_)) == IDLE; } + bool is_state_req() const { return (ATOMIC_LOAD(&state_)) == REQ; } + bool is_state_done() const { return (ATOMIC_LOAD(&state_)) == DONE; } + + bool is_find_svr_req() const { return REQ_FIND_SVR == type_; } + bool is_find_leader_req() const { return REQ_FIND_LEADER == type_; } + + static const char *print_state(const int state); + static const char *print_type(const int type); + + TO_STRING_KV("type", print_type(type_), + "state", print_state(state_), + K_(err_code), + K_(mysql_err_code), + K_(trace_id)); +}; + +// Request structure for requesting Server lists +struct SvrFindReq : public ISvrFinderReq +{ + // request paraments + common::ObPartitionKey pkey_; + int64_t start_tstamp_; + uint64_t next_log_id_; + bool req_by_start_tstamp_; + bool req_by_next_log_id_; + + // request result + IPartSvrList *svr_list_; + + SvrFindReq() : ISvrFinderReq(REQ_FIND_SVR) { reset(); } + virtual ~SvrFindReq() { reset(); } + + void reset(); + virtual uint64_t hash() const; + + bool is_valid() const; + + // Rest for timestamp-based request server list + void reset_for_req_by_tstamp(IPartSvrList &svr_list, + const common::ObPartitionKey &pkey, + const int64_t tstamp); + + // Reset for log id based request server list + void reset_for_req_by_log_id(IPartSvrList &svr_list, + const common::ObPartitionKey &pkey, + const uint64_t id); + + TO_STRING_KV("base", (ISvrFinderReq&)(*this), + K_(pkey), + K_(req_by_start_tstamp), + "start_tstamp", TS_TO_STR(start_tstamp_), + K_(req_by_next_log_id), + K_(next_log_id), + KPC_(svr_list)); +}; + +// Request structure for requesting Leader information +struct LeaderFindReq : public ISvrFinderReq +{ + // request paraments + common::ObPartitionKey pkey_; + + // request result + bool has_leader_; + common::ObAddr leader_; + + LeaderFindReq() : ISvrFinderReq(REQ_FIND_LEADER) { reset(); } + virtual ~LeaderFindReq() { reset(); } + + virtual uint64_t hash() const + { + return pkey_.hash(); + } + + void reset() + { + ISvrFinderReq::reset(); + + pkey_.reset(); + has_leader_ = false; + leader_.reset(); + } + + void reset(const common::ObPartitionKey &pkey) + { + reset(); + pkey_ = pkey; + } + + TO_STRING_KV("base", (ISvrFinderReq&)(*this), + K_(pkey), + K_(has_leader), + K_(leader)); +}; + +} +} + +#endif diff --git a/src/liboblog/src/ob_log_svr_stream.cpp b/src/liboblog/src/ob_log_svr_stream.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8716dee2cbd5cfbc17cb4cbfe8b482bfa4753827 --- /dev/null +++ b/src/liboblog/src/ob_log_svr_stream.cpp @@ -0,0 +1,125 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG_FETCHER + +#include "ob_log_svr_stream.h" + +#include "ob_log_part_fetch_ctx.h" // PartFetchCtx +#include "ob_log_fetch_stream_type.h" // FETCH_STREAM_TYPE_HOT +#include "ob_log_utils.h" // SIZE_TO_STR + +namespace oceanbase +{ +namespace liboblog +{ + +SvrStream::SvrStream() : + ddl_stream_(FETCH_STREAM_TYPE_DDL), + hot_stream_(FETCH_STREAM_TYPE_HOT), + cold_stream_(FETCH_STREAM_TYPE_COLD) +{ + reset(); +} + +SvrStream::~SvrStream() +{ + reset(); +} + +void SvrStream::reset() +{ + svr_.reset(); + ddl_stream_.reset(); + hot_stream_.reset(); + cold_stream_.reset(); +} + +void SvrStream::reset(const common::ObAddr &svr, + IObLogRpc &rpc, + IFetchStreamPool &fs_pool, + IObLogSvrFinder &svr_finder, + IObLogFetcherHeartbeatWorker &heartbeater, + IObLogStreamWorker &stream_worker, + IFetchLogARpcResultPool &rpc_result_pool, + PartProgressController &progress_controller) +{ + reset(); + + svr_ = svr; + + ddl_stream_.reset(svr, + rpc, + fs_pool, + svr_finder, + heartbeater, + stream_worker, + rpc_result_pool, + progress_controller); + + hot_stream_.reset(svr, + rpc, + fs_pool, + svr_finder, + heartbeater, + stream_worker, + rpc_result_pool, + progress_controller); + + cold_stream_.reset(svr, + rpc, + fs_pool, + svr_finder, + heartbeater, + stream_worker, + rpc_result_pool, + progress_controller); +} + +int SvrStream::dispatch(PartFetchCtx &task) +{ + int ret = OB_SUCCESS; + FetchStreamType stype = task.get_fetch_stream_type(); + + if (FETCH_STREAM_TYPE_HOT == stype) { + if (OB_FAIL(hot_stream_.dispatch(task))) { + LOG_ERROR("dispatch fetch task to fetch stream container fail", KR(ret), K(hot_stream_), + K(task)); + } + } else if (FETCH_STREAM_TYPE_COLD == stype) { + if (OB_FAIL(cold_stream_.dispatch(task))) { + LOG_ERROR("dispatch fetch task to fetch stream container fail", KR(ret), K(cold_stream_), + K(task)); + } + } else if (FETCH_STREAM_TYPE_DDL == stype) { + if (OB_FAIL(ddl_stream_.dispatch(task))) { + // It is required that the join must be successful and that a DDL stream can hold all the DDL partitions + LOG_ERROR("dispatch fetch task to fetch stream container fail", KR(ret), K(ddl_stream_), + K(task)); + } + } else { + LOG_ERROR("invalid stream type", K(stype), K(task)); + ret = OB_INVALID_ARGUMENT; + } + + return ret; +} + +void SvrStream::do_stat() +{ + ddl_stream_.do_stat(); + hot_stream_.do_stat(); + cold_stream_.do_stat(); +} + +} +} diff --git a/src/liboblog/src/ob_log_svr_stream.h b/src/liboblog/src/ob_log_svr_stream.h new file mode 100644 index 0000000000000000000000000000000000000000..ce0221c56cd26010f29f7dd17e2b8a53187501b5 --- /dev/null +++ b/src/liboblog/src/ob_log_svr_stream.h @@ -0,0 +1,71 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_SVR_STREAM_H__ +#define OCEANBASE_LIBOBLOG_OB_LOG_SVR_STREAM_H__ + +#include "lib/net/ob_addr.h" // ObAddr + +#include "ob_log_fetch_stream_container.h" // FetchStreamContainer + +namespace oceanbase +{ +namespace liboblog +{ +class IFetchStreamPool; +class IObLogRpc; +class IObLogStreamWorker; +class IFetchLogARpcResultPool; +class IObLogSvrFinder; +class IObLogFetcherHeartbeatWorker; +class PartProgressController; +class PartFetchCtx; + +class SvrStream +{ +public: + SvrStream(); + virtual ~SvrStream(); + +public: + void reset(); + void reset(const common::ObAddr &svr, + IObLogRpc &rpc, + IFetchStreamPool &fs_pool, + IObLogSvrFinder &svr_finder, + IObLogFetcherHeartbeatWorker &heartbeater, + IObLogStreamWorker &stream_worker, + IFetchLogARpcResultPool &rpc_result_pool, + PartProgressController &progress_controller); + int dispatch(PartFetchCtx &task); + + void do_stat(); + + TO_STRING_KV(K_(svr), + K_(ddl_stream), + K_(hot_stream), + K_(cold_stream)); + +private: + common::ObAddr svr_; + FetchStreamContainer ddl_stream_; + FetchStreamContainer hot_stream_; + FetchStreamContainer cold_stream_; + +private: + DISALLOW_COPY_AND_ASSIGN(SvrStream); +}; + +} +} + +#endif diff --git a/src/liboblog/src/ob_log_systable_helper.cpp b/src/liboblog/src/ob_log_systable_helper.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8fdd96c48e4fb52f468e73911e5cf31e9a5f5507 --- /dev/null +++ b/src/liboblog/src/ob_log_systable_helper.cpp @@ -0,0 +1,1709 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG_FETCHER + +#include "ob_log_systable_helper.h" + +#include "common/ob_role.h" // LEADER +#include "common/ob_partition_key.h" // ObPartitionKey + +#include "ob_log_config.h" // ObLogConfig, TCONF +#include "share/inner_table/ob_inner_table_schema_constants.h" // OB_***_TNAME +#include "ob_log_utils.h" +#include "share/ob_cluster_version.h" // GET_MIN_CLUSTER_VERSION + +#define GET_DATA(type, index, val, val_str) \ + do { \ + if (OB_SUCCESS == ret) {\ + bool is_null_value = false; \ + int64_t idx = (index); \ + if (OB_FAIL(get_##type(idx, (val), is_null_value)) || is_null_value) { \ + LOG_ERROR("get " val_str " fail", KR(ret), K(idx), K(is_null_value)); \ + ret = (OB_SUCCESS == ret ? OB_INVALID_DATA : ret); \ + } \ + } \ + } while (0) + +using namespace oceanbase::common; +using namespace oceanbase::share; +namespace oceanbase +{ +namespace liboblog +{ + +bool ISQLStrategy::g_is_replica_type_info_valid = true; + +bool is_cluster_version_be_equal_or_greater_than_200_() +{ + bool bool_ret = true; + + // ob version: 2_0_0 + bool_ret = (GET_MIN_CLUSTER_VERSION() >= CLUSTER_VERSION_2000); + + return bool_ret; +} + +bool is_cluster_version_be_equal_or_greater_than_220_() +{ + bool bool_ret = true; + + // ob version: 2_2_0 + bool_ret = (GET_MIN_CLUSTER_VERSION() >= CLUSTER_VERSION_2200); + + return bool_ret; +} + +////////////////////////////////////// QueryClogHistorySQLStrategy ///////////////////////////////// +QueryClogHistorySQLStrategy::QueryClogHistorySQLStrategy() : + inited_(false), + pkey_(), + log_id_(OB_INVALID_ID), + tstamp_(OB_INVALID_TIMESTAMP), + query_by_log_id_(false) +{ +} + +QueryClogHistorySQLStrategy::~QueryClogHistorySQLStrategy() +{ + destroy(); +} + +void QueryClogHistorySQLStrategy::destroy() +{ + inited_ = false; + + pkey_.reset(); + log_id_ = OB_INVALID_ID; + tstamp_ = OB_INVALID_TIMESTAMP; + query_by_log_id_ = false; +} + +int QueryClogHistorySQLStrategy::init_by_log_id_query(const ObPartitionKey &pkey, + const uint64_t log_id) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("init twice"); + ret = OB_INIT_TWICE; + } else { + pkey_ = pkey; + log_id_ = log_id; + query_by_log_id_ = true; + + inited_ = true; + } + + return ret; +} + +int QueryClogHistorySQLStrategy::init_by_tstamp_query(const ObPartitionKey &pkey, + const int64_t tstamp) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("init twice"); + ret = OB_INIT_TWICE; + } else { + pkey_ = pkey; + tstamp_ = tstamp; + query_by_log_id_ = false; + + inited_ = true; + } + + return ret; +} + +int QueryClogHistorySQLStrategy::build_sql_statement(char *sql_buf, + const int64_t mul_statement_buf_len, + int64_t &pos) +{ + int ret = OB_SUCCESS; + pos = 0; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(sql_buf) || OB_UNLIKELY(mul_statement_buf_len <=0)) { + LOG_ERROR("invalid argument", K(sql_buf), K(mul_statement_buf_len)); + ret = OB_INVALID_ARGUMENT; + } else { + // Always use __all_virtual_server_log_meta for clusters greater than or equal to version 2_2_0 + bool is_all_virtual_server_log_meta_valid = is_cluster_version_be_equal_or_greater_than_220_(); + const char *table_name = NULL; + + if (is_all_virtual_server_log_meta_valid) { + table_name = OB_ALL_VIRTUAL_SERVER_LOG_META_TNAME; + } else { + table_name = OB_ALL_CLOG_HISTORY_INFO_V2_TNAME; + } + + if (is_all_virtual_server_log_meta_valid) { + // After __all_clog_history_info_v2 is split into individual tenants, __all_virtual_server_log_meta is queried to specify the tenant_id, to ensure query efficiency + if (OB_FAIL(databuff_printf(sql_buf, mul_statement_buf_len, pos, + "SELECT " + "start_log_id, " + "end_log_id, " + "svr_ip, " + "svr_port " + "FROM %s " + "WHERE tenant_id=%lu " + "AND table_id=%lu " + "AND partition_idx=%ld " + "AND partition_cnt=%d ", + table_name, + pkey_.get_tenant_id(), + pkey_.table_id_, + pkey_.get_partition_id(), + pkey_.get_partition_cnt()))) { + LOG_ERROR("build sql fail", KR(ret), K(pos), K(table_name), K(pkey_), "buf_size", mul_statement_buf_len, K(sql_buf)); + } + } else { + if (OB_FAIL(databuff_printf(sql_buf, mul_statement_buf_len, pos, + "SELECT " + "start_log_id, " + "end_log_id, " + "svr_ip, " + "svr_port " + "FROM %s " + "WHERE table_id=%lu " + "AND partition_idx=%ld " + "AND partition_cnt=%d ", + table_name, + pkey_.table_id_, + pkey_.get_partition_id(), + pkey_.get_partition_cnt()))) { + LOG_ERROR("build sql fail", KR(ret), K(pos), K(table_name), K(pkey_), "buf_size", mul_statement_buf_len, K(sql_buf)); + } + } + + if (OB_SUCC(ret)) { + // query based on log id + if (query_by_log_id_) { + if (OB_FAIL(databuff_printf(sql_buf, mul_statement_buf_len, pos, + "AND (%lu<=end_log_id OR %lu=end_log_id) " + "ORDER BY gmt_create;", + log_id_, + OB_INVALID_ID))) { + LOG_ERROR("build sql fail", KR(ret), K(pos), "buf_size", mul_statement_buf_len, K(sql_buf)); + } + } else { + // query based on timestamp + if (OB_FAIL(databuff_printf(sql_buf, mul_statement_buf_len, pos, + "AND (%ld<=end_log_timestamp OR %ld=end_log_timestamp) " + "ORDER BY gmt_create;", + tstamp_, + OB_INVALID_TIMESTAMP))) { + LOG_ERROR("build sql fail", KR(ret), K(pos), "buf_size", mul_statement_buf_len, K(sql_buf)); + } + } + } + } + + return ret; +} + +////////////////////////////////////// QueryMetaInfo ///////////////////////////////// +QueryMetaInfoSQLStrategy::QueryMetaInfoSQLStrategy() : + inited_(false), + pkey_(), + only_query_leader_(false) +{ +} + +QueryMetaInfoSQLStrategy::~QueryMetaInfoSQLStrategy() +{ + destroy(); +} + +void QueryMetaInfoSQLStrategy::destroy() +{ + inited_ = false; + + pkey_.reset(); + only_query_leader_ = false; +} + +int QueryMetaInfoSQLStrategy::init(const ObPartitionKey &pkey, bool only_query_leader) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("init twice"); + ret = OB_INIT_TWICE; + } else { + pkey_ = pkey; + only_query_leader_ = only_query_leader; + + inited_ = true; + } + + return ret; +} + +int QueryMetaInfoSQLStrategy::build_sql_statement(char *sql_buf, + const int64_t mul_statement_buf_len, + int64_t &pos) +{ + int ret = OB_SUCCESS; + pos = 0; + int64_t leader_role = common::LEADER; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(sql_buf) || OB_UNLIKELY(mul_statement_buf_len <=0)) { + LOG_ERROR("invalid argument", K(sql_buf), K(mul_statement_buf_len)); + ret = OB_INVALID_ARGUMENT; + } else { + // Cluster greater than or equal to version 2_0_0, always use __all_virtual_partition_location + bool is_partition_location_table_valid = is_cluster_version_be_equal_or_greater_than_200_(); + // Generate different SQL according to version + bool is_replica_type_info_valid = ATOMIC_LOAD(&g_is_replica_type_info_valid); + const char *table_name = NULL; + + if (is_partition_location_table_valid) { + table_name = "__all_virtual_partition_location"; + } else { + table_name = is_inner_table(pkey_.table_id_) ? "__all_root_table" : "__all_meta_table"; + } + + if (is_replica_type_info_valid) { + if (OB_FAIL(databuff_printf(sql_buf, mul_statement_buf_len, pos, + "SELECT /*+READ_CONSISTENCY(WEAK)*/ " + "svr_ip, svr_port, role, replica_type " + "FROM %s " + "where tenant_id=%lu AND table_id=%lu AND partition_id=%ld", + table_name, + extract_tenant_id(pkey_.table_id_), + pkey_.table_id_, + pkey_.get_partition_id()))) { + LOG_ERROR("build sql fail", KR(ret), K(pos), "buf_size", mul_statement_buf_len, K(sql_buf)); + } + } else { + if (OB_FAIL(databuff_printf(sql_buf, mul_statement_buf_len, pos, + "SELECT /*+READ_CONSISTENCY(WEAK)*/ " + "svr_ip, svr_port, role " + "FROM %s " + "where tenant_id=%lu AND table_id=%lu AND partition_id=%ld", + table_name, + extract_tenant_id(pkey_.table_id_), + pkey_.table_id_, + pkey_.get_partition_id()))) { + LOG_ERROR("build sql fail", KR(ret), K(pos), "buf_size", mul_statement_buf_len, K(sql_buf)); + } + } + + // If only the leader info is queried, add the leader query condition + if (OB_SUCCESS == ret && only_query_leader_) { + if (OB_FAIL(databuff_printf(sql_buf, mul_statement_buf_len, pos, " and role=%ld", leader_role))) { + LOG_ERROR("build sql fail", KR(ret), K(pos), "buf_size", mul_statement_buf_len, K(sql_buf)); + } + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(databuff_printf(sql_buf, mul_statement_buf_len, pos, ";"))) { + LOG_ERROR("build sql fail", KR(ret), K(pos), "buf_size", mul_statement_buf_len, K(sql_buf)); + } + } + } + + return ret; +} + +////////////////////////////////////// QueryAllServerInfo ///////////////////////////////// +int QueryAllServerInfoStrategy::build_sql_statement(char *sql_buf, + const int64_t mul_statement_buf_len, + int64_t &pos) +{ + int ret = OB_SUCCESS; + pos = 0; + + if (OB_ISNULL(sql_buf) || OB_UNLIKELY(mul_statement_buf_len <=0)) { + LOG_ERROR("invalid argument", K(sql_buf), K(mul_statement_buf_len)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(databuff_printf(sql_buf, mul_statement_buf_len, pos, + "SELECT /*+READ_CONSISTENCY(WEAK)*/ " + "svr_ip, svr_port, status, zone " + "FROM %s", + OB_ALL_SERVER_TNAME))) { + LOG_ERROR("build sql fail", KR(ret), K(pos), "buf_size", mul_statement_buf_len, K(sql_buf)); + } else { + // succ + } + + return ret; +} + +////////////////////////////////////// QueryAllZoneInfo ///////////////////////////////// +int QueryAllZoneInfoStrategy::build_sql_statement(char *sql_buf, + const int64_t mul_statement_buf_len, + int64_t &pos) +{ + int ret = OB_SUCCESS; + pos = 0; + const char *region = "region"; + + if (OB_ISNULL(sql_buf) || OB_UNLIKELY(mul_statement_buf_len <= 0)) { + LOG_ERROR("invalid argument", K(sql_buf), K(mul_statement_buf_len)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(databuff_printf(sql_buf, mul_statement_buf_len, pos, + "SELECT /*+READ_CONSISTENCY(WEAK)*/ " + "zone, info " + "FROM %s " + "WHERE name = \'%s\'", + OB_ALL_ZONE_TNAME, region))) { + LOG_ERROR("build sql fail", KR(ret), K(pos), "buf_size", mul_statement_buf_len, K(sql_buf)); + } else { + // succ + } + + return ret; +} + +////////////////////////////////////// QueryAllZoneTypeInfo ///////////////////////////////// +int QueryAllZoneTypeStrategy::build_sql_statement(char *sql_buf, + const int64_t mul_statement_buf_len, + int64_t &pos) +{ + int ret = OB_SUCCESS; + pos = 0; + const char *zone_type = "zone_type"; + + if (OB_ISNULL(sql_buf) || OB_UNLIKELY(mul_statement_buf_len <= 0)) { + LOG_ERROR("invalid argument", K(sql_buf), K(mul_statement_buf_len)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(databuff_printf(sql_buf, mul_statement_buf_len, pos, + "SELECT /*+READ_CONSISTENCY(WEAK)*/ " + "zone, info " + "FROM %s " + "WHERE name = \'%s\'", + OB_ALL_ZONE_TNAME, zone_type))) { + LOG_ERROR("build sql fail", KR(ret), K(pos), "buf_size", mul_statement_buf_len, K(sql_buf)); + } else { + // succ + } + return ret; +} + +////////////////////////////////////// QueryClusterId ///////////////////////////////// +int QueryClusterIdStrategy::build_sql_statement(char *sql_buf, + const int64_t mul_statement_buf_len, + int64_t &pos) +{ + int ret = OB_SUCCESS; + pos = 0; + const char *query_sql = "show parameters like 'cluster_id'"; + + if (OB_ISNULL(sql_buf) || OB_UNLIKELY(mul_statement_buf_len <=0)) { + LOG_ERROR("invalid argument", K(sql_buf), K(mul_statement_buf_len)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(databuff_printf(sql_buf, mul_statement_buf_len, pos, + "%s", query_sql))) { + LOG_ERROR("build sql fail", KR(ret), K(pos), K(query_sql), "buf_size", mul_statement_buf_len, K(sql_buf)); + } else { + // succ + } + + return ret; +} + +///////////////////////// QueryObserverVersionStrategy ///////////////////////// +int QueryObserverVersionStrategy::build_sql_statement(char *sql_buf, + const int64_t mul_statement_buf_len, + int64_t &pos) +{ + int ret = OB_SUCCESS; + pos = 0; + const char *query_sql = "select distinct(value) from __all_virtual_sys_parameter_stat where name='min_observer_version';"; + + if (OB_ISNULL(sql_buf) || OB_UNLIKELY(mul_statement_buf_len <=0)) { + LOG_ERROR("invalid argument", K(sql_buf), K(mul_statement_buf_len)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(databuff_printf(sql_buf, mul_statement_buf_len, pos, + "%s", query_sql))) { + LOG_ERROR("build sql fail", KR(ret), K(pos), K(query_sql), "buf_size", mul_statement_buf_len, K(sql_buf)); + } else { + // succ + } + + return ret; +} + +///////////////////////// QueryTimeZoneInfoVersionStrategy///////////////////////// +int QueryTimeZoneInfoVersionStrategy::build_sql_statement(char *sql_buf, + const int64_t mul_statement_buf_len, + int64_t &pos) +{ + int ret = OB_SUCCESS; + pos = 0; + const char *query_sql = NULL; + const bool need_query_tenant_timezone_version = (GET_MIN_CLUSTER_VERSION() >= CLUSTER_VERSION_2260); + + if (need_query_tenant_timezone_version) { + query_sql = "select value from __all_virtual_sys_stat where name='current_timezone_version' and tenant_id="; + } else { + query_sql = "select value from __all_zone where name='time_zone_info_version';"; + } + if (OB_ISNULL(sql_buf) || OB_UNLIKELY(mul_statement_buf_len <=0)) { + LOG_ERROR("invalid argument", K(sql_buf), K(mul_statement_buf_len)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(databuff_printf(sql_buf, mul_statement_buf_len, pos, + "%s", query_sql))) { + LOG_ERROR("build sql fail", KR(ret), K(pos), K(query_sql), "buf_size", mul_statement_buf_len, K(sql_buf)); + } else { + if (need_query_tenant_timezone_version) { + if (OB_FAIL(databuff_printf(sql_buf, mul_statement_buf_len, pos, "%lu;", tenant_id_))) { + LOG_ERROR("build tenant_id sql fail", KR(ret), K(pos), K(query_sql), + "buf_size", mul_statement_buf_len, K(sql_buf), K(tenant_id_)); + } + } + } + + return ret; +} + +IObLogSysTableHelper::BatchSQLQuery::BatchSQLQuery() : + inited_(false), + enable_multiple_statement_(false), + mul_statement_buf_(NULL), + mul_statement_buf_len_(0), + pos_(0), + batch_sql_count_(0) +{ + single_statement_buf_[0] = '\0'; +} + +IObLogSysTableHelper::BatchSQLQuery::~BatchSQLQuery() +{ + destroy(); +} + +int IObLogSysTableHelper::BatchSQLQuery::init(const int64_t mul_statement_buf_len) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("init twice"); + ret = OB_INIT_TWICE; + } else if (OB_UNLIKELY(mul_statement_buf_len <= 0)) { + LOG_ERROR("invalid arguments", K(mul_statement_buf_len)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(mul_statement_buf_ = static_cast(ob_malloc( + mul_statement_buf_len, ObModIds::OB_LOG_BATCH_SQL_QUERY)))) { + LOG_ERROR("mul_statement_buf_ is null", K(mul_statement_buf_), K(mul_statement_buf_len)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + enable_multiple_statement_ = true; + mul_statement_buf_len_ = mul_statement_buf_len; + + pos_ = 0; + batch_sql_count_ = 0; + + inited_ = true; + } + + return ret; +} + +int IObLogSysTableHelper::BatchSQLQuery::init(ISQLStrategy *strategy) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("init twice"); + ret = OB_INIT_TWICE; + } else if (OB_ISNULL(strategy)) { + LOG_ERROR("strategy is null", K(strategy)); + ret = OB_INVALID_ARGUMENT; + } else { + enable_multiple_statement_ = false; + single_statement_buf_[0] = '\0'; + + pos_ = 0; + batch_sql_count_ = 0; + + inited_ = true; + + if (OB_FAIL(do_sql_aggregate(strategy))) { + LOG_ERROR("do_sql_aggregate fail", KR(ret)); + } else if (OB_FAIL(init_sql())) { + LOG_ERROR("init_sql fail", KR(ret)); + } else { + // succ + } + } + + return ret; +} + +void IObLogSysTableHelper::BatchSQLQuery::destroy() +{ + MySQLQueryBase::destroy(); + + inited_ = false; + + enable_multiple_statement_ = false; + if (NULL != mul_statement_buf_) { + ob_free(mul_statement_buf_); + mul_statement_buf_ = NULL; + } + mul_statement_buf_len_ = 0; + single_statement_buf_[0] = '\0'; + + pos_ = 0; + batch_sql_count_ = 0; +} + +void IObLogSysTableHelper::BatchSQLQuery::reset() +{ + MySQLQueryBase::destroy(); + + pos_ = 0; + batch_sql_count_ = 0; +} + +int IObLogSysTableHelper::BatchSQLQuery::do_sql_aggregate(ISQLStrategy *strategy) +{ + int ret = OB_SUCCESS; + int64_t sql_len = 0; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(strategy)) { + LOG_ERROR("strategy is null", K(strategy)); + ret = OB_INVALID_ARGUMENT; + } else { + if (enable_multiple_statement_) { + if (OB_FAIL(strategy->build_sql_statement(mul_statement_buf_ + pos_, mul_statement_buf_len_ - pos_, sql_len))) { + LOG_ERROR("strategy build_sql_statement fail", KR(ret), K(mul_statement_buf_), K(mul_statement_buf_len_), + K(pos_), K(sql_len)); + } + } else { + if (OB_FAIL(strategy->build_sql_statement(single_statement_buf_, sizeof(single_statement_buf_), + sql_len))) { + LOG_ERROR("strategy build_sql_statement fail", KR(ret), K(single_statement_buf_), + "buf_len", sizeof(single_statement_buf_), K(sql_len)); + } + } + + if (OB_SUCC(ret)) { + pos_ += sql_len; + ++batch_sql_count_; + } + } + + return ret; +} + +int IObLogSysTableHelper::BatchSQLQuery::init_sql() +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init"); + ret = OB_NOT_INIT; + } else { + if (enable_multiple_statement_) { + if (OB_FAIL(MySQLQueryBase::init(mul_statement_buf_, pos_))) { + LOG_ERROR("init MySQLQueryBase fail", KR(ret), K(mul_statement_buf_), K(pos_)); + } + } else { + if (OB_FAIL(MySQLQueryBase::init(single_statement_buf_, pos_))) { + LOG_ERROR("init MySQLQueryBase fail", KR(ret), K(single_statement_buf_), K(pos_)); + } + } + } + + return ret; +} + +template +int IObLogSysTableHelper::BatchSQLQuery::get_records_tpl_(RecordsType &records, const char *event, + int64_t &record_count) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(next_result())) { + // OB_NEED_RETRY indicates that a retry is required + LOG_ERROR("next_result fail", KR(ret), K(event), "mysql_error_code", get_mysql_err_code(), + "mysql_err_msg", get_mysql_err_msg()); + } else { + record_count = 0; + + while (OB_SUCC(ret)) { + // fetch next row + if (OB_FAIL(next_row())) { + if (OB_ITER_END == ret) { + // End of iteration + } else { + // OB_NEED_RETRY indicates that a retry is required + LOG_ERROR("get next row fail", KR(ret), K(event), + "mysql_error_code", get_mysql_err_code(), "mysql_err_msg", get_mysql_err_msg()); + } + } + // Parsing data from the rows + else if (OB_FAIL(parse_record_from_row_(records))) { + LOG_ERROR("parse records from row data fail", KR(ret), K(event)); + } else { + record_count++; + } + } // while + + if (OB_ITER_END == ret) { + ret = OB_SUCCESS; + } + } + + return ret; +} + + +int IObLogSysTableHelper::BatchSQLQuery::parse_record_from_row_(ClogHistoryRecordArray &records) +{ + int ret = OB_SUCCESS; + ObString ip_str; + ClogHistoryRecord record; + + // Store tmp data. + int64_t port = 0; + int64_t index = -1; + + index++; + GET_DATA(uint, index, record.start_log_id_, "start log id"); + + index++; + GET_DATA(uint, index, record.end_log_id_, "end log id"); + + index++; + GET_DATA(varchar, index, ip_str, "server ip"); + + index++; + GET_DATA(int, index, port, "server port"); + + if (OB_SUCCESS == ret) { + int64_t pos = 0; + if (OB_FAIL(databuff_printf(record.svr_ip_, sizeof(record.svr_ip_), pos, + "%.*s", ip_str.length(), ip_str.ptr()))) { + LOG_ERROR("save ip address fail", K(pos), "size", sizeof(record.svr_ip_), K(ip_str)); + } else { + record.svr_port_ = static_cast(port); + } + + if (OB_SUCCESS == ret && OB_FAIL(records.push_back(record))) { + LOG_ERROR("push back record fail", KR(ret), K(record), K(records)); + } + } + return ret; +} + +int IObLogSysTableHelper::BatchSQLQuery::get_records(ClogHistoryRecordArray& records) +{ + int64_t record_count = 0; + return get_records_tpl_(records, "QueryClogHistory", record_count); +} + +int IObLogSysTableHelper::BatchSQLQuery::parse_record_from_row_(MetaRecordArray& records) +{ + int ret = OB_SUCCESS; + bool is_replica_type_info_valid = ATOMIC_LOAD(&ISQLStrategy::g_is_replica_type_info_valid); + ObString ip_str; + MetaRecord record; + + int64_t port = 0; + int64_t role = 0; + int64_t replica_type = -1; + int64_t index = -1; + + index++; + GET_DATA(varchar, index, ip_str, "server ip"); + + index++; + GET_DATA(int, index, port, "server port"); + + index++; + GET_DATA(int, index, role, "role"); + + // Get replica_type only for high version observer + if (is_replica_type_info_valid) { + index++; + GET_DATA(int, index, replica_type, "replica type"); + } + + if (OB_SUCCESS == ret) { + int64_t pos = 0; + if (OB_FAIL(databuff_printf(record.svr_ip_, sizeof(record.svr_ip_), pos, + "%.*s", ip_str.length(), ip_str.ptr()))) { + LOG_ERROR("save ip address fail", KR(ret), K(pos), + "buf_size", sizeof(record.svr_ip_), K(ip_str)); + } else { + record.svr_port_ = static_cast(port); + record.role_ = role; + if (is_replica_type_info_valid) { + record.replica_type_ = static_cast(replica_type); + } else { + // Low version default REPLICA_TYPE_FULL + record.replica_type_ = REPLICA_TYPE_FULL; + } + } + + if (OB_SUCCESS == ret && OB_FAIL(records.push_back(record))) { + LOG_ERROR("push back record fail", KR(ret), K(record), K(records)); + } + } + return ret; +} + +int IObLogSysTableHelper::BatchSQLQuery::get_records(MetaRecordArray& records) +{ + int64_t record_count = 0; + return get_records_tpl_(records, "QueryMetaTable", record_count); +} + +int IObLogSysTableHelper::BatchSQLQuery::get_records(bool &has_leader, common::ObAddr &leader) +{ + int ret = OB_SUCCESS; + MetaRecordArray records; + MetaRecord rec; + has_leader = false; + leader.reset(); + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init"); + ret = OB_NOT_INIT; + } + // The query Leader is also based on Meta Table query, just get the first row of Meta Table query result directly + else if (OB_FAIL(get_records(records))) { + LOG_ERROR("get records fail while query leader info", KR(ret), + "mysql_error_code", get_mysql_err_code(), + "mysql_err_msg", get_mysql_err_msg()); + } else if (records.count() <= 0) { + // Returning an empty record means there is no leader + has_leader = false; + leader.reset(); + } + // Take only the first record + else if (OB_FAIL(records.at(0, rec))) { + LOG_ERROR("get record from array fail", KR(ret), K(records.count()), K(records)); + } else if (OB_UNLIKELY(!common::is_strong_leader(static_cast(rec.role_)))) { + LOG_ERROR("server role is not leader", K(rec)); + ret = OB_ERR_UNEXPECTED; + } else if (! leader.set_ip_addr(rec.svr_ip_, rec.svr_port_)) { + LOG_ERROR("set_ip_addr fail", K(leader), K(rec)); + ret = OB_ERR_UNEXPECTED; + } else { + has_leader = true; + } + + return ret; +} + +int IObLogSysTableHelper::BatchSQLQuery::parse_record_from_row_(AllServerRecordArray& records) +{ + int ret = OB_SUCCESS; + ObString ip_str; + ObString status; + ObString zone; + AllServerRecord record; + int64_t port = 0; + int64_t index = -1; + + index++; + GET_DATA(varchar, index, ip_str, "server ip"); + + index++; + GET_DATA(int, index, port, "server port"); + + index++; + GET_DATA(varchar, index, status, "status"); + + index++; + GET_DATA(varchar, index, zone, "zone"); + + if (OB_SUCCESS == ret) { + int64_t pos = 0; + if (OB_FAIL(databuff_printf(record.svr_ip_, sizeof(record.svr_ip_), pos, + "%.*s", ip_str.length(), ip_str.ptr()))) { + LOG_ERROR("save ip address fail", KR(ret), K(pos), + "buf_size", sizeof(record.svr_ip_), K(ip_str)); + } else if (OB_FAIL(share::ObServerStatus::str2display_status(status.ptr(), record.status_))) { + LOG_ERROR("str2display_status fail", KR(ret), K(status.ptr()), K(status)); + } else if (OB_FAIL(record.zone_.assign(zone.ptr()))) { + LOG_ERROR("zone assign fail", KR(ret), K(zone.ptr()), K(zone)); + } else { + record.svr_port_ = static_cast(port); + } + + LOG_DEBUG("query all server info record", KR(ret), K(record)); + + if (OB_SUCCESS == ret && OB_FAIL(records.push_back(record))) { + LOG_ERROR("push back record fail", KR(ret), K(record), K(records)); + } + } + return ret; +} + +int IObLogSysTableHelper::BatchSQLQuery::get_records(AllServerRecordArray& records) +{ + int64_t record_count = 0; + return get_records_tpl_(records, "QueryAllServer", record_count); +} + +int IObLogSysTableHelper::BatchSQLQuery::parse_record_from_row_(AllZoneRecordArray& records) +{ + int ret = OB_SUCCESS; + ObString zone; + ObString region; + AllZoneRecord record; + int64_t index = -1; + + index++; + GET_DATA(varchar, index, zone, "zone"); + + index++; + GET_DATA(varchar, index, region, "region"); + + if (OB_SUCCESS == ret) { + if (OB_FAIL(record.zone_.assign(zone.ptr()))) { + LOG_ERROR("zone assign fail", KR(ret), K(zone.ptr()), K(zone)); + } else if (OB_FAIL(record.region_.assign(region.ptr()))) { + LOG_ERROR("region assign fail", KR(ret), K(region.ptr()), K(region)); + } + LOG_DEBUG("query all zone info record", KR(ret), K(record)); + + if (OB_SUCCESS == ret && OB_FAIL(records.push_back(record))) { + LOG_ERROR("push back record fail", KR(ret), K(record), K(records)); + } + } + return ret; +} + +int IObLogSysTableHelper::BatchSQLQuery::get_records(AllZoneRecordArray& records) +{ + int64_t record_count = 0; + int ret = get_records_tpl_(records, "QueryAllZone", record_count); + + if (OB_SUCCESS == ret && 0 == record_count) { + // Query the __all_zone table, if no record is retrieved, the region information is not available and a low version of the observer is connected. + ret = OB_ITEM_NOT_SETTED; + } + return ret; +} + +int IObLogSysTableHelper::BatchSQLQuery::parse_record_from_row_(AllZoneTypeRecordArray& records) +{ + int ret = OB_SUCCESS; + ObString zone; + ObString zone_type_str; + AllZoneTypeRecord record; + int64_t index = -1; + + index++; + GET_DATA(varchar, index, zone, "zone"); + + index++; + GET_DATA(varchar, index, zone_type_str, "zone_type"); + + if (OB_SUCCESS == ret) { + if (OB_FAIL(record.zone_.assign(zone.ptr()))) { + LOG_ERROR("zone assign fail", KR(ret), K(zone.ptr()), K(zone)); + } else { + record.zone_type_ = str_to_zone_type(zone_type_str.ptr()); + } + LOG_DEBUG("query all zone info record", KR(ret), K(record)); + + if (OB_SUCCESS == ret && OB_FAIL(records.push_back(record))) { + LOG_ERROR("push back record fail", KR(ret), K(record), K(records)); + } + } + return ret; +} + +int IObLogSysTableHelper::BatchSQLQuery::get_records(AllZoneTypeRecordArray& records) +{ + int64_t record_count = 0; + int ret = get_records_tpl_(records, "QueryAllZoneType", record_count); + + if (OB_SUCCESS == ret && 0 == record_count) { + // Query the __all_zone table, if no record is retrieved, the zone_type information is not available and a low version of the observer is connected. + ret = OB_ITEM_NOT_SETTED; + } + return ret; +} + +int IObLogSysTableHelper::BatchSQLQuery::parse_record_from_row_(ClusterInfo &record) +{ + int ret = OB_SUCCESS; + int64_t index = -1; + const char *column_name = "value"; + // Get value of column `value` + // FIXME: it is assumed that show parameters like 'cluster_id' will only return one row, and value must be the value of cluster_id + if (OB_FAIL(get_column_index(column_name, index))) { + LOG_ERROR("get_column_index fail", KR(ret), K(column_name), K(index)); + } else { + GET_DATA(int, index, record.cluster_id_, "value"); + } + return ret; +} + +int IObLogSysTableHelper::BatchSQLQuery::get_records(ClusterInfo &record) +{ + int64_t record_count = 0; + int ret = get_records_tpl_(record, "QueryClusterInfo", record_count); + + if (OB_SUCCESS == ret && 0 == record_count) { + LOG_ERROR("no cluster id info records, unexcepted error", K(record_count), K(svr_)); + ret = OB_ITEM_NOT_SETTED; + } + return ret; +} + +int IObLogSysTableHelper::BatchSQLQuery::parse_record_from_row_(ObServerVersionInfoArray &records) +{ + int ret = OB_SUCCESS; + ObServerVersionInfo record; + ObString server_version_str; + uint64_t server_version; + int64_t index = -1; + + index++; + GET_DATA(varchar, index, server_version_str, "value"); + + if (OB_FAIL(ObClusterVersion::get_version(server_version_str, server_version))) { + LOG_ERROR("ObClusterVersion get_version fail", KR(ret), K(server_version_str), K(server_version)); + } else { + record.server_version_ = server_version; + + if (OB_FAIL(records.push_back(record))) { + LOG_ERROR("push back record fail", KR(ret), K(record), K(records)); + } + } + + return ret; +} + +int IObLogSysTableHelper::BatchSQLQuery::get_records(ObServerVersionInfoArray& records) +{ + int64_t record_count = 0; + return get_records_tpl_(records, "QueryClusterVersion", record_count); +} + +int IObLogSysTableHelper::BatchSQLQuery::parse_record_from_row_(ObServerTZInfoVersionInfo &record) +{ + int ret = OB_SUCCESS; + int64_t index = -1; + + index++; + GET_DATA(int, index, record.timezone_info_version_, "value"); + + return ret; +} + +int IObLogSysTableHelper::BatchSQLQuery::get_records(ObServerTZInfoVersionInfo &record) +{ + int ret = OB_SUCCESS; + int64_t record_count = 0; + + ret = get_records_tpl_(record, "QueryTimeZoneInfoVersion", record_count); + + if (0 == record_count) { + record.is_timezone_info_version_exist_ = false; + } + + return ret; +} + +//////////////////////////////////////// ObLogSysTableHelper //////////////////////////////////////// + +ObLogSysTableHelper::ObLogSysTableHelper() : + inited_(false), + svr_provider_(NULL), + max_thread_num_(0), + mysql_conns_(NULL), + next_svr_idx_array_(NULL), + thread_counter_(0) +{} + +ObLogSysTableHelper::~ObLogSysTableHelper() +{ + destroy(); +} + +int ObLogSysTableHelper::init(SvrProvider &svr_provider, + const int64_t access_systable_helper_thread_num, + const char *mysql_user, + const char *mysql_password, + const char *mysql_db) +{ + int ret = OB_SUCCESS; + ObLogMySQLConnector *conn_array = NULL; + + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("init twice"); + ret = OB_INIT_TWICE; + } else if (OB_UNLIKELY(access_systable_helper_thread_num <= 0) + || OB_ISNULL(mysql_user) + || OB_ISNULL(mysql_password) + || OB_ISNULL(mysql_db)) { + LOG_ERROR("invalid arguments", K(access_systable_helper_thread_num), K(mysql_user), + K(mysql_password), K(mysql_db)); + ret = OB_INVALID_ARGUMENT; + } else { + int64_t max_thread_num = access_systable_helper_thread_num; + int64_t sql_conn_size = static_cast(sizeof(ObLogMySQLConnector) * max_thread_num); + int64_t next_svr_idx_array_size = static_cast(sizeof(int64_t) * max_thread_num); + int64_t buf_size = sql_conn_size + next_svr_idx_array_size; + void *buf = ob_malloc(buf_size, ObModIds::OB_LOG_MYSQL_CONNECTOR); + + if (OB_ISNULL(buf)) { + LOG_ERROR("alloc buffer fail", K(buf_size), K(max_thread_num)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + conn_array = static_cast(buf); + next_svr_idx_array_ = reinterpret_cast(static_cast(buf) + sql_conn_size); + int64_t svr_count = svr_provider.get_server_count(); + + if (svr_count <= 0) { + svr_count = 1; + } + + for (int64_t i = 0; i < max_thread_num; ++i) { + new(conn_array + i) ObLogMySQLConnector(); + // Ensure that requests from different threads are balanced across servers + next_svr_idx_array_[i] = i % svr_count; + } + + (void)snprintf(mysql_user_, sizeof(mysql_user_), "%s", mysql_user); + (void)snprintf(mysql_password_, sizeof(mysql_password_), "%s", mysql_password); + (void)snprintf(mysql_db_, sizeof(mysql_db_), "%s", mysql_db); + + svr_provider_ = &svr_provider; + mysql_conns_ = conn_array; + max_thread_num_ = max_thread_num; + thread_counter_ = 0; + inited_ = true; + + LOG_INFO("init systable helper succ", K(mysql_user_), K(mysql_password_), K(mysql_db_), + K(access_systable_helper_thread_num)); + } + } + + return ret; +} + +void ObLogSysTableHelper::destroy() +{ + inited_ = false; + + if (NULL != mysql_conns_) { + for (int64_t idx = 0, cnt = max_thread_num_; (idx < cnt); ++idx) { + mysql_conns_[idx].~ObLogMySQLConnector(); + } + + ob_free(mysql_conns_); + mysql_conns_ = NULL; + } + + svr_provider_ = NULL; + max_thread_num_ = 0; + thread_counter_ = 0; + mysql_conns_ = NULL; + next_svr_idx_array_ = NULL; // FIXME:next_svr_idx_array_ shares a piece of memory with mysql_conns_ and does not need to be freed here + mysql_user_[0] = '\0'; + mysql_password_[0] = '\0'; + mysql_db_[0] = '\0'; + + LOG_INFO("destroy systable helper succ"); +} + +int ObLogSysTableHelper::do_query_and_handle_when_query_error_occurred_(BatchSQLQuery &query) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(do_query_(query))) { + if (OB_NEED_RETRY != ret) { + LOG_ERROR("do_query_ fail", KR(ret)); + } else { + const int err_code = query.get_mysql_err_code(); + const char *mysql_err_msg = NULL; + + if (0 == err_code) { + // need retry + } else { + if (OB_ISNULL(mysql_err_msg = query.get_mysql_err_msg())) { + LOG_ERROR("mysql_err_msg is null", K(mysql_err_msg)); + ret = OB_ERR_UNEXPECTED; + } else { + const char *ptr = NULL; + + if (ER_BAD_FIELD_ERROR == err_code) { + // handler if column not exist + ptr = strstr(mysql_err_msg, "replica_type"); + + if (NULL == ptr) { + LOG_ERROR("ptr is null, unexpected column is not found", K(ptr), K(err_code), K(mysql_err_msg)); + ret = OB_NEED_RETRY; + } else { + LOG_DEBUG("column info", K(ptr), K(mysql_err_msg)); + handle_column_not_found_when_query_meta_info_(); + } + } else { + LOG_ERROR("do query fail", K(err_code), K(mysql_err_msg)); + ret = OB_NEED_RETRY; + } + } + } + } + } + + return ret; +} + +void ObLogSysTableHelper::handle_column_not_found_when_query_meta_info_() +{ + // The replica_type field is not available and will not be requested in the future + ATOMIC_STORE(&ISQLStrategy::g_is_replica_type_info_valid, false); + LOG_INFO("'replica_type' is not availalbe in meta table. would not request 'replica_type'", + "g_is_replica_type_info_valid", ISQLStrategy::g_is_replica_type_info_valid); +} + +int ObLogSysTableHelper::query_with_multiple_statement(BatchSQLQuery &query) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(query.init_sql())) { + LOG_ERROR("query init_sql fail", KR(ret)); + } else if (OB_FAIL(do_query_and_handle_when_query_error_occurred_(query))) { + if (OB_NEED_RETRY == ret) { + LOG_WARN("query_with_multiple_statement fail, need retry", KR(ret), + "mysql_error_code", query.get_mysql_err_code(), + "mysql_error_msg", query.get_mysql_err_msg()); + } else { + LOG_ERROR("do query fail", KR(ret), + "mysql_error_code", query.get_mysql_err_code(), + "mysql_error_msg", query.get_mysql_err_msg()); + } + } else { + // succ + } + + return ret; +} + +int ObLogSysTableHelper::query_all_server_info(AllServerRecordArray &records) +{ + int ret = OB_SUCCESS; + + BatchSQLQuery query; + QueryAllServerInfoStrategy query_server_strategy; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(query.init(&query_server_strategy))) { + LOG_ERROR("init all server info query fail", KR(ret)); + } else if (OB_FAIL(do_query_(query))) { + if (OB_NEED_RETRY == ret) { + LOG_WARN("do query_all_server_info fail, need retry", KR(ret), + "mysql_error_code", query.get_mysql_err_code(), + "mysql_error_msg", query.get_mysql_err_msg()); + } else { + LOG_ERROR("do query_all_server_info fail", KR(ret), + "mysql_error_code", query.get_mysql_err_code(), + "mysql_error_msg", query.get_mysql_err_msg()); + } + } else if (OB_FAIL(query.get_records(records))) { + if (OB_NEED_RETRY == ret) { + LOG_WARN("get_records fail while query_all_server_info, need retry", KR(ret), + "mysql_error_code", query.get_mysql_err_code(), + "mysql_error_msg", query.get_mysql_err_msg()); + } else { + LOG_ERROR("get_records fail while query_all_server_info", KR(ret), + "mysql_error_code", query.get_mysql_err_code(), + "mysql_error_msg", query.get_mysql_err_msg()); + } + } + + LOG_DEBUG("query_all_server_info", KR(ret), "query_svr", query.get_server(), + "record_count", records.count(), K(records)); + + return ret; +} + +int ObLogSysTableHelper::query_all_zone_info(AllZoneRecordArray &records) +{ + int ret = OB_SUCCESS; + BatchSQLQuery query; + QueryAllZoneInfoStrategy query_zone_strategy; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(query.init(&query_zone_strategy))) { + LOG_ERROR("init all zone info query fail", KR(ret)); + } else if (OB_FAIL(do_query_(query))) { + if (OB_NEED_RETRY == ret) { + LOG_WARN("do query_all_zone_info fail, need retry", KR(ret), + "mysql_error_code", query.get_mysql_err_code(), + "mysql_error_msg", query.get_mysql_err_msg()); + } else { + LOG_ERROR("do query_all_zone_info fail", KR(ret), + "mysql_error_code", query.get_mysql_err_code(), + "mysql_error_msg", query.get_mysql_err_msg()); + } + } else if (OB_FAIL(query.get_records(records))) { + if (OB_ITEM_NOT_SETTED == ret) { + LOG_WARN("'region' is not availalbe in __all_zone table. would not request 'region'", KR(ret)); + } else if (OB_NEED_RETRY == ret) { + LOG_WARN("get_records fail while query_all_zone_info, need retry", KR(ret), + "mysql_error_code", query.get_mysql_err_code(), + "mysql_error_msg", query.get_mysql_err_msg()); + } else { + LOG_ERROR("get_records fail while query_all_zone_info", KR(ret), + "mysql_error_code", query.get_mysql_err_code(), + "mysql_error_msg", query.get_mysql_err_msg()); + } + } + + LOG_DEBUG("query_all_zone_info", KR(ret), "query_svr", query.get_server(), + "record_count", records.count(), K(records)); + + return ret; +} + +int ObLogSysTableHelper::query_all_zone_type(AllZoneTypeRecordArray &records) +{ + int ret = OB_SUCCESS; + BatchSQLQuery query; + QueryAllZoneTypeStrategy query_zone_type_strategy; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(query.init(&query_zone_type_strategy))) { + LOG_ERROR("init all zone info query fail", KR(ret)); + } else if (OB_FAIL(do_query_(query))) { + if (OB_NEED_RETRY == ret) { + LOG_WARN("do query_all_zone_type fail, need retry", KR(ret), + "mysql_error_code", query.get_mysql_err_code(), + "mysql_error_msg", query.get_mysql_err_msg()); + } else { + LOG_ERROR("do query_all_zone_type fail", KR(ret), + "mysql_error_code", query.get_mysql_err_code(), + "mysql_error_msg", query.get_mysql_err_msg()); + } + } else if (OB_FAIL(query.get_records(records))) { + if (OB_ITEM_NOT_SETTED == ret) { + LOG_WARN("'region' is not availalbe in __all_zone table. would not request 'zone_type'", KR(ret)); + } else if (OB_NEED_RETRY == ret) { + LOG_WARN("get_records fail while query_all_zone_info, need retry", KR(ret), + "mysql_error_code", query.get_mysql_err_code(), + "mysql_error_msg", query.get_mysql_err_msg()); + } else { + LOG_ERROR("get_records fail while query_all_zone_info", KR(ret), + "mysql_error_code", query.get_mysql_err_code(), + "mysql_error_msg", query.get_mysql_err_msg()); + } + } + + LOG_DEBUG("query_all_zone_type", KR(ret), "query_svr", query.get_server(), + "record_count", records.count(), K(records)); + return ret; +} + +int ObLogSysTableHelper::query_cluster_info(ClusterInfo &record) +{ + int ret = OB_SUCCESS; + BatchSQLQuery query; + QueryClusterIdStrategy query_cluster_id_strategy; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(query.init(&query_cluster_id_strategy))) { + LOG_ERROR("init cluster info query fail", KR(ret)); + } else if (OB_FAIL(do_query_(query))) { + if (OB_NEED_RETRY == ret) { + LOG_WARN("do query_cluster_info fail, need retry", KR(ret), + "mysql_error_code", query.get_mysql_err_code(), + "mysql_error_msg", query.get_mysql_err_msg()); + } else { + LOG_ERROR("do query_cluster_info fail", KR(ret), + "mysql_error_code", query.get_mysql_err_code(), + "mysql_error_msg", query.get_mysql_err_msg()); + } + } else if (OB_FAIL(query.get_records(record))) { + if (OB_NEED_RETRY == ret) { + LOG_WARN("get_records fail while query_cluster_info, need retry", KR(ret), + "mysql_error_code", query.get_mysql_err_code(), + "mysql_error_msg", query.get_mysql_err_msg()); + } else { + LOG_ERROR("get_records fail while query_cluster_info", KR(ret), + "mysql_error_code", query.get_mysql_err_code(), + "mysql_error_msg", query.get_mysql_err_msg()); + } + } + + LOG_INFO("query_cluster_info", KR(ret), K(record), "query_svr", query.get_server()); + + return ret; +} + +int ObLogSysTableHelper::query_cluster_min_observer_version(uint64_t &min_observer_version) +{ + int ret = OB_SUCCESS; + BatchSQLQuery query; + QueryObserverVersionStrategy query_observer_version_strategy; + ObServerVersionInfoArray records; + min_observer_version = OB_INVALID_ID; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(query.init(&query_observer_version_strategy))) { + LOG_ERROR("init ObserverVersion query fail", KR(ret)); + } else if (OB_FAIL(do_query_(query))) { + if (OB_NEED_RETRY == ret) { + LOG_WARN("do query_cluster_min_observer_version fail, need retry", KR(ret), + "mysql_error_code", query.get_mysql_err_code(), + "mysql_error_msg", query.get_mysql_err_msg()); + } else { + LOG_ERROR("do query_cluster_min_observer_version fail", KR(ret), + "mysql_error_code", query.get_mysql_err_code(), + "mysql_error_msg", query.get_mysql_err_msg()); + } + } else if (OB_FAIL(query.get_records(records))) { + if (OB_NEED_RETRY == ret) { + LOG_WARN("get_records fail while query_cluster_min_observer_version, need retry", KR(ret), + "mysql_error_code", query.get_mysql_err_code(), + "mysql_error_msg", query.get_mysql_err_msg()); + } else { + LOG_ERROR("get_records fail while query_cluster_min_observer_version", KR(ret), + "mysql_error_code", query.get_mysql_err_code(), + "mysql_error_msg", query.get_mysql_err_msg()); + } + } + + if (OB_SUCC(ret)) { + // Get minimum version of servers in cluster + for (int64_t idx = 0; OB_SUCC(ret) && idx < records.count(); ++idx) { + uint64_t server_version = records.at(idx).server_version_; + if (OB_INVALID_ID == min_observer_version) { + min_observer_version = server_version; + } else { + min_observer_version = std::min(min_observer_version, server_version); + } + } + } + + LOG_INFO("query_cluster_min_observer_version", KR(ret), K(min_observer_version), K(records), "query_svr", query.get_server()); + + return ret; +} + +int ObLogSysTableHelper::query_timezone_info_version(const uint64_t tenant_id, + int64_t &timezone_info_version) +{ + int ret = OB_SUCCESS; + BatchSQLQuery query; + QueryTimeZoneInfoVersionStrategy query_timezone_info_version_strategy(tenant_id); + ObServerTZInfoVersionInfo record; + timezone_info_version = OB_INVALID_TIMESTAMP; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(query.init(&query_timezone_info_version_strategy))) { + LOG_ERROR("init QueryTimeZoneInfoVersionquery fail", KR(ret)); + } else if (OB_FAIL(do_query_(query))) { + if (OB_NEED_RETRY == ret) { + LOG_WARN("do query_timezone_info_version fail, need retry", KR(ret), + "mysql_error_code", query.get_mysql_err_code(), + "mysql_error_msg", query.get_mysql_err_msg()); + } else { + LOG_ERROR("do query_timezone_info_version fail", KR(ret), + "mysql_error_code", query.get_mysql_err_code(), + "mysql_error_msg", query.get_mysql_err_msg()); + } + } else if (OB_FAIL(query.get_records(record))) { + if (OB_NEED_RETRY == ret) { + LOG_WARN("get_records fail while query_timezone_info_version, need retry", KR(ret), + "mysql_error_code", query.get_mysql_err_code(), + "mysql_error_msg", query.get_mysql_err_msg()); + } else { + LOG_ERROR("get_records fail while query_timezone_info_version", KR(ret), + "mysql_error_code", query.get_mysql_err_code(), + "mysql_error_msg", query.get_mysql_err_msg()); + } + } else { + if (false == record.is_timezone_info_version_exist_) { + // 1. 1451 version ob cluster __all_zone table does not have timezone_info_version, query is empty + // 2. 226 versions of the ob cluster have the timezone table split into tenants, if the tenants are not imported, then the query is not available + ret = OB_ENTRY_NOT_EXIST; + } else { + timezone_info_version = record.timezone_info_version_; + } + } + + LOG_INFO("query_timezone_info_version", KR(ret), K(timezone_info_version), K(record), "query_svr", query.get_server()); + + return ret; +} + +int64_t ObLogSysTableHelper::thread_index_() +{ + static __thread int64_t index = -1; + return index < 0 ? (index = ATOMIC_FAA(&thread_counter_, 1)) : index; +} + +int ObLogSysTableHelper::do_query_(MySQLQueryBase &query) +{ + int ret = OB_SUCCESS; + int64_t tid = thread_index_(); + static __thread int64_t last_change_server_tstamp = 0; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(mysql_conns_) || OB_ISNULL(next_svr_idx_array_)) { + LOG_ERROR("invalid mysql_conns_ or next_svr_idx_array_", K(mysql_conns_), + K(next_svr_idx_array_)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_ISNULL(svr_provider_)) { + LOG_ERROR("invalid svr provider or config", K(svr_provider_)); + ret = OB_ERR_UNEXPECTED; + } + // Check if the maximum number of threads to access the systable helper is exceeded + // FIXME: we cache a mysql connector for each access thread. + // If the number of access threads exceeds the maximum number of threads prepared, an error should be reported. + else if (OB_UNLIKELY(tid >= max_thread_num_)) { + LOG_ERROR("thread index is larger than systable helper's max thread number", K(tid), + K(max_thread_num_)); + ret = OB_ERR_UNEXPECTED; + } else { + bool done = false; + ObLogMySQLConnector &conn = mysql_conns_[tid]; + // Get the next server index of the corresponding Connector connection + int64_t &next_svr_idx = next_svr_idx_array_[tid]; + + // Check if should force a server switch before querying + // There is currently a periodic connection reset mechanism to avoid exceptions on one server, e.g. if the query result is wrong + if (need_change_server_(last_change_server_tstamp, conn, tid, next_svr_idx)) { + if (conn.is_inited()) { + conn.destroy(); + } + } + + if (svr_provider_->get_server_count() <= 0) { + LOG_WARN("no server available to query", K(svr_provider_->get_server_count())); + ret = OB_NEED_RETRY; + } else { + for (int64_t retry_svr_cnt = 0; + OB_SUCCESS == ret && ! done && retry_svr_cnt <= svr_provider_->get_server_count(); + ++retry_svr_cnt) { + // init connection + if (! conn.is_inited()) { + // get index of next server + int64_t svr_idx = get_next_svr_idx_(next_svr_idx, svr_provider_->get_server_count()); + // switch to next server + if (OB_FAIL(change_to_next_server_(svr_idx, conn))) { + if (OB_ENTRY_NOT_EXIST == ret) { + // The server list has changed, the required server does not exist, and the current query exits. + LOG_WARN("server list changed, next_svr_idx does not exist. quit current query", + K(svr_idx), K(next_svr_idx), K(retry_svr_cnt), + "server_count", svr_provider_->get_server_count()); + } else if (OB_NEED_RETRY == ret) { + LOG_ERROR("connect to server fail, need switch server", KR(ret), K(svr_idx), + K(next_svr_idx), K(retry_svr_cnt), + "server_count", svr_provider_->get_server_count()); + } else { + LOG_ERROR("connect to server fail", KR(ret), K(svr_idx), K(next_svr_idx), + K(retry_svr_cnt), "server_count", svr_provider_->get_server_count()); + } + } else { + last_change_server_tstamp = ObTimeUtility::current_time(); + } + } + + // execute query + if (OB_SUCCESS == ret) { + if (OB_FAIL(conn.query(query))) { + if (OB_NEED_RETRY == ret) { + LOG_WARN("query fail on current server, need retry to change server", KR(ret), + "cur_server", query.get_server(), K(next_svr_idx), K(retry_svr_cnt), + "server_count", svr_provider_->get_server_count(), + "mysql_error_code", query.get_mysql_err_code(), + "mysql_error_msg", query.get_mysql_err_msg()); + } else { + LOG_ERROR("query fail", KR(ret), K(retry_svr_cnt), K(next_svr_idx)); + } + } else { + done = true; + } + } + + // In case of query failure, reset the connection and retry to the next server next time + if (OB_SUCCESS != ret) { + if (conn.is_inited()) { + conn.destroy(); + } + } + + // Retry next round + if (OB_NEED_RETRY == ret) { + ret = OB_SUCCESS; + } + } + } + + if (OB_ENTRY_NOT_EXIST == ret) { + // The server list has changed + ret = OB_SUCCESS; + } + + if ((OB_SUCC(ret)) && !done) { + ret = OB_NEED_RETRY; + } + } + + return ret; +} + +bool ObLogSysTableHelper::need_change_server_( + const int64_t last_change_server_tstamp, + const ObLogMySQLConnector &conn, + const int64_t tid, + const int64_t next_svr_idx) +{ + bool bool_ret = false; + static const int64_t PRINT_CONN_SERVER_INTERVAL = 10 * _SEC_; + const int64_t sql_server_change_interval = TCONF.sql_server_change_interval_sec * _SEC_; + int64_t cur_time = ObTimeUtility::current_time(); + + if (REACH_TIME_INTERVAL(PRINT_CONN_SERVER_INTERVAL)) { + LOG_INFO("[STAT] [SYSTABLE_HELPER] [QUERY_SQL_SERVER]", K(tid), "svr", conn.get_server(), + K(next_svr_idx), "last_change_server_tstamp", TS_TO_STR(last_change_server_tstamp)); + } + + if (cur_time - last_change_server_tstamp >= sql_server_change_interval) { + bool_ret = true; + LOG_INFO("[STAT] [SYSTABLE_HELPER] [NEED_CHANGE_SQL_SERVER]", K(tid), "svr", conn.get_server(), + K(next_svr_idx), "last_change_server_tstamp", TS_TO_STR(last_change_server_tstamp)); + } + + return bool_ret; +} + +int64_t ObLogSysTableHelper::get_next_svr_idx_(int64_t &next_svr_idx, const int64_t total_svr_count) +{ + // Get index while advancing index value + int64_t svr_idx = next_svr_idx++; + if (svr_idx >= total_svr_count) { + // Corrected values to avoid overflow + svr_idx = 0; + next_svr_idx = 1; + } + return svr_idx; +} + +int ObLogSysTableHelper::change_to_next_server_(const int64_t svr_idx, ObLogMySQLConnector &conn) +{ + int ret = OB_SUCCESS; + // update connection + ObAddr svr; + MySQLConnConfig conn_config; + int mysql_connect_timeout_sec = TCONF.mysql_connect_timeout_sec; + int mysql_query_timeout_sec = TCONF.mysql_query_timeout_sec; + int64_t cluster_id = OB_INVALID_ID; + const bool enable_ssl_client_authentication = (1 == TCONF.ssl_client_authentication); + + if (OB_ISNULL(svr_provider_)) { + LOG_ERROR("invalid svr provider", K(svr_provider_)); + ret = OB_NOT_INIT; + } else if (OB_FAIL(svr_provider_->get_server(cluster_id, svr_idx, svr))) { + if (OB_ENTRY_NOT_EXIST == ret) { + // exit on server exhaustion + } else { + LOG_ERROR("get server from svr provider fail", KR(ret), K(svr_idx), + K(svr_provider_->get_server_count())); + } + } else if (OB_FAIL(conn_config.reset(svr, mysql_user_, mysql_password_, mysql_db_, + mysql_connect_timeout_sec, mysql_query_timeout_sec))) { + LOG_ERROR("reset mysql config fail", KR(ret), K(svr), K(mysql_user_), K(mysql_password_), + K(mysql_db_), K(mysql_connect_timeout_sec), K(mysql_query_timeout_sec)); + } else { + LOG_INFO("connect to next mysql server", "cur_server", conn.get_server(), + "next_server", svr, "next_svr_idx", svr_idx, + "server_count", svr_provider_->get_server_count()); + + // destroy connection + if (conn.is_inited()) { + conn.destroy(); + } + + if (OB_FAIL(conn.init(conn_config, enable_ssl_client_authentication))) { + if (OB_NEED_RETRY == ret) { + LOG_WARN("init mysql connector fail, need retry", KR(ret), K(svr), K(svr_idx), + K(conn_config), K(enable_ssl_client_authentication)); + } else { + LOG_ERROR("init mysql connector fail", KR(ret), K(svr), K(svr_idx), K(conn_config)); + } + } else { + // connection init success + } + } + return ret; +} + +int ObLogSysTableHelper::reset_connection() +{ + int ret = OB_SUCCESS; + int64_t tid = thread_index_(); + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(mysql_conns_) || OB_ISNULL(next_svr_idx_array_)) { + LOG_ERROR("invalid mysql_conns_ or next_svr_idx_array_", K(mysql_conns_), + K(next_svr_idx_array_)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_UNLIKELY(tid >= max_thread_num_)) { + LOG_ERROR("thread index is larger than systable helper's max thread number", K(tid), + K(max_thread_num_)); + ret = OB_ERR_UNEXPECTED; + } else { + ObLogMySQLConnector &conn = mysql_conns_[tid]; + const int64_t next_svr_idx = next_svr_idx_array_[tid]; + + if (conn.is_inited()) { + LOG_INFO("reset connection", K(tid), "current_server", conn.get_server(), K(next_svr_idx)); + conn.destroy(); + } + } + + return ret; +} + +} +} diff --git a/src/liboblog/src/ob_log_systable_helper.h b/src/liboblog/src/ob_log_systable_helper.h new file mode 100644 index 0000000000000000000000000000000000000000..a00a91b45c19dfbdf9cb78cfb9baaa165a4785bb --- /dev/null +++ b/src/liboblog/src/ob_log_systable_helper.h @@ -0,0 +1,638 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_SYSTABLE_HELPER_H_ +#define OCEANBASE_LIBOBLOG_OB_LOG_SYSTABLE_HELPER_H_ + +#include "lib/container/ob_se_array.h" // ObSEArray +#include "share/ob_define.h" // MAX_IP_ADDR_LENGTH, OB_INVALID_ID, ObReplicaType +#include "lib/mysqlclient/ob_mysql_server_provider.h" // ObMySQLServerProvider +#include "common/ob_partition_key.h" // ObPartitionKey +#include "common/ob_region.h" // ObRegin +#include "common/ob_zone.h" // ObZone +#include "common/ob_zone_type.h" // ObZoneType +#include "share/ob_server_status.h" // ObServerStatus + + +#include "ob_log_mysql_connector.h" // MySQLQueryBase + +namespace oceanbase +{ +namespace liboblog +{ +class ISQLStrategy +{ + // Class global variables +public: + // indicates whether the replica_type information is valid, default is true, i.e. replica information is used + // When sql fails and ret=OB_ERR_COLUMN_NOT_FOUND, the atom is set to false, won’t use replica_type information + static bool g_is_replica_type_info_valid; + +public: + /// SQL strategy + /// + /// @param [in] sql_buf aggregate sql buffer + /// @param [in] mul_statement_buf_len length of aggregate sql buffer + /// @param [out] pos Returns the length of a single sql + /// + /// @retval OB_SUCCESS Success + /// @retval Other return values Failure + virtual int build_sql_statement(char *sql_buf, const int64_t mul_statement_buf_len, int64_t &pos) = 0; +}; + +bool is_cluster_version_be_equal_or_greater_than_200_(); +bool is_cluster_version_be_equal_or_greater_than_220_(); + +///////////////////////// QueryClogHistorySQLStrategy ///////////////////////// +/// Query __all_clog_history_info_v2 policy +/// Query __all_clog_history_info_v2 based on log_id to get all servers with service log IDs greater than or equal to log_id logs +/// Query __all_clog_history_info_v2 based on timestamp to get all servers with service timestamp greater than or equal to timestamp log +class QueryClogHistorySQLStrategy : public ISQLStrategy +{ +public: + QueryClogHistorySQLStrategy(); + virtual ~QueryClogHistorySQLStrategy(); + +public: + int build_sql_statement(char *sql_buf, const int64_t mul_statement_buf_len, int64_t &pos); + + int init_by_log_id_query(const common::ObPartitionKey &pkey, const uint64_t log_id); + int init_by_tstamp_query(const common::ObPartitionKey &pkey, const int64_t tstamp); + void destroy(); + +public: + TO_STRING_KV(K_(pkey), + K_(log_id), + K_(tstamp), + K_(query_by_log_id)); + +private: + bool inited_; + common::ObPartitionKey pkey_; + uint64_t log_id_; + int64_t tstamp_; + bool query_by_log_id_; + +private: + DISALLOW_COPY_AND_ASSIGN(QueryClogHistorySQLStrategy); +}; + +///////////////////////// QueryMetaInfoSQLStrategy ///////////////////////// +/// Query meta info policy +//// Query __all_meta_table / __all_root_table for information on the server that is serving the partition +/// Query __all_meta_table / __all_root_table for leader information +class QueryMetaInfoSQLStrategy : public ISQLStrategy +{ +public: + QueryMetaInfoSQLStrategy(); + virtual ~QueryMetaInfoSQLStrategy(); + +public: + int build_sql_statement(char *sql_buf, const int64_t mul_statement_buf_len, int64_t &pos); + + int init(const common::ObPartitionKey &pkey, bool only_query_leader); + void destroy(); + +public: + TO_STRING_KV(K_(pkey), + K_(only_query_leader)); + +private: + bool inited_; + common::ObPartitionKey pkey_; + bool only_query_leader_; + +private: + DISALLOW_COPY_AND_ASSIGN(QueryMetaInfoSQLStrategy); +}; + +///////////////////////// QueryAllServerInfo ///////////////////////// +// Query __all_server table +class QueryAllServerInfoStrategy: public ISQLStrategy +{ +public: + QueryAllServerInfoStrategy() {} + virtual ~QueryAllServerInfoStrategy() {} + +public: + int build_sql_statement(char *sql_buf, const int64_t mul_statement_buf_len, int64_t &pos); + +private: + DISALLOW_COPY_AND_ASSIGN(QueryAllServerInfoStrategy); +}; + + +///////////////////////// QueryAllZoneInfo ///////////////////////// +// Query the __all_zone table +class QueryAllZoneInfoStrategy: public ISQLStrategy +{ +public: + QueryAllZoneInfoStrategy() {} + virtual ~QueryAllZoneInfoStrategy() {} + +public: + int build_sql_statement(char *sql_buf, const int64_t mul_statement_buf_len, int64_t &pos); + +private: + DISALLOW_COPY_AND_ASSIGN(QueryAllZoneInfoStrategy); +}; + +///////////////////////// QueryAllZoneType ///////////////////////// +// Query __all_zone table +class QueryAllZoneTypeStrategy: public ISQLStrategy +{ +public: + QueryAllZoneTypeStrategy() {} + virtual ~QueryAllZoneTypeStrategy() {} + +public: + int build_sql_statement(char *sql_buf, const int64_t mul_statement_buf_len, int64_t &pos); + +private: + DISALLOW_COPY_AND_ASSIGN(QueryAllZoneTypeStrategy); +}; + +///////////////////////// QueryClusterIdStrategy ///////////////////////// +// 查询cluster id +class QueryClusterIdStrategy: public ISQLStrategy +{ +public: + QueryClusterIdStrategy() {} + ~QueryClusterIdStrategy() {} + +public: + int build_sql_statement(char *sql_buf, const int64_t mul_statement_buf_len, int64_t &pos); + +private: + DISALLOW_COPY_AND_ASSIGN(QueryClusterIdStrategy); +}; + +///////////////////////// QueryObserverVersionStrategy ///////////////////////// +// query cluster version +class QueryObserverVersionStrategy: public ISQLStrategy +{ +public: + QueryObserverVersionStrategy() {} + ~QueryObserverVersionStrategy() {} + +public: + int build_sql_statement(char *sql_buf, const int64_t mul_statement_buf_len, int64_t &pos); + +private: + DISALLOW_COPY_AND_ASSIGN(QueryObserverVersionStrategy); +}; + +///////////////////////// QueryTimeZoneInfoVersionStrategy ///////////////////////// +// query time_zone_info_version +class QueryTimeZoneInfoVersionStrategy: public ISQLStrategy +{ +public: + QueryTimeZoneInfoVersionStrategy(const uint64_t tenant_id) : tenant_id_(tenant_id) {} + ~QueryTimeZoneInfoVersionStrategy() { tenant_id_ = common::OB_INVALID_TENANT_ID; } + +public: + int build_sql_statement(char *sql_buf, const int64_t mul_statement_buf_len, int64_t &pos); + +private: + uint64_t tenant_id_; + +private: + DISALLOW_COPY_AND_ASSIGN(QueryTimeZoneInfoVersionStrategy); +}; + +/////////////////////////////////// IObLogSysTableHelper /////////////////////////////////////// +// query system table +class IObLogSysTableHelper +{ +public: + static const int64_t DEFAULT_RECORDS_NUM = 16; + static const int64_t ALL_SERVER_DEFAULT_RECORDS_NUM = 32; + + struct ClogHistoryRecord; + typedef common::ObSEArray ClogHistoryRecordArray; + + struct MetaRecord; + typedef common::ObSEArray MetaRecordArray; + + struct AllServerRecord; + typedef common::ObSEArray AllServerRecordArray; + + struct AllZoneRecord; + typedef common::ObSEArray AllZoneRecordArray; + + struct AllZoneTypeRecord; + typedef common::ObSEArray AllZoneTypeRecordArray; + + struct ClusterInfo; + + struct ObServerVersionInfo; + typedef common::ObSEArray ObServerVersionInfoArray; + + struct ObServerTZInfoVersionInfo; + +public: + virtual ~IObLogSysTableHelper() { } + +public: + class BatchSQLQuery; + /// Using multi-statement to aggregate SQL queries + //// + /// Currently serving SvrFinder for querying Meta Table and Clog history to determine server list and leader information + virtual int query_with_multiple_statement(BatchSQLQuery &batch_query) = 0; + + /// Query __all_server table for all active server information + virtual int query_all_server_info(AllServerRecordArray &records) = 0; + + /// Query __all_zone table for all zone-region information + virtual int query_all_zone_info(AllZoneRecordArray &records) = 0; + + /// Query __all_zone table for all zone-type information + virtual int query_all_zone_type(AllZoneTypeRecordArray &records) = 0; + + /// Query cluster-related information + virtual int query_cluster_info(ClusterInfo &cluster_info) = 0; + + /// query min version of obsever in cluster + virtual int query_cluster_min_observer_version(uint64_t &min_observer_version) = 0; + + /// query timezone info version + virtual int query_timezone_info_version(const uint64_t tenant_id, + int64_t &timezone_info_version) = 0; + + /// Reset the current thread connection to allow the next query to use a different Server + virtual int reset_connection() = 0; + +public: + // Support batch query + // 1. single-statement query + // 2. multiple-statement query + // + // Usage. + // 1. single-statement query + // (1) Implementing SQL query policies(ISQLStrategy), and get_records() + // (2) BatchSQLQuery::init(ISQLStrategy *), for initialization + // (3) query + // + // 2. multiple-statement query + // (1) Implementing a multi-statement SQL query strategy, and the corresponding get_records() + // (2) BatchSQLQuery::init(const int64_t), complete with initialization-aggregate buffer declaration + // (3) do_sql_aggregate() to aggregate request + // (4) when aggregation is complete, query_with_multiple_statement(BatchSQLQuery &) + // (5) Process all query results in turn. When processing is complete, the query is launched again and reset() is called + class BatchSQLQuery : public MySQLQueryBase + { + public: + BatchSQLQuery(); + virtual ~BatchSQLQuery(); + + public: + // single-statement query init + int init(ISQLStrategy *strategy); + // multiple-statement query init + int init(const int64_t mul_statement_buf_len); + void destroy(); + // multiple-statement reset + void reset(); + // Doing aggregation based on SQL strategy + int do_sql_aggregate(ISQLStrategy *strategy); + // init sql + int init_sql(); + + public: + /* + * Error codes + * - OB_NEED_RETRY: Connection error encountered + */ + int get_records(ClogHistoryRecordArray& records); + + /* + * Error codes + * - OB_NEED_RETRY: Connection error encountered + */ + int get_records(MetaRecordArray& records); + + /* + * Error codes + * - OB_NEED_RETRY: Connection error encountered + */ + int get_records(bool &has_leader, common::ObAddr &leader); + + /* + * Error codes + * - OB_NEED_RETRY: Connection error encountered + */ + int get_records(AllServerRecordArray& records); + + /* + * Error codes + * - OB_NEED_RETRY: Connection error encountered + */ + int get_records(AllZoneRecordArray& records); + + /* + * Error codes + * - OB_NEED_RETRY: Connection error encountered + */ + int get_records(AllZoneTypeRecordArray& records); + /* + * Error codes + * - OB_NEED_RETRY: Connection error encountered + */ + int get_records(ClusterInfo &record); + + /* + * Error codes + * - OB_NEED_RETRY: Connection error encountered + */ + int get_records(ObServerVersionInfoArray& records); + + /* + * Error codes + * - OB_NEED_RETRY: Connection error encountered + */ + int get_records(ObServerTZInfoVersionInfo & record); + + int64_t get_batch_sql_count() const { return batch_sql_count_; } + + private: + template + int get_records_tpl_(RecordsType &records, const char *event, int64_t &record_count); + int parse_record_from_row_(ClogHistoryRecordArray &records); + int parse_record_from_row_(MetaRecordArray& records); + int parse_record_from_row_(AllServerRecordArray& records); + int parse_record_from_row_(AllZoneRecordArray& records); + int parse_record_from_row_(AllZoneTypeRecordArray& records); + int parse_record_from_row_(ClusterInfo &record); + int parse_record_from_row_(ObServerVersionInfoArray &records); + int parse_record_from_row_(ObServerTZInfoVersionInfo &record); + + private: + bool inited_; + + // multiple-statement SQL + bool enable_multiple_statement_; // Identifies whether the multiple-statement is in effect + char *mul_statement_buf_; // multiple-statement buffer, store aggregated SQL + int64_t mul_statement_buf_len_; // multiple-statement buffer length + + // single SQL + char single_statement_buf_[DEFAULT_SQL_LENGTH]; + + int64_t pos_; // Record current fill position + int64_t batch_sql_count_; // Number of records aggregated SQL + + private: + DISALLOW_COPY_AND_ASSIGN(BatchSQLQuery); + }; + +public: + // Cluster related information + struct ClusterInfo + { + ClusterInfo() { reset(); } + + // clusterID + int64_t cluster_id_; + + void reset() + { + cluster_id_ = common::OB_INVALID_CLUSTER_ID; + } + + TO_STRING_KV(K_(cluster_id)); + }; + + struct ObServerVersionInfo + { + ObServerVersionInfo() { reset(); } + + void reset() + { + server_version_ = common::OB_INVALID_ID; + } + + uint64_t server_version_; + + TO_STRING_KV(K_(server_version)); + }; + + struct ObServerTZInfoVersionInfo + { + ObServerTZInfoVersionInfo() { reset(); } + + void reset() + { + timezone_info_version_ = common::OB_INVALID_TIMESTAMP; + is_timezone_info_version_exist_ = true; + } + + int64_t timezone_info_version_; + bool is_timezone_info_version_exist_; + + TO_STRING_KV(K_(timezone_info_version), K_(is_timezone_info_version_exist)); + }; + + // records in table __all_clog_history_info_v2 + struct ClogHistoryRecord + { + ClogHistoryRecord() { reset(); } + + // Range. + uint64_t start_log_id_; + uint64_t end_log_id_; + + // Addr. + char svr_ip_[common::MAX_IP_ADDR_LENGTH + 1]; + int32_t svr_port_; + + void reset() + { + start_log_id_ = common::OB_INVALID_ID; + end_log_id_ = common::OB_INVALID_ID; + svr_ip_[0] = '\0'; + svr_port_ = 0; + } + + TO_STRING_KV( + K_(start_log_id), + K_(end_log_id), + K_(svr_ip), + K_(svr_port)); + }; + + // records in table __all_meta_table/__all_root_table + struct MetaRecord + { + char svr_ip_[common::MAX_IP_ADDR_LENGTH + 1]; + int32_t svr_port_; + int64_t role_; + // compatibility: connecting to a lower version of the observer, which does not have a replica_type field. + // replica_type defaults to a REPLICA_TYPE_FULL + common::ObReplicaType replica_type_; + + MetaRecord() { reset(); } + + void reset() + { + svr_ip_[0] = '\0'; + svr_port_ = 0; + role_ = 0; + replica_type_ = common::REPLICA_TYPE_MAX; + } + + TO_STRING_KV(K_(svr_ip), K_(svr_port), K_(role), K_(replica_type)); + }; + + // records in table __all_server + struct AllServerRecord + { + typedef share::ObServerStatus::DisplayStatus StatusType; + + char svr_ip_[common::MAX_IP_ADDR_LENGTH + 1]; + int32_t svr_port_; + StatusType status_; + common::ObZone zone_; + + AllServerRecord() { reset(); } + + void reset() + { + svr_ip_[0] = '\0'; + svr_port_ = 0; + status_ = share::ObServerStatus::OB_SERVER_ACTIVE; + zone_.reset(); + } + + TO_STRING_KV(K_(svr_ip), K_(svr_port), K_(status), K_(zone)); + }; + + struct AllZoneRecord + { + common::ObZone zone_; + common::ObRegion region_; + + AllZoneRecord() { reset(); } + + void reset() + { + zone_.reset(); + region_.reset(); + } + + TO_STRING_KV(K_(zone), K_(region)); + }; + + struct AllZoneTypeRecord + { + common::ObZone zone_; + common::ObZoneType zone_type_; + + AllZoneTypeRecord() { reset(); } + + void reset() + { + zone_.reset(); + zone_type_ = common::ZONE_TYPE_INVALID; + } + + TO_STRING_KV(K_(zone), K_(zone_type)); + }; +}; + +/////////////////////////////////// ObLogSysTableHelper /////////////////////////////////////// + +class ObLogConfig; +class ObLogMySQLConnector; +class ObLogSysTableHelper : public IObLogSysTableHelper +{ + typedef common::sqlclient::ObMySQLServerProvider SvrProvider; + +public: + ObLogSysTableHelper(); + virtual ~ObLogSysTableHelper(); + +public: + /// FIXME: Note that there is an upper limit to the total number of threads that can be used by external modules: + // access_systable_helper_thread_num, beyond which the module query interface will fail. + int init(SvrProvider &svr_provider, + const int64_t access_systable_helper_thread_num, + const char *mysql_user, + const char *mysql_password, + const char *mysql_db); + void destroy(); + + /// The SvrFinder thread pool uses this interface + virtual int query_with_multiple_statement(BatchSQLQuery &batch_query); + + /// The ObLogAllSvrCache thread pool uses this interface + virtual int query_all_server_info(AllServerRecordArray &records); + + /// THe ObLogAllSvrCache thread pool uses this interface + virtual int query_all_zone_info(AllZoneRecordArray &records); + + /// The ObLogAllSvrCache thread pool uses this interface + virtual int query_all_zone_type(AllZoneTypeRecordArray &records); + + /// Start the main thread ObLogInstance using this interface + virtual int query_cluster_info(ClusterInfo &cluster_info); + + /// 1. Start the main thread ObLogInstance to use this interface to query and initialize the ObClusterVersion singleton + /// 2. ObLogAllSvrCache thread uses this interface + /// Query the cluster min observer version + virtual int query_cluster_min_observer_version(uint64_t &min_observer_version); + + /// Query timezone info version, for oracle new timezone type synchronization + /// ObLogTimeZoneInfoGetter + virtual int query_timezone_info_version(const uint64_t tenant_id, + int64_t &timezone_info_version); + + /// Restart the connection used by the current thread + virtual int reset_connection(); + +private: + int do_query_(MySQLQueryBase &query); + int do_query_and_handle_when_query_error_occurred_(BatchSQLQuery &query); + void handle_column_not_found_when_query_meta_info_(); + int64_t thread_index_(); + int change_to_next_server_(const int64_t svr_idx, ObLogMySQLConnector &conn); + int64_t get_next_svr_idx_(int64_t &next_svr_idx, const int64_t total_svr_count); + bool need_change_server_( + const int64_t last_change_server_tstamp, + const ObLogMySQLConnector &conn, + const int64_t tid, + const int64_t next_svr_idx); + +// Internal member variables +private: + bool inited_; + SvrProvider *svr_provider_; + int64_t max_thread_num_; + char mysql_user_[common::OB_MAX_USER_NAME_BUF_LENGTH]; + char mysql_password_[common::OB_MAX_PASSWORD_LENGTH + 1]; + char mysql_db_[common::OB_MAX_DATABASE_NAME_BUF_LENGTH]; + + // MySQL connector array + // One connector object for each thread for efficiency reasons + ObLogMySQLConnector *mysql_conns_; + + // The index of the server to which each ObLogMySQLConnector corresponds, corresponding to the server index in SvrProvider + // Indicates the next server to be connected to + int64_t *next_svr_idx_array_; + + int64_t thread_counter_ CACHE_ALIGNED; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogSysTableHelper); +}; + +} +} +#endif diff --git a/src/liboblog/src/ob_log_table_id_cache.cpp b/src/liboblog/src/ob_log_table_id_cache.cpp new file mode 100644 index 0000000000000000000000000000000000000000..564c7bde10b094fe6afa62b8d76ed37aeb4f932b --- /dev/null +++ b/src/liboblog/src/ob_log_table_id_cache.cpp @@ -0,0 +1,66 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG + +#include "ob_log_table_id_cache.h" + +namespace oceanbase +{ +using namespace common; + +namespace liboblog +{ +///////////////////////////////////////////////////////////////////////////// +void TableInfo::reset() +{ + table_id_ = OB_INVALID_ID; +} + +int TableInfo::init(const uint64_t table_id) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(OB_INVALID_ID == table_id)) { + LOG_ERROR("invalid argument", K(table_id)); + ret = OB_INVALID_ARGUMENT; + } else { + table_id_ = table_id; + } + + return ret; +} + +bool TableInfoEraserByTenant::operator()( + const TableID &table_id_key, + TableInfo &tb_info) +{ + uint64_t target_tenant_id = extract_tenant_id(table_id_key.table_id_); + const char *cache_type = NULL; + if (is_global_normal_index_) { + cache_type = "GLOBAL_NORMAL_INDEX_TBALE"; + } else { + cache_type = "SERVED_TABLE_ID_CACHE"; + } + + if (tenant_id_ == target_tenant_id) { + _LOG_INFO("[DDL] [%s] [REMOVE_BY_TENANT] TENANT_ID=%lu TABLE_ID_INFO=(%lu.%lu)", + cache_type, tenant_id_, table_id_key.table_id_, tb_info.table_id_); + // reset value + tb_info.reset(); + } + + return (tenant_id_ == target_tenant_id); +} + +} +} diff --git a/src/liboblog/src/ob_log_table_id_cache.h b/src/liboblog/src/ob_log_table_id_cache.h new file mode 100644 index 0000000000000000000000000000000000000000..7f0ef53a7c23a878cafb70ce4d74f8a620121523 --- /dev/null +++ b/src/liboblog/src/ob_log_table_id_cache.h @@ -0,0 +1,75 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_TABLE_ID_CACHE_H_ +#define OCEANBASE_LIBOBLOG_OB_LOG_TABLE_ID_CACHE_H_ + +#include "common/ob_partition_key.h" // ObPartitionKey +#include "lib/hash/ob_linear_hash_map.h" // ObLinearHashMap + +namespace oceanbase +{ +namespace liboblog +{ +// Cache of Global General Index +struct TableID +{ + uint64_t table_id_; + + TableID(const uint64_t table_id) : table_id_(table_id) {} + + int64_t hash() const + { + return static_cast(table_id_); + } + + bool operator== (const TableID &other) const + { + return table_id_ == other.table_id_; + } + + TO_STRING_KV(K_(table_id)); +}; + +// Record table_id +// 1. for the primary table, record itself +// 2. For an index table, record table_id of its primary table +struct TableInfo +{ + uint64_t table_id_; + + TableInfo() { reset(); } + ~TableInfo() { reset(); } + + void reset(); + int init(const uint64_t table_id); + + TO_STRING_KV(K_(table_id)); +}; +struct TableInfoEraserByTenant +{ + uint64_t tenant_id_; + bool is_global_normal_index_; + + explicit TableInfoEraserByTenant(const uint64_t id, const bool is_global_normal_index) + : tenant_id_(id), is_global_normal_index_(is_global_normal_index) {} + bool operator()(const TableID &table_id_key, TableInfo &tb_info); +}; + +// Global General Index Cache +typedef common::ObLinearHashMap GIndexCache; +// TableIDCache, records master table, unique index table, global unique index table_id, used to filter tables within a partition group +typedef common::ObLinearHashMap TableIDCache; +} +} + +#endif diff --git a/src/liboblog/src/ob_log_table_matcher.cpp b/src/liboblog/src/ob_log_table_matcher.cpp new file mode 100644 index 0000000000000000000000000000000000000000..736c7bb798a4a3bff500cd7e945a7fbf554f35b0 --- /dev/null +++ b/src/liboblog/src/ob_log_table_matcher.cpp @@ -0,0 +1,805 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include "ob_log_table_matcher.h" + +#include "share/ob_define.h" + +#include "ob_log_utils.h" // ob_log_malloc + +#define _STAT(level, fmt, args...) _OBLOG_LOG(level, "[STAT] [TABLE_MATCHER] " fmt, ##args) +#define _ISTAT(fmt, args...) _STAT(INFO, fmt, ##args) +#define _DSTAT(fmt, args...) _STAT(DEBUG, fmt, ##args) + +namespace oceanbase +{ +namespace liboblog +{ +using namespace common; + +ObLogTableMatcher::ObLogTableMatcher() : + patterns_(), + buf_(NULL), + buf_size_(0), + black_patterns_(), + black_buf_(NULL), + black_buf_size_(0), + pg_patterns_(), + pg_buf_(NULL), + pg_buf_size_(0), + black_pg_patterns_(), + black_pg_buf_(NULL), + black_pg_buf_size_(0) +{ } + +ObLogTableMatcher::~ObLogTableMatcher() +{ + (void)destroy(); +} + +int ObLogTableMatcher::table_match_pattern_(const bool is_black, + const char* tenant_name, + const char* db_name, + const char* tb_name, + bool& matched, + const int fnmatch_flags) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(tenant_name) || OB_ISNULL(db_name) || OB_ISNULL(tb_name)) { + OBLOG_LOG(ERROR, "invalid arguments", K(tenant_name), K(db_name), K(tb_name)); + ret = OB_INVALID_ARGUMENT; + } else { + PatternArray *ptns = is_black ? &black_patterns_ : &patterns_; + + matched = false; + + for (int64_t idx = 0, cnt = ptns->count(); OB_SUCCESS == ret && !matched && idx < cnt; ++idx) { + const Pattern &pattern = ptns->at(idx); + int err = 0; + const char *not_match_part = "UNKNOW"; + + // Tenant name. + if (0 != (err = fnmatch(pattern.tenant_pattern_.ptr(), + tenant_name, + fnmatch_flags))) { + // Not matched. + not_match_part = "TENANT_PATTERN"; + } + // Database name. + else if (0 != (err = fnmatch(pattern.database_pattern_.ptr(), + db_name, + fnmatch_flags))) { + // Not matched. + not_match_part = "DATABASE_PATTERN"; + } + // Table name. + else if (0 != (err = fnmatch(pattern.table_pattern_.ptr(), + tb_name, + fnmatch_flags))) { + // Not matched. + not_match_part = "TABLE_PATTERN"; + } + else { + // Matched. + matched = true; + } + + if (matched) { + _ISTAT("[%s_PATTERN_MATCHED] PATTERN='%s.%s.%s' TABLE='%s.%s.%s'", + is_black ? "BLACK" : "WHITE", + pattern.tenant_pattern_.ptr(), pattern.database_pattern_.ptr(), + pattern.table_pattern_.ptr(), + tenant_name, db_name, tb_name); + } else { + _ISTAT("[%s_PATTERN_NOT_MATCH] PATTERN='%s.%s.%s' TABLE='%s.%s.%s' NOT_MATCH_PATTERN=%s", + is_black ? "BLACK" : "WHITE", + pattern.tenant_pattern_.ptr(), pattern.database_pattern_.ptr(), + pattern.table_pattern_.ptr(), + tenant_name, db_name, tb_name, + not_match_part); + } + + // fnmatch() err. + // OB_SUCCESS == 0. + if (OB_SUCCESS != err && FNM_NOMATCH != err) { + ret = OB_ERR_UNEXPECTED; + OBLOG_LOG(ERROR, "err exec fnmatch", KR(ret), K(err)); + } + } + } + + return ret; +} + +int ObLogTableMatcher::tenant_match_pattern_(const bool is_black, + const char* tenant_name, + bool& matched, + const int fnmatch_flags) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(tenant_name)) { + OBLOG_LOG(ERROR, "invalid arguments", K(tenant_name)); + ret = OB_INVALID_ARGUMENT; + } else { + PatternArray *ptns = is_black ? &black_patterns_ : &patterns_; + + matched = false; + + for (int64_t idx = 0, cnt = ptns->count(); OB_SUCCESS == ret && !matched && idx < cnt; ++idx) { + const Pattern &pattern = ptns->at(idx); + int err = 0; + + // Tenant name. + if (0 != (err = fnmatch(pattern.tenant_pattern_.ptr(), + tenant_name, + fnmatch_flags))) { + // Not matched. + } + else { + // Matched. + matched = true; + } + + if (matched) { + _ISTAT("[%s_PATTERN_MATCHED] PATTERN='%s' TENANT='%s'", + is_black ? "BLACK" : "WHITE", + pattern.tenant_pattern_.ptr(), tenant_name); + } else { + _ISTAT("[%s_PATTERN_NOT_MATCH] PATTERN='%s' TENANT='%s'", + is_black ? "BLACK" : "WHITE", + pattern.tenant_pattern_.ptr(), tenant_name); + } + + // fnmatch() err. + // OB_SUCCESS == 0. + if (OB_SUCCESS != err && FNM_NOMATCH != err) { + ret = OB_ERR_UNEXPECTED; + OBLOG_LOG(ERROR, "err exec fnmatch", KR(ret), K(err)); + } + } + } + + return ret; +} + +int ObLogTableMatcher::table_match(const char* tenant_name, + const char* db_name, + const char* tb_name, + bool& matched, + const int fnmatch_flags) +{ + int ret = OB_SUCCESS; + bool white_matched = false; + bool black_matched = false; + + matched = false; + + // First filter by whitelist, if whitelist matches, match blacklist + if (OB_FAIL(table_match_pattern_(false, tenant_name, db_name, tb_name, white_matched, fnmatch_flags))) { + OBLOG_LOG(ERROR, "match white pattern fail", KR(ret), K(tenant_name), K(db_name), K(tb_name), + K(white_matched), K(fnmatch_flags)); + } else if (white_matched && OB_FAIL(table_match_pattern_(true, tenant_name, db_name, tb_name, + black_matched, fnmatch_flags))) { + OBLOG_LOG(ERROR, "match black pattern fail", KR(ret), K(tenant_name), K(db_name), K(tb_name), + K(white_matched), K(fnmatch_flags)); + } else { + matched = (white_matched && ! black_matched); + + _ISTAT("[%sTABLE_PATTERNS_MATCHED] TABLE='%s.%s.%s' WHITE_PATTERN_COUNT=%ld " + "BLACK_PATTERN_COUNT=%ld WHITE_MATCHED=%d BLACK_MATCHED=%d", + matched ? "" : "NO_", + tenant_name, db_name, tb_name, patterns_.count(), black_patterns_.count(), + white_matched, black_matched); + } + + return ret; +} + +int ObLogTableMatcher::tenant_match(const char* tenant_name, + bool& matched, + const int fnmatch_flags) +{ + int ret = OB_SUCCESS; + bool white_matched = false; + bool black_matched = false; + + matched = false; + + // Tenant matching is only considered for whitelisting, as tenants may be duplicated + if (OB_FAIL(tenant_match_pattern_(false, tenant_name, white_matched, fnmatch_flags))) { + OBLOG_LOG(ERROR, "match white pattern fail", KR(ret), K(tenant_name), K(white_matched), + K(fnmatch_flags)); + } else { + //make blacklists always mismatch + matched = (white_matched && ! black_matched); + + _ISTAT("[%sTENANT_PATTERNS_MATCHED] TENANT='%s' WHITE_PATTERN_COUNT=%ld " + "BLACK_PATTERN_COUNT=%ld WHITE_MATCHED=%d BLACK_MATCHED=%d", + matched ? "" : "NO_", + tenant_name, patterns_.count(), black_patterns_.count(), + white_matched, black_matched); + } + + return ret; +} + +int ObLogTableMatcher::match(const char* pattern1, + const ObIArray& pattern2, + bool& matched, + const int fnmatch_flags) +{ + int ret = OB_SUCCESS; + matched = false; + + // Param check. + if (NULL == pattern1) { + ret = OB_INVALID_ARGUMENT; + OBLOG_LOG(ERROR, "invalid args", KR(ret), K(pattern1)); + } else if (pattern2.count() <= 0) { + matched = false; + } else { + // Copy. + char *pattern_buf = NULL; + int64_t pattern_buf_size = 0; + if (OB_SUCC(ret)) { + int tmp_ret = 0; + pattern_buf_size = 1 + static_cast(strlen(pattern1)); + if (NULL == (pattern_buf = + reinterpret_cast(ob_log_malloc(pattern_buf_size)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + OBLOG_LOG(ERROR, "err alloc pattern buf", KR(ret), K(pattern_buf_size)); + } + else if (pattern_buf_size <= (tmp_ret = + snprintf(pattern_buf, pattern_buf_size, "%s", pattern1)) || (tmp_ret < 0)) { + ret = OB_ERR_UNEXPECTED; + OBLOG_LOG(ERROR, "err copy pattern", KR(ret), K(tmp_ret)); + } + else { + _OBLOG_LOG(INFO, "[STAT] [PATTERN_MATCH] PATTERN=%s", pattern_buf); + } + } + + // Cut pattern1. + const char delimiter = '|'; + if (OB_SUCC(ret)) { + for (int64_t idx = 0, cnt = pattern_buf_size; idx < cnt; ++idx) { + char &cur = pattern_buf[idx]; + if (delimiter == cur) { + cur = '\0'; + } + } + } + + // Match. + int64_t iter = 0; + while (OB_SUCCESS == ret && iter < pattern_buf_size && !matched) { + const char *p1 = pattern_buf + iter; + const char *p2 = NULL; + for (int64_t idx = 0, cnt = pattern2.count(); + OB_SUCCESS == ret && idx < cnt && !matched; + ++idx) { + const ObString &pattern2_str = pattern2.at(idx); + p2 = pattern2_str.ptr(); + // fnmatch. + int tmp_ret = 0; + if (0 == (tmp_ret = fnmatch(p1, p2, fnmatch_flags))) { + matched = true; + } + else if (FNM_NOMATCH == tmp_ret) { + // Not matched. + } + else { + ret = OB_ERR_UNEXPECTED; + OBLOG_LOG(ERROR, "err exec fnmatch", KR(ret), K(tmp_ret), K(p1), K(p2)); + } + + _OBLOG_LOG(INFO, "[STAT] [PATTERN_MATCH] MATCH('%s', '%s') => %s", + p1, p2, matched ? "true" : "false"); + } + // Move to next pattern. + if (OB_SUCCESS == ret && !matched) { + iter += (1 + static_cast(strlen(p1))); + } + } + + // Release mem. + if (NULL != pattern_buf) { + ob_log_free(pattern_buf); + pattern_buf = NULL; + } + } + + return ret; +} + +int ObLogTableMatcher::tablegroup_match(const char *tenant_name, + const char *tablegroup_name, + bool &matched, + const int fnmatch_flags) +{ + int ret = OB_SUCCESS; + bool white_matched = false; + bool black_matched = false; + + matched = false; + + // First filter by whitelist, if whitelist matches, match blacklist + if (OB_FAIL(tablegroup_match_pattern_(false, tenant_name, tablegroup_name, white_matched, fnmatch_flags))) { + OBLOG_LOG(ERROR, "match white pattern fail", KR(ret), K(tenant_name), K(tablegroup_name), + K(white_matched), K(fnmatch_flags)); + } else if (white_matched && OB_FAIL(tablegroup_match_pattern_(true, tenant_name, tablegroup_name, + black_matched, fnmatch_flags))) { + OBLOG_LOG(ERROR, "match black pattern fail", KR(ret), K(tenant_name), K(tablegroup_name), + K(white_matched), K(fnmatch_flags)); + } else { + matched = (white_matched && ! black_matched); + + _ISTAT("[%sPG_PATTERNS_MATCHED] TABLEGROUP='%s.%s' WHITE_PATTERN_COUNT=%ld " + "BLACK_PATTERN_COUNT=%ld WHITE_MATCHED=%d BLACK_MATCHED=%d", + matched ? "" : "NO_", + tenant_name, tablegroup_name, pg_patterns_.count(), black_pg_patterns_.count(), + white_matched, black_matched); + } + + return ret; +} + +int ObLogTableMatcher::tablegroup_match_pattern_(const bool is_black, + const char* tenant_name, + const char* tablegroup_name, + bool& matched, + const int fnmatch_flags) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(tenant_name) || OB_ISNULL(tablegroup_name)) { + OBLOG_LOG(ERROR, "invalid arguments", K(tenant_name), K(tablegroup_name)); + ret = OB_INVALID_ARGUMENT; + } else { + PgPatternArray *ptns = is_black ? &black_pg_patterns_ : &pg_patterns_; + + matched = false; + + for (int64_t idx = 0, cnt = ptns->count(); OB_SUCC(ret) && !matched && idx < cnt; ++idx) { + const PgPattern &pattern = ptns->at(idx); + int err = 0; + const char *not_match_part = "UNKNOW"; + + // Tenant name. + if (0 != (err = fnmatch(pattern.tenant_pattern_.ptr(), + tenant_name, + fnmatch_flags))) { + // Not matched. + not_match_part = "TENANT_PATTERN"; + } + // TableGroup name. + else if (0 != (err = fnmatch(pattern.tablegroup_pattern_.ptr(), + tablegroup_name, + fnmatch_flags))) { + // Not matched. + not_match_part = "TABLEGROUP_PATTERN"; + } + else { + // Matched. + matched = true; + } + + if (matched) { + _ISTAT("[%s_PG_PATTERN_MATCHED] PATTERN='%s.%s' TABLEGROUP='%s.%s'", + is_black ? "BLACK" : "WHITE", + pattern.tenant_pattern_.ptr(), pattern.tablegroup_pattern_.ptr(), + tenant_name, tablegroup_name); + } else { + _ISTAT("[%s_PG_PATTERN_NOT_MATCH] PATTERN='%s.%s' TABLEGROUP='%s.%s' NOT_MATCH_PATTERN=%s", + is_black ? "BLACK" : "WHITE", + pattern.tenant_pattern_.ptr(), pattern.tablegroup_pattern_.ptr(), + tenant_name, tablegroup_name, + not_match_part); + } + + // fnmatch() err. + // OB_SUCCESS == 0. + if (OB_SUCCESS != err && FNM_NOMATCH != err) { + ret = OB_ERR_UNEXPECTED; + OBLOG_LOG(ERROR, "err exec fnmatch", KR(ret), K(err)); + } + } // for + } + + return ret; +} + +int ObLogTableMatcher::cluster_match(bool &matched) +{ + int ret = OB_SUCCESS; + matched = false; + const int64_t idx = 0; + + if (1 != patterns_.count()) { + matched = false; + } else { + const Pattern &pattern = patterns_.at(idx); + // always ensure that patterns are strings ending in \0, see build_patterns_() for implementation + // so build the same format, ObString::case_compare will compare the lengths + char tmp_str[2]; + tmp_str[0] = '*'; + tmp_str[1] = '\0'; + ObString match_all_str(0, 2, tmp_str); + + if ((0 == pattern.tenant_pattern_.case_compare(match_all_str)) + && (0 == pattern.database_pattern_.case_compare(match_all_str)) + && (0 == pattern.table_pattern_.case_compare(match_all_str)) + && (0 == strcmp(black_buf_, "|"))) { + OBLOG_LOG(INFO, "[TABLE_MATCHER] cluster_match succ", K(pattern), K(black_buf_)); + matched = true; + } else { + OBLOG_LOG(INFO, "[TABLE_MATCHER] cluster_match false", K(pattern), K(black_buf_)); + matched = false; + } + } + + return ret; +} + +int ObLogTableMatcher::init(const char *tb_white_list, + const char *tb_black_list, + const char *tg_white_list, + const char *tg_black_list) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(tb_white_list) || OB_ISNULL(tb_black_list) + || OB_ISNULL(tg_white_list) || OB_ISNULL(tg_black_list)) { + OBLOG_LOG(ERROR, "invalid_argument", K(tb_white_list), K(tb_black_list), + K(tg_white_list), K(tg_black_list)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(set_pattern_(tb_white_list))) { + OBLOG_LOG(ERROR, "set table white list pattern fail", KR(ret), K(tb_white_list)); + } else if (OB_FAIL(set_black_pattern_(tb_black_list))) { + OBLOG_LOG(ERROR, "set table black list pattern fail", KR(ret), K(tb_black_list)); + } else if (OB_FAIL(set_pg_pattern_(tg_white_list))) { + OBLOG_LOG(ERROR, "set tablegroup white list pattern fail", KR(ret), K(tg_white_list)); + } else if (OB_FAIL(set_black_pg_pattern_(tg_black_list))) { + OBLOG_LOG(ERROR, "set tablegroup black list pattern fail", KR(ret), K(tg_black_list)); + } else { + // succ + } + + return ret; +} + +int ObLogTableMatcher::destroy() +{ + int ret = OB_SUCCESS; + + // Free pattern buffer. + if (NULL != buf_) { + ob_log_free(buf_); + buf_ = NULL; + } + + buf_size_ = 0; + + // Free pattern array. + patterns_.reset(); + + // Free pattern buffer. + if (NULL != black_buf_) { + ob_log_free(black_buf_); + black_buf_ = NULL; + } + + black_buf_size_ = 0; + + black_patterns_.reset(); + + return ret; +} + +int ObLogTableMatcher::set_pattern_(const char* pattern_str) +{ + bool is_black = false; + bool is_pg = false; + + return set_pattern_internal_(pattern_str, is_pg, is_black); +} + +int ObLogTableMatcher::set_black_pattern_(const char* black_pattern_str) +{ + bool is_black = true; + bool is_pg = false; + + return set_pattern_internal_(black_pattern_str, is_pg, is_black); +} + +int ObLogTableMatcher::set_pg_pattern_(const char* pattern_str) +{ + bool is_black = false; + bool is_pg = true; + + return set_pattern_internal_(pattern_str, is_pg, is_black); +} + +int ObLogTableMatcher::set_black_pg_pattern_(const char* black_pattern_str) +{ + bool is_black = true; + bool is_pg = true; + + return set_pattern_internal_(black_pattern_str, is_pg, is_black); +} + +int ObLogTableMatcher::set_pattern_internal_(const char* pattern_str, + const bool is_pg, + const bool is_black) +{ + int ret = OB_SUCCESS; + char **buffer = NULL; + int64_t *buffer_size = NULL; + + if (! is_pg) { + buffer = is_black ? &black_buf_ : &buf_; + buffer_size = is_black ? &black_buf_size_ : &buf_size_; + } else { + buffer = is_black ? &black_pg_buf_ : &pg_buf_; + buffer_size = is_black ? &black_pg_buf_size_ : &pg_buf_size_; + } + + if (OB_ISNULL(pattern_str)) { + ret = OB_INVALID_ARGUMENT; + OBLOG_LOG(ERROR, "NULL pattern", KR(ret), K(pattern_str)); + } + // Copy pattern string. + else { + int tmp_ret = 0; + *buffer_size = strlen(pattern_str) + 1; + // Alloc buffer. + if (OB_ISNULL(*buffer = reinterpret_cast(ob_log_malloc(*buffer_size)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + OBLOG_LOG(ERROR, "err alloc pattern string buffer", KR(ret), K(buffer_size)); + } + // Copy. + else if (*buffer_size <= (tmp_ret = snprintf(*buffer, *buffer_size, "%s", pattern_str)) + || (tmp_ret < 0)) { + ret = OB_ERR_UNEXPECTED; + OBLOG_LOG(ERROR, "err snprintf", KR(ret), K(tmp_ret), K(buffer_size), K(pattern_str)); + } + else { + OBLOG_LOG(DEBUG, "pattern string", K(pattern_str), K(is_black)); + } + } + + if (OB_SUCC(ret)) { + // Split string. + if (! is_pg) { + if (OB_FAIL(build_patterns_(is_black))) { + OBLOG_LOG(ERROR, "err build patterns", KR(ret), K(is_pg), K(is_black)); + } + } else { + if (OB_FAIL(build_pg_patterns_(is_black))) { + OBLOG_LOG(ERROR, "err build patterns", KR(ret), K(is_pg), K(is_black)); + } + } + } + + // Free buf on error. + if (OB_SUCCESS != ret && NULL != *buffer) { + ob_log_free(*buffer); + *buffer = NULL; + *buffer_size = 0; + } + + return ret; +} + +int ObLogTableMatcher::build_patterns_(const bool is_black) +{ + int ret = OB_SUCCESS; + const char pattern_delimiter = '|'; + const char name_delimiter = '.'; + bool done = false; + + PatternArray *ptrn_array = is_black ? &black_patterns_ : &patterns_; + + if ((is_black && OB_ISNULL(black_buf_)) || (! is_black && OB_ISNULL(buf_))) { + OBLOG_LOG(ERROR, "invalid buffer", K(is_black), K(black_buf_), K(buf_)); + ret = OB_ERR_UNEXPECTED; + } else { + char **buffer = is_black ? &black_buf_ : &buf_; + + ObString remain(strlen(*buffer), *buffer); + Pattern pattern; + ObString cur_pattern; + + // Skip empty blacklist + if (0 == strcmp(*buffer, "|")) { + done = true; + } + + while (OB_SUCCESS == ret && !done) { + // Split Pattern & get current pattern. + cur_pattern = remain.split_on(pattern_delimiter); + if (cur_pattern.empty()) { + cur_pattern = remain; + done = true; + } + if (OB_SUCC(ret)) { + ObString &str = cur_pattern; + *(str.ptr() + str.length()) = '\0'; + str.set_length(1 + str.length()); + } + + // Split names. + pattern.reset(); + // Tenant name. + if (OB_SUCC(ret)) { + pattern.tenant_pattern_ = cur_pattern.split_on(name_delimiter); + if (pattern.tenant_pattern_.empty()) { + ret = OB_INVALID_ARGUMENT; + OBLOG_LOG(ERROR, "invalid argment", KR(ret), K(cur_pattern)); + } + else { + ObString &str = pattern.tenant_pattern_; + *(str.ptr() + str.length()) = '\0'; + // Here set_length does not change the length, because the split_on implementation ensures that the buffer_size and length are the same + // set_length will check the buffer_size. redirect + str.assign_ptr(str.ptr(), 1 + str.length()); + } + } + // Database name. + if (OB_SUCC(ret)) { + pattern.database_pattern_ = cur_pattern.split_on(name_delimiter); + if (pattern.database_pattern_.empty()) { + ret = OB_INVALID_ARGUMENT; + OBLOG_LOG(ERROR, "invalid argment", KR(ret), K(cur_pattern)); + } + else { + ObString &str = pattern.database_pattern_; + *(str.ptr() + str.length()) = '\0'; + // Here set_length does not change the length, because the split_on implementation ensures that the buffer_size and length are the same + // set_length will check the buffer_size. redirect + str.assign_ptr(str.ptr(), 1 + str.length()); + } + } + // Table name. + if (OB_SUCC(ret)) { + pattern.table_pattern_= cur_pattern; + if (pattern.table_pattern_.empty()) { + ret = OB_INVALID_ARGUMENT; + OBLOG_LOG(ERROR, "invalid argment", KR(ret), K(cur_pattern)); + } + else { + ObString &str = pattern.table_pattern_; + *(str.ptr() + str.length()) = '\0'; + str.assign_ptr(str.ptr(), 1 + str.length()); + } + } + + if (OB_SUCC(ret)) { + // Save pattern. + if (OB_SUCCESS != (ret = ptrn_array->push_back(pattern))) { + OBLOG_LOG(ERROR, "err push back pattern", KR(ret)); + } + else { + _ISTAT("[ADD_PATTERN] IS_BLACK=%d TENANT='%s' DATABASE='%s' TABLE='%s'", + is_black, + pattern.tenant_pattern_.ptr(), + pattern.database_pattern_.ptr(), + pattern.table_pattern_.ptr()); + } + } + } // while + } + + return ret; +} + +int ObLogTableMatcher::build_pg_patterns_(const bool is_black) +{ + int ret = OB_SUCCESS; + const char pattern_delimiter = '|'; + const char name_delimiter = '.'; + bool done = false; + + PgPatternArray *ptrn_array = is_black ? &black_pg_patterns_ : &pg_patterns_; + + if ((is_black && OB_ISNULL(black_pg_buf_)) || (! is_black && OB_ISNULL(pg_buf_))) { + OBLOG_LOG(ERROR, "invalid buffer", K(is_black), K(black_pg_buf_), K(pg_buf_)); + ret = OB_ERR_UNEXPECTED; + } else { + char **buffer = is_black ? &black_pg_buf_ : &pg_buf_; + + ObString remain(strlen(*buffer), *buffer); + PgPattern pattern; + ObString cur_pattern; + + // skip empty blacklist + if (0 == strcmp(*buffer, "|")) { + done = true; + } + + while (OB_SUCCESS == ret && !done) { + // Split Pattern & get current pattern. + cur_pattern = remain.split_on(pattern_delimiter); + if (cur_pattern.empty()) { + cur_pattern = remain; + done = true; + } + if (OB_SUCC(ret)) { + ObString &str = cur_pattern; + *(str.ptr() + str.length()) = '\0'; + str.set_length(1 + str.length()); + } + + // Split names. + pattern.reset(); + // Tenant name. + if (OB_SUCC(ret)) { + pattern.tenant_pattern_ = cur_pattern.split_on(name_delimiter); + if (pattern.tenant_pattern_.empty()) { + ret = OB_INVALID_ARGUMENT; + OBLOG_LOG(ERROR, "invalid argment", KR(ret), K(cur_pattern)); + } + else { + ObString &str = pattern.tenant_pattern_; + *(str.ptr() + str.length()) = '\0'; + str.set_length(1 + str.length()); + } + } + // Tablegroup name. + if (OB_SUCC(ret)) { + pattern.tablegroup_pattern_= cur_pattern; + if (pattern.tablegroup_pattern_.empty()) { + ret = OB_INVALID_ARGUMENT; + OBLOG_LOG(ERROR, "invalid argment", KR(ret), K(cur_pattern)); + } + else { + ObString &str = pattern.tablegroup_pattern_; + *(str.ptr() + str.length()) = '\0'; + str.set_length(1 + str.length()); + } + } + + if (OB_SUCC(ret)) { + // Save pattern. + if (OB_FAIL(ptrn_array->push_back(pattern))) { + OBLOG_LOG(ERROR, "err push back pattern", KR(ret)); + } + else { + _ISTAT("[ADD_PG_PATTERN] IS_BLACK=%d TENANT='%s' TABLEGROUP='%s'", + is_black, + pattern.tenant_pattern_.ptr(), + pattern.tablegroup_pattern_.ptr()); + } + } + } // while + } + + return ret; +} + +void ObLogTableMatcher::Pattern::reset() +{ + tenant_pattern_.reset(); + database_pattern_.reset(); + table_pattern_.reset(); +} + +void ObLogTableMatcher::PgPattern::reset() +{ + tenant_pattern_.reset(); + tablegroup_pattern_.reset(); +} + +} // namespace liboblog +} // namespace oceanbase diff --git a/src/liboblog/src/ob_log_table_matcher.h b/src/liboblog/src/ob_log_table_matcher.h new file mode 100644 index 0000000000000000000000000000000000000000..73318cdd03da5ac9e9cfe757419d433cb69357d7 --- /dev/null +++ b/src/liboblog/src/ob_log_table_matcher.h @@ -0,0 +1,230 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_TABLE_MATCHER_H__ +#define OCEANBASE_LIBOBLOG_TABLE_MATCHER_H__ + +#include // fnmatch, FNM_CASEFOLD +#include "share/ob_define.h" +#include "lib/string/ob_string.h" +#include "lib/utility/ob_print_utils.h" +#include "lib/container/ob_array.h" + +namespace oceanbase +{ +namespace liboblog +{ +class IObLogTableMatcher +{ +public: + virtual ~IObLogTableMatcher() {} + +public: + /// Matching functions + /// Because the oracle schema introduces a case-sensitive mechanism, database/table needs to be case-sensitive + //// The fnmatch() function can no longer use FNM_NOESCAPE + /// + /// @param [in] tenant_name Tenant name + /// @param [in] db_name database name + /// @param [in] tb_name table name + /// @param [out] matched return value, indicating whether or not it matches + /// @param [in] fnmatch_flags fnmatch flags + /// + /// @retval OB_SUCCESS Success + /// @retval Other error codes Fail + virtual int table_match(const char *tenant_name, + const char *db_name, + const char *tb_name, + bool &matched, + const int fntatch_flags) = 0; + + /// check tenant match + /// + /// @param [in] tenant_name tenant name + /// @param [out] matched is matched or not + /// @param [in] fnmatch_flags fnmatch flags + /// + /// @retval OB_SUCCESS Success + /// @retval Other error codes Fail + virtual int tenant_match(const char *tenant_name, + bool &matched, + const int fnmatch_flags = FNM_CASEFOLD) = 0; + + /// PG match function: matches based on tenatn_name and tablegroup_name + /// + /// + /// @param [in] tenant_name tenant name + /// @param [in] tablegroup_name tablegroup name + /// @param [out] matched is match or not + /// @param [in] fnmatch_flags fnmatch flags + /// + /// @retval OB_SUCCESS Success + /// @retval Other error codes Fail + virtual int tablegroup_match(const char *tenant_name, + const char *tablegroup_name, + bool &matched, + const int fntatch_flags) = 0; + + /// is serving the cluster + /// + /// @param [out] matched is matched or not + /// + /// @retval OB_SUCCESS Success + /// @retval Other error codes Fail + virtual int cluster_match(bool &matched) = 0; +}; + +/* + * Impl. + * + */ +class ObLogTableMatcher : public IObLogTableMatcher +{ +public: + ObLogTableMatcher(); + virtual ~ObLogTableMatcher(); + +public: + int table_match(const char *tenant_name, + const char *db_name, + const char *tb_name, + bool &matched, + const int fnmatch_flags); + + int tenant_match(const char *tenant_name, + bool &matched, + const int fnmatch_flags = FNM_CASEFOLD); + + int tablegroup_match(const char *tenant_name, + const char *tablegroup_name, + bool &matched, + const int fntatch_flags); + + int cluster_match(bool &matched); + + static int match(const char *pattern1, + const common::ObIArray &pattern2, + bool &matched, + const int fnmatch_flags = FNM_CASEFOLD); + /* + * Init table matcher. + */ + int init(const char *tb_white_list, + const char *tb_black_list, + const char *tg_white_list, + const char *tg_black_list); + + /* + * Destroy. + */ + int destroy(); + +private: + int table_match_pattern_(const bool is_black, + const char* tenant_name, + const char* db_name, + const char* tb_name, + bool& matched, + const int fnmatch_flags); + + int tenant_match_pattern_(const bool is_black, + const char* tenant_name, + bool& matched, + const int fnmatch_flags); + + int tablegroup_match_pattern_(const bool is_black, + const char* tenant_name, + const char* tablegroup_name, + bool& matched, + const int fnmatch_flags); + + int set_pattern_internal_(const char* pattern_str, + const bool is_pg, + const bool is_black); + + // Set table whitelist + int set_pattern_(const char *pattern_str); + // Set table blacklist + int set_black_pattern_(const char *black_pattern_str); + // Build table pattern array. + int build_patterns_(const bool is_black); + + // Set tablegroup whitelist + int set_pg_pattern_(const char *pattern_str); + // Set tablegroup blacklist + int set_black_pg_pattern_(const char *black_pattern_str); + // Build pg pattern array. + int build_pg_patterns_(const bool is_black); + +private: + struct Pattern + { + // Tenant. + common::ObString tenant_pattern_; + // Database. + common::ObString database_pattern_; + // Table. + common::ObString table_pattern_; + + /* + * Reset. Set patterns to empty string. + */ + void reset(); + TO_STRING_KV(K_(tenant_pattern), K_(database_pattern), K_(table_pattern)); + }; + typedef common::ObArray PatternArray; + + struct PgPattern + { + // Tenant. + common::ObString tenant_pattern_; + // Tablegroup. + common::ObString tablegroup_pattern_; + + /* + * Reset. Set patterns to empty string. + */ + void reset(); + TO_STRING_KV(K_(tenant_pattern), K_(tablegroup_pattern)); + }; + typedef common::ObArray PgPatternArray; +private: + // Pattern Array. + PatternArray patterns_; + // Buffer. + char *buf_; + int64_t buf_size_; + + // balcklist array + PatternArray black_patterns_; + // Buffer. + char *black_buf_; + int64_t black_buf_size_; + + // PG Whitelist + // PgPattern Array. + PgPatternArray pg_patterns_; + char *pg_buf_; + int64_t pg_buf_size_; + + // PG Blacklist + // PgPattern Array. + PgPatternArray black_pg_patterns_; + char *black_pg_buf_; + int64_t black_pg_buf_size_; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogTableMatcher); +}; +} // namespace liboblog +} // namespace oceanbase +#endif /* OCEANBASE_LIBOBLOG_TABLE_MATCHER_H__ */ diff --git a/src/liboblog/src/ob_log_task_pool.h b/src/liboblog/src/ob_log_task_pool.h new file mode 100644 index 0000000000000000000000000000000000000000..e50195707b654befcd73f6d77091a53c470db4eb --- /dev/null +++ b/src/liboblog/src/ob_log_task_pool.h @@ -0,0 +1,463 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_TASK_POOL_H_ +#define OCEANBASE_LIBOBLOG_OB_LOG_TASK_POOL_H_ + +#include "share/ob_define.h" +#include "lib/allocator/ob_malloc.h" +#include "lib/allocator/ob_allocator.h" +#include "lib/allocator/ob_concurrent_fifo_allocator.h" // ObConcurrentFIFOAllocator +#include "lib/queue/ob_link_queue.h" // ObLinkQueue +#include "lib/queue/ob_fixed_queue.h" // ObFixedQueue +#include "common/ob_partition_key.h" // ObPartitionKey + +namespace oceanbase +{ +namespace liboblog +{ + +template class ObLogTransTaskPool; + +// TransTaskBase. +// Base class for trans task. +// Call get() on task pool to get a task. +// Call revert() on task to return it to pool. +template +class TransTaskBase : public common::QLink +{ + typedef TransTaskBase MyType; + typedef ObLogTransTaskPool PoolType; +public: + TransTaskBase() : pool_(NULL) {} + virtual ~TransTaskBase() { } +public: + void set_pool(PoolType* pool) { pool_ = pool; } + void revert() + { + if (NULL != pool_) { + pool_->revert(this); + } + } + PoolType *get_pool() { return pool_; } +private: + PoolType *pool_; +}; + +// TransTaskPool. +// Pool of trans task of type T. +// You get one task by calling get(), and return it using revert() +// on task base. Calling revert() on the pool is also ok. +// Use pool_size to set the pre-alloc task number. +template +class ObLogTransTaskPool +{ + typedef T TaskType; + typedef TransTaskBase BaseTaskType; + typedef common::QLink PoolElemType; + typedef common::ObFixedQueue PagePool; + + static const int64_t LARGE_ALLOCATOR_PAGE_SIZE = (1LL << 22); // 4M + static const int64_t LARGE_ALLOCATOR_TOTAL_LIMIT = (1LL << 37); // 127G + static const int64_t LARGE_ALLOCATOR_HOLD_LIMIT = (1LL << 26); // 64M + +public: + ObLogTransTaskPool() : + inited_(false), + prealloc_task_cnt_(0), + task_page_size_(0), + allow_dynamic_alloc_(false), + prealloc_task_pool_(), + prealloc_pool_tasks_(NULL), + alloc_(NULL), + prealloc_page_cnt_(0), + prealloc_pages_(NULL), + prealloc_page_pool_(), + task_large_allocator_(), + total_cnt_(0), + dynamic_alloc_task_cnt_(0), + used_prealloc_task_cnt_(0) + { } + virtual ~ObLogTransTaskPool() { } + + int64_t get_alloc_count() const { return dynamic_alloc_task_cnt_ + used_prealloc_task_cnt_; } + int64_t get_total_count() const { return total_cnt_; } + +public: + // Init pool. + // Should provide allocator, and the size of prealloc task number. + int init(common::ObIAllocator *task_alloc, + const int64_t prealloc_pool_size, + const int64_t trans_task_page_size, + const bool allow_dynamic_alloc, + const int64_t prealloc_page_count) + { + int ret = common::OB_SUCCESS; + if (OB_UNLIKELY(inited_)) { + ret = common::OB_INIT_TWICE; + OBLOG_LOG(WARN, "already init", KR(ret)); + } else if (OB_ISNULL(alloc_ = task_alloc) + || OB_UNLIKELY((prealloc_task_cnt_ = prealloc_pool_size) < 1) + || OB_UNLIKELY((task_page_size_ = trans_task_page_size) <= 0) + || OB_UNLIKELY((prealloc_page_cnt_ = prealloc_page_count) <= 0)) { + ret = common::OB_INVALID_ARGUMENT; + OBLOG_LOG(WARN, "invalid argument", KR(ret), K(task_alloc), K(prealloc_pool_size), + K(trans_task_page_size), K(prealloc_page_count)); + } else if (OB_FAIL(task_large_allocator_.init(LARGE_ALLOCATOR_TOTAL_LIMIT, + LARGE_ALLOCATOR_HOLD_LIMIT, + LARGE_ALLOCATOR_PAGE_SIZE))) { + OBLOG_LOG(ERROR, "init large allocator fail", KR(ret)); + } else if (OB_FAIL(prealloc_page_pool_.init(prealloc_page_count))) { + OBLOG_LOG(ERROR, "init prealloc page pool fail", KR(ret), K(prealloc_page_count)); + } else if (OB_FAIL(prepare_prealloc_tasks_(prealloc_pool_size, trans_task_page_size))) { + OBLOG_LOG(ERROR, "err prepare prealloc tasks", KR(ret), K(prealloc_pool_size), + K(trans_task_page_size)); + } else if (OB_FAIL(prepare_prealloc_pages_(prealloc_page_count, trans_task_page_size))) { + OBLOG_LOG(ERROR, "prepare prealloc pages fail", KR(ret), K(prealloc_page_count), + K(trans_task_page_size)); + } else { + task_large_allocator_.set_label(common::ObModIds::OB_LOG_PART_TRANS_TASK_LARGE); + allow_dynamic_alloc_ = allow_dynamic_alloc; + inited_ = true; + } + return ret; + } + + void destroy() + { + int ret = common::OB_SUCCESS; + if (OB_UNLIKELY(!inited_)) { + ret = common::OB_NOT_INIT; + } else if (OB_FAIL(clean_prealloc_tasks_())) { + CLOG_LOG(ERROR, "err clean prealloc tasks", KR(ret)); + } else { + if (0 < total_cnt_) { + OBLOG_LOG(WARN, "user didn't return all tasks", KR(ret), K(total_cnt_), + K(dynamic_alloc_task_cnt_), K(used_prealloc_task_cnt_), K(prealloc_task_cnt_)); + } + + task_large_allocator_.destroy(); + + // Clear pre-allocated pages + clean_prealloc_pages_(); + + inited_ = false; + prealloc_task_cnt_ = 0; + task_page_size_ = 0; + allow_dynamic_alloc_ = false; + prealloc_pool_tasks_ = NULL; + alloc_ = NULL; + prealloc_page_cnt_ = 0; + prealloc_pages_ = NULL; + total_cnt_ = 0; + dynamic_alloc_task_cnt_ = 0; + used_prealloc_task_cnt_ = 0; + + prealloc_page_pool_.destroy(); + } + } + +public: + // Get a task. + // Return NULL when it runs out of memory. + TaskType* get(const char *info, const common::ObPartitionKey &partition) + { + int ret = common::OB_SUCCESS; + TaskType *ret_task = NULL; + + if (OB_UNLIKELY(! inited_)) { + OBLOG_LOG(WARN, "task pool not init", K(inited_)); + ret = common::OB_NOT_INIT; + } else { + PoolElemType *elem = NULL; + while (common::OB_SUCCESS == ret && NULL == elem) { + if (common::OB_SUCCESS == (ret = prealloc_task_pool_.pop(elem))) { + ret_task = static_cast(elem); + (void)ATOMIC_AAF(&used_prealloc_task_cnt_, 1); + } else if (common::OB_EAGAIN == ret) { + ret = common::OB_SUCCESS; + if (allow_dynamic_alloc_) { + if (OB_ISNULL(ret_task = new_task_())) { + ret = common::OB_ALLOCATE_MEMORY_FAILED; + OBLOG_LOG(ERROR, "alloc task fail", KR(ret)); + } else { + elem = ret_task; + (void)ATOMIC_AAF(&dynamic_alloc_task_cnt_, 1); + } + } else { + ret = common::OB_SUCCESS; + elem = NULL; + OBLOG_LOG(WARN, "no trans task avaliable, wait and retry", K(allow_dynamic_alloc_), + K(total_cnt_), K(prealloc_task_cnt_), K(used_prealloc_task_cnt_), + K(dynamic_alloc_task_cnt_)); + usleep(500 * 1000); + } + } else { + OBLOG_LOG(ERROR, "fail to pop task", KR(ret)); + } + } + } + + if (common::OB_SUCCESS == ret && OB_NOT_NULL(ret_task)) { + ret_task->set_prealloc_page(get_prealloc_page_()); + ret_task->set_pkey_info(partition, info); + } + return ret_task; + } + + // Return a task. + void revert(BaseTaskType *obj) + { + TaskType *task = static_cast(obj); + + if (OB_ISNULL(task)) { + // pass + } else { + // Recycling of pre-allocated pages + void *page = NULL; + task->revert_prealloc_page(page); + revert_prealloc_page_(page); + page = NULL; + + // Reset memory on recycle + task->reset(); + + if (prealloc_task_cnt_ > 0 + && NULL != prealloc_pool_tasks_ + && prealloc_pool_tasks_ <= task && task < (prealloc_pool_tasks_ + prealloc_task_cnt_)) { + (void)prealloc_task_pool_.push(task); + (void)ATOMIC_AAF(&used_prealloc_task_cnt_, -1); + } else { + delete_task_(task); + (void)ATOMIC_AAF(&dynamic_alloc_task_cnt_, -1); + } + + task = NULL; + } + } + + void print_stat_info() + { + int64_t dynamic_alloc_cnt = ATOMIC_LOAD(&dynamic_alloc_task_cnt_); + int64_t used_prealloc_cnt = ATOMIC_LOAD(&used_prealloc_task_cnt_); + int64_t prealloc_cnt = ATOMIC_LOAD(&prealloc_task_cnt_); + int64_t total_cnt = ATOMIC_LOAD(&total_cnt_); + + int64_t out = dynamic_alloc_cnt + used_prealloc_cnt; + _OBLOG_LOG(INFO, "[STAT] [TRANS_TASK_POOL] OUT=%ld/%ld FIXED=%ld/%ld DYNAMIC=%ld " + "PAGE_POOL=%ld/%ld", + out, total_cnt, used_prealloc_cnt, prealloc_cnt, dynamic_alloc_cnt, + prealloc_page_pool_.get_total(), prealloc_page_cnt_); + } +private: + TaskType* new_task_() + { + TaskType *ret_task = NULL; + int64_t alloc_size = static_cast(sizeof(TaskType)); + if (OB_ISNULL(ret_task = static_cast(alloc_->alloc(alloc_size)))) { + OBLOG_LOG(WARN, "failed to alloc task", K(ret_task), K(alloc_size)); + } else { + new(ret_task)TaskType(); + ret_task->set_pool(this); + ret_task->set_allocator(task_page_size_, task_large_allocator_); + (void)ATOMIC_AAF(&total_cnt_, 1); + } + return ret_task; + } + void delete_task_(TaskType *task) + { + if (NULL != task && NULL != alloc_) { + task->~TaskType(); + alloc_->free(task); + (void)ATOMIC_AAF(&total_cnt_, -1); + task = NULL; + } + } + int prepare_prealloc_tasks_(const int64_t cnt, const int64_t page_size) + { + int ret = common::OB_SUCCESS; + if (OB_UNLIKELY(cnt <= 0) || OB_UNLIKELY(page_size <= 0)) { + OBLOG_LOG(WARN, "invalid argument", K(cnt), K(page_size)); + ret = common::OB_INVALID_ARGUMENT; + } else if (OB_UNLIKELY(NULL != prealloc_pool_tasks_)) { + OBLOG_LOG(WARN, "prealloc task has been allocated", KP(prealloc_pool_tasks_)); + ret = common::OB_INIT_TWICE; + } else { + int64_t size = static_cast(sizeof(TaskType) * cnt); + void *buf = common::ob_malloc(size, common::ObModIds::OB_LOG_PART_TRANS_TASK_POOL); + if (OB_ISNULL(prealloc_pool_tasks_ = static_cast(buf))) { + ret = common::OB_ALLOCATE_MEMORY_FAILED; + OBLOG_LOG(ERROR, "err alloc task pool", KR(ret), K(size)); + } else { + for (int64_t idx = 0; common::OB_SUCCESS == ret && idx < cnt; ++idx) { + TaskType *task = prealloc_pool_tasks_ + idx; + new(task)TaskType(); + task->set_pool(this); + task->set_allocator(page_size, task_large_allocator_); + total_cnt_ += 1; + if (OB_FAIL(prealloc_task_pool_.push(task))) { + OBLOG_LOG(ERROR, "err push prealloc task pool", KR(ret), KP(task)); + } + } + } + } + return ret; + } + int clean_prealloc_tasks_() + { + int ret = common::OB_SUCCESS; + + if (prealloc_task_cnt_ > 0 && NULL != prealloc_pool_tasks_) { + int64_t cnt = 0; + for (int64_t idx = 0; + common::OB_SUCCESS == ret && idx < prealloc_task_cnt_; + ++idx) { + TaskType *task = NULL; + PoolElemType *elem = NULL; + if (OB_FAIL(prealloc_task_pool_.pop(elem))) { + if (common::OB_EAGAIN != ret) { + OBLOG_LOG(WARN, "err pop prealloc task pool", KR(ret), K(idx)); + } + } else if (OB_ISNULL(elem)) { + OBLOG_LOG(WARN, "pop prealloc task fail", KR(ret), K(elem)); + ret = common::OB_ERR_UNEXPECTED; + } else { + cnt += 1; + task = static_cast(elem); + task->~TaskType(); + total_cnt_ -= 1; + } + } + // Allow some tasks not returned. + if (common::OB_EAGAIN == ret) { + ret = common::OB_SUCCESS; + } + if (prealloc_task_cnt_ == cnt) { + common::ob_free(prealloc_pool_tasks_); + prealloc_pool_tasks_ = NULL; + prealloc_task_cnt_ = 0; + } + } + + return ret; + } + + int prepare_prealloc_pages_(const int64_t cnt, const int64_t page_size) + { + int ret = common::OB_SUCCESS; + if (OB_UNLIKELY(cnt <= 0) || OB_UNLIKELY(page_size <= 0)) { + OBLOG_LOG(ERROR, "invalid argument", K(cnt), K(page_size)); + ret = common::OB_INVALID_ARGUMENT; + } else if (OB_UNLIKELY(NULL != prealloc_pages_)) { + OBLOG_LOG(ERROR, "prealloc pages has been allocated", K(prealloc_pages_)); + ret = common::OB_INIT_TWICE; + } else { + int64_t size = page_size * cnt; + prealloc_pages_ = common::ob_malloc(size, + common::ObModIds::OB_LOG_PART_TRANS_TASK_PREALLOC_PAGE); + + if (OB_ISNULL(prealloc_pages_)) { + OBLOG_LOG(ERROR, "allocate prealloc-page fail", K(cnt), K(page_size), K(size)); + ret = common::OB_ALLOCATE_MEMORY_FAILED; + } else { + for (int64_t index = 0; common::OB_SUCCESS == ret && index < cnt; index++) { + char *page = reinterpret_cast(prealloc_pages_) + (index * page_size); + // Initialize the page and ensure that physical pages are allocated + page[0] = '\0'; + + if (OB_FAIL(prealloc_page_pool_.push(page))) { + OBLOG_LOG(ERROR, "push prealloc page into pool fail", KR(ret), K(page), K(index), + K(page_size)); + } + } + } + } + return ret; + } + + void clean_prealloc_pages_() + { + if (NULL != prealloc_pages_) { + if (prealloc_page_pool_.get_total() < prealloc_page_cnt_) { + OBLOG_LOG(WARN, "part trans task prealloc pages are not reverted all", + K_(prealloc_page_cnt), "pool_size", prealloc_page_pool_.get_total()); + } + + // empty the pool + void *page = NULL; + while (common::OB_SUCCESS == prealloc_page_pool_.pop(page)) { + page = NULL; + } + + // 释放预分配页 + common::ob_free(prealloc_pages_); + prealloc_pages_ = NULL; + prealloc_page_cnt_ = 0; + } + } + + void *get_prealloc_page_() + { + int ret = common::OB_SUCCESS; + void *page = NULL; + + if (OB_FAIL(prealloc_page_pool_.pop(page))) { + if (common::OB_ENTRY_NOT_EXIST == ret) { + // No page available + // Normal + ret = common::OB_SUCCESS; + } else { + OBLOG_LOG(ERROR, "pop page from pool fail", KR(ret)); + } + } + return page; + } + + void revert_prealloc_page_(void *page) + { + int ret = common::OB_SUCCESS; + if (OB_NOT_NULL(page)) { + if (OB_FAIL(prealloc_page_pool_.push(page))) { + OBLOG_LOG(ERROR, "push prealloc page into pool fail", KR(ret), K(page)); + } + } + } + +private: + bool inited_; + int64_t prealloc_task_cnt_; + int64_t task_page_size_; + bool allow_dynamic_alloc_; + common::ObLinkQueue prealloc_task_pool_; + TaskType *prealloc_pool_tasks_; + common::ObIAllocator *alloc_; + + // Pool of pre-allocated page objects + int64_t prealloc_page_cnt_; + void *prealloc_pages_; + PagePool prealloc_page_pool_; + + // Using FIFO Allocator as a dynamic chunk allocator + common::ObConcurrentFIFOAllocator task_large_allocator_; + + int64_t total_cnt_ CACHE_ALIGNED; + int64_t dynamic_alloc_task_cnt_ CACHE_ALIGNED; // Number of dynamically assigned tasks + int64_t used_prealloc_task_cnt_ CACHE_ALIGNED; // Number of pre-assigned tasks used + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogTransTaskPool); +}; + +} +} + +#endif diff --git a/src/liboblog/src/ob_log_tenant.cpp b/src/liboblog/src/ob_log_tenant.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0abd322b42979bcabed8ff367da36b44faa52c8d --- /dev/null +++ b/src/liboblog/src/ob_log_tenant.cpp @@ -0,0 +1,820 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG + +#include "ob_log_tenant.h" + +#include "lib/allocator/ob_malloc.h" // OB_NEW OB_DELETE +#include "common/ob_partition_key.h" // ObPartitionKey +#include "share/inner_table/ob_inner_table_schema_constants.h" // OB_ALL_DDL_OPERATION_TID + +#include "ob_log_tenant_mgr.h" // ObLogTenantMgr +#include "ob_log_instance.h" // TCTX +#include "ob_log_config.h" // TCONF +#include "ob_log_timezone_info_getter.h" // ObLogTimeZoneInfoGetter + +#include "ob_log_start_schema_matcher.h" // ObLogStartSchemaMatcher + +#define STAT(level, tag_str, args...) OBLOG_LOG(level, "[STAT] [TENANT] " tag_str, ##args) +#define ISTAT(tag_str, args...) STAT(INFO, tag_str, ##args) +#define DSTAT(tag_str, args...) STAT(DEBUG, tag_str, ##args) + +using namespace oceanbase::common; +using namespace oceanbase::share; + +namespace oceanbase +{ +namespace liboblog +{ +ObLogTenant::ObLogTenant() : + inited_(false), + tenant_id_(OB_INVALID_TENANT_ID), + start_schema_version_(OB_INVALID_VERSION), + task_queue_(NULL), + part_mgr_(*this), + tenant_state_(), + ddl_progress_(OB_INVALID_TIMESTAMP), + ddl_log_id_(OB_INVALID_ID), + drop_tenant_tstamp_(OB_INVALID_TIMESTAMP), + global_seq_and_schema_version_(), + committer_cur_schema_version_(OB_INVALID_VERSION), + committer_next_trans_schema_version_(OB_INVALID_VERSION), + tz_info_map_version_(OB_INVALID_TIMESTAMP), + tz_info_map_(NULL), + tz_info_wrap_(NULL), + cf_handle_(NULL) +{ + tenant_name_[0] = '\0'; + global_seq_and_schema_version_.lo = 0; + global_seq_and_schema_version_.hi = 0; +} + +ObLogTenant::~ObLogTenant() +{ + reset(); +} + +int ObLogTenant::init(const uint64_t tenant_id, + const char *tenant_name, + const int64_t start_tstamp, + const int64_t start_seq, + const int64_t start_schema_version, + void *cf_handle, + ObLogTenantMgr &tenant_mgr) +{ + int ret = OB_SUCCESS; + int64_t pos = 0; + + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("ObLogTenant has been initialized"); + ret = OB_INIT_TWICE; + } else if (OB_UNLIKELY(OB_INVALID_TENANT_ID == tenant_id) + || OB_UNLIKELY(start_tstamp <= 0) + || OB_UNLIKELY(start_seq < 0) + || OB_UNLIKELY(start_schema_version <= 0) + || OB_ISNULL(tenant_name) + || OB_ISNULL(cf_handle)) { + LOG_ERROR("invalid argument", K(tenant_id), K(tenant_name), K(start_tstamp), K(start_seq), + K(start_schema_version), K(cf_handle)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(task_queue_ = OB_NEW(ObLogTenantTaskQueue, ObModIds::OB_LOG_TENANT_TASK_QUEUE, *this))) { + LOG_ERROR("create task queue fail", K(task_queue_)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else if (OB_FAIL(task_queue_->init(start_seq))) { + LOG_ERROR("task_queue_ init fail", KR(ret), K(start_seq)); + } else if (OB_FAIL(part_mgr_.init(tenant_id, start_schema_version, tenant_mgr.enable_oracle_mode_match_case_sensitive_, + tenant_mgr.part_info_map_, tenant_mgr.gindex_cache_, tenant_mgr.table_id_cache_, + tenant_mgr.part_add_cb_array_, tenant_mgr.part_rc_cb_array_))) { + LOG_ERROR("part_mgr_ init fail", KR(ret), K(tenant_id_), K(start_schema_version)); + } else if (OB_FAIL(databuff_printf(tenant_name_, sizeof(tenant_name_), pos, "%s", tenant_name))) { + LOG_ERROR("print tenant name fail", KR(ret), K(pos), K(tenant_id), K(tenant_name)); + } else if (OB_FAIL(init_tz_info_(tenant_id))) { + LOG_ERROR("init tz info failed", KR(ret), K(tenant_id)); + } + + if (OB_SUCC(ret)) { + tenant_id_ = tenant_id; + start_schema_version_ = start_schema_version; + + // init to NORMAL state + tenant_state_.reset(TENANT_STATE_NORMAL); + + // 1. When a transaction with the same timestamp as the start timestamp exists in the data partition and has not been sent, + // the progress is fetched at this point, as the "task to be output timestamp-1" is fetched and the heartbeat may fall back + // 2. so initialize the progress to start timestamp-1 + ddl_progress_ = start_tstamp - 1; + ddl_log_id_ = OB_INVALID_ID; + drop_tenant_tstamp_ = OB_INVALID_TIMESTAMP; + + global_seq_and_schema_version_.lo = start_seq; + global_seq_and_schema_version_.hi = start_schema_version; + + committer_cur_schema_version_ = start_schema_version; + committer_next_trans_schema_version_ = start_schema_version; + cf_handle_ = cf_handle; + + inited_ = true; + + LOG_INFO("init tenant succ", K(tenant_id), K(tenant_name), K(start_schema_version), + K(start_tstamp), K(start_seq)); + } + + return ret; +} + +void ObLogTenant::reset() +{ + if (inited_) { + LOG_INFO("destroy tenant", K_(tenant_id), K_(tenant_name), K_(start_schema_version)); + } + + inited_ = false; + uint64_t tenant_id = tenant_id_; + tenant_id_ = OB_INVALID_TENANT_ID; + tenant_name_[0] = '\0'; + + start_schema_version_ = OB_INVALID_VERSION; + + if (NULL != task_queue_) { + task_queue_->reset(); + OB_DELETE(ObLogTenantTaskQueue, unused, task_queue_); + task_queue_ = NULL; + } + + part_mgr_.reset(); + + tenant_state_.reset(); + + tz_info_map_version_ = OB_INVALID_TIMESTAMP; + if (OB_SYS_TENANT_ID != tenant_id) { + if (! OB_ISNULL(tz_info_map_)) { + OB_DELETE(ObTZInfoMap, "ObLogTenantTz", tz_info_map_); + tz_info_map_ = NULL; + } + if (! OB_ISNULL(tz_info_wrap_)) { + OB_DELETE(ObTimeZoneInfoWrap, "ObLogTenantTz", tz_info_wrap_); + tz_info_wrap_ = NULL; + } + } + ddl_progress_ = OB_INVALID_TIMESTAMP; + ddl_log_id_ = OB_INVALID_ID; + drop_tenant_tstamp_ = OB_INVALID_TIMESTAMP; + + global_seq_and_schema_version_.lo = 0; + global_seq_and_schema_version_.hi = 0; + committer_cur_schema_version_ = OB_INVALID_VERSION; + committer_next_trans_schema_version_ = OB_INVALID_VERSION; + cf_handle_ = NULL; +} + +int ObLogTenant::alloc_global_trans_seq_and_schema_version_for_ddl( + const int64_t base_schema_version, + int64_t &new_seq, + int64_t &new_schema_version, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + int64_t end_time = get_timestamp() + timeout; + + while (OB_SUCC(ret)) { + types::uint128_t old_v; + types::uint128_t new_v; + + LOAD128(old_v, &global_seq_and_schema_version_); + + // Note: DDLs do not take up global serial numbers + // Only the global Schema version number will be affected + new_v.lo = old_v.lo; + + // Use int64_t to compare, use uint64_t to assign values + new_v.hi = + (static_cast(old_v.hi) < base_schema_version) ? + static_cast(base_schema_version) : old_v.hi; + + if (CAS128(&global_seq_and_schema_version_, old_v, new_v)) { + new_seq = static_cast(new_v.lo); + new_schema_version = static_cast(new_v.hi); + + int64_t old_seq = static_cast(old_v.lo); + int64_t old_schema_version = static_cast(old_v.hi); + + LOG_DEBUG("Tenant alloc_global_trans_seq_and_schema_version_for_ddl", K(tenant_id_), + K(new_seq), K(new_schema_version), K(old_seq), K(old_schema_version)); + break; + } + + PAUSE(); + + if (end_time <= get_timestamp()) { + ret = OB_TIMEOUT; + break; + } + } + + return ret; +} + +int ObLogTenant::alloc_global_trans_seq_and_schema_version(const int64_t base_schema_version, + int64_t &new_seq, + int64_t &new_schema_version, + volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + + while (! stop_flag) { + types::uint128_t old_v; + types::uint128_t new_v; + + LOAD128(old_v, &global_seq_and_schema_version_); + + new_v.lo = old_v.lo + 1; + + // Use int64_t to compare, use uint64_t to assign values + new_v.hi = + (static_cast(old_v.hi) < base_schema_version) ? + static_cast(base_schema_version) : old_v.hi; + + if (CAS128(&global_seq_and_schema_version_, old_v, new_v)) { + new_seq = static_cast(new_v.lo) - 1; + new_schema_version = static_cast(new_v.hi); + LOG_DEBUG("ObLogTenant alloc_global_trans_seq_and_schema_version", + K(new_seq), K(new_schema_version)); + + int64_t old_seq = static_cast(old_v.lo); + int64_t old_schema_version = static_cast(old_v.hi); + + LOG_DEBUG("alloc_global_trans_seq_and_schema_version_for_dml", K(tenant_id_), + K(new_seq), K(new_schema_version), K(old_seq), K(old_schema_version)); + break; + } + } + + if (stop_flag) { + ret = OB_IN_STOP_STATE; + } + + return ret; +} + +int ObLogTenant::drop_tenant(bool &tenant_can_be_dropped, const char *call_from) +{ + int ret = OB_SUCCESS; + tenant_can_be_dropped = false; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init", K(inited_)); + ret = OB_NOT_INIT; + } else { + ISTAT("[DROP_TENANT] BEGIN", K_(tenant_id), K_(tenant_name), + "state", print_state(get_tenant_state()), + "active_part_count", get_active_part_count(), K(call_from)); + + // If the tenant is already OFFLINE, it is no longer necessary to drop + if (TENANT_STATE_OFFLINE == get_tenant_state()) { + ISTAT("[DROP_TENANT] END: tenant is dropped twice", "tenant", *this, K(call_from)); + } else if (OB_FAIL(part_mgr_.drop_all_tables())) { + LOG_ERROR("PartMgr drop_all_tables fail", KR(ret), KPC(this)); + } else { + int64_t old_state = TENANT_STATE_INVALID; + int64_t ref_cnt = 0; + // Status changed to OFFLINE, returning old status and reference count + if (tenant_state_.change_state(TENANT_STATE_OFFLINE, old_state, ref_cnt)) { + tenant_can_be_dropped = (0 == ref_cnt); + } else { + LOG_INFO("tenant has been in offline state", KPC(this)); + } + + ISTAT("[DROP_TENANT] END", K_(tenant_id), K_(tenant_name), + K(tenant_can_be_dropped), + "old_state", print_state(old_state), + "cur_part_count", ref_cnt, + K(call_from)); + } + } + return ret; +} + +// Note: This interface and mark_drop_tenant_start() must be called serially, otherwise there are correctness issues +int ObLogTenant::update_ddl_info(const PartTransTask &task) +{ + int ret = OB_SUCCESS; + const int64_t PRINT_INTERVAL = 10 * 1000 * 1000L; + // Progress information is required for all types of tasks + const int64_t handle_progress = task.get_timestamp(); + + // Invalid prepare log id and schema version for DDL heartbeat task, + // ignored when checking parameters + const uint64_t handle_log_id = task.get_prepare_log_id(); + const int64_t ddl_schema_version = task.get_local_schema_version(); + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init", K(inited_)); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(! is_serving())) { + // DDL progress information is not updated if the tenant is not in service + if (REACH_TIME_INTERVAL(PRINT_INTERVAL)) { + LOG_INFO("tenant state is not serving, need not update DDL info", KPC(this), + K(handle_progress), K(handle_log_id), K(ddl_schema_version)); + } + } + // Only heartbeat and DDL transaction tasks are allowed + else if (OB_UNLIKELY(! task.is_ddl_trans() && ! task.is_ddl_part_heartbeat())) { + LOG_ERROR("task is not DDL trans task, HEARTBEAT, not supported", K(task)); + ret = OB_NOT_SUPPORTED; + } else if (OB_UNLIKELY(handle_progress <= 0)) { + // Progress must be effective + LOG_ERROR("invalid argument", K(handle_progress), K(task)); + ret = OB_INVALID_ARGUMENT; + } + // Update the schema version if the schema version is valid + else if (ddl_schema_version > 0 + && ddl_schema_version > part_mgr_.get_schema_version() + && OB_FAIL(part_mgr_.update_schema_version(ddl_schema_version))) { + LOG_ERROR("part mgr update schema version fail", KR(ret), K(ddl_schema_version), K(task)); + } + // update the progress for all types of tasks + // Note: for DDL heartbeat tasks, handle_log_id is not valid + else if (OB_FAIL(update_ddl_progress_(handle_progress, handle_log_id))) { + LOG_ERROR("update ddl progress fail", KR(ret), K(handle_progress), K(handle_log_id), K(task)); + } else { + // success + } + return ret; +} + +int ObLogTenant::update_ddl_progress_(const int64_t handle_progress, const uint64_t handle_log_id) +{ + int ret = OB_SUCCESS; + const int64_t old_handle_progress = ATOMIC_LOAD(&ddl_progress_); + + // Note: It is important here to ensure that the DDL progress does not fall back, otherwise it will cause the heartbeat progress to fall back and exit with an error. + // The actual __all_ddl_operation of the new tenant will pull in DDL transactions with a timestamp less than or equal to the start-up timestamp, and the update DDL progress should be guaranteed to increment + if (OB_INVALID_TIMESTAMP == ddl_progress_ || handle_progress > ddl_progress_) { + ATOMIC_STORE(&ddl_progress_, handle_progress); + + // Note: It is possible that the handle_log_id passed in is an invalid value. + if (OB_INVALID_ID != handle_log_id) { + ATOMIC_STORE(&ddl_log_id_, handle_log_id); + } + } + + // Check if the progress value is greater than drop_tenant_tstamp_ for the first time + if (OB_INVALID_TIMESTAMP != drop_tenant_tstamp_ + && old_handle_progress < drop_tenant_tstamp_ + && handle_progress >= drop_tenant_tstamp_) { + bool need_drop_tenant = false; + + LOG_INFO("DDL progress is beyond drop_tenant_tstamp while updating ddl progress, " + "start to drop tenant", + K_(tenant_id), K_(drop_tenant_tstamp), K(old_handle_progress), K(handle_progress), + "delta", drop_tenant_tstamp_ - handle_progress, K(handle_log_id), + "state", print_state(get_tenant_state()), + "active_part_count", get_active_part_count()); + + if (OB_FAIL(start_drop_tenant_if_needed_(need_drop_tenant))) { + LOG_ERROR("start_drop_tenant_if_needed_ fail", KR(ret), K(tenant_id_), + K(drop_tenant_tstamp_), K(old_handle_progress), K(handle_progress)); + } + } + return ret; +} + +// This interface is called when processing a deleted tenant DDL for a SYS tenant +// It is currently assumed that DDLs for all tenants are processed serially, if multiple tenants are processed in parallel there are concurrency issues here +int ObLogTenant::mark_drop_tenant_start(const int64_t drop_tenant_start_tstamp) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("tenant has not been initialized", K(inited_)); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(drop_tenant_start_tstamp <= 0)) { + LOG_ERROR("invalid argument", K(drop_tenant_start_tstamp)); + ret = OB_INVALID_ARGUMENT; + } + // If it is already offline, it no longer needs to be processed + else if (is_offlined()) { + LOG_INFO("[DROP_TENANT] tenant has been offlined, need not mark drop tenant start", KPC(this), + K(drop_tenant_start_tstamp)); + } else if (OB_UNLIKELY(OB_INVALID_TIMESTAMP != drop_tenant_tstamp_)) { + LOG_ERROR("invalid drop_tenant_tstamp_ which should be invalid", K(drop_tenant_tstamp_)); + ret = OB_ERR_UNEXPECTED; + } else { + // Mark the timestamp of the deleted tenant + drop_tenant_tstamp_ = drop_tenant_start_tstamp; + bool need_drop_tenant = false; + + if (OB_FAIL(start_drop_tenant_if_needed_(need_drop_tenant))) { + LOG_ERROR("start_drop_tenant_if_needed_ fail", KR(ret), K(tenant_id_), + K(drop_tenant_start_tstamp)); + } + + ISTAT("[DROP_TENANT] mark drop tenant start", KR(ret), K_(tenant_id), K(need_drop_tenant), + K(drop_tenant_start_tstamp), K(ddl_progress_), + "delta", drop_tenant_start_tstamp - ddl_progress_, + "state", print_state(get_tenant_state()), + "active_part_count", get_active_part_count()); + } + return ret; +} + +bool ObLogTenant::need_drop_tenant_() const +{ + // A tenant can only be deleted if the DDL processing progress is greater than the timestamp of the deleted tenant + return (OB_INVALID_TIMESTAMP != drop_tenant_tstamp_ && ddl_progress_ >= drop_tenant_tstamp_); +} + +int ObLogTenant::start_drop_tenant_if_needed_(bool &need_drop_tenant) +{ + int ret = OB_SUCCESS; + need_drop_tenant = need_drop_tenant_(); + + if (need_drop_tenant) { + ISTAT("[DROP_TENANT] need_drop_tenant, begin drop DDL partition", + K_(tenant_id), K_(tenant_name), K_(drop_tenant_tstamp), K_(ddl_progress), + "delta", drop_tenant_tstamp_ - ddl_progress_, + "state", print_state(get_tenant_state()), + "active_part_count", get_active_part_count()); + + // Delete the DDL partition if the tenant can be deleted, and let the DDL partition offline task trigger the deletion of the tenant + if (OB_FAIL(drop_ddl_part_())) { + LOG_ERROR("drop_ddl_part_ fail", KR(ret), K(tenant_id_)); + } + } + return ret; +} + +int ObLogTenant::drop_ddl_part_() +{ + int ret = OB_SUCCESS; + const uint64_t ddl_table_id = combine_id(tenant_id_, OB_ALL_DDL_OPERATION_TID); + const ObPartitionKey ddl_pkey(ddl_table_id, 0, 0); + + if (OB_FAIL(part_mgr_.offline_partition(ddl_pkey))) { + if (OB_ENTRY_NOT_EXIST == ret) { + LOG_INFO("DDL partition has been offlined", KR(ret), K(ddl_pkey), K(tenant_id_)); + ret = OB_SUCCESS; + } else { + LOG_ERROR("part mgr offline_partition fail", KR(ret), K(ddl_pkey)); + } + } + return ret; +} + +const char *ObLogTenant::print_state(const int64_t state) +{ + const char *ret = "UNKNOWN"; + + switch (state) { + case TENANT_STATE_INVALID: { + ret = "INVALID"; + break; + } + case TENANT_STATE_NORMAL: { + ret = "NORMAL"; + break; + } + case TENANT_STATE_OFFLINE: { + ret = "OFFLINE"; + break; + } + default: { + ret = "UNKNOWN"; + break; + } + } + + return ret; +} + +int ObLogTenant::inc_part_count_on_serving(const ObPartitionKey &pkey, bool &is_serving) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogTenant has not been initialized", K(inited_)); + ret = OB_NOT_INIT; + } else { + is_serving = false; + + // Only the NORMAL state can increase the reference count + // If the tenant has been deleted, the add partition operation cannot be executed again + int64_t target_state = TENANT_STATE_NORMAL; + int64_t new_state = TENANT_STATE_INVALID; + int64_t new_ref = 0; + + // Increase reference count, return latest status and reference count + if (! tenant_state_.inc_ref(target_state, new_state, new_ref)) { + is_serving = false; + } else { + is_serving = true; + } + + ISTAT("[INC_PART_COUNT_ON_SERVING]", + K_(tenant_id), + K_(tenant_name), + K(is_serving), + "cur_state", print_state(new_state), + "cur_part_count", new_ref, + K(pkey)); + } + return ret; +} + +int ObLogTenant::recycle_partition(const ObPartitionKey &pkey, bool &tenant_can_be_dropped) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogTenant has not been initialized", K(inited_)); + ret = OB_NOT_INIT; + } else { + // First PartMgr reclaims the partition + if (OB_FAIL(part_mgr_.offline_and_recycle_partition(pkey))) { + if (OB_ENTRY_NOT_EXIST == ret) { + // Partition does not exist, normal + ret = OB_SUCCESS; + } else { + LOG_ERROR("part mgr offline_and_recycle_partition fail", KR(ret), K(pkey), K(tenant_id_)); + } + } + + // Tenant structure minus reference count + if (OB_SUCCESS == ret) { + int64_t new_state = TENANT_STATE_INVALID; + int64_t new_ref = 0; + + // Return the latest status and reference count value + tenant_state_.dec_ref(new_state, new_ref); + + // The reference count cannot be 0, otherwise there is a bug + if (OB_UNLIKELY(new_ref < 0)) { + LOG_ERROR("tenant reference count is invalid after dec_ref()", K(new_ref), K(new_state), + KPC(this)); + ret = OB_ERR_UNEXPECTED; + } else { + // Can a tenant drop + tenant_can_be_dropped = (TENANT_STATE_OFFLINE == new_state && 0 == new_ref); + } + + ISTAT("[RECYCLE_PARTITION]", + K_(tenant_id), + K_(tenant_name), + "cur_state", print_state(new_state), + "cur_part_count", new_ref, + K(tenant_can_be_dropped), + K(pkey)); + } + } + return ret; +} + +int ObLogTenant::update_committer_next_trans_schema_version(int64_t schema_version) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogTenant has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(OB_INVALID_VERSION == schema_version)) { + LOG_ERROR("invalid argument", K(schema_version)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_UNLIKELY(schema_version < ATOMIC_LOAD(&committer_next_trans_schema_version_))) { + LOG_ERROR("global schema version reversed, unexpected", K(schema_version), + K(committer_next_trans_schema_version_)); + ret = OB_ERR_UNEXPECTED; + } else { + ATOMIC_STORE(&committer_next_trans_schema_version_, schema_version); + } + + return ret; +} + +void ObLogTenant::print_stat_info() +{ + int64_t serving_part_count = 0, offline_part_count = 0, not_served_part_count = 0; + + if (inited_) { + const uint64_t ddl_table_id = combine_id(tenant_id_, OB_ALL_DDL_OPERATION_TID); + + // First call PartMgr to print the partition information and return the number of partitions served and the number of downstream partitions + part_mgr_.print_part_info(serving_part_count, offline_part_count, not_served_part_count); + + _LOG_INFO("[SERVE_INFO] TENANT=%lu(%s) STATE=%s(%ld) " + "PART_COUNT(SERVE=%ld,OFFLINE=%ld,NOT_SERVE=%ld,ACTIVE=%ld) " + "DDL_PROGRESS=%s DELAY=%s DDL_LOG_ID=%lu " + "QUEUE(DML=%ld) " + "SEQ(GB=%ld,CMT=%ld) " + "SCHEMA(GB=%ld,CUR=%ld) " + "CMT_SCHEMA(CUR=%ld,NEXT=%ld) " + "DROP_TS=%s " + "DDL_TABLE=%lu", + tenant_id_, tenant_name_, print_state(get_tenant_state()), get_tenant_state(), + serving_part_count, offline_part_count, not_served_part_count, get_active_part_count(), + TS_TO_STR(ddl_progress_), TS_TO_DELAY(ddl_progress_), ddl_log_id_, + NULL == task_queue_ ? 0 : task_queue_->get_log_entry_task_count(), + get_global_seq(), NULL == task_queue_ ? 0 : task_queue_->get_next_task_seq(), + get_global_schema_version(), get_schema_version(), + committer_cur_schema_version_, committer_next_trans_schema_version_, + TS_TO_STR(drop_tenant_tstamp_), + ddl_table_id); + } +} + +// If the tenant is not in service, an error is reported, requiring that the tenant must be in service +int ObLogTenant::add_ddl_table( + const int64_t start_serve_tstamp, + const int64_t ddl_table_start_schema_version, + const bool is_create_tenant) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogTenant has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(! is_serving())) { + LOG_ERROR("tenant is not serving", KPC(this)); + ret = OB_STATE_NOT_MATCH; + } else if (OB_FAIL(part_mgr_.add_ddl_table(start_serve_tstamp, ddl_table_start_schema_version, + is_create_tenant))) { + LOG_ERROR("PartMgr add_ddl_table fail", KR(ret), K(start_serve_tstamp), + K(ddl_table_start_schema_version), K(is_create_tenant)); + } else { + ISTAT("[ADD_DDL_TABLE] update tenant schema version after add ddl table", K_(tenant_id), + "cur_schema_version", part_mgr_.get_schema_version(), + K_(start_schema_version), K(ddl_table_start_schema_version), + K(start_serve_tstamp), K(is_create_tenant)); + + // To update the schema version after adding the DDL partition + // + // Scenario: schema split DDL processing to add DDL partitions to each tenant, this process should incidentally push the tenant schema version to the corresponding schema version of the split DDL, otherwise the schema version saved on each tenant is the latest schema version of the SYS tenant before the split, not the latest version of that tenant. + // + // The observer ensures that a DDL operation is written transactively for each tenant and that they have the same schema version, ensuring that after the split, each tenant advances to the latest version. + if (ddl_table_start_schema_version > part_mgr_.get_schema_version()) { + if (OB_FAIL(part_mgr_.update_schema_version(ddl_table_start_schema_version))) { + LOG_ERROR("part mgr update schema version fail", KR(ret), + K(ddl_table_start_schema_version), KPC(this)); + } + } + + if (OB_SUCCESS == ret) { + // The starting schema version should also be updated to the DDL starting schema version, otherwise the DDL partition + // will pull in a minor version of the schema operation when it is started, which will cause the schema version to be rolled back + start_schema_version_ = ddl_table_start_schema_version; + } + } + return ret; +} + +// TODO: Only common tenant all_sequence_value tables are supported after version 2.2 for now +int ObLogTenant::add_inner_tables(const int64_t start_serve_tstamp, + const int64_t start_schema_version, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + const bool enable_backup_mode = is_backup_mode(); + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogTenant has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(! is_serving())) { + LOG_ERROR("tenant is not serving", KPC(this)); + ret = OB_STATE_NOT_MATCH; + } else if (OB_UNLIKELY(! enable_backup_mode)) { + LOG_ERROR("inner tables can only be added on backup mode", K(enable_backup_mode), K(tenant_id_), + K(start_serve_tstamp), K(start_schema_version)); + ret = OB_NOT_SUPPORTED; + } else { + ret = part_mgr_.add_inner_tables(start_serve_tstamp, start_schema_version, timeout); + } + + return ret; +} + +/// add all table +int ObLogTenant::add_all_tables( + const int64_t start_serve_tstamp, + const int64_t start_schema_version, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogTenant has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(! is_serving())) { + LOG_ERROR("tenant is not serving", KPC(this)); + ret = OB_STATE_NOT_MATCH; + } else { + ret = part_mgr_.add_all_tables(start_serve_tstamp, start_schema_version, timeout); + } + return ret; +} + +//////////////////////////// ObLogTenantGuard ///////////////////////// +void ObLogTenantGuard::revert_tenant() +{ + IObLogTenantMgr *tenant_mgr = TCTX.tenant_mgr_; + + if (OB_NOT_NULL(tenant_) && OB_NOT_NULL(tenant_mgr)) { + int revert_ret = tenant_mgr->revert_tenant(tenant_); + if (OB_SUCCESS != revert_ret) { + LOG_ERROR("revert ObLogTenant fail", K(revert_ret), KPC(tenant_)); + } else { + tenant_ = NULL; + } + } +} + +void ObLogTenant::update_global_data_schema_version(const int64_t data_start_schema_version) +{ + int64_t start_schema_version = global_seq_and_schema_version_.hi; + global_seq_and_schema_version_.hi = std::max(start_schema_version, data_start_schema_version); + + LOG_INFO("set_data_start_schema_version succ", K_(tenant_id), + K(start_schema_version), K(data_start_schema_version), + "global_seq", global_seq_and_schema_version_.lo, + "global_schema_version", global_seq_and_schema_version_.hi); +} + +// This function only works in tenant split mode and is not responsible for checking if the mode is split or not +// This function updates start_schema_version only if data_start_schema_version is set correctly +int ObLogTenant::update_data_start_schema_version_on_split_mode() +{ + int ret = OB_SUCCESS; + bool match = false; + int64_t schema_version = 0; + int64_t old_data_schema_version = global_seq_and_schema_version_.hi; + IObLogStartSchemaMatcher *schema_matcher = TCTX.ss_matcher_; + + if (OB_ISNULL(schema_matcher)) { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("schema version matcher is NULL", KR(ret), K(schema_matcher)); + } else if (OB_FAIL(schema_matcher->match_data_start_schema_version(tenant_id_, + match, + schema_version))) { + LOG_ERROR("match_data_start_schema_version failed", + KR(ret), K(tenant_id_), K(match), K(schema_version)); + } else if (match) { + global_seq_and_schema_version_.hi = std::max(schema_version, old_data_schema_version); + } else { + // No specified tenant found, original schema version used + } + + if (OB_SUCC(ret) && match) { + LOG_INFO("[UPDATE_START_SCHEMA] update_data_start_schema_version_on_split_mode succ", KR(ret), + K(tenant_id_), K(schema_version), K(global_seq_and_schema_version_.hi), + K(old_data_schema_version)); + } + + return ret; +} + +int ObLogTenant::init_tz_info_(const uint64_t tenant_id) +{ + int ret = OB_SUCCESS; + + if (OB_SYS_TENANT_ID == tenant_id) { + tz_info_map_ = &TCTX.tz_info_map_; + tz_info_wrap_ = &TCTX.tz_info_wrap_; + } else { + if (OB_ISNULL(tz_info_map_ = OB_NEW(ObTZInfoMap, "ObLogTenantTz"))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_ERROR("create tenant timezone info map failed", KR(ret)); + } else if (OB_ISNULL(tz_info_wrap_ = OB_NEW(ObTimeZoneInfoWrap, "ObLogTenantTz"))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_ERROR("create tenant timezone info wrap failed", KR(ret)); + } + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(tz_info_map_->init(ObModIds::OB_HASH_BUCKET_TIME_ZONE_INFO_MAP))) { + LOG_ERROR("fail to init tz_info_map_", K(tenant_id), KR(ret)); + } else if (TCTX.timezone_info_getter_->init_tz_info_wrap(tenant_id, tz_info_map_version_, + *tz_info_map_, *tz_info_wrap_)) { + LOG_ERROR("fail to init tz info wrap", KR(ret), K(tenant_id)); + } else { + // succ + } + + if (OB_FAIL(ret)) { + if (NULL != tz_info_map_) { + OB_DELETE(ObTZInfoMap, "ObLogTenantTz", tz_info_map_); + tz_info_map_ = NULL; + } + if (NULL != tz_info_wrap_) { + OB_DELETE(ObTimeZoneInfoWrap, "ObLogTenantTz", tz_info_wrap_); + tz_info_wrap_ = NULL; + } + } + + return ret; +} + +} // namespace liboblog +} // namespace oceanbase diff --git a/src/liboblog/src/ob_log_tenant.h b/src/liboblog/src/ob_log_tenant.h new file mode 100644 index 0000000000000000000000000000000000000000..efc28ca99c149036abab5e4c060f4ba23e03c3aa --- /dev/null +++ b/src/liboblog/src/ob_log_tenant.h @@ -0,0 +1,318 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_TENANT_H__ +#define OCEANBASE_LIBOBLOG_TENANT_H__ + +#include "lib/atomic/ob_atomic.h" // ATOMIC_LOAD +#include "lib/atomic/atomic128.h" // ATOMIC_LOAD + +#include "ob_log_tenant_task_queue.h" // ObLogTenantTaskQueue +#include "ob_log_part_mgr.h" // ObLogPartMgr +#include "ob_log_ref_state.h" // RefState +#include "lib/timezone/ob_timezone_info.h" // ObTimeZoneInfo + +namespace oceanbase +{ +namespace common +{ +struct ObPartitionKey; +} + +namespace liboblog +{ + +//////////////////////// TENANT STRUCTURE //////////////////////// + +class ObLogTenantMgr; + +struct TenantID +{ + uint64_t tenant_id_; + + TenantID() : tenant_id_(OB_INVALID_TENANT_ID) {} + + TenantID(const uint64_t tenant_id) : tenant_id_(tenant_id) + {} + + int64_t hash() const + { + return static_cast(tenant_id_); + } + + int compare(const TenantID &other) const + { + int cmp_ret = 0; + + if (tenant_id_ > other.tenant_id_) { + cmp_ret = 1; + } else if (tenant_id_ < other.tenant_id_) { + cmp_ret = -1; + } else { + cmp_ret = 0; + } + + return cmp_ret; + } + + bool operator== (const TenantID &other) const { return 0 == compare(other); } + bool operator!=(const TenantID &other) const { return !operator==(other); } + bool operator<(const TenantID &other) const { return -1 == compare(other); } + + void reset() + { + tenant_id_ = common::OB_INVALID_TENANT_ID; + } + + TO_STRING_KV(K_(tenant_id)); +}; + +typedef common::LinkHashValue TenantValue; +class ObLogTenant : public TenantValue +{ + static const int64_t DATA_OP_TIMEOUT = 1 * _SEC_; +public: + ObLogTenant(); + ~ObLogTenant(); + +public: + int init(const uint64_t tenant_id, + const char *tenant_name, + const int64_t start_tstamp, + const int64_t start_seq, + const int64_t start_schema_version, + void *cf_handle, + ObLogTenantMgr &tenant_mgr); + void reset(); + bool is_inited() const { return inited_; } + + // get/set functions +public: + uint64_t get_tenant_id() const { return tenant_id_; } + int64_t get_start_schema_version() const { return start_schema_version_; } + int64_t get_schema_version() const { return part_mgr_.get_schema_version(); } + int64_t get_ddl_progress() const { return ATOMIC_LOAD(&ddl_progress_); } + uint64_t get_handle_log_id() const { return ATOMIC_LOAD(&ddl_log_id_); } + ObLogTenantTaskQueue *get_task_queue() { return task_queue_; } + int64_t get_committer_cur_schema_version() const { return committer_cur_schema_version_; } + void update_committer_cur_schema_version(const int64_t ddl_schema_version) + { + committer_cur_schema_version_ = std::max(committer_cur_schema_version_, ddl_schema_version); + } + int64_t get_committer_next_trans_schema_version() const { return ATOMIC_LOAD(&committer_next_trans_schema_version_); } + int update_committer_next_trans_schema_version(int64_t schema_version); + IObLogPartMgr &get_part_mgr() { return part_mgr_; } + int64_t get_global_schema_version() const { return global_seq_and_schema_version_.hi; } + int64_t get_global_seq() const { return global_seq_and_schema_version_.lo; } + // get timezone info version + int64_t get_timezone_info_version() const { return ATOMIC_LOAD(&tz_info_map_version_); } + // update timezone info version + void update_timezone_info_version(const int64_t timezone_info_version) + { ATOMIC_STORE(&tz_info_map_version_, timezone_info_version); } + common::ObTimeZoneInfoWrap *get_tz_info_wrap() { return tz_info_wrap_; } + common::ObTZInfoMap *get_tz_info_map() { return tz_info_map_; } + + void *get_cf() { return cf_handle_; } + +public: + // Print statistics + void print_stat_info(); + + // Update DDL-related information + // + // @retval OB_SUCCESS Success + // @retval other_error_code Fail + int update_ddl_info(const PartTransTask &task); + + // Assigning global transaction serial numbers and schema versions to DDL tasks + int alloc_global_trans_seq_and_schema_version_for_ddl( + const int64_t base_schema_version, + int64_t &new_seq, + int64_t &new_schema_version, + const int64_t timeout); + + int alloc_global_trans_seq_and_schema_version(const int64_t base_schema_version, + int64_t &new_seq, + int64_t &new_schema_version, + volatile bool &stop_flag); + + // Delete a tenant + // Supports multiple calls, guaranteeing only one will return tenant_can_be_dropped = true + int drop_tenant(bool &tenant_can_be_dropped, const char *call_from); + + // Increase the number of partitions when a tenant is in service + int inc_part_count_on_serving(const common::ObPartitionKey &pkey, bool &is_serving); + + /// Reclaim partition + /// This task is called when processing a partition down task, if this task is received, + /// it means that all the partition data has been consumed, there will be no more partition + /// data and the tenant can safely subtract the number of partitions served + /// + /// @param [in] pkey Target recycled partition + /// @param [out] tenant_can_be_dropped tenant can be dropped or not + /// + /// @retval OB_SUCCESS Success + /// @retval other_error_code Fail + int recycle_partition(const common::ObPartitionKey &pkey, bool &tenant_can_be_dropped); + + /// mark start of drop tenant + /// + /// @param drop_tenant_start_tstamp The start timestamp for deleting a tenant means that the tenant can be deleted as soon as progress crosses that timestamp + /// + /// @retval OB_SUCCESS Success + /// @retval other_error_code Fail + int mark_drop_tenant_start(const int64_t drop_tenant_start_tstamp); + + /// Whether the tenant has been marked for deletion + bool is_drop_tenant_start_marked() const + { + return OB_INVALID_TIMESTAMP != drop_tenant_tstamp_; + } + + void update_global_data_schema_version(const int64_t schema_version); + + int update_data_start_schema_version_on_split_mode(); + /// Adding a DDL table partition requires that the tenant must be served + /// Report an error if the tenant is not in service + /// + /// + /// @retval OB_STATE_NOT_MATCH tenant not serve + /// + /// @retval Other error codes follow PartMgr error codes + int add_ddl_table( + const int64_t start_serve_tstamp, + const int64_t start_schema_version, + const bool is_create_tenant); + + /// Add all tables and require the tenant to be in service + /// + /// @retval OB_STATE_NOT_MATCH tenant not serve + /// + /// @retval Other error codes follow PartMgr error codes + int add_all_tables( + const int64_t start_serve_tstamp, + const int64_t start_schema_version, + const int64_t timeout); + + int add_inner_tables(const int64_t start_serve_tstamp, + const int64_t start_schema_version, + const int64_t timeout); + +public: + enum + { + TENANT_STATE_INVALID = 0, + TENANT_STATE_NORMAL = 1, + TENANT_STATE_OFFLINE = 2, + }; + + static const char *print_state(const int64_t state); + int64_t get_tenant_state() const { return tenant_state_.state_; } + int64_t get_active_part_count() const { return tenant_state_.ref_cnt_; } + bool is_offlined() const { return TENANT_STATE_OFFLINE == get_tenant_state(); } + bool is_serving() const { return TENANT_STATE_NORMAL == get_tenant_state(); } + +private: + int update_ddl_progress_(const int64_t handle_progress, + const uint64_t handle_log_id = OB_INVALID_ID); + int start_drop_tenant_if_needed_(bool &need_drop_tenant); + bool need_drop_tenant_() const; + int drop_ddl_part_(); + // 1. If the low version of OB upgrades to 226, if the low version imports a time zone table, then the post script will split the time zone related table under the tenant + // 2. If the low version does not import the time zone table, do nothing + int init_tz_info_(const uint64_t tenant_id); + +public: + TO_STRING_KV( + K_(tenant_id), + K_(tenant_name), + "state", print_state(get_tenant_state()), + "active_part_count", get_active_part_count(), + "ddl_progress", TS_TO_STR(ddl_progress_), + "drop_tenant_tstamp", TS_TO_STR(drop_tenant_tstamp_), + K_(ddl_log_id), + "global_seq", get_global_seq(), + "global_schema_version", get_global_schema_version(), + K_(start_schema_version), + "cur_schema_version", part_mgr_.get_schema_version(), + K_(committer_cur_schema_version), + K_(committer_next_trans_schema_version), + KPC_(task_queue), + K_(tz_info_map_version)); + +private: + bool inited_; + uint64_t tenant_id_; + char tenant_name_[common::OB_MAX_TENANT_NAME_LENGTH + 1]; + int64_t start_schema_version_; + // task queue + ObLogTenantTaskQueue *task_queue_; + + // partition manager + ObLogPartMgr part_mgr_; + + // status of tenant + // => state variable + ref count + // + // The reference count represents the number of active partitions, if the number of active partitions is 0, it means there is no data dependency on the tenant structure + RefState tenant_state_ CACHE_ALIGNED; + + // Progress of the last DDL processing + int64_t ddl_progress_ CACHE_ALIGNED; + uint64_t ddl_log_id_; + + // Timestamp for deleting the tenant + // Indicates that the tenant can be safely deleted as soon as the progress crosses that timestamp, and there will be no further DDL afterwards + int64_t drop_tenant_tstamp_; + + // sequencer + // Low 64 bits: global serial number; High 64 bits: global Schema version number + types::uint128_t global_seq_and_schema_version_ CACHE_ALIGNED; + + // Committer + int64_t committer_cur_schema_version_ CACHE_ALIGNED; // The current advanced schema version + + // Committer is currently a tenant parallel commit model: + // Transaction data and DDL data need to be matched for consumption, where the global_schema_version of the current transaction is recorded, which is used by the DDL to determine if it needs to be consumed. + int64_t committer_next_trans_schema_version_ CACHE_ALIGNED; + + // 2_2_6 branch start: Oracle time zone related data types: internal table dependency split to tenant + int64_t tz_info_map_version_; + common::ObTZInfoMap *tz_info_map_; + common::ObTimeZoneInfoWrap *tz_info_wrap_; + + void *cf_handle_; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogTenant); +}; + +//////////////////////////// ObLogTenantGuard ///////////////////////// +class ObLogTenantGuard final +{ +public: + ObLogTenantGuard() : tenant_(NULL) {} + ~ObLogTenantGuard() { revert_tenant(); } +public: + ObLogTenant *get_tenant() { return tenant_; } + void set_tenant(ObLogTenant *tenant) { tenant_ = tenant; } + + TO_STRING_KV(KPC_(tenant)); +private: + void revert_tenant(); +private: + ObLogTenant *tenant_; + DISALLOW_COPY_AND_ASSIGN(ObLogTenantGuard); +}; +} +} +#endif diff --git a/src/liboblog/src/ob_log_tenant_mgr.cpp b/src/liboblog/src/ob_log_tenant_mgr.cpp new file mode 100644 index 0000000000000000000000000000000000000000..722f911b73eab8c93da453b49a39be4655fc70bb --- /dev/null +++ b/src/liboblog/src/ob_log_tenant_mgr.cpp @@ -0,0 +1,1364 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG + +#include // std::min +#include "ob_log_tenant_mgr.h" // ObLogTenantMgr +#include "ob_log_instance.h" // TCTX +#include "ob_log_schema_getter.h" // IObLogSchemaGetter, ObLogSchemaGuard +#include "ob_log_table_matcher.h" // IObLogTableMatcher +#include "ob_log_trans_stat_mgr.h" // IObLogTransStatMgr +#include "ob_log_common.h" // DEFAULT_START_SEQUENCE_NUM +#include "ob_log_config.h" // TCONF +#include "ob_log_store_service.h" + +#define _STAT(level, fmt, args...) _OBLOG_LOG(level, "[STAT] [TenantMgr] " fmt, ##args) +#define STAT(level, fmt, args...) OBLOG_LOG(level, "[STAT] [TenantMgr] " fmt, ##args) +#define ISTAT(fmt, args...) STAT(INFO, fmt, ##args) +#define _ISTAT(fmt, args...) _STAT(INFO, fmt, ##args) + +using namespace oceanbase::common; + +namespace oceanbase +{ +namespace liboblog +{ +ObLogTenantMgr::ObLogTenantMgr() : + inited_(false), + tenant_hash_map_(), + part_info_map_(), + gindex_cache_(), + table_id_cache_(), + part_add_cb_array_(), + part_rc_cb_array_(), + tenant_id_set_(), + enable_oracle_mode_match_case_sensitive_(false) +{ +} + +ObLogTenantMgr::~ObLogTenantMgr() +{ + destroy(); +} + +int ObLogTenantMgr::init(const bool enable_oracle_mode_match_case_sensitive) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("ObLogTenantMgr has not been initialized"); + ret = OB_INIT_TWICE; + } else if (OB_FAIL(tenant_id_set_.create(DEFAULT_TENANT_SET_SIZE))) { + LOG_ERROR("tenant_id_set_ create fail", KR(ret)); + } else if (OB_FAIL(tenant_hash_map_.init(ObModIds::OB_LOG_TENANT_MAP))) { + LOG_ERROR("tenant_hash_map init fail", KR(ret)); + } else if (OB_FAIL(part_info_map_.init(CACHED_PART_INFO_COUNT, + PART_INFO_BLOCK_SIZE, + ObModIds::OB_LOG_PART_INFO))) { + LOG_ERROR("init part info map fail", KR(ret)); + } else if (OB_FAIL(gindex_cache_.init(ObModIds::OB_LOG_GLOBAL_NORMAL_INDEX_CACHE))) { + LOG_ERROR("global index cache init fail", KR(ret)); + } else if (OB_FAIL(table_id_cache_.init(ObModIds::OB_LOG_TABLE_ID_CACHE))) { + LOG_ERROR("table id cache init fail", KR(ret)); + } else { + inited_ = true; + enable_oracle_mode_match_case_sensitive_ = enable_oracle_mode_match_case_sensitive; + + LOG_INFO("ObLogTenantMgr init succ", K(enable_oracle_mode_match_case_sensitive_)); + } + + if (OB_FAIL(ret)) { + destroy(); + } + return ret; +} + +void ObLogTenantMgr::destroy() +{ + inited_ = false; + + tenant_hash_map_.destroy(); + part_info_map_.destroy(); + gindex_cache_.destroy(); + table_id_cache_.destroy(); + part_add_cb_array_.destroy(); + part_rc_cb_array_.destroy(); + tenant_id_set_.destroy(); + enable_oracle_mode_match_case_sensitive_ = false; + + LOG_INFO("ObLogTenantMgr destroy succ"); +} + +int ObLogTenantMgr::filter_tenant(const char *tenant_name, bool &chosen) +{ + int ret = OB_SUCCESS; + IObLogTableMatcher *tb_matcher = TCTX.tb_matcher_; + + if (OB_ISNULL(tenant_name)) { + LOG_ERROR("tenant name is null", K(tenant_name)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(tb_matcher)) { + LOG_ERROR("invalid tb_matcher", K(tb_matcher)); + ret = OB_ERR_UNEXPECTED; + } else { + chosen = false; + + // Matching tenants only + if (OB_FAIL(tb_matcher->tenant_match(tenant_name, chosen))) { + LOG_ERROR("match tenant fail", KR(ret), K(tenant_name), K(chosen)); + } else { + // succ + } + } + + return ret; +} + +int ObLogTenantMgr::add_served_tenant_for_stat_(const char *tenant_name, + const uint64_t tenant_id) +{ + int ret = OB_SUCCESS; + IObLogTransStatMgr *trans_stat_mgr = TCTX.trans_stat_mgr_; + + if (OB_ISNULL(trans_stat_mgr)) { + LOG_ERROR("trans_stat is null", K(trans_stat_mgr)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(trans_stat_mgr->add_served_tenant(tenant_name, tenant_id))) { + LOG_ERROR("trans_stat_mgr add served tenant fail", KR(ret), K(tenant_id), K(tenant_name)); + } else { + // do nothing + } + + return ret; +} + +int ObLogTenantMgr::add_served_tenant_into_set_(const char *tenant_name, + const uint64_t tenant_id) +{ + int ret = OB_SUCCESS; + + if (OB_FAIL(tenant_id_set_.set_refactored(tenant_id))) { + LOG_ERROR("add tenant_id into tenant_id_set fail", KR(ret), K(tenant_name), K(tenant_id)); + } else { + LOG_INFO("add tenant_id into tenant_id_set succ", K(tenant_name), K(tenant_id)); + } + + return ret; +} + +// Get the tenant's starting-service-schema-version +// +// @retval OB_SUCCESS success +// @retval OB_TENANT_HAS_BEEN_DROPPED tenant has been dropped +// @retval OB_TIMEOUT timeout +// @retval other error code fail +int ObLogTenantMgr::get_tenant_start_schema_version_(const uint64_t tenant_id, + const bool is_new_created_tenant, /* is new created tenant */ + const bool is_new_tenant_by_restore, /* is new created tenant by restore */ + const int64_t start_serve_tstamp, /* start serve timestamp for this tenant */ + const int64_t sys_schema_version, /* corresponding schema version of sys tenant */ + int64_t &tenant_schema_version, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + IObLogSchemaGetter *schema_getter = TCTX.schema_getter_; + if (OB_ISNULL(schema_getter)) { + ret = OB_INVALID_ARGUMENT; + } else if (OB_SYS_TENANT_ID == tenant_id) { + // For sys tenants, the sys tenant schema version is returned directly + // This only happens at bootstrap + tenant_schema_version = sys_schema_version; + ISTAT("[ADD_TENANT] sys tenant start schema version is already ready", K(tenant_id), + K(is_new_created_tenant), K(is_new_tenant_by_restore), K(start_serve_tstamp), K(sys_schema_version), + K(tenant_schema_version)); + } else if (is_new_tenant_by_restore) { + // check is_new_tenant_by_restore is_new_tenant_by_restore = true is subset of is_new_created_tenant=true + // The sys_schema_version at this point is actually the schema_version of the tenant carried in the backup-restore ddl_stmt_str parsed by ddl_handler + tenant_schema_version = sys_schema_version; + + ISTAT("[ADD_TENANT] get tenant schema_version of CREATE_TENANT_END_DDL for tenant generate dynamicly by restore", + K(tenant_id), K(is_new_created_tenant), K(is_new_tenant_by_restore), + K(start_serve_tstamp), K(sys_schema_version)); + } else if (is_new_created_tenant) { + // If it is a new tenant, need to take the first schema version of the tenant + // because all DDL operations are synchronised to this tenant + if (OB_FAIL(get_first_schema_version_of_tenant_(tenant_id, sys_schema_version, *schema_getter, + tenant_schema_version, timeout))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get_first_schema_version_of_tenant_ fail", KR(ret), K(tenant_id), + K(sys_schema_version)); + } + } + + ISTAT("[ADD_TENANT] get tenant first schema version as start schema version", + KR(ret), K(tenant_id), K(is_new_created_tenant), K(sys_schema_version), + K(tenant_schema_version), "is_schema_split_mode", TCTX.is_schema_split_mode_); + } else { + // Add a tenant that already exists and query for a schema version less than or equal to that timestamp based on the starting service timestamp + if (OB_FAIL(schema_getter->get_schema_version_by_timestamp(tenant_id, start_serve_tstamp, + tenant_schema_version, timeout))) { + if (OB_TENANT_HAS_BEEN_DROPPED == ret) { + LOG_WARN("get_schema_version_by_timestamp fail cause by tenant has been dropped, ignore", KR(ret), K(tenant_id), K(start_serve_tstamp)); + } else { + LOG_ERROR("get_schema_version_by_timestamp fail", KR(ret), K(tenant_id), K(start_serve_tstamp)); + } + } + + ISTAT("[ADD_TENANT] get start schema version by start timestamp", + KR(ret), K(tenant_id), K(is_new_created_tenant), K(start_serve_tstamp), + K(tenant_schema_version)); + } + return ret; +} + +int ObLogTenantMgr::add_ddl_table_if_needed_(const uint64_t tenant_id, + ObLogTenant &tenant, + const int64_t start_serve_tstamp, + const int64_t tenant_start_schema_version, + const bool is_new_created_tenant) +{ + int ret = OB_SUCCESS; + bool is_schema_split_mode = TCTX.is_schema_split_mode_; + // Is it necessary to add a ddl partition + // 1. SYS tenants must add a DDL partition + // 2. In split mode, normal tenants need to add a DDL partition + bool need_add_ddl_table = ((OB_SYS_TENANT_ID == tenant_id) || is_schema_split_mode); + + if (need_add_ddl_table) { + if (OB_FAIL(tenant.add_ddl_table(start_serve_tstamp, + tenant_start_schema_version, + is_new_created_tenant))) { + LOG_WARN("tenant add ddl table fail",KR(ret), K(tenant_id), K(tenant), K(is_schema_split_mode), + K(need_add_ddl_table)); + } + } + + ISTAT("[ADD_DDL_TABLE]", K(tenant_id), K(need_add_ddl_table), K(is_schema_split_mode), + K(start_serve_tstamp), K(tenant_start_schema_version), K(is_new_created_tenant), KR(ret)); + return ret; +} + +// TODO: Only normal tenant all_sequence_value tables are supported after version 2.2 for now +int ObLogTenantMgr::add_inner_tables_on_backup_mode_(const uint64_t tenant_id, + ObLogTenant &tenant, + const int64_t start_serve_tstamp, + const int64_t start_schema_version, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + const bool is_schema_split_mode = TCTX.is_schema_split_mode_; + const bool enable_backup_mode = is_backup_mode(); + // 1) normal tenant + // 2) schema split mode(ob version greater than 2.2) + // 3) backup_mode + bool need_add_inner_table = ((OB_SYS_TENANT_ID != tenant_id) + && is_schema_split_mode + && enable_backup_mode); + + if (need_add_inner_table) { + if (OB_FAIL(tenant.add_inner_tables(start_serve_tstamp, start_schema_version, timeout))) { + LOG_ERROR("tenant add inner table on backup mode fail", KR(ret), K(tenant_id), + K(tenant), K(is_schema_split_mode), K(enable_backup_mode), K(need_add_inner_table)); + } else { + LOG_INFO("[ADD_INNER_TABLE] tenant add inner table on backup mode succ", KR(ret), K(tenant_id), + K(tenant), K(is_schema_split_mode), K(enable_backup_mode), K(need_add_inner_table)); + } + } else { + LOG_INFO("not on backup mode, do not add inner tables", K(tenant_id), + K(tenant), K(is_schema_split_mode), K(enable_backup_mode), K(need_add_inner_table)); + } + + return ret; +} + +// @retval OB_SUCCESS success +// @retval OB_TENANT_HAS_BEEN_DROPPED tenant has been dropped +// @retval OB_TIMEOUT timeout +// @retval other error code fail +int ObLogTenantMgr::do_add_tenant_(const uint64_t tenant_id, + const char *tenant_name, + const bool is_new_created_tenant, + const bool is_new_tenant_by_restore, + const bool is_tenant_served, + const int64_t start_serve_tstamp, + const int64_t sys_schema_version, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + TenantID tid(tenant_id); + ObLogTenant *tenant = NULL; + int64_t tenant_start_schema_version = 0; + int64_t start_seq = DEFAULT_START_SEQUENCE_NUM; + IObStoreService *store_service = TCTX.store_service_; + void *column_family_handle = NULL; + + if (OB_ISNULL(store_service)) { + LOG_ERROR("store_service is NULL"); + ret = OB_ERR_UNEXPECTED; + } + // Get the starting schema version of the tenant + else if (OB_FAIL(get_tenant_start_schema_version_(tenant_id, is_new_created_tenant, is_new_tenant_by_restore, + start_serve_tstamp, sys_schema_version, tenant_start_schema_version, timeout))) { + if (OB_TENANT_HAS_BEEN_DROPPED == ret) { + LOG_WARN("get_tenant_start_schema_version_ fail cause tenant has been dropped", KR(ret), K(tenant_id), K(tenant_name), + K(is_new_created_tenant), K(is_new_tenant_by_restore), K(start_serve_tstamp), K(sys_schema_version)); + } else if (OB_TIMEOUT != ret) { + LOG_ERROR("get_tenant_start_schema_version_ fail", KR(ret), K(tenant_id), K(tenant_name), + K(is_new_created_tenant), K(is_new_tenant_by_restore), K(start_serve_tstamp), K(sys_schema_version)); + } + } + else if (OB_ENTRY_EXIST == (ret = tenant_hash_map_.contains_key(tid))) { + LOG_ERROR("cannot add duplicated tenant", KR(ret), K(tenant_id)); + } else if (OB_ENTRY_NOT_EXIST != ret) { + LOG_ERROR("tenant hash map contains key failed", KR(ret), K(tenant_id)); + } else { + ret = OB_SUCCESS; + // alloc a tenant struct + if (OB_FAIL(tenant_hash_map_.alloc_value(tenant))) { + LOG_ERROR("alloc log tenant failed", KR(ret), K(tenant_id), K(tenant)); + } else if (OB_ISNULL(tenant)) { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("tenant is NULL", KR(ret), K(tenant_id), K(tenant)); + } + else if (OB_FAIL(store_service->create_column_family(std::to_string(tenant_id) + ":" + std::string(tenant_name), + column_family_handle))) { + LOG_ERROR("create_column_family fail", KR(ret), K(tenant_id), K(tenant_name), K(column_family_handle)); + } + // init tenant + else if (OB_FAIL(tenant->init(tenant_id, tenant_name, start_serve_tstamp, start_seq, + tenant_start_schema_version, column_family_handle, *this))) { + LOG_ERROR("tenant init fail", KR(ret), K(tenant_id), K(tenant_name), K(start_serve_tstamp), + K(start_seq), K(tenant_start_schema_version)); + } + // Ensure that a valid tenant is inserted and that the consumer will not see an invalid tenant + else if (OB_FAIL(tenant_hash_map_.insert_and_get(tid, tenant))) { + LOG_ERROR("tenant_hash_map_ insert and get failed", KR(ret), K(tenant_id)); + } else { + // make sure to revert here, otherwise there will be a memory/ref leak + revert_tenant_(tenant); + // The tenant structure cannot be referenced again afterwards and may be deleted at any time + tenant = NULL; + } + + if (OB_FAIL(ret)) { + if (NULL != tenant) { + (void)tenant_hash_map_.del(tid); + tenant_hash_map_.free_value(tenant); + tenant = NULL; + } + } + // start tenant service + else if (OB_FAIL(start_tenant_service_(tenant_id, is_new_created_tenant, is_new_tenant_by_restore, start_serve_tstamp, + tenant_start_schema_version, timeout))) { + LOG_ERROR("start tenant service fail", KR(ret), K(tenant_id), K(is_new_created_tenant), + K(start_serve_tstamp), K(tenant_start_schema_version)); + } + } + + if (OB_SUCCESS == ret && is_tenant_served) { + if (OB_FAIL(add_served_tenant_for_stat_(tenant_name, tenant_id))) { + LOG_ERROR("trans stat mgr add serverd tenant fail", KR(ret), K(tenant_id), K(tenant_name)); + } else if (OB_FAIL(add_served_tenant_into_set_(tenant_name, tenant_id))) { + LOG_ERROR("add tenant_id into tenant_id_set fail", KR(ret), K(tenant_id), K(tenant_name)); + } else { + // do nothing + } + } + + if (OB_SUCCESS == ret) { + ISTAT("[ADD_TENANT]", K(tenant_id), K(tenant_name), K(is_new_created_tenant), K(is_new_tenant_by_restore), + K(is_tenant_served), K(start_serve_tstamp), K(sys_schema_version), + "total_tenant_count", tenant_hash_map_.count()); + } + + return ret; +} + +int ObLogTenantMgr::start_tenant_service_(const uint64_t tenant_id, + const bool is_new_created_tenant, + const bool is_new_tenant_by_restore, + const int64_t start_serve_tstamp, + const int64_t tenant_start_schema_version, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + ObLogTenantGuard guard; + ObLogTenant *tenant = NULL; + // normal_new_created_tenant means tenant found by ADD_TENANT action AND NOT CREATE by RESTORE + const bool is_normal_new_created_tenant = is_new_created_tenant && !is_new_tenant_by_restore; + // Real-time access to tenant structure, tenants should not be non-existent + if (OB_FAIL(get_tenant_guard(tenant_id, guard))) { + LOG_ERROR("get tenant fail", KR(ret), K(tenant_id)); + } else if (OB_ISNULL(tenant = guard.get_tenant())) { + LOG_ERROR("invalid tenant", K(tenant)); + ret = OB_ERR_UNEXPECTED; + } + // add DDL table(__all_ddl_operation) + else if (OB_FAIL(add_ddl_table_if_needed_(tenant_id, *tenant, start_serve_tstamp, + tenant_start_schema_version, is_normal_new_created_tenant))) { + LOG_ERROR("add_ddl_table_if_needed_ fail", KR(ret), K(tenant_id), K(start_serve_tstamp), + K(tenant_start_schema_version), K(is_new_created_tenant), K(is_normal_new_created_tenant)); + } + // add inner tables + else if (OB_FAIL(add_inner_tables_on_backup_mode_(tenant_id, + *tenant, + start_serve_tstamp, + tenant_start_schema_version, + timeout))) { + LOG_ERROR("add_inner_tables_on_backup_mode_ fail", KR(ret), K(tenant_id), K(start_serve_tstamp), + K(tenant_start_schema_version)); + } + // add all user tables of tenant in such cases: + // 1. tenant exist when oblog start + // 2. or tenant create by DDL AND by restore + // which means, won't add user table of tenant of a normal new tenant(which should not have any user table) + else if (! is_normal_new_created_tenant) { + if (OB_FAIL(tenant->add_all_tables(start_serve_tstamp, + tenant_start_schema_version, + timeout))) { + if (OB_TENANT_HAS_BEEN_DROPPED == ret) { + // FIXME: When a schema error (OB_TENANT_HAS_BEEN_DROPPED) is encountered, the table under the tenant is ignored and success is returned + // Since the deletion of the tenant DDL will definitely be encountered later, there is no need to roll back the successful operation above + LOG_WARN("schema error, tenant may be dropped. need not add_all_tables", KR(ret), + K(tenant_id), K(tenant_start_schema_version), + K(start_serve_tstamp), K(is_new_created_tenant)); + ret = OB_SUCCESS; + } else { + LOG_ERROR("add_all_tables fail", KR(ret), K(tenant_id), K(tenant_start_schema_version), + K(start_serve_tstamp), K(is_new_created_tenant)); + } + } + } + return ret; +} + +// add tenant for oblog +// +// @retval OB_SUCCESS success +// @retval OB_TIMEOUT timeout +// @retval OB_TENANT_HAS_BEEN_DROPPED tenant/database/... not exist, caller should ignore +// @retval other error code error +int ObLogTenantMgr::add_tenant(const uint64_t tenant_id, + const bool is_new_created_tenant, + const bool is_new_tenant_by_restore, + const int64_t start_serve_tstamp, + const int64_t sys_schema_version, + ObLogSchemaGuard &schema_guard, + const char *&tenant_name, + const int64_t timeout, + bool &add_tenant_succ) +{ + int ret = OB_SUCCESS; + add_tenant_succ = false; + TenantSchemaInfo tenant_schema_info; + IObLogSchemaGetter *schema_getter = TCTX.schema_getter_; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogTenantMgr has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(OB_INVALID_TENANT_ID == tenant_id) || OB_UNLIKELY(sys_schema_version <= 0) || OB_ISNULL(schema_getter)) { + LOG_ERROR("invalid arguments", K(tenant_id), K(sys_schema_version), K(schema_getter)); + ret = OB_INVALID_ARGUMENT; + } + // Use system tenant identity to get schema of normal tenant + // by lazy mode + else if (OB_FAIL(schema_getter->get_lazy_schema_guard(OB_SYS_TENANT_ID, sys_schema_version, + timeout, schema_guard))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get_lazy_schema_guard of SYS tenant fail", KR(ret), K(sys_schema_version)); + } + } else if (OB_FAIL(schema_guard.get_tenant_schema_info(tenant_id, tenant_schema_info, timeout))) { + if (OB_TENANT_HAS_BEEN_DROPPED == ret) { + // Note: Because the above gets the Guard of the SYS tenant, using the exact schema version of the SYS tenant. + // Therefore, it is expected that the schema for that tenant must be visible, and if it is not, there is a bug. + LOG_ERROR("get_tenant_schema_info fail: tenant has been dropped when add tenant, should not happen", + KR(ret), K(tenant_id), K(sys_schema_version), K(is_new_created_tenant)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_TIMEOUT != ret) { + LOG_ERROR("get tenant schema fail", KR(ret), K(tenant_id), K(sys_schema_version)); + } + } else if (tenant_schema_info.is_restore_) { + // 1. won't add tenant in restore status + // 2. for new tenant found by ADD_TENENT_END DDL: + // 2.1. normal new created tenant, start log id will set to 1 to simplify the ADD_PARTITION process, and won't add user tables + // 2.2. if tenant new created by restore, should query server for start_serve_log_id, and should add user tables. + LOG_INFO("won't add restore-state tenant", K(tenant_id), K(tenant_schema_info), K(sys_schema_version)); + } else { + bool is_tenant_served = false; + // whether the tenant should be added or not, if the tenant is in service, it must be added. + // Tenants that are not in service(mean not config in oblog config file) may also need to be added, e.g. SYS tenant + bool need_add_tenant = false; + tenant_name = tenant_schema_info.name_; + + // Filtering tenants based on whitelists + if (OB_FAIL(filter_tenant(tenant_name, is_tenant_served))) { + LOG_ERROR("filter_tenant fail", KR(ret), K(tenant_id), K(tenant_name)); + } else if (! is_tenant_served) { + // tenant is not serve + LOG_INFO("tenant is not served", K(tenant_id), K(tenant_name)); + if (OB_SYS_TENANT_ID == tenant_id) { + // must add sys tenant(cause __all_ddl_operation in sys tenant is needed) + need_add_tenant = true; + } + } else { + // must add served tenant + need_add_tenant = true; + } + + if (OB_FAIL(ret)) { + // fail + } else if (! need_add_tenant) { + // don't need add tenant + add_tenant_succ = false; + // do real add tenant + } else if (OB_FAIL(do_add_tenant_(tenant_id, tenant_name, is_new_created_tenant, is_new_tenant_by_restore, + is_tenant_served, start_serve_tstamp, sys_schema_version, timeout))) { + if (OB_TENANT_HAS_BEEN_DROPPED == ret) { + LOG_WARN("will not add tenant has been dropped", KR(ret), K(tenant_id), K(tenant_name), + K(is_new_created_tenant), K(is_new_tenant_by_restore), K(is_tenant_served), K(start_serve_tstamp), + K(sys_schema_version), K(timeout)); + } else if (OB_TIMEOUT != ret) { + LOG_ERROR("do add tenant fail", KR(ret), K(tenant_id), K(tenant_name), + K(is_new_created_tenant), K(is_new_tenant_by_restore), K(is_tenant_served), K(start_serve_tstamp), + K(sys_schema_version), K(timeout)); + } + } else { + // add tenant success + add_tenant_succ = true; + } + } + + return ret; +} + +// fetch first schema version of tenant +// +// @retval OB_SUCCESS success +// @retval OB_TENANT_HAS_BEEN_DROPPED tenent has been dropped +// @retval OB_TIMEOUT timeout +// @retval other error code fail +int ObLogTenantMgr::get_first_schema_version_of_tenant_(const uint64_t tenant_id, + const int64_t sys_schema_version, + IObLogSchemaGetter &schema_getter, + int64_t &first_schema_version, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + bool is_schema_split_mode = TCTX.is_schema_split_mode_; + first_schema_version = OB_INVALID_VERSION; + + if (! is_schema_split_mode) { + // use schema version of ADD_TENANT DDL(by sys tenant) if is NOT schema split mode + first_schema_version = sys_schema_version; + } + // query first tenant schema version if tenant is created by schema split mode + else if (OB_FAIL(schema_getter.get_first_trans_end_schema_version(tenant_id, + first_schema_version, + timeout))) { + // OB_TENANT_HAS_BEEN_DROPPED return caller + if (OB_TIMEOUT != ret) { + LOG_ERROR("get_first_trans_end_schema_version fail", KR(ret), K(tenant_id), + K(first_schema_version)); + } + } else if (OB_UNLIKELY(first_schema_version <= 0)) { + LOG_ERROR("tenant first schema versioin is invalid", K(tenant_id), K(first_schema_version)); + ret = OB_ERR_UNEXPECTED; + } else { + // success + } + + return ret; +} + +int ObLogTenantMgr::drop_tenant(const uint64_t tenant_id, const char *call_from) +{ + int ret = OB_SUCCESS; + ObLogTenantGuard guard; + ObLogTenant *tenant = NULL; + bool tenant_can_be_dropped = false; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogTenantMgr has not inited"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(get_tenant_guard(tenant_id, guard))) { + if (OB_ENTRY_NOT_EXIST == ret) { + LOG_INFO("tenant not exist, need not drop tenant", K(tenant_id)); + ret = OB_SUCCESS; + } else { + LOG_ERROR("get tenant fail", KR(ret), K(tenant_id)); + } + } else if (OB_ISNULL(tenant = guard.get_tenant())) { + LOG_ERROR("invalid tenant", K(tenant_id), K(tenant)); + ret = OB_ERR_UNEXPECTED; + } + // Multiple drop_tenant() to be processed properly + else if (OB_FAIL(tenant->drop_tenant(tenant_can_be_dropped, call_from))) { + LOG_ERROR("part mgr drop tenant fail", KR(ret), K(tenant_id)); + } else if (OB_FAIL(drop_served_tenant_for_stat_(tenant_id))) { + LOG_ERROR("trans stat mgr del server tenant fail", KR(ret), K(tenant_id)); + } else if (OB_FAIL(drop_served_tenant_from_set_(tenant_id))) { + LOG_ERROR("drop tenant_id from tenant_id_set fail", KR(ret), K(tenant_id)); + } else { + // do nothing + } + + // Delete the tenant structure if the tenant can be deleted + if (OB_SUCCESS == ret && tenant_can_be_dropped) { + ISTAT("[DROP_TENANT] [REMOVE_TENANT] ready to remove tenant from tenant map after drop tenant", + KPC(tenant), K(call_from)); + + // Although it will be called many times externally, it will only be called once here + if (OB_FAIL(remove_tenant_(tenant_id, tenant))) { + LOG_ERROR("remove tenant fail", KR(ret), K(tenant_id), K(tenant_can_be_dropped)); + } + } + return ret; +} + +int ObLogTenantMgr::drop_tenant_start(const uint64_t tenant_id, + const int64_t drop_tenant_start_tstamp) +{ + int ret = OB_SUCCESS; + ObLogTenantGuard guard; + ObLogTenant *tenant = NULL; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogTenantMgr has not inited"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(get_tenant_guard(tenant_id, guard))) { + if (OB_ENTRY_NOT_EXIST == ret) { + LOG_INFO("tenant not exist, need not handle drop tenant start DDL", K(tenant_id), + K(drop_tenant_start_tstamp)); + ret = OB_SUCCESS; + } else { + LOG_ERROR("get tenant fail", KR(ret), K(tenant_id)); + } + } else if (OB_ISNULL(tenant = guard.get_tenant())) { + LOG_ERROR("invalid tenant", K(tenant_id), K(tenant)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(tenant->mark_drop_tenant_start(drop_tenant_start_tstamp))) { + LOG_ERROR("mark drop tenant start fail", KR(ret), KPC(tenant), K(drop_tenant_start_tstamp)); + } else { + LOG_INFO("succeed to mark drop tenant start while handling drop tenant start DDL", KPC(tenant), + K(drop_tenant_start_tstamp)); + } + return ret; +} + +int ObLogTenantMgr::drop_tenant_end(const uint64_t tenant_id, + const int64_t drop_tenant_end_tstamp) +{ + int ret = OB_SUCCESS; + ObLogTenantGuard guard; + ObLogTenant *tenant = NULL; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogTenantMgr has not inited"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(get_tenant_guard(tenant_id, guard))) { + if (OB_ENTRY_NOT_EXIST == ret) { + LOG_INFO("tenant not exist, need not handle drop tenant end DDL", K(tenant_id), + K(drop_tenant_end_tstamp)); + ret = OB_SUCCESS; + } else { + LOG_ERROR("get tenant fail", KR(ret), K(tenant_id)); + } + } else if (OB_ISNULL(tenant = guard.get_tenant())) { + LOG_ERROR("invalid tenant", K(tenant_id), K(tenant)); + ret = OB_ERR_UNEXPECTED; + } else if (tenant->is_drop_tenant_start_marked()) { + LOG_INFO("drop tenant start has been marked, need not handle drop tenent end DDL", K(tenant_id), + K(drop_tenant_end_tstamp)); + } else if (OB_FAIL(tenant->mark_drop_tenant_start(drop_tenant_end_tstamp))) { + LOG_ERROR("mark drop tenant start fail", KR(ret), KPC(tenant), K(drop_tenant_end_tstamp)); + } else { + LOG_INFO("succeed to mark drop tenant start while handling drop tenant end DDL", KPC(tenant), + K(drop_tenant_end_tstamp)); + } + return ret; +} + +int ObLogTenantMgr::alter_tenant_name(const uint64_t tenant_id, + const int64_t schema_version_before_alter, + const int64_t schema_version_after_alter, + const int64_t timeout, + const char *&tenant_name, + bool &tenant_is_chosen) +{ + int ret = OB_SUCCESS; + IObLogSchemaGetter *schema_getter = TCTX.schema_getter_; + ObLogSchemaGuard old_schema_guard; + TenantSchemaInfo old_tenant_schema_info; + ObLogSchemaGuard new_schema_guard; + TenantSchemaInfo new_tenant_schema_info; + tenant_is_chosen = true; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogTenantMgr has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(schema_version_before_alter <= 0) + || OB_UNLIKELY(schema_version_after_alter <= 0) + || OB_ISNULL(schema_getter)) { + LOG_ERROR("invalid arguments", K(schema_version_before_alter), K(schema_version_after_alter), K(schema_getter)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(schema_getter->get_lazy_schema_guard(OB_SYS_TENANT_ID, schema_version_before_alter, + timeout, old_schema_guard))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get_lazy_schema_guard of SYS tenant fail", KR(ret), K(schema_version_before_alter)); + } + } else if (OB_FAIL(schema_getter->get_lazy_schema_guard(OB_SYS_TENANT_ID, schema_version_after_alter, + timeout, new_schema_guard))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get_lazy_schema_guard of SYS tenant fail", KR(ret), K(schema_version_after_alter)); + } + } else if (OB_FAIL(old_schema_guard.get_tenant_schema_info(tenant_id, old_tenant_schema_info, timeout))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get tenant schema fail", KR(ret), K(tenant_id), K(schema_version_before_alter)); + } + } else if (OB_FAIL(new_schema_guard.get_tenant_schema_info(tenant_id, new_tenant_schema_info, timeout))) { + if (OB_TIMEOUT != ret) { + LOG_ERROR("get tenant schema fail", KR(ret), K(tenant_id), K(schema_version_after_alter)); + } + } else { + const char *old_tenant_name = old_tenant_schema_info.name_; + const char *new_tenant_name = new_tenant_schema_info.name_; + const bool skip_rename_tenant_ddl = (0 != TCONF.skip_rename_tenant_ddl); + bool is_tenant_served = false; + tenant_name = new_tenant_name; + + if (OB_ISNULL(old_tenant_name) || OB_ISNULL(new_tenant_name)) { + LOG_ERROR("old_tenant_name or new_tenant_name is not valid", K(old_tenant_name), K(new_tenant_name)); + ret = OB_ERR_UNEXPECTED; + // Filtering tenants based on whitelists + } else if (OB_FAIL(filter_tenant(old_tenant_name, is_tenant_served))) { + LOG_ERROR("filter_tenant fail", KR(ret), K(tenant_id), K(old_tenant_name)); + } else if (! is_tenant_served) { + // If the current rename tenant does not match the current whitelist, then no processing is required + tenant_is_chosen = false; + } else { + // If you are synchronising the whole cluster, there is no need to do this + IObLogTableMatcher *tb_matcher = TCTX.tb_matcher_; + bool matched = false; + + if (OB_ISNULL(tb_matcher)) { + LOG_ERROR("tb_matcher is NULL", K(tb_matcher)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(tb_matcher->cluster_match(matched))) { + LOG_ERROR("tb_matcher cluster_match fail", KR(ret), K(matched)); + } else if (matched) { + LOG_INFO("[RENAME_TENANT] is supported when cluster match", K(old_tenant_name), K(new_tenant_name)); + } else { + if (skip_rename_tenant_ddl) { + LOG_INFO("[RENAME_TENANT] skip check", K(old_tenant_name), K(new_tenant_name), K(skip_rename_tenant_ddl)); + tenant_is_chosen = true; + ret = OB_SUCCESS; + } else { + LOG_ERROR("[RENAME_TENANT] is not supported", K(old_tenant_name), K(new_tenant_name)); + ret = OB_NOT_SUPPORTED; + } + } + } + } + + return ret; +} + +int ObLogTenantMgr::remove_tenant_(const uint64_t tenant_id, ObLogTenant *tenant) +{ + int ret = OB_SUCCESS; + TenantID tid(tenant_id); + IObStoreService *store_service = TCTX.store_service_; + void *cf = NULL; + + if (OB_UNLIKELY(!inited_)) { + LOG_ERROR("ObLogTenantMgr has not inited"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(store_service)) { + LOG_ERROR("store_service is NULL"); + ret = OB_ERR_UNEXPECTED; + } else if (OB_ISNULL(tenant)) { + LOG_ERROR("tenant is NULL", K(tenant_id), K(tenant)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_ISNULL(cf = tenant->get_cf())) { + LOG_ERROR("cf is NULL", K(tid), KPC(tenant)); + ret= OB_ERR_UNEXPECTED; + } else if (OB_FAIL(store_service->drop_column_family(cf))) { + LOG_ERROR("store_service drop_column_family fail", K(tid), KPC(tenant)); + } else if (OB_FAIL(tenant_hash_map_.del(tid))) { + LOG_ERROR("tenant_hash_map_ del failed", KR(ret), K(tenant_id)); + } else { + //do nothing + } + + return ret; +} + +int ObLogTenantMgr::drop_served_tenant_for_stat_(const uint64_t tenant_id) +{ + int ret = OB_SUCCESS; + IObLogTransStatMgr *trans_stat_mgr = TCTX.trans_stat_mgr_; + + if (OB_ISNULL(trans_stat_mgr)) { + LOG_ERROR("trans_stat is null", K(trans_stat_mgr)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(trans_stat_mgr->drop_served_tenant(tenant_id))) { + LOG_ERROR("trans_stat_mgr del served tenant fail", KR(ret), + K(tenant_id)); + } else { + // do nothing + } + + return ret; +} + +int ObLogTenantMgr::drop_served_tenant_from_set_(const uint64_t tenant_id) +{ + int ret = OB_SUCCESS; + + if (OB_FAIL(tenant_id_set_.erase_refactored(tenant_id))) { + if (OB_HASH_NOT_EXIST == ret) { + LOG_INFO("tenant_id has been dropped from tenant_id_set", K(tenant_id)); + ret = OB_SUCCESS; + } else { + LOG_ERROR("del tenant_id from tenant_id_set fail", KR(ret), K(tenant_id)); + } + } else { + ISTAT("[DROP_TENANT] drop tenant_id from tenant_id_set succ", K(tenant_id)); + } + + return ret; +} + +int ObLogTenantMgr::get_tenant(const uint64_t tenant_id, ObLogTenant *&tenant) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogTenantMgr has not inited"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(OB_INVALID_TENANT_ID == tenant_id)) { + LOG_ERROR("invalid argument", K(tenant_id)); + ret = OB_INVALID_ARGUMENT; + } else { + TenantID tid(tenant_id); + ObLogTenant *tmp_tenant = NULL; + + if (OB_FAIL(tenant_hash_map_.get(tid, tmp_tenant))) { + if (OB_ENTRY_NOT_EXIST != ret) { + LOG_ERROR("tenant_hash_map_ get fail", KR(ret), K(tid), K(tmp_tenant)); + } + } else if (OB_ISNULL(tmp_tenant)) { + LOG_ERROR("tenant is null", K(tenant_id), K(tmp_tenant)); + ret = OB_ERR_UNEXPECTED; + } else { + // succ + tenant = tmp_tenant; + } + } + return ret; +} + +int ObLogTenantMgr::get_tenant_guard(const uint64_t tenant_id, ObLogTenantGuard &guard) +{ + int ret = OB_SUCCESS; + ObLogTenant *tenant = NULL; + if (OB_FAIL(get_tenant(tenant_id, tenant))) { + // Failed, or non-existent + } else { + guard.set_tenant(tenant); + } + return ret; +} + +void ObLogTenantMgr::revert_tenant_(ObLogTenant *tenant) +{ + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogTenantMgr has not inited"); + } else if (NULL != tenant) { + tenant_hash_map_.revert(tenant); + } +} + +int ObLogTenantMgr::revert_tenant(ObLogTenant *tenant) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogTenantMgr has not inited"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(tenant)) { + LOG_ERROR("tenant is NULL", K(tenant)); + ret = OB_INVALID_ARGUMENT; + } else { + tenant_hash_map_.revert(tenant); + } + return ret; +} + +int ObLogTenantMgr::get_ddl_progress(uint64_t &tenant_id, + int64_t &ddl_min_progress, + uint64_t &ddl_min_handle_log_id) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogTenantMgr has not inited"); + ret = OB_NOT_INIT; + } else { + TenantDDLProgessGetter getter; + + if (OB_FAIL(tenant_hash_map_.for_each(getter))) { + LOG_ERROR("tenant_hash_map_ for each fail", KR(ret)); + } else { + tenant_id = getter.tenant_id_; + ddl_min_progress = getter.ddl_min_progress_; + ddl_min_handle_log_id = getter.ddl_min_handle_log_id_; + } + } + + return ret; +} + +ObLogTenantMgr::TenantDDLProgessGetter::TenantDDLProgessGetter() : + tenant_id_(OB_INVALID_TENANT_ID), + ddl_min_progress_(OB_INVALID_TIMESTAMP), + ddl_min_handle_log_id_(OB_INVALID_ID) +{ +} + +bool ObLogTenantMgr::TenantDDLProgessGetter::operator()(const TenantID &tid, + ObLogTenant *tenant) +{ + // Ignore offlined tenants + if (NULL != tenant && tenant->is_serving()) { + int64_t ddl_progress = tenant->get_ddl_progress(); + uint64_t ddl_handle_log_id = tenant->get_handle_log_id(); + + if (OB_INVALID_TIMESTAMP == ddl_min_progress_) { + ddl_min_progress_ = ddl_progress; + tenant_id_ = tid.tenant_id_; + } else { + if (ddl_progress < ddl_min_progress_) { + ddl_min_progress_ = ddl_progress; + tenant_id_ = tid.tenant_id_; + } + } + + if (OB_INVALID_ID == ddl_min_handle_log_id_) { + ddl_min_handle_log_id_ = ddl_handle_log_id; + } else { + ddl_min_handle_log_id_ = std::min(ddl_handle_log_id, ddl_min_handle_log_id_); + } + } + return true; +} + +int ObLogTenantMgr::register_part_add_callback(PartAddCallback *callback) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogTenantMgr has not inited"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(callback)) { + LOG_ERROR("invalid argument", K(callback)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(part_add_cb_array_.push_back(reinterpret_cast(callback)))) { + LOG_ERROR("push_back into part_add_cb_array fail", KR(ret), K(callback)); + } else { + // succ + } + return ret; +} + +int ObLogTenantMgr::register_part_recycle_callback(PartRecycleCallback *callback) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogTenantMgr has not inited"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(callback)) { + LOG_ERROR("invalid argument", K(callback)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(part_rc_cb_array_.push_back(reinterpret_cast(callback)))) { + LOG_ERROR("push_back into part_rc_cb_array fail", KR(ret), K(callback)); + } else { + // succ + } + return ret; +} + +int ObLogTenantMgr::add_all_tenants(const int64_t start_tstamp, + const int64_t sys_schema_version, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + common::ObArray tenant_ids; + ObLogSchemaGuard sys_schema_guard; + IObLogSchemaGetter *schema_getter = TCTX.schema_getter_; + + if (OB_UNLIKELY(! inited_)) { + ret = OB_NOT_INIT; + LOG_ERROR("ObLogTenantMgr has not inited"); + } else if (OB_UNLIKELY(start_tstamp <= 0) || OB_UNLIKELY(sys_schema_version <= 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_ERROR("invalid arguments", K(start_tstamp), K(sys_schema_version)); + } else if (OB_ISNULL(schema_getter)) { + ret = OB_INVALID_ARGUMENT; + LOG_ERROR("schema_getter is invalid", K(schema_getter), KR(ret)); + } + // Get the schema guard for the SYS tenant + // Since get_available_tenant_ids() cannot use lazy mode + else if (OB_FAIL(schema_getter->get_fallback_schema_guard(OB_SYS_TENANT_ID, sys_schema_version, + timeout, sys_schema_guard))) { + LOG_ERROR("get_fallback_schema_guard of SYS tenant fail", KR(ret), K(sys_schema_version), K(timeout)); + } + // get available tenant id list + else if (OB_FAIL(sys_schema_guard.get_available_tenant_ids(tenant_ids, timeout))) { + LOG_ERROR("get_available_tenant_ids fail", KR(ret), K(tenant_ids), K(timeout)); + } else { + int64_t chosen_tenant_count = 0; + bool is_new_created_tenant = false; + bool is_new_tenant_by_restore = false; + const int64_t total_tenant_count = tenant_ids.count(); + ISTAT("[ADD_ALL_TENANTS] BEGIN", K(sys_schema_version), K(start_tstamp), K(total_tenant_count), + K(tenant_ids)); + + // add all tenants + for (int64_t index = 0; OB_SUCCESS == ret && index < total_tenant_count; index++) { + const uint64_t tenant_id = tenant_ids.at(index); + bool add_tenant_succ = false; + ObLogSchemaGuard schema_guard; + const char *tenant_name = NULL; + if (OB_FAIL(add_tenant(tenant_id, is_new_created_tenant, is_new_tenant_by_restore, start_tstamp, + sys_schema_version, schema_guard, tenant_name, timeout, add_tenant_succ))) { + if (OB_TENANT_HAS_BEEN_DROPPED == ret) { + LOG_INFO("tenant has dropped, ignore it", KR(ret), K(tenant_id), K(sys_schema_version), + K(index), K(start_tstamp)); + ret = OB_SUCCESS; + } else { + LOG_ERROR("add tenant fail", KR(ret), K(tenant_id), K(start_tstamp),K(sys_schema_version), + K(index)); + } + } + + if (OB_SUCCESS == ret) { + if (add_tenant_succ) { + chosen_tenant_count++; + } + + ISTAT("[ADD_ALL_TENANTS] ", K(index), K(tenant_id), K(tenant_name), K(add_tenant_succ), + K(chosen_tenant_count), K(total_tenant_count)); + } + } + + ISTAT("[ADD_ALL_TENANTS] DONE", K(chosen_tenant_count), K(total_tenant_count), + K(sys_schema_version), K(start_tstamp), K(tenant_ids), KR(ret)); + } + return ret; +} + +int ObLogTenantMgr::filter_ddl_stmt(const uint64_t tenant_id, bool &chosen) +{ + int ret = OB_SUCCESS; + chosen = false; + + if (OB_FAIL(tenant_id_set_.exist_refactored(tenant_id))) { + if (OB_HASH_EXIST == ret) { + chosen = true; + ret = OB_SUCCESS; + } else if (OB_HASH_NOT_EXIST == ret) { + chosen = false; + ret = OB_SUCCESS; + } else { + LOG_ERROR("tenant_id_set exist_refactored fail", KR(ret), K(tenant_id)); + } + } + + return ret; +} + +int ObLogTenantMgr::get_all_tenant_ids(std::vector &tenant_ids) +{ + int ret = OB_SUCCESS; + tenant_ids.clear(); + + for (typename common::hash::ObHashSet::const_iterator iter = tenant_id_set_.begin(); + OB_SUCC(ret) && iter != tenant_id_set_.end(); ++iter) { + const uint64_t tenant_id = iter->first; + (void)tenant_ids.push_back(tenant_id); + _ISTAT("[GET_TENANT] TENANT=%ld", tenant_id); + } + + return ret; +} + +// the following function needs to be compatible, pre-226 versions only had time zone tables for system tenants, so only one map needs to be maintained. +// After 226 the time zone table is split into tenant level and a tz_info_map needs to be maintained for each tenant +// Obj2Str uses this interface to get the tz_info_wrap of a particular tenant for obj to string conversion +int ObLogTenantMgr::get_tenant_tz_wrap(const uint64_t tenant_id, ObTimeZoneInfoWrap *&tz_info_wrap) +{ + int ret = OB_SUCCESS; + ObLogTenantGuard guard; + const uint64_t tz_tenant_id = GET_MIN_CLUSTER_VERSION() >= CLUSTER_VERSION_2260 ? tenant_id : OB_SYS_TENANT_ID; + + if (OB_SYS_TENANT_ID == tz_tenant_id) { + tz_info_wrap = &TCTX.tz_info_wrap_; + } else { + ObLogTenantGuard tenant_guard; + ObLogTenant *tenant = NULL; + if (OB_FAIL(get_tenant_guard(tz_tenant_id, guard))) { + if (OB_ENTRY_NOT_EXIST == ret) { + LOG_ERROR("tenant not exist when get tz_wrap", KR(ret), K(tenant_id), K(tz_tenant_id)); + } else { + LOG_ERROR("get_tenant_guard fail", KR(ret), K(tenant_id), K(tz_tenant_id)); + } + } else if (OB_ISNULL(tenant = guard.get_tenant())) { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("invalid tenant", KR(ret), K(tenant), K(tenant_id), K(tz_tenant_id)); + } else { + tz_info_wrap = tenant->get_tz_info_wrap(); + } + } + + return ret; +} + +int ObLogTenantMgr::get_tenant_tz_map(const uint64_t tenant_id, + ObTZInfoMap *&tz_info_map) +{ + int ret = OB_SUCCESS; + ObLogTenantGuard guard; + const uint64_t tz_tenant_id = GET_MIN_CLUSTER_VERSION() >= CLUSTER_VERSION_2260 ? tenant_id : OB_SYS_TENANT_ID; + + if (OB_SYS_TENANT_ID == tz_tenant_id) { + tz_info_map = &TCTX.tz_info_map_; + } else { + ObLogTenantGuard tenant_guard; + ObLogTenant *tenant = NULL; + if (OB_FAIL(get_tenant_guard(tz_tenant_id, guard))) { + if (OB_ENTRY_NOT_EXIST == ret) { + LOG_ERROR("tenant not exist when get tz_wrap", KR(ret), K(tenant_id), K(tz_tenant_id)); + } else { + LOG_ERROR("get_tenant_guard fail", KR(ret), K(tenant_id), K(tz_tenant_id)); + } + } else if (OB_ISNULL(tenant = guard.get_tenant())) { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("invalid tenant", KR(ret), K(tenant), K(tenant_id), K(tz_tenant_id)); + } else { + tz_info_map = tenant->get_tz_info_map(); + } + } + return ret; +} + +bool ObLogTenantMgr::TenantPrinter::operator()(const TenantID &tid, ObLogTenant *tenant) +{ + if (NULL != tenant) { + tenant->print_stat_info(); + + if (tenant->is_offlined()) { + offline_tenant_count_++; + } else { + serving_tenant_count_++; + } + (void)tenant_ids_.push_back(tid.tenant_id_); + (void)cf_handles_.push_back(tenant->get_cf()); + } + return true; +} + +void ObLogTenantMgr::print_stat_info() +{ + TenantPrinter printer; + (void)tenant_hash_map_.for_each(printer); + _LOG_INFO("[STAT] [SERVE_INFO] TENANT_COUNT=%ld(SERVE=%ld,OFFLINE=%ld)", + printer.offline_tenant_count_ + printer.serving_tenant_count_, + printer.serving_tenant_count_, + printer.offline_tenant_count_); + part_info_map_.print_state("[STAT] [SERVE_INFO] TOTAL_PART_INFO:"); + + IObStoreService *store_service = TCTX.store_service_; + + if (NULL != store_service) { + store_service->get_mem_usage(printer.tenant_ids_, printer.cf_handles_); + } +} + +int ObLogTenantMgr::handle_schema_split_finish( + const uint64_t ddl_tenant_id, + const int64_t split_schema_version, + const int64_t start_serve_tstamp, + const int64_t timeout) +{ + UNUSED(timeout); + int ret = OB_SUCCESS; + TenantAddDDLTableFunc func(start_serve_tstamp, split_schema_version); + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("PartMgr has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_SYS_TENANT_ID != ddl_tenant_id) { + // If the schema split DDL is for a common tenant, it is not processed + _ISTAT("[DDL] [SCHEMA_SPLIT_FINISH] ignore schema split DDL of NON-SYS tenant, " + "DDL_TENANT_ID=%lu TENANT_COUNT=%ld SPLIT_SCHEMA_VERSION=%ld START_SERVER_TSTAMP=%ld", + ddl_tenant_id, tenant_hash_map_.count(), split_schema_version, start_serve_tstamp); + } else if (OB_FAIL(tenant_hash_map_.for_each(func))) { + LOG_ERROR("add ddl table for all tenant fail", KR(ret), K(func.err_), K(start_serve_tstamp), + K(split_schema_version)); + } else if (OB_UNLIKELY(OB_SUCCESS != func.err_)) { + LOG_ERROR("add ddl table for all tenant fail", KR(func.err_), K(ddl_tenant_id), + K(split_schema_version), K(start_serve_tstamp)); + ret = func.err_; + } else { + _ISTAT("[DDL] [SCHEMA_SPLIT_FINISH] DDL_TENANT_ID=%lu " + "TENANT_COUNT=(TOTAL=%ld,SUCC=%ld,OFFLINE=%ld) SPLIT_SCHEMA_VERSION=%ld START_SERVER_TSTAMP=%ld", + ddl_tenant_id, tenant_hash_map_.count(), func.succ_tenant_count_, + func.offline_tenant_count_, split_schema_version, start_serve_tstamp); + } + + return ret; +} + +// Add DDL tables for all tenants, except SYS +bool ObLogTenantMgr::TenantAddDDLTableFunc::operator()(const TenantID &tid, ObLogTenant *tenant) +{ + int ret = OB_SUCCESS; + bool bret = true; + bool is_create_tenant = false; + if (OB_ISNULL(tenant)) { + LOG_ERROR("invalid NULL tenant", K(tenant), K(tid)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_SYS_TENANT_ID == tid.tenant_id_) { // SYS tenants don't need to be added + LOG_INFO("sys tenant need not add ddl table", K(tid), K_(ddl_table_start_serve_tstamp), + K_(ddl_table_start_schema_version), KPC(tenant)); + } else if (! tenant->is_serving()) { + // No need to add a DDL partition if the tenant is no longer in service + LOG_WARN("tenant is not serving, need not add ddl table", KPC(tenant)); + } else if (OB_FAIL(tenant->add_ddl_table(ddl_table_start_serve_tstamp_, + ddl_table_start_schema_version_, is_create_tenant))) { + LOG_ERROR("add ddl table fail", KR(ret), K(tid), K(ddl_table_start_schema_version_), + K(ddl_table_start_serve_tstamp_), K(is_create_tenant), KPC(tenant)); + } else { + // Normal tenant successfully adds DDL table + } + + if (OB_SUCCESS != ret) { + err_ = ret; + bret = false; + } + return bret; +} + +int ObLogTenantMgr::recycle_partition(const common::ObPartitionKey &pkey) +{ + int ret = OB_SUCCESS; + ObLogTenantGuard guard; + ObLogTenant *tenant = NULL; + bool tenant_can_be_dropped = false; + uint64_t tenant_id = pkey.get_tenant_id(); + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogTenantMgr has not inited"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(get_tenant_guard(tenant_id, guard))) { + if (OB_ENTRY_NOT_EXIST == ret) { + // Tenants must wait until all partitions have been reclaimed before they are deleted, no tenant will not exist + LOG_ERROR("tenant not exist when recycle partition, which should not happen", KR(ret), + K(tenant_id), K(pkey)); + ret = OB_ERR_UNEXPECTED; + } else { + LOG_ERROR("get_tenant_guard fail", KR(ret), K(tenant_id), K(pkey)); + } + } else if (OB_ISNULL(tenant = guard.get_tenant())) { + LOG_ERROR("invalid tenant", K(tenant), K(tenant_id), K(pkey)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(tenant->recycle_partition(pkey, tenant_can_be_dropped))) { + LOG_ERROR("tenant recycle partition fail", KR(ret), K(tenant_id), K(pkey), KPC(tenant)); + } else if (tenant_can_be_dropped) { + ISTAT("[DROP_TENANT] [REMOVE_TENANT] remove tenant from tenant map after recycle partition", + KPC(tenant), K(pkey)); + + // If the tenant needs to be deleted, delete the tenant here + if (OB_FAIL(remove_tenant_(tenant_id, tenant))) { + LOG_ERROR("remove tenant fail", KR(ret), K(tenant_id), K(pkey)); + } + } + + ISTAT("[RECYCLE_PARTITION]", KR(ret), K(tenant_id), K(tenant_can_be_dropped), K(pkey), KPC(tenant)); + + return ret; +} + +int ObLogTenantMgr::set_data_start_schema_version_on_split_mode() +{ + int ret = OB_SUCCESS; + TenantUpdateStartSchemaFunc func; + + if (OB_UNLIKELY(! inited_)) { + ret = OB_NOT_INIT; + LOG_ERROR("ObLogTenantMgr has not inited", KR(ret)); + } else if (OB_FAIL(tenant_hash_map_.for_each(func))) { + LOG_ERROR("set data start schema version for all tenant fail", KR(ret)); + } else { + // succ + } + + return ret; +} + +bool ObLogTenantMgr::TenantUpdateStartSchemaFunc::operator()(const TenantID &tid, ObLogTenant *tenant) +{ + bool bret = true; + int ret = OB_SUCCESS; + + if (OB_ISNULL(tenant)) { + ret = OB_ERR_UNEXPECTED; + bret = false; + LOG_ERROR("tenant is NULL", KR(ret), K(bret), K(tid), K(tenant)); + } else if (OB_FAIL(tenant->update_data_start_schema_version_on_split_mode())) { + bret = false; + LOG_ERROR("update_data_start_schema_version_on_split_mode failed", + KR(ret), K(bret), K(tid), K(tenant)); + } else { + // succ + } + + return bret; +} + +int ObLogTenantMgr::set_data_start_schema_version_for_all_tenant(const int64_t version) +{ + int ret = OB_SUCCESS; + SetDataStartSchemaVersionFunc func(version); + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogTenantMgr has not inited"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(tenant_hash_map_.for_each(func))) { + LOG_ERROR("set data start schema version for all tenant fail", KR(ret), K(version)); + } else { + // success + } + return ret; +} + +bool ObLogTenantMgr::SetDataStartSchemaVersionFunc::operator()(const TenantID &tid, ObLogTenant *tenant) +{ + UNUSED(tid); + if (NULL != tenant) { + tenant->update_global_data_schema_version(data_start_schema_version_); + } + return true; +} + +} // namespace liboblog +} // namespace oceanbase diff --git a/src/liboblog/src/ob_log_tenant_mgr.h b/src/liboblog/src/ob_log_tenant_mgr.h new file mode 100644 index 0000000000000000000000000000000000000000..9ec2fa9810912a01060db79291922a9a190acc93 --- /dev/null +++ b/src/liboblog/src/ob_log_tenant_mgr.h @@ -0,0 +1,378 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_TENANT_MGR_H__ +#define OCEANBASE_LIBOBLOG_TENANT_MGR_H__ + +#include "lib/hash/ob_hashset.h" // ObHashSet +#include "lib/container/ob_se_array.h" // ObSEArray +#include "lib/string/ob_string.h" // ObString + +#include "ob_log_tenant.h" // ObLogTenant, ObLogTenantGuard +#include "ob_log_part_callback.h" // PartAddCallback, PartRecycleCallback, PartCBArray +#include "ob_log_part_info.h" // PartInfoMap +#include "ob_log_table_id_cache.h" // GIndexCache, TableIDCache + +namespace oceanbase +{ +namespace common +{ +class ObPartitionKey; +} + +namespace liboblog +{ +class IObLogSchemaGetter; +class IObLogTimeZoneInfoGetter; + +class IObLogTenantMgr +{ +public: + IObLogTenantMgr() {} + virtual ~IObLogTenantMgr() {} + +public: + // add all tenants + // + // @retval OB_SUCCESS success + // @retval OB_TENANT_HAS_BEEN_DROPPED tenent has been dropped + // @retval OB_TIMEOUT timeout + // @retval other error code fail + virtual int add_all_tenants(const int64_t start_tstamp, + const int64_t sys_schema_version, + const int64_t timeout) = 0; + + // add tenant + // @param old_sys_schema_version sys_schema_version before ADD_TENANT DDL operate success + // + // @retval OB_SUCCESS success + // @retval OB_TENANT_HAS_BEEN_DROPPED tenent has been dropped + // @retval OB_TIMEOUT timeout + // @retval other error code fail + virtual int add_tenant(const uint64_t tenant_id, + const bool is_new_created_tenant, + const bool is_new_tenant_by_restore, + const int64_t start_serve_tstamp, + const int64_t sys_schema_version, + ObLogSchemaGuard &schema_guard, + const char *&tenant_name, + const int64_t timeout, + bool &add_tenant_succ) = 0; + + /// Perform a tenant deletion operation, only if the tenant deletion conditions are met + /// + /// @param tenant_id TenantID + /// @param call_from caller info + /// + /// @retval OB_SUCCESS Success + /// @retval other error code Fail + virtual int drop_tenant(const uint64_t tenant_id, const char *call_from) = 0; + + /// handle DEL_TENANT_START DDL + virtual int drop_tenant_start(const uint64_t tenant_id, const int64_t drop_tenant_start_tstamp) = 0; + + /// handle DEL_TENANT_END DDL + virtual int drop_tenant_end(const uint64_t tenant_id, const int64_t drop_tenant_end_tstamp) = 0; + + virtual int alter_tenant_name(const uint64_t tenant_id, + const int64_t schema_version_before_alter, + const int64_t schema_version_after_alter, + const int64_t timeout, + const char *&tenant_name, + bool &tenant_is_chosen) = 0; + + /// Sync only ddl statements from whitelisted tenants, filter ddl statements from other tenants + /// + /// @param [in] tenant_id TenantID + /// @param [out] chosen Returned matching results + /// + /// @retval OB_SUCCESS Success + /// @retval other error code Fail + virtual int filter_ddl_stmt(const uint64_t tenant_id, bool &chosen) = 0; + + /// get all Tenant ID + /// TODO: Removing std::vector dependencies + /// + /// @param [out] tenant_ids returned tenant ids + /// + /// @retval OB_SUCCESS Success + /// @retval other error code Fail + virtual int get_all_tenant_ids(std::vector &tenant_ids) = 0; + virtual bool is_inited() = 0; + + /// Get the specified Tenant ID tz_info_wrap, called by ObObj2strHelper, where the tenant does not exist and an error is required + /// + /// @param [in] tenant_id TenantID + /// @param [out] tz_info_wrap Timezone info + /// + /// @retval OB_SUCCESS Success + /// @retval other error code Fail + virtual int get_tenant_tz_wrap(const uint64_t tenant_id, common::ObTimeZoneInfoWrap *&tz_info_wrap) = 0; + + /// get tz_info_map, ObLogTimeZoneInfoGetter with specified Tenant ID + /// + /// @param [in] tenant_id TenantID + /// @param [out] tz_info_wrap Timezone info + /// + /// @retval OB_SUCCESS Success + /// @retval other error code Fail + virtual int get_tenant_tz_map(const uint64_t tenant_id, + common::ObTZInfoMap *&tz_info_map) = 0; + + /// Handling schema split end DDL + virtual int handle_schema_split_finish( + const uint64_t ddl_tenant_id, + const int64_t new_schema_version, + const int64_t start_serve_tstamp, + const int64_t timeout) = 0; + + // @retval OB_SUCCESS Success + // @retval OB_ENTRY_NOT_EXIST Tenant not exist + // @retval other error code Fail + virtual int get_tenant_guard(const uint64_t tenant_id, ObLogTenantGuard &guard) = 0; + + // 基于tenant id获取对应ObLogTenant + // + // @retval OB_SUCCESS Success + // @retval OB_ENTRY_NOT_EXIST Tenant not exist + // @retval other error code Fail + virtual int get_tenant(const uint64_t tenant_id, ObLogTenant *&tenant) = 0; + // revert tenant + virtual int revert_tenant(ObLogTenant *tenant) = 0; + + virtual int get_ddl_progress(uint64_t &tenant_id, + int64_t &ddl_min_progress, + uint64_t &ddl_min_handle_log_id) = 0; + + virtual void print_stat_info() = 0; + + virtual int register_part_add_callback(PartAddCallback *callback) = 0; + virtual int register_part_recycle_callback(PartRecycleCallback *callback) = 0; + + // Recycle the partition structure + // This interface is called when the partition OFFLINE task is processed and the partition OFFLINE task is received, indicating that all data for the partition has been processed. + virtual int recycle_partition(const common::ObPartitionKey &pkey) = 0; + + // Set the same starting schema version for all tenants + virtual int set_data_start_schema_version_for_all_tenant(const int64_t version) = 0; + + // set the start schema version for a specific tenant in split mode + virtual int set_data_start_schema_version_on_split_mode() = 0; + +}; + +typedef common::ObLinkHashMap TenantHashMap; +class ObLogTenantMgr : public IObLogTenantMgr +{ + friend ObLogTenant; +public: + ObLogTenantMgr(); + virtual ~ObLogTenantMgr(); + +public: + int init(const bool enable_oracle_mode_match_case_sensitive); + void destroy(); + + int register_part_add_callback(PartAddCallback *callback); + int register_part_recycle_callback(PartRecycleCallback *callback); + + int add_all_tenants(const int64_t start_tstamp, + const int64_t sys_schema_version, + const int64_t timeout); + + // add tenant + int add_tenant(const uint64_t tenant_id, + const bool is_new_created_tenant, + const bool is_new_tenant_by_restore, + const int64_t start_serve_tstamp, + const int64_t sys_schema_version, + ObLogSchemaGuard &schema_guard, + const char *&tenant_name, + const int64_t timeout, + bool &add_tenant_succ); + // drop tenant + int drop_tenant(const uint64_t tenant_id, const char *call_from); + int drop_tenant_start(const uint64_t tenant_id, const int64_t drop_tenant_start_tstamp); + int drop_tenant_end(const uint64_t tenant_id, const int64_t drop_tenant_end_tstamp); + // alter tenant name + int alter_tenant_name(const uint64_t tenant_id, + const int64_t schema_version_before_alter, + const int64_t schema_version_after_alter, + const int64_t timeout, + const char *&tenant_name, + bool &tenant_is_chosen); + + int recycle_partition(const common::ObPartitionKey &pkey); + + int filter_ddl_stmt(const uint64_t tenant_id, bool &chosen); + int filter_tenant(const char *tenant_name, bool &chosen); + int handle_schema_split_finish( + const uint64_t ddl_tenant_id, + const int64_t new_schema_version, + const int64_t start_serve_tstamp, + const int64_t timeout); + int get_all_tenant_ids(std::vector &tenant_ids); + + virtual int get_tenant_tz_wrap(const uint64_t tenant_id, + common::ObTimeZoneInfoWrap *&tz_info_wrap); + virtual int get_tenant_tz_map(const uint64_t tenant_id, + common::ObTZInfoMap *&tz_info_map); + + // Get the corresponding ObLogTenant based on tenant id + int get_tenant_guard(const uint64_t tenant_id, ObLogTenantGuard &guard); + int get_tenant(const uint64_t tenant_id, ObLogTenant *&tenant); + int revert_tenant(ObLogTenant *tenant); + + int get_ddl_progress(uint64_t &tenant_id, + int64_t &ddl_min_progress, + uint64_t &ddl_min_handle_log_id); + virtual bool is_inited() { return inited_; } + void print_stat_info(); + + template int for_each_tenant(Func &func) + { + return tenant_hash_map_.for_each(func); + } + // Set the same starting schema version for all tenants + int set_data_start_schema_version_for_all_tenant(const int64_t version); + + int set_data_start_schema_version_on_split_mode(); +private: + static const int64_t DATA_OP_TIMEOUT = 1 * _SEC_; + static const int64_t DEFAULT_TENANT_SET_SIZE = 64; + static const int64_t CACHED_PART_INFO_COUNT = 1 << 10; + static const int64_t PART_INFO_BLOCK_SIZE = common::OB_MALLOC_NORMAL_BLOCK_SIZE; + + typedef common::hash::ObHashSet TenantIDSet; + + struct TenantUpdateStartSchemaFunc + { + TenantUpdateStartSchemaFunc() { }; + bool operator()(const TenantID &tid, ObLogTenant *tenant); + }; + + struct TenantDDLProgessGetter + { + TenantDDLProgessGetter(); + bool operator()(const TenantID &tid, ObLogTenant *tenant); + + uint64_t tenant_id_; + int64_t ddl_min_progress_; + uint64_t ddl_min_handle_log_id_; + }; + + struct TenantPrinter + { + int64_t serving_tenant_count_; + int64_t offline_tenant_count_; + std::vector tenant_ids_; + std::vector cf_handles_; + + TenantPrinter() : serving_tenant_count_(0), offline_tenant_count_(0), cf_handles_() {} + bool operator()(const TenantID &tid, ObLogTenant *tenant); + }; + + struct TenantAddDDLTableFunc + { + int err_; + int64_t succ_tenant_count_; + int64_t offline_tenant_count_; + int64_t ddl_table_start_serve_tstamp_; + int64_t ddl_table_start_schema_version_; + + TenantAddDDLTableFunc(const int64_t start_serve_tstamp, const int64_t start_schema_version) : + err_(OB_SUCCESS), + succ_tenant_count_(0), + offline_tenant_count_(0), + ddl_table_start_serve_tstamp_(start_serve_tstamp), + ddl_table_start_schema_version_(start_schema_version) + {} + bool operator()(const TenantID &tid, ObLogTenant *tenant); + }; + + struct SetDataStartSchemaVersionFunc + { + int64_t data_start_schema_version_; + + explicit SetDataStartSchemaVersionFunc(const int64_t data_start_schema_version) : + data_start_schema_version_(data_start_schema_version) + {} + bool operator()(const TenantID &tid, ObLogTenant *tenant); + }; + +private: + void revert_tenant_(ObLogTenant *tenant); + int add_served_tenant_for_stat_(const char *tenant_name, const uint64_t tenant_id); + int add_served_tenant_into_set_(const char *tenant_name, const uint64_t tenant_id); + int get_tenant_start_schema_version_(const uint64_t tenant_id, + const bool is_new_created_tenant, + const bool is_new_tenant_by_restore, + const int64_t start_serve_tstamp, + const int64_t sys_schema_version, + int64_t &tenant_schema_version, + const int64_t timeout); + int get_first_schema_version_of_tenant_(const uint64_t tenant_id, + const int64_t sys_schema_version, + IObLogSchemaGetter &schema_getter, + int64_t &first_schema_version, + const int64_t timeout); + int add_ddl_table_if_needed_(const uint64_t tenant_id, + ObLogTenant &tenant, + const int64_t start_serve_tstamp, + const int64_t tenant_start_schema_version, + const bool is_new_created_tenant); + int add_inner_tables_on_backup_mode_(const uint64_t tenant_id, + ObLogTenant &tenant, + const int64_t start_serve_tstamp, + const int64_t start_schema_version, + const int64_t timeout); + int do_add_tenant_(const uint64_t tenant_id, + const char *tenant_name, + const bool is_new_created_tenant, + const bool is_new_tenant_by_restore, + const bool is_tenant_served, + const int64_t start_serve_tstamp, + const int64_t sys_schema_version, + const int64_t timeout); + int drop_served_tenant_for_stat_(const uint64_t tenant_id); + int drop_served_tenant_from_set_(const uint64_t tenant_id); + int remove_tenant_(const uint64_t tenant_id, ObLogTenant *tenant); + int start_tenant_service_(const uint64_t tenant_id, + const bool is_new_created_tenant, + const bool is_new_tenant_by_restore, + const int64_t start_serve_tstamp, + const int64_t tenant_start_schema_version, + const int64_t timeout); +private: + bool inited_; + + TenantHashMap tenant_hash_map_; + + // Structures shared by all tenants + PartInfoMap part_info_map_; // partition info map + GIndexCache gindex_cache_; // cache of global index + TableIDCache table_id_cache_; + + // callback + PartCBArray part_add_cb_array_; + PartCBArray part_rc_cb_array_; + + TenantIDSet tenant_id_set_; + + bool enable_oracle_mode_match_case_sensitive_; +private: + DISALLOW_COPY_AND_ASSIGN(ObLogTenantMgr); +}; + +} +} +#endif diff --git a/src/liboblog/src/ob_log_tenant_task_queue.cpp b/src/liboblog/src/ob_log_tenant_task_queue.cpp new file mode 100644 index 0000000000000000000000000000000000000000..48f767ef9266ac799e07962a77bf5cf031d1c75d --- /dev/null +++ b/src/liboblog/src/ob_log_tenant_task_queue.cpp @@ -0,0 +1,103 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG + +#include "ob_log_tenant_task_queue.h" + +using namespace oceanbase::common; + +namespace oceanbase +{ +namespace liboblog +{ +ObLogTenantTaskQueue::ObLogTenantTaskQueue(ObLogTenant &host) : + inited_(false), + host_(host), + lease_(), + log_entry_task_queue_() +{ +} + +ObLogTenantTaskQueue::~ObLogTenantTaskQueue() +{ + reset(); +} + +int ObLogTenantTaskQueue::init(const int64_t start_seq) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("ObLogTenantTaskQueue has been initialized", K(inited_)); + ret = OB_INIT_TWICE; + } else if (OB_UNLIKELY(start_seq < 0)) { + LOG_ERROR("invalid arguments", K(start_seq)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(log_entry_task_queue_.init(start_seq))) { + LOG_ERROR("init row_data_committer queue fail", KR(ret), K(start_seq)); + } else { + lease_.reset(); + inited_ = true; + } + + return ret; +} + +void ObLogTenantTaskQueue::reset() +{ + inited_ = false; + lease_.reset(); + + if (log_entry_task_queue_.is_inited()) { + (void)log_entry_task_queue_.destroy(); + } +} + +int ObLogTenantTaskQueue::push_log_entry_task(ObLogEntryTask *task) +{ + int ret = OB_SUCCESS; + PartTransTask *part_trans_task = NULL; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogTenantTaskQueue has not been initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(task)) { + LOG_ERROR("invalid task", KPC(task)); + ret = OB_INVALID_ARGUMENT; + } else { + const uint64_t tenant_id = task->get_tenant_id(); + const int64_t seq = 1; + part_trans_task = static_cast(task->get_host()); + + if (OB_FAIL(log_entry_task_queue_.set(seq, task))) { + LOG_ERROR("push task into queue fail", KR(ret), + K(tenant_id), + K(seq), "task", *task, + KPC(part_trans_task), + "begin_sn", log_entry_task_queue_.begin_sn(), + "end_sn", log_entry_task_queue_.end_sn()); + } else { + LOG_DEBUG("[ROW_DATA] push task into queue succ", + K(tenant_id), + K(seq), "task", *task, + KPC(part_trans_task), + "begin_sn", log_entry_task_queue_.begin_sn(), + "end_sn", log_entry_task_queue_.end_sn()); + } + } + + return ret; +} + +} +} diff --git a/src/liboblog/src/ob_log_tenant_task_queue.h b/src/liboblog/src/ob_log_tenant_task_queue.h new file mode 100644 index 0000000000000000000000000000000000000000..5d49d0fe20a5fd1358ae001fab60af15097b6077 --- /dev/null +++ b/src/liboblog/src/ob_log_tenant_task_queue.h @@ -0,0 +1,71 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_TENANT_TASK_QUEUE_H__ +#define OCEANBASE_LIBOBLOG_OB_LOG_TENANT_TASK_QUEUE_H__ + +#include "lib/container/ob_ext_ring_buffer.h" // ObExtendibleRingBuffer +#include "lib/thread/ob_thread_lease.h" // ObThreadLease +#include "ob_log_part_trans_task.h" // ObLogEntryTask + +namespace oceanbase +{ +namespace liboblog +{ +//////////////////////////// ObLogTenantTaskQueue ///////////////////////// + +typedef common::ObExtendibleRingBuffer LogEntryTaskQueue; + +class ObLogTenant; +class ObLogTenantTaskQueue final +{ +public: + explicit ObLogTenantTaskQueue(ObLogTenant &host); + ~ObLogTenantTaskQueue(); + +public: + int init(const int64_t start_seq); + void reset(); + + ObLogTenant &get_host() { return host_; } + + bool acquire_lease() { return lease_.acquire(); } + bool revoke_lease() {return lease_.revoke();} + + int push_log_entry_task(ObLogEntryTask *task); + + LogEntryTaskQueue &get_log_entry_task_queue() { return log_entry_task_queue_; } + + int64_t get_next_task_seq() const { return log_entry_task_queue_.begin_sn(); } + int64_t get_log_entry_task_count() const + { + return log_entry_task_queue_.end_sn() - log_entry_task_queue_.begin_sn(); + } + + TO_STRING_KV("log_entry_task_count", get_log_entry_task_count(), + "next_task_seq", get_next_task_seq()); + +private: + bool inited_; + ObLogTenant &host_; + + common::ObThreadLease lease_; // Responsible for the state transition of the queue + LogEntryTaskQueue log_entry_task_queue_; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogTenantTaskQueue); +}; + +} +} + +#endif diff --git a/src/liboblog/src/ob_log_timer.cpp b/src/liboblog/src/ob_log_timer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..46f230bf45073a598c37fec2b398791ccc76b08f --- /dev/null +++ b/src/liboblog/src/ob_log_timer.cpp @@ -0,0 +1,309 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG + +#include "ob_log_timer.h" + +#include "ob_log_instance.h" // IObLogErrHandler +#include "ob_log_config.h" // ObLogConfig + +using namespace oceanbase::common; +namespace oceanbase +{ +namespace liboblog +{ + +int64_t ObLogFixedTimer::g_wait_time = ObLogConfig::default_timer_task_wait_time_msec * _MSEC_; + +ObLogFixedTimer::ObLogFixedTimer() : + inited_(false), + tid_(0), + err_handler_(NULL), + task_queue_(), + task_cond_(), + allocator_(), + stop_flag_(true) +{} + +ObLogFixedTimer::~ObLogFixedTimer() +{ + destroy(); +} + +int ObLogFixedTimer::init(IObLogErrHandler &err_handler, const int64_t max_task_count) +{ + int ret = OB_SUCCESS; + int64_t queue_task_size = sizeof(QTask); + + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("init twice"); + ret = OB_INIT_TWICE; + } else if (OB_UNLIKELY(max_task_count <= 0)) { + LOG_ERROR("invalid argument", K(max_task_count)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(task_queue_.init(max_task_count, global_default_allocator, + ObModIds::OB_LOG_TIMER))) { + LOG_ERROR("init task queue fail", KR(ret), K(max_task_count)); + } else if (OB_FAIL(allocator_.init(queue_task_size, ObModIds::OB_LOG_TIMER))) { + LOG_ERROR("init allocator fail", KR(ret), K(queue_task_size)); + } else { + tid_ = 0; + err_handler_ = &err_handler; + stop_flag_ = true; + inited_ = true; + + LOG_INFO("init oblog timer succ", K(max_task_count)); + } + + return ret; +} + +void ObLogFixedTimer::destroy() +{ + stop(); + + destroy_all_tasks_(); + + inited_ = false; + stop_flag_ = true; + tid_ = 0; + err_handler_ = NULL; + task_queue_.destroy(); + allocator_.destroy(); + + LOG_INFO("destroy oblog timer succ"); +} + +int ObLogFixedTimer::start() +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init"); + ret = OB_NOT_INIT; + } else if (stop_flag_) { + stop_flag_ = false; + + int pthread_ret = pthread_create(&tid_, NULL, thread_func_, this); + + if (OB_UNLIKELY(0 != pthread_ret)) { + LOG_ERROR("create timer thread fail", K(pthread_ret), KERRNOMSG(pthread_ret)); + ret = OB_ERR_UNEXPECTED; + } else { + LOG_INFO("start oblog timer succ"); + } + } + + return ret; +} + +void ObLogFixedTimer::stop() +{ + if (inited_) { + stop_flag_ = true; + + if (0 != tid_) { + int pthread_ret = pthread_join(tid_, NULL); + if (0 != pthread_ret) { + LOG_ERROR("pthread_join fail", K(tid_), K(pthread_ret), KERRNOMSG(pthread_ret)); + } + + tid_ = 0; + + LOG_INFO("stop oblog timer succ"); + } + } +} + +void ObLogFixedTimer::mark_stop_flag() +{ + stop_flag_ = true; +} + +void *ObLogFixedTimer::thread_func_(void *args) +{ + ObLogFixedTimer *self = static_cast(args); + + if (NULL != self) { + self->run(); + } + + return NULL; +} + +void ObLogFixedTimer::run() +{ + int ret = OB_SUCCESS; + + LOG_INFO("oblog timer thread start"); + + while (OB_SUCCESS == ret && ! stop_flag_) { + QTask *queue_task = NULL; + + if (OB_FAIL(next_queue_task_(queue_task))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("next_queue_task_ fail", KR(ret), K(queue_task)); + } + } else if (OB_ISNULL(queue_task)) { + LOG_ERROR("invalid queue task", K(queue_task)); + ret = OB_ERR_UNEXPECTED; + } else { + queue_task->task_.process_timer_task(); + free_queue_task_(queue_task); + queue_task = NULL; + + if (REACH_TIME_INTERVAL(STAT_INTERVAL)) { + _LOG_INFO("[STAT] [TIMER] TASK_COUNT=%ld", task_queue_.get_total()); + } + } + } + + if (OB_SUCCESS != ret && OB_IN_STOP_STATE != ret && NULL != err_handler_) { + err_handler_->handle_error(ret, "oblog timer thread exits, err=%d", ret); + } + + LOG_INFO("oblog timer thread exits", KR(ret), K_(stop_flag)); +} + +int ObLogFixedTimer::schedule(ObLogTimerTask *task) +{ + int ret = OB_SUCCESS; + QTask *queue_task = NULL; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("not init"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(task)) { + LOG_ERROR("invalid argument", K(task)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(queue_task = alloc_queue_task_(*task))) { + LOG_ERROR("allocate queue task fail", K(task)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else if (OB_FAIL(push_queue_task_(*queue_task))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("push_queue_task_ fail", KR(ret), K(queue_task)); + } + } + + if (OB_SUCCESS != ret && NULL != queue_task) { + free_queue_task_(queue_task); + queue_task = NULL; + } + + return ret; +} + +void ObLogFixedTimer::configure(const ObLogConfig &config) +{ + int64_t timer_task_wait_time_msec = config.timer_task_wait_time_msec; + + ATOMIC_STORE(&g_wait_time, timer_task_wait_time_msec * _MSEC_); + LOG_INFO("[CONFIG]", K(timer_task_wait_time_msec)); +} + +ObLogFixedTimer::QTask *ObLogFixedTimer::alloc_queue_task_(ObLogTimerTask &timer_task) +{ + QTask *queue_task = NULL; + void *ptr = allocator_.alloc(); + + if (NULL != ptr) { + queue_task = new (ptr) QTask(timer_task); + } + return queue_task; +} + +void ObLogFixedTimer::free_queue_task_(QTask *task) +{ + if (NULL != task) { + task->~QTask(); + allocator_.free(task); + task = NULL; + } +} + +int ObLogFixedTimer::push_queue_task_(QTask &task) +{ + int ret = OB_SUCCESS; + + while (OB_SIZE_OVERFLOW == (ret = task_queue_.push(&task)) && ! stop_flag_) { + task_cond_.timedwait(COND_WAIT_TIME); + } + + if (OB_SUCCESS != ret && stop_flag_) { + ret = OB_IN_STOP_STATE; + } + + if (OB_SUCCESS == ret) { + task_cond_.signal(); + } + + return ret; +} + +int ObLogFixedTimer::next_queue_task_(QTask *&task) +{ + int ret = OB_SUCCESS; + task = NULL; + + while (OB_ENTRY_NOT_EXIST == (ret = task_queue_.pop(task)) && ! stop_flag_) { + task_cond_.timedwait(COND_WAIT_TIME); + } + + if (OB_SUCCESS != ret && stop_flag_) { + ret = OB_IN_STOP_STATE; + } + + if (OB_SUCCESS == ret) { + task_cond_.signal(); + + if (OB_ISNULL(task)) { + LOG_ERROR("invalid task popped from queue", K(task)); + ret = OB_ERR_UNEXPECTED; + } else { + int64_t cur_time = get_timestamp(); + int64_t delay = task->out_timestamp_ - cur_time; + + // Assuming the delay is not too long, otherwise the thread would stuck here + if (delay > 0) { + usec_sleep(delay); + } + } + } + + return ret; +} + +void ObLogFixedTimer::destroy_all_tasks_() +{ + int ret = OB_SUCCESS; + QTask *task = NULL; + + while (OB_SUCC(task_queue_.pop(task))) { + if (OB_NOT_NULL(task)) { + free_queue_task_(task); + task = NULL; + } + } +} + +///////////////////////////////////// QTask /////////////////////////////////////// + +ObLogFixedTimer::QTask::QTask(ObLogTimerTask &task) : task_(task) +{ + int64_t wait_time = ATOMIC_LOAD(&ObLogFixedTimer::g_wait_time); + out_timestamp_ = get_timestamp() + wait_time; +} + +} +} + diff --git a/src/liboblog/src/ob_log_timer.h b/src/liboblog/src/ob_log_timer.h new file mode 100644 index 0000000000000000000000000000000000000000..7259e40065c7d826e2a1aec25d69d2430a81739b --- /dev/null +++ b/src/liboblog/src/ob_log_timer.h @@ -0,0 +1,104 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_TIMER_H__ +#define OCEANBASE_LIBOBLOG_OB_LOG_TIMER_H__ + +#include "lib/queue/ob_fixed_queue.h" // ObFixedQueue +#include "common/ob_queue_thread.h" // ObCond +#include "lib/allocator/ob_small_allocator.h" // ObSmallAllocator +#include "ob_log_utils.h" // _SEC_ + +namespace oceanbase +{ +namespace liboblog +{ +class ObLogTimerTask +{ +public: + virtual ~ObLogTimerTask() {} + +public: + virtual void process_timer_task() = 0; +}; + +class IObLogErrHandler; +class ObLogConfig; + +// Timer class +class ObLogFixedTimer +{ + static const int64_t STAT_INTERVAL = 30 * _SEC_; + static const int64_t COND_WAIT_TIME = 1 * _SEC_; + +public: + // Timer task waiting time + static int64_t g_wait_time; + +public: + ObLogFixedTimer(); + virtual ~ObLogFixedTimer(); + +public: + int init(IObLogErrHandler &err_handler, const int64_t max_task_count); + void destroy(); + + int start(); + void stop(); + void mark_stop_flag(); + +public: + int schedule(ObLogTimerTask *task); + +public: + static void configure(const ObLogConfig &config); + +public: + void run(); + +private: + static void *thread_func_(void *args); + struct QTask + { + int64_t out_timestamp_; // timestamp of out + ObLogTimerTask &task_; // Actual timer tasks + + explicit QTask(ObLogTimerTask &task); + }; + + typedef common::ObFixedQueue TaskQueue; + +private: + void destroy_all_tasks_(); + QTask *alloc_queue_task_(ObLogTimerTask &timer_task); + int push_queue_task_(QTask &task); + void free_queue_task_(QTask *task); + int next_queue_task_(QTask *&task); + +private: + bool inited_; + pthread_t tid_; // Timer thread ID + IObLogErrHandler *err_handler_; // err handler + TaskQueue task_queue_; // task queue + common::ObCond task_cond_; + common::ObSmallAllocator allocator_; + + volatile bool stop_flag_ CACHE_ALIGNED; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogFixedTimer); +}; + +} +} + +#endif diff --git a/src/liboblog/src/ob_log_timezone_info_getter.cpp b/src/liboblog/src/ob_log_timezone_info_getter.cpp new file mode 100644 index 0000000000000000000000000000000000000000..26a2c1e3c5a1055e57ff75b642b3c9f1b1dd427d --- /dev/null +++ b/src/liboblog/src/ob_log_timezone_info_getter.cpp @@ -0,0 +1,448 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG + +#include "ob_log_timezone_info_getter.h" + +#include "ob_log_systable_helper.h" // ObLogSysTableHelper +#include "ob_log_instance.h" // IObLogErrHandler +#include "share/ob_time_zone_info_manager.h" // FETCH_TZ_INFO_SQL + +namespace oceanbase +{ +namespace liboblog +{ +using namespace oceanbase::common; +using namespace oceanbase::common::sqlclient; + +ObLogTimeZoneInfoGetter::ObLogTimeZoneInfoGetter() : inited_(false), + tz_tid_(0), + tz_cond_(), + stop_flag_(true), + mysql_proxy_(NULL), + systable_helper_(NULL), + err_handler_(NULL), + lock_(), + tenant_mgr_(NULL), + timezone_str_(NULL) +{ +} + +ObLogTimeZoneInfoGetter::~ObLogTimeZoneInfoGetter() +{ + destroy(); +} + +int ObLogTimeZoneInfoGetter::init(const char *timezone_str, + common::ObMySQLProxy &mysql_proxy, + IObLogSysTableHelper &systable_helper, + IObLogTenantMgr &tenant_mgr, + IObLogErrHandler &err_handler) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("schema getter has been initialized"); + ret = OB_INIT_TWICE; + } else if (OB_ISNULL(timezone_str)) { + LOG_ERROR("invalid argument", K(timezone_str)); + ret = OB_INVALID_ARGUMENT; + } else { + tz_tid_ = 0; + stop_flag_ = false; + timezone_str_ = timezone_str; + mysql_proxy_ = &mysql_proxy; + systable_helper_ = &systable_helper; + tenant_mgr_ = &tenant_mgr; + err_handler_ = &err_handler; + inited_ = true; + + LOG_INFO("init timezone info getter succ"); + } + return ret; +} + +void ObLogTimeZoneInfoGetter::destroy() +{ + stop(); + + inited_ = false; + tz_tid_ = 0; + + timezone_str_ = NULL; + mysql_proxy_ = NULL; + systable_helper_ = NULL; + tenant_mgr_ = NULL; + err_handler_ = NULL; +} + +int ObLogTimeZoneInfoGetter::start() +{ + int ret = OB_SUCCESS; + int pthread_ret = 0; + + if (OB_UNLIKELY(0 != tz_tid_)) { + LOG_ERROR("timezone info thread has been started", K(tz_tid_)); + ret = OB_NOT_SUPPORTED; + } else if (0 != (pthread_ret = pthread_create(&tz_tid_, NULL, tz_thread_func_, this))) { + LOG_ERROR("start timezone info thread fail", K(pthread_ret), KERRNOMSG(pthread_ret)); + ret = OB_ERR_UNEXPECTED; + } else { + LOG_INFO("start timezone info thread succ"); + } + + return ret; +} + +void ObLogTimeZoneInfoGetter::stop() +{ + stop_flag_ = true; + + if (0 != tz_tid_) { + tz_cond_.signal(); + + int pthread_ret = pthread_join(tz_tid_, NULL); + if (0 != pthread_ret) { + LOG_ERROR("join timezone info thread fail", K(tz_tid_), K(pthread_ret), + KERRNOMSG(pthread_ret)); + } else { + LOG_INFO("stop timezone info thread succ"); + } + + tz_tid_ = 0; + } +} + +void *ObLogTimeZoneInfoGetter::tz_thread_func_(void *args) +{ + if (NULL != args) { + ObLogTimeZoneInfoGetter *tz_info_getter = static_cast(args); + tz_info_getter->tz_routine(); + } + + return NULL; +} + +void ObLogTimeZoneInfoGetter::tz_routine() +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("instance has not been initialized"); + ret = OB_NOT_INIT; + } else { + while (! stop_flag_ && OB_SUCCESS == ret && tenant_mgr_->is_inited()) { + if (OB_FAIL(query_timezone_info_version_and_update_())) { + LOG_ERROR("query_timezone_info_version_and_update_ fail", KR(ret)); + } + + if (OB_NEED_RETRY == ret) { + ret = OB_SUCCESS; + } + + tz_cond_.timedwait(QUERY_TIMEZONE_INFO_VERSION_INTERVAL); + } + + if (stop_flag_) { + ret = OB_IN_STOP_STATE; + } + + if (OB_SUCCESS != ret && OB_IN_STOP_STATE != ret) { + if (NULL != err_handler_) { + err_handler_->handle_error(ret, "timezone info thread exits, err=%d", ret); + } + stop_flag_ = true; + } + } + + LOG_INFO("timezone info thread exits", KR(ret), K_(stop_flag)); +} + +int ObLogTimeZoneInfoGetter::query_timezone_info_version_and_update_() +{ + int ret = OB_SUCCESS; + + // Version change, active refresh + if (OB_FAIL(refresh_timezone_info_())) { + if (OB_NEED_RETRY == ret) { + LOG_WARN("timezone_info_getter_ refresh_timezone_info_ fail", KR(ret)); + ret = OB_SUCCESS; + } else { + LOG_ERROR("timezone_info_getter_ refresh_timezone_info_ fail", KR(ret)); + } + } else { + LOG_INFO("timezone_info_getter_ refresh_timezone_info_ succ"); + } + + return ret; +} + +int ObLogTimeZoneInfoGetter::refresh_timezone_info_() +{ + int ret = OB_SUCCESS; + // Requires locking to prevent multi-threaded access: formatter and ObLogTimeZoneInfoGetter query threads themselves + ObSpinLockGuard guard(lock_); + const bool fetch_timezone_info_by_tennat = need_fetch_timezone_info_by_tennat_(); + + if (! fetch_timezone_info_by_tennat) { + // Global use of a time zone table + if (OB_FAIL(refresh_tenant_timezone_info_based_on_version_(OB_SYS_TENANT_ID))) { + LOG_WARN("refresh_sys_tenant_timezone_info_based_on_version_ fail", KR(ret)); + } + } else { + // refresh by tenant + if (OB_FAIL(refresh_all_tenant_timezone_info_())) { + LOG_WARN("fail to refresh all tenant timezone info", KR(ret)); + } + } + + return ret; +} + +bool ObLogTimeZoneInfoGetter::need_fetch_timezone_info_by_tennat_() const +{ + return GET_MIN_CLUSTER_VERSION() >= CLUSTER_VERSION_2260; +} + +int ObLogTimeZoneInfoGetter::refresh_tenant_timezone_info_based_on_version_(const uint64_t tenant_id) +{ + int ret = OB_SUCCESS; + int64_t tz_info_version = OB_INVALID_TIMESTAMP; + ObLogTenantGuard guard; + ObLogTenant *tenant = NULL; + + if (OB_ISNULL(tenant_mgr_)) { + LOG_ERROR("tenant_mgr_ is NULL"); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(query_timezone_info_version_(tenant_id, tz_info_version))) { + if (OB_ENTRY_NOT_EXIST == ret) { + // Not present, normal, tenant has not imported time zone table + LOG_INFO("query_timezone_info_version_, timezone_info_version not exist", K(tenant_id)); + ret = OB_SUCCESS; + } else { + LOG_WARN("query_timezone_info_version_ fail", KR(ret), K(tz_info_version)); + } + } else if (OB_FAIL(tenant_mgr_->get_tenant_guard(tenant_id, guard))) { + if (OB_ENTRY_NOT_EXIST == ret) { + // No need to deal with tenant non-existence, deletion + LOG_INFO("tenant not exist, do nothing", K(tenant_id)); + ret = OB_SUCCESS; + } else { + LOG_ERROR("get tenant fail", KR(ret), K(tenant_id)); + } + } else if (OB_ISNULL(tenant = guard.get_tenant())) { + LOG_ERROR("invalid tenant", K(tenant_id), K(tenant)); + ret = OB_ERR_UNEXPECTED; + } else if (tz_info_version == tenant->get_timezone_info_version()) { + // do nothing + LOG_INFO("timezone_info_version is unchanged, don't need to update timezone info", K(tenant_id), + "current_tz_info_version", tenant->get_timezone_info_version(), K(tz_info_version)); + } else { + // Version change, active refresh + if (OB_FAIL(refresh_tenant_timezone_info_(tenant_id, tenant->get_tz_info_map()))) { + LOG_ERROR("refresh_tenant_timezone_info_ fail", KR(ret), K(tenant_id)); + } else { + // update version + tenant->update_timezone_info_version(tz_info_version); + } + } + + return ret; +} + +int ObLogTimeZoneInfoGetter::refresh_tenant_timezone_info_(const uint64_t tenant_id, + common::ObTZInfoMap *tz_info_map) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(tz_info_map)) { + LOG_WARN("get tenant timezone info map fail", KR(ret), K(tenant_id), K(tz_info_map)); + ret = OB_ERR_UNEXPECTED; + } else { + SMART_VAR(ObMySQLProxy::MySQLResult, res) { + sqlclient::ObMySQLResult *result = nullptr; + if (OB_ISNULL(mysql_proxy_)) { + LOG_ERROR("mysql_proxy_ is null", K(mysql_proxy_)); + ret = OB_ERR_UNEXPECTED; + } else if (! need_fetch_timezone_info_by_tennat_()) { + if (OB_FAIL(mysql_proxy_->read(res, ObTimeZoneInfoManager::FETCH_TZ_INFO_SQL))) { + LOG_WARN("fail to execute sql", KR(ret)); + ret = OB_NEED_RETRY; + } + } else { + if (OB_FAIL(mysql_proxy_->read(res, tenant_id, ObTimeZoneInfoManager::FETCH_TENANT_TZ_INFO_SQL))) { + LOG_WARN("fail to execute sql", KR(ret)); + ret = OB_NEED_RETRY; + } + } + + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(result = res.get_result())) { + LOG_WARN("fail to get result", K(result)); + ret = OB_NEED_RETRY; + } else if (OB_FAIL(ObTimeZoneInfoManager::fill_tz_info_map(*result, *tz_info_map))) { + LOG_ERROR("fill_tz_info_map fail", KR(ret), K(tenant_id)); + } + } + } + + return ret; +} + +// 226 does a tenant split of the time zone table and needs to maintain a tz_info_map for each tenant +int ObLogTimeZoneInfoGetter::refresh_all_tenant_timezone_info_() +{ + int ret = OB_SUCCESS; + std::vector all_tenant_ids; + + if (OB_FAIL(tenant_mgr_->get_all_tenant_ids(all_tenant_ids))) { + LOG_WARN("fail to get all tenant ids", KR(ret)); + } else if (OB_ISNULL(mysql_proxy_)) { + LOG_ERROR("mysql_proxy_ is null", K(mysql_proxy_)); + } else { + for (int64_t idx = 0; OB_SUCC(ret) && idx < all_tenant_ids.size(); idx++) { + const uint64_t tenant_id = all_tenant_ids[idx]; + + if (OB_FAIL(refresh_tenant_timezone_info_based_on_version_(tenant_id))) { + if (OB_ENTRY_NOT_EXIST != ret) { + LOG_WARN("refresh_tenant_timezone_info_based_on_version_ fail", KR(ret), K(tenant_id)); + } else { + // tenant not exist, reset ret + ret = OB_SUCCESS; + } + } + } // for + } + + return ret; +} + +int ObLogTimeZoneInfoGetter::init_tz_info_wrap(const uint64_t tenant_id, + int64_t &tz_info_version, + ObTZInfoMap &tz_info_map, + ObTimeZoneInfoWrap &tz_info_wrap) +{ + int ret = OB_SUCCESS; + // 1. query the initial timezone_info_version + // 2. refresh timezone_info until successful + // 3. initialize tz_info_wrap_ + tz_info_version = OB_INVALID_TIMESTAMP; + + if (OB_ISNULL(timezone_str_)) { + LOG_ERROR("timezone_str is null", K(timezone_str_)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(query_timezone_info_version_(tenant_id, tz_info_version))) { + if (OB_ENTRY_NOT_EXIST == ret) { + // Not present, normal, tenant has not imported time zone table + LOG_INFO("query_timezone_info_version_, timezone_info_version not exist", K(tenant_id)); + ret = OB_SUCCESS; + } else { + LOG_ERROR("query_timezone_info_version_ fail", KR(ret), K(tenant_id), K(tz_info_version)); + } + } else if (OB_FAIL(fetch_tenant_timezone_info_util_succ(tenant_id, &tz_info_map))) { + LOG_ERROR("fetch_tenant_timezone_info_util_succ fail", KR(ret), K(tenant_id)); + } else { + // succ + } + + + if (OB_SUCC(ret)) { + if (OB_FAIL(tz_info_wrap.init_time_zone(ObString(timezone_str_), tz_info_version, tz_info_map))) { + LOG_ERROR("tz_info_wrap init_time_zone fail", KR(ret), K(tenant_id), "timezone", timezone_str_, + K(tz_info_version), K(tz_info_wrap)); + } else { + LOG_INFO("tz_info_wrap init_time_zone succ", K(tenant_id), "timezone", timezone_str_, + K(tz_info_version), K(tz_info_wrap)); + } + } + + return ret; +} + + +int ObLogTimeZoneInfoGetter::query_timezone_info_version_(const uint64_t tenant_id, + int64_t &timezone_info_version) +{ + int ret = OB_SUCCESS; + bool done = false; + + if (OB_ISNULL(systable_helper_)) { + LOG_ERROR("systable_helper_ is null", K(systable_helper_)); + ret = OB_ERR_UNEXPECTED; + } else { + while (! done && OB_SUCCESS == ret) { + if (OB_FAIL(systable_helper_->query_timezone_info_version(tenant_id, timezone_info_version))) { + if (OB_ENTRY_NOT_EXIST != ret) { + LOG_WARN("systable_helper_ query_timezone_info_version fail", KR(ret), K(tenant_id), + K(timezone_info_version)); + } + } else if (OB_UNLIKELY(OB_INVALID_TIMESTAMP == timezone_info_version)) { + LOG_ERROR("timezone_info_version is not valid", K(tenant_id), K(timezone_info_version)); + ret = OB_ERR_UNEXPECTED; + } else { + done = true; + } + + if (OB_NEED_RETRY == ret) { + ret = OB_SUCCESS; + usleep(100L * 1000L); + } + } + } + + return ret; +} + +int ObLogTimeZoneInfoGetter::fetch_tenant_timezone_info_util_succ(const uint64_t tenant_id, + ObTZInfoMap *tz_info_map) +{ + int ret = OB_SUCCESS; + bool done = false; + + while (! done && OB_SUCCESS == ret) { + if (OB_FAIL(refresh_tenant_timezone_info_(tenant_id, tz_info_map))) { + LOG_WARN("refresh_tenant_timezone_info_ fail", KR(ret), K(tenant_id)); + } else { + done = true; + } + + if (OB_NEED_RETRY == ret) { + ret = OB_SUCCESS; + usleep(100L * 1000L); + } + } + + return ret; +} + +int ObLogTimeZoneInfoGetter::get_tenant_timezone_map(const uint64_t tenant_id, + ObTZMapWrap &tz_mgr_wrap) +{ + int ret = OB_SUCCESS; + IObLogTenantMgr *log_tenant_mgr = nullptr; + ObTZInfoMap *tz_info_map = nullptr; + + if (OB_ISNULL(log_tenant_mgr = TCTX.tenant_mgr_)) { + ret = OB_NOT_INIT; + LOG_WARN("log tenant mgr not init", K(ret)); + } else if (OB_FAIL(log_tenant_mgr->get_tenant_tz_map(tenant_id, tz_info_map))) { + LOG_WARN("log tenant mgr get tenant tz map failed", KR(ret), K(tenant_id)); + } else { + tz_mgr_wrap.set_tz_map(tz_info_map); + } + + return ret; +} + +} // namespace liboblog +} // namespace oceanbase diff --git a/src/liboblog/src/ob_log_timezone_info_getter.h b/src/liboblog/src/ob_log_timezone_info_getter.h new file mode 100644 index 0000000000000000000000000000000000000000..b2833cd7b6662c99ebed69e5e2c2c4582492705a --- /dev/null +++ b/src/liboblog/src/ob_log_timezone_info_getter.h @@ -0,0 +1,149 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_TIMEZONE_INFO_GETTER_H__ +#define OCEANBASE_LIBOBLOG_TIMEZONE_INFO_GETTER_H__ + +#include "lib/mysqlclient/ob_mysql_proxy.h" // ObMySQLProxy +#include "lib/timezone/ob_timezone_info.h" // ObTZInfoMap +#include "lib/lock/ob_spin_lock.h" // ObSpinLock +#include "common/ob_queue_thread.h" // ObCond +#include "ob_log_tenant_mgr.h" // ObLogTenantMgr +#include "ob_log_instance.h" //TCTX + +namespace oceanbase +{ +namespace omt +{ +class ObTenantTimezoneGuard; +} +namespace liboblog +{ +///////////////////////////////////// IObLogTimeZoneInfoGetter ///////////////////////////////// +class IObLogTimeZoneInfoGetter +{ +public: + virtual ~IObLogTimeZoneInfoGetter() {} + +public: + virtual int start() = 0; + virtual void stop() = 0; + virtual void mark_stop_flag() = 0; + + // Init by ObLogTenant initialisation + virtual int init_tz_info_wrap(const uint64_t tenant_id, + int64_t &tz_info_map_version, + common::ObTZInfoMap &tz_info_map, + common::ObTimeZoneInfoWrap &tz_info_wrap) = 0; + + /// Refresh timezone info until successful (try refreshing several times) + virtual int fetch_tenant_timezone_info_util_succ(const uint64_t tenant_id, + common::ObTZInfoMap *tz_info_map) = 0; +}; + + +///////////////////////////////////// ObLogTimeZoneInfoGetter ///////////////////////////////// + +class IObLogErrHandler; +class IObLogSysTableHelper; +class IObLogTenantMgr; + +class ObLogTimeZoneInfoGetter : public IObLogTimeZoneInfoGetter +{ + static const int64_t SLEEP_TIME_ON_SCHEMA_FAIL = 500 * 1000; + static const int64_t QUERY_TIMEZONE_INFO_VERSION_INTERVAL = 100 * 1000 * 1000; + +public: + ObLogTimeZoneInfoGetter(); + virtual ~ObLogTimeZoneInfoGetter(); + +public: + int init(const char *timezone_str, + common::ObMySQLProxy &mysql_proxy, + IObLogSysTableHelper &systable_helper, + IObLogTenantMgr &tenant_mgr, + IObLogErrHandler &err_handler); + void destroy(); + +public: + virtual int start(); + virtual void stop(); + virtual void mark_stop_flag() { ATOMIC_STORE(&stop_flag_, true); } + + // Init by ObLogTenant initialisation + virtual int init_tz_info_wrap(const uint64_t tenant_id, + int64_t &tz_info_map_version, + common::ObTZInfoMap &tz_info_map, + common::ObTimeZoneInfoWrap &tz_info_wrap); + + virtual int fetch_tenant_timezone_info_util_succ(const uint64_t tenant_id, + common::ObTZInfoMap *tz_info_map); + + // for init interface OTTZ_MGR.tenant_tz_map_getter_ + static int get_tenant_timezone_map(const uint64_t tenant_id, + common::ObTZMapWrap &tz_map_wrap); + +private: + static void *tz_thread_func_(void *args); + void tz_routine(); + + // 1. local maintenance of timezone info version + // 2. Periodically query all_zone table - time_zone_info_version: + // update timezone info when changes occur + // otherwise not updated (updating timezone info involves multiple table joins) + int query_timezone_info_version_and_update_(); + + bool need_fetch_timezone_info_by_tennat_() const; + + // 1. Check the version first, if the version has not changed, then do not refresh + // 2. Refresh only if the version has changed + // + // @retval OB_SUCCESS Success + // @retval OB_ENTRY_NOT_EXIST tenant not exist + // @retval other_error_code Fail + int refresh_tenant_timezone_info_based_on_version_(const uint64_t tenant_id); + + // 1. For versions below 226, there is one global copy of the timezone internal table and only one timezone_verison + // 2. From version 226, the timezone internal table is split into tenants, each with a timezone_verison; if timezone_version is not available, the tenant has not imported a timezone table + // + // @retval OB_SUCCESS Success + // @retval OB_ENTRY_NOT_EXIST tenant not exist + // @retval other_error_code Fail + int query_timezone_info_version_(const uint64_t tenant_id, + int64_t &timezone_info_version); + + // refresh timezone info + int refresh_timezone_info_(); + int refresh_tenant_timezone_info_(const uint64_t tenant_id, common::ObTZInfoMap *tz_info_map); + int refresh_all_tenant_timezone_info_(); + +private: + bool inited_; + pthread_t tz_tid_; + common::ObCond tz_cond_; + volatile bool stop_flag_ CACHE_ALIGNED; + + common::ObMySQLProxy *mysql_proxy_; + + IObLogSysTableHelper *systable_helper_; + IObLogErrHandler *err_handler_; + + common::ObSpinLock lock_; + IObLogTenantMgr *tenant_mgr_; + // save for init tz_info_wrap + const char *timezone_str_; +private: + DISALLOW_COPY_AND_ASSIGN(ObLogTimeZoneInfoGetter); +}; +} // namespace liboblog +} // namespace oceanbase +#endif diff --git a/src/liboblog/src/ob_log_trace_id.cpp b/src/liboblog/src/ob_log_trace_id.cpp new file mode 100644 index 0000000000000000000000000000000000000000..75bb634767c21f12af4f7efb665cdd0d80df95ea --- /dev/null +++ b/src/liboblog/src/ob_log_trace_id.cpp @@ -0,0 +1,27 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include "lib/net/ob_addr.h" +#include "lib/profile/ob_trace_id.h" + +namespace oceanbase +{ +namespace liboblog +{ +common::ObAddr& get_self_addr() +{ + static common::ObAddr s_self_addr; + return s_self_addr; +} + +} +} diff --git a/src/liboblog/src/ob_log_trace_id.h b/src/liboblog/src/ob_log_trace_id.h new file mode 100644 index 0000000000000000000000000000000000000000..bb91f323d9745b493c2a5fc8c48ccfec497b1125 --- /dev/null +++ b/src/liboblog/src/ob_log_trace_id.h @@ -0,0 +1,64 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_SRC_OB_LOG_TRACE_ID_ +#define OCEANBASE_LIBOBLOG_SRC_OB_LOG_TRACE_ID_ + +#include "lib/net/ob_addr.h" +#include "lib/profile/ob_trace_id.h" + +namespace oceanbase +{ +namespace liboblog +{ +common::ObAddr& get_self_addr(); + +// trace id: Used to identify rpc requests between liboblog-observer +inline void init_trace_id() +{ + common::ObCurTraceId::init(get_self_addr()); +} + +inline void clear_trace_id() +{ + common::ObCurTraceId::reset(); +} + +inline void set_trace_id(const common::ObCurTraceId::TraceId &trace_id) +{ + common::ObCurTraceId::set(trace_id); +} + +class ObLogTraceIdGuard +{ +public: + ObLogTraceIdGuard() + { + init_trace_id(); + } + + explicit ObLogTraceIdGuard(const common::ObCurTraceId::TraceId &trace_id) + { + set_trace_id(trace_id); + } + + ~ObLogTraceIdGuard() + { + clear_trace_id(); + } +private: + DISALLOW_COPY_AND_ASSIGN(ObLogTraceIdGuard); +}; + +} +} +#endif // OCEANBASE_LIBOBLOG_SRC_OB_LOG_TRACE_ID_ diff --git a/src/liboblog/src/ob_log_trans_ctx.cpp b/src/liboblog/src/ob_log_trans_ctx.cpp new file mode 100644 index 0000000000000000000000000000000000000000..63e07370dcc17eb27a5620e17e257ff7b73a3e0f --- /dev/null +++ b/src/liboblog/src/ob_log_trans_ctx.cpp @@ -0,0 +1,782 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG + +#include "ob_log_trans_ctx.h" + +#include "lib/allocator/ob_mod_define.h" // ObModIds + +#include "ob_log_part_mgr.h" // IObLogPartMgr +#include "ob_log_trans_ctx_mgr.h" // IObLogTransCtxMgr + +#define _TCTX_STAT(level, tag, args...) _OBLOG_LOG(level, "[STAT] [TRANS_CTX] " tag, ##args) +#define _TCTX_ISTAT(tag, args...) _TCTX_STAT(INFO, tag, ##args) +#define _TCTX_DSTAT(tag, args...) _TCTX_STAT(DEBUG, tag, ##args) + +using namespace oceanbase::common; +using namespace oceanbase::transaction; + +namespace oceanbase +{ +namespace liboblog +{ +TransCtxSortElement::TransCtxSortElement() : + trans_ctx_host_(NULL), + trans_id_(), + global_trans_version_(OB_INVALID_VERSION) +{ +} + +TransCtxSortElement::~TransCtxSortElement() +{ + reset(); +} + +void TransCtxSortElement::reset() +{ + trans_ctx_host_ = NULL; + trans_id_.reset(); + global_trans_version_ = OB_INVALID_VERSION; +} + +int TransCtxSortElement::init(TransCtx &trans_ctx_host, + const transaction::ObTransID &trans_id, + const int64_t global_trans_version) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! trans_id.is_valid()) + || OB_UNLIKELY(OB_INVALID_VERSION == global_trans_version)) { + LOG_ERROR("invalid argument", K(trans_id), K(global_trans_version)); + } else { + trans_ctx_host_ = &trans_ctx_host; + trans_id_ = trans_id; + global_trans_version_ = global_trans_version; + } + + return ret; +} + +bool TransCtxSortElement::TransCtxCmp::operator() (const TransCtxSortElement &tx1, const TransCtxSortElement &tx2) const +{ + bool bool_ret = false; + + if (tx1.global_trans_version_ != tx2.global_trans_version_) { + bool_ret = tx1.global_trans_version_ > tx2.global_trans_version_; + } else { + bool_ret = tx1.trans_id_.compare(tx2.trans_id_); + } + + return bool_ret; +} + +TransCtx::TransCtx() : + host_(NULL), + state_(TRANS_CTX_STATE_INVALID), + trans_id_(), + trans_id_str_(), + major_version_str_(), + trx_sort_elem_(), + seq_(0), + participants_(NULL), + participant_count_(0), + ready_participant_objs_(NULL), + ready_participant_count_(0), + br_committer_queue_cond_(), + total_br_count_(0), + committed_br_count_(0), + valid_part_trans_task_count_(0), + revertable_participant_count_(0), + allocator_(ObModIds::OB_LOG_TRANS_CTX, PAGE_SIZE), + lock_() +{} + +TransCtx::~TransCtx() +{ +} + +void TransCtx::reset() +{ + destroy_participant_array_(); + + host_ = NULL; + state_ = TRANS_CTX_STATE_INVALID; + trans_id_.reset(); + trans_id_str_.reset(); + major_version_str_.reset(); + trx_sort_elem_.reset(); + seq_ = 0; + + participants_ = NULL; + participant_count_ = 0; + ready_participant_objs_ = NULL; + ready_participant_count_ = 0; + + total_br_count_ = 0; + committed_br_count_ = 0; + valid_part_trans_task_count_ = 0; + revertable_participant_count_ = 0; + + allocator_.reset(); +} + +int TransCtx::set_trans_id(const transaction::ObTransID &trans_id) +{ + int ret = OB_SUCCESS; + if (! trans_id.is_valid()) { + ret = OB_INVALID_ARGUMENT; + } else { + trans_id_ = trans_id; + } + + return ret; +} + +int TransCtx::get_state() const +{ + return state_; +} + +int TransCtx::set_state(const int target_state) +{ + int ret = OB_SUCCESS; + if (! is_state_valid(target_state)) { + ret = OB_INVALID_ARGUMENT; + } else { + state_ = static_cast(target_state); + } + + return ret; +} + +int TransCtx::set_ready_participant_count(const int64_t count) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(count < 0)) { + ret = OB_INVALID_ARGUMENT; + } else { + ready_participant_count_ = count; + } + + return ret; +} + +int64_t TransCtx::get_revertable_participant_count() const +{ + return revertable_participant_count_; +} + +int TransCtx::set_ready_participant_objs(PartTransTask *part_trans_task) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(part_trans_task)) { + ret = OB_INVALID_ARGUMENT; + } else { + ready_participant_objs_ = part_trans_task; + } + + return ret; +} + +int TransCtx::init(IObLogTransCtxMgr *host) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(host)) { + LOG_ERROR("IObLogTransCtxMgr is NULL"); + ret = OB_INVALID_ARGUMENT; + } else { + host_ = host; + } + + return ret; +} + +int TransCtx::prepare(PartTransTask &part_trans_task, + IObLogPartMgr &part_mgr, + const bool print_participant_not_serve_info, + volatile bool &stop_flag, + bool &need_discard) +{ + int ret = OB_SUCCESS; + const ObTransID &trans_id = part_trans_task.get_trans_id(); + + // Requires locking conditions + ObSpinLockGuard guard(lock_); + need_discard = false; + + if (state_ < TRANS_CTX_STATE_INVALID) { + // Context is deprecated and can no longer be used + ret = OB_INVALID_ERROR; + } + // PREPARE is required only if the status is INVALID + else if (TRANS_CTX_STATE_INVALID == state_) { + // Requires that the partitioned transaction responsible for Prepare must be serviced, otherwise the transaction context will be deprecated + if (OB_FAIL(prepare_(part_trans_task, part_mgr, print_participant_not_serve_info, + stop_flag))) { + if (OB_INVALID_ERROR == ret) { + // The partition transaction is not serviced and the transaction context is discarded + need_discard = true; + state_ = TRANS_CTX_STATE_DISCARDED; + _TCTX_ISTAT("[DISCARD] TRANS_ID=%s PART=%s TRANS_CTX=%p", to_cstring(trans_id), + to_cstring(part_trans_task.get_partition()), this); + // reset return value + ret = OB_SUCCESS; + } else if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("prepare trans_ctx fail", KR(ret), K(part_trans_task)); + } + } + } else { + // Other cases indicate that it has been Prepared and returns normal + } + + return ret; +} + +int TransCtx::add_participant(PartTransTask &part_trans_task, + bool &is_part_trans_served, + bool &is_all_participants_ready) +{ + int ret = OB_SUCCESS; + const ObTransID &trans_id = part_trans_task.get_trans_id(); + + // Requires locking conditions + ObSpinLockGuard guard(lock_); + + is_part_trans_served = true; + is_all_participants_ready = false; + + if (state_ <= TRANS_CTX_STATE_INVALID) { + LOG_ERROR("state not match which is not PREPARED or higher", "state", print_state()); + ret = OB_STATE_NOT_MATCH; + } else if (state_ >= TRANS_CTX_STATE_PARTICIPANT_READY) { + // Participants have been added, no later arrivals will be served + is_part_trans_served = false; + _TCTX_DSTAT("[PART_NOT_SERVE] [DELAY_COMING] TRANS_ID=%s PART=%s " + "LOG_ID=%ld LOG_TSTAMP=%ld TRNAS_CTX_STATE=%s", + to_cstring(trans_id), to_cstring(part_trans_task.get_partition()), + part_trans_task.get_prepare_log_id(), part_trans_task.get_timestamp(), + print_state()); + } + // Attempt to add participants to the participant READY list + else if (OB_FAIL(add_ready_participant_(part_trans_task, is_part_trans_served, is_all_participants_ready))) { + LOG_ERROR("add_ready_participant_ fail", KR(ret), K(part_trans_task), K(is_part_trans_served), + K(is_all_participants_ready)); + } + + return ret; +} + +int TransCtx::prepare_(PartTransTask &host, + IObLogPartMgr &part_mgr, + const bool print_participant_not_serve_info, + volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(TRANS_CTX_STATE_INVALID != state_)) { + ret = OB_STATE_NOT_MATCH; + } else if (OB_UNLIKELY(NULL != participants_) || OB_UNLIKELY(participant_count_ > 0)) { + LOG_ERROR("participants info are not empty", K(participants_), K(participant_count_)); + ret = OB_ERR_UNEXPECTED; + } else { + const ObTransID &trans_id = host.get_trans_id(); + const ObPartitionKey &host_pkey = host.get_partition(); + int64_t host_prepare_log_timestmap = host.get_timestamp(); + int64_t host_prepare_log_id = host.get_prepare_log_id(); + const ObPartitionLogInfo *part_array = host.get_participants(); + int64_t part_count = host.get_participant_count(); + const int64_t global_trans_version = host.get_global_trans_version(); + // default serve + bool is_serving_host_part = true; + + if (OB_UNLIKELY(part_count <= 0) || OB_ISNULL(part_array)) { + LOG_ERROR("invalid participant array", K(part_count), K(part_array), K(host)); + ret = OB_ERR_UNEXPECTED; + } + // first determine whether to service the host partition transaction + // if not, simply discard the transaction context. + // This ensures that a valid transaction context must have been created by a valid participant, and thus ensure that the transaction context must be processed + else if (OB_FAIL(inc_part_trans_count_on_serving_(is_serving_host_part, + part_mgr, + trans_id, + host_pkey, + host_prepare_log_id, + host_prepare_log_timestmap, + print_participant_not_serve_info, + stop_flag))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("inc_part_trans_count_on_serving_ fail", KR(ret), K(host_pkey), + K(host_prepare_log_id), K(host_prepare_log_timestmap)); + } + } else if (! is_serving_host_part) { + // host partition not serve + ret = OB_INVALID_ERROR; + } + // Initialize the participant array on the premise of the host partition service + else if (OB_FAIL(init_participant_array_(part_count))) { + LOG_ERROR("init participant array fail", KR(ret), K(part_count)); + } else if (OB_FAIL(trx_sort_elem_.init(*this, trans_id, global_trans_version))) { + LOG_ERROR("trx_sort_elem_ init fail", KR(ret), K(trans_id), K(global_trans_version)); + } else { + trans_id_ = trans_id; + + // Prepare the list of participants in the case of a host partitioned transaction service, the list of participants must contain host partitioned transactions + // Check if each participant is served and add the served participants to the participant list + for (int64_t index = 0; OB_SUCC(ret) && index < part_count; index++) { + const ObPartitionLogInfo &pinfo = part_array[index]; + + if (! pinfo.is_valid()) { + LOG_ERROR("participant info is invalid", K(pinfo), K(index), K(part_array)); + ret = OB_INVALID_DATA; + } else { + bool is_host_pkey = (host_pkey == pinfo.get_partition()); + + // host partition must serve + bool is_serving = is_host_pkey; + + if (! is_serving && OB_FAIL(inc_part_trans_count_on_serving_(is_serving, + part_mgr, + trans_id, + pinfo.get_partition(), + pinfo.get_log_id(), + pinfo.get_log_timestamp(), + print_participant_not_serve_info, + stop_flag))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("inc_part_trans_count_on_serving_ fail", KR(ret), K(pinfo)); + } + } else if (is_serving) { + // Add participant information for the service + new (participants_ + participant_count_) TransPartInfo(pinfo.get_partition()); + participant_count_++; + } + + if (! is_serving) { + if (print_participant_not_serve_info) { + _TCTX_ISTAT("[PREPARE_PART] [PART_NOT_SERVE] TRANS_ID=%s PART=%s LOG_ID=%ld " + "LOG_TSTAMP=%ld PARTICIPANT_INDEX=%ld/%ld", + to_cstring(trans_id), to_cstring(pinfo.get_partition()), pinfo.get_log_id(), + pinfo.get_log_timestamp(), index + 1, part_count); + } else if (REACH_TIME_INTERVAL(PRINT_LOG_INTERVAL)) { + _TCTX_ISTAT("[PREPARE_PART] [PART_NOT_SERVE] TRANS_ID=%s PART=%s LOG_ID=%ld " + "LOG_TSTAMP=%ld PARTICIPANT_INDEX=%ld/%ld", + to_cstring(trans_id), to_cstring(pinfo.get_partition()), pinfo.get_log_id(), + pinfo.get_log_timestamp(), index + 1, part_count); + } else { + // do nothing + } + } else { + _TCTX_DSTAT("[PREPARE_PART] TRANS_ID=%s PART=%s LOG_ID=%ld LOG_TSTAMP=%ld " + "PARTICIPANT_INDEX=%ld/%ld IS_SERVING=%d", + to_cstring(trans_id), to_cstring(pinfo.get_partition()), pinfo.get_log_id(), + pinfo.get_log_timestamp(), index + 1, part_count, is_serving); + } + } + } // for + + // If the configuration requires participants to be sorted, the sorting operation is performed + // The purpose is to ensure that the list of participants iterated out is the same in a restart cluster scenario + if (OB_SUCC(ret) + && OB_UNLIKELY(NULL != host_ && host_->need_sort_participant()) + && participant_count_ > 0) { + std::sort(participants_, participants_ + participant_count_, TransPartInfoCompare()); + } + + _TCTX_DSTAT("[PREPARE] TRANS_ID=%s SERVED_PARTICIPANTS=%ld/%ld", + to_cstring(trans_id), participant_count_, part_count); + } + + // None of the partition participants are served, additional case + if (OB_SUCC(ret)) { + if (OB_UNLIKELY(participant_count_ <= 0)) { + LOG_ERROR("get participants count unexpected failed", K(participant_count_)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(init_trans_id_str_(trans_id))) { + LOG_ERROR("init_trans_id_str_ fail", KR(ret), K(trans_id)); + } else { + switch_state_(TRANS_CTX_STATE_PREPARED); + } + } + } + + return ret; +} + +// Initialize trans_id_str per transaction context to avoid Committer to_cstring ObTransID impacting commit performance +int TransCtx::init_trans_id_str_(const transaction::ObTransID &trans_id) +{ + int ret = OB_SUCCESS; + // Use the maximum value for the buf length of the ObTransID, 1024 is large enough + static const int64_t TRANS_ID_BUF_LENGTH = 1024; + char trans_id_buf[TRANS_ID_BUF_LENGTH]; + int64_t pos = 0; + + if (OB_FAIL(common::databuff_printf(trans_id_buf, TRANS_ID_BUF_LENGTH, pos, "%s", to_cstring(trans_id)))) { + LOG_ERROR("databuff_printf fail", K(ret), K(trans_id), K(trans_id_buf), K(TRANS_ID_BUF_LENGTH), K(pos)); + } else if (OB_UNLIKELY(pos <= 0 || pos >= TRANS_ID_BUF_LENGTH)) { + LOG_ERROR("local buf pos is not valid", K(pos), K(TRANS_ID_BUF_LENGTH), K(trans_id)); + ret = OB_ERR_UNEXPECTED; + } else { + int64_t buf_len = pos + 1; + char *buf = static_cast(allocator_.alloc(buf_len)); + + if (OB_ISNULL(buf)) { + LOG_ERROR("allocator_ alloc for trans id str fail", K(buf), K(buf_len)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + MEMCPY(buf, trans_id_buf, pos); + buf[pos] = '\0'; + + trans_id_str_.assign(buf, static_cast(buf_len)); + } + } + + return ret; +} + +void TransCtx::switch_state_(const int target_state) +{ + int ret = OB_SUCCESS; + + state_ = static_cast(target_state); + + if (NULL != host_) { + if (OB_FAIL(host_->update_stat_info(target_state))) { + LOG_ERROR("update stat info failed", KR(ret), K(target_state)); + } + } +} + +int TransCtx::add_ready_participant_(PartTransTask &part_trans_task, + bool &is_part_trans_served, + bool &is_all_participants_ready) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(TRANS_CTX_STATE_PREPARED != state_)) { + ret = OB_STATE_NOT_MATCH; + } else { + is_all_participants_ready = false; + + // Check if the partition is in the participants list and if it exists, set the participants to the participants array + bool existed = false; + for (int64_t index = 0; OB_SUCC(ret) && ! existed && index < participant_count_; index++) { + if (part_trans_task.get_partition() == participants_[index].pkey_) { + existed = true; + participants_[index].obj_ = &part_trans_task; + } + } + + if (OB_SUCC(ret)) { + // If the partitioned transaction is not in the list of participants, the partitioned transaction is no longer in service + if (! existed) { + is_part_trans_served = false; + } + // If the partition transaction is in the participant list, add it to the READY list + else { + ready_participant_count_++; + is_all_participants_ready = (ready_participant_count_ == participant_count_); + + // If all participants are clustered, link them together in order to construct a list of participant objects + if (is_all_participants_ready && OB_FAIL(build_ready_participants_list_())) { + LOG_ERROR("build_ready_participants_list_ fail", KR(ret), K(is_all_participants_ready), + K(ready_participant_count_), K(participant_count_)); + } else { + // All participants are READY to advance to the current state + if (is_all_participants_ready) { + switch_state_(TRANS_CTX_STATE_PARTICIPANT_READY); + } + + _TCTX_DSTAT("[ADD_PART] TRANS_ID=%s READY_PARTS=%ld/%ld READY=%d", + to_cstring(trans_id_), ready_participant_count_, + participant_count_, is_all_participants_ready); + } + } + } + } + + return ret; +} + +int TransCtx::build_ready_participants_list_() +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(participants_)) { + LOG_ERROR("invalid participants", K(participants_)); + ret = OB_ERR_UNEXPECTED; + } else { + // Linked list of participants + for (int64_t index = 0; OB_SUCCESS == ret && index < participant_count_; index++) { + if (OB_ISNULL(participants_[index].obj_)) { + LOG_ERROR("participant object is NULL", K(participants_[index]), K(index), + K(participant_count_), K(state_), K(trans_id_)); + ret = OB_ERR_UNEXPECTED; + } else if (index < participant_count_ - 1) { + participants_[index].obj_->set_next_task(participants_[index + 1].obj_); + } else { + participants_[index].obj_->set_next_task(NULL); + } + } + + // set ready obj list + if (OB_SUCCESS == ret) { + ready_participant_objs_ = participants_[0].obj_; + } + } + return ret; +} + +int TransCtx::inc_part_trans_count_on_serving_(bool &is_serving, + IObLogPartMgr &part_mgr, + const ObTransID &trans_id, + const ObPartitionKey &key, + const uint64_t prepare_log_id, + const int64_t prepare_log_timestamp, + const bool print_participant_not_serve_info, + volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!trans_id.is_valid()) + || OB_UNLIKELY(!key.is_valid()) + || OB_UNLIKELY(prepare_log_timestamp <= 0)) { + ret = OB_INVALID_ARGUMENT; + } else { + RETRY_FUNC(stop_flag, (part_mgr), inc_part_trans_count_on_serving, + is_serving, + key, + prepare_log_id, + prepare_log_timestamp, + print_participant_not_serve_info, + DATA_OP_TIMEOUT); + + if (OB_SUCC(ret)) { + _TCTX_DSTAT("[INC_TRANS_COUNT] IS_SERVING=%d TRANS_ID=%s PART=%s LOG_ID=%ld LOG_TSTAMP=%ld", + is_serving, to_cstring(trans_id), to_cstring(key), prepare_log_id, prepare_log_timestamp); + } + } + + return ret; +} + +int TransCtx::sequence(const int64_t seq) +{ + int ret = OB_SUCCESS; + ObSpinLockGuard guard(lock_); + + if (OB_UNLIKELY(seq < 0)) { + LOG_ERROR("invalid argument", K(seq)); + ret = OB_INVALID_ARGUMENT; + } else if (TRANS_CTX_STATE_PARTICIPANT_READY != state_) { + LOG_ERROR("state not match which is not DEP_PARSED", "state", print_state()); + ret = OB_STATE_NOT_MATCH; + } else { + seq_ = seq; + switch_state_(TRANS_CTX_STATE_SEQUENCED); + + // Update the global sequence number and Schema version for each participant + PartTransTask *task = ready_participant_objs_; + while (NULL != task) { + task->set_global_trans_seq(seq); + task = task->next_task(); + } + + _TCTX_DSTAT("[SEQUENCE] COMMIT_VERSION=%ld TRANS_ID=%s SEQ=%ld", + trx_sort_elem_.get_global_trans_version(), + to_cstring(trans_id_), + seq_); + } + + return ret; +} + +int TransCtx::wait_data_ready(const int64_t timeout, + volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + int64_t end_time = get_timestamp() + timeout; + + if (OB_UNLIKELY(TRANS_CTX_STATE_SEQUENCED != state_)) { + LOG_ERROR("state is not match which is not SEQUENCED", "state", print_state()); + ret = OB_STATE_NOT_MATCH; + } else { + PartTransTask *part = ready_participant_objs_; + while (OB_SUCC(ret) && NULL != part) { + RETRY_FUNC(stop_flag, (*part), wait_data_ready, DATA_OP_TIMEOUT); + + int64_t left_time = end_time - get_timestamp(); + if (left_time <= 0) { + //_TCTX_ISTAT() + OBLOG_LOG(INFO, "wait_data_ready timeout", KPC(this), KPC(part)); + } + + if (OB_SUCC(ret)) { + part = part->next_task(); + } + } // while + } + + if (OB_SUCC(ret)) { + switch_state_(TRANS_CTX_STATE_DATA_READY); + } + + return ret; +} + +int TransCtx::commit() +{ + int ret = OB_SUCCESS; + ObSpinLockGuard guard(lock_); + + if (OB_ISNULL(ready_participant_objs_)) { + LOG_ERROR("ready participant objs is null", + KPC(ready_participant_objs_), KPC(this)); + ret = OB_ERR_UNEXPECTED; + } else { + const bool is_ddl_trans = ready_participant_objs_->is_ddl_trans(); + + if (TRANS_CTX_STATE_DATA_READY != state_) { + LOG_ERROR("state not match which is not DATA_READY", "state", print_state(), K(is_ddl_trans)); + ret = OB_STATE_NOT_MATCH; + } else { + _TCTX_DSTAT("[COMMIT] TRANS_ID=%s/%ld PARTICIPANTS=%ld SEQ=%ld ", + to_cstring(trans_id_), trx_sort_elem_.get_global_trans_version(), + ready_participant_count_, seq_); + + switch_state_(TRANS_CTX_STATE_COMMITTED); + } + } + + return ret; +} + +int TransCtx::inc_revertable_participant_count(bool &all_participant_revertable) +{ + int ret = OB_SUCCESS; + ObSpinLockGuard guard(lock_); + + if (OB_UNLIKELY(TRANS_CTX_STATE_COMMITTED != state_)) { + LOG_ERROR("state is not match which is not COMMITTED", "state", print_state()); + ret = OB_STATE_NOT_MATCH; + } else { + int64_t result_count = ATOMIC_AAF(&revertable_participant_count_, 1); + all_participant_revertable = (result_count == ready_participant_count_); + + if (result_count > ready_participant_count_) { + LOG_ERROR("revertable participant count is larger than ready participant count", + "revertable_participant_count", result_count, + K(ready_participant_count_), + K(*this)); + ret = OB_ERR_UNEXPECTED; + } + } + + return ret; +} + +int TransCtx::revert_participants() +{ + int ret = OB_SUCCESS; + ObSpinLockGuard guard(lock_); + + if (OB_UNLIKELY(TRANS_CTX_STATE_COMMITTED != state_)) { + LOG_ERROR("TransCtx has not been sequenced, can not revert participants", + "state", print_state()); + ret = OB_STATE_NOT_MATCH; + } else if (OB_UNLIKELY(revertable_participant_count_ != ready_participant_count_)) { + LOG_ERROR("revertable_participant_count does not equal to participant count", + K(revertable_participant_count_), K(ready_participant_count_)); + ret = OB_STATE_NOT_MATCH; + } else { + _TCTX_DSTAT("[REVERT_PARTICIPANTS] TRANS_ID=%s PARTICIPANTS=%ld SEQ=%ld", + to_cstring(trans_id_), participant_count_, seq_); + + // Note: All participants are recalled by external modules + ready_participant_objs_ = NULL; + ready_participant_count_ = 0; + revertable_participant_count_ = 0; + + switch_state_(TRANS_CTX_STATE_PARTICIPANT_REVERTED); + } + + return ret; +} + +int TransCtx::lock() +{ + return lock_.lock(); +} + +int TransCtx::unlock() +{ + return lock_.unlock(); +} + +int TransCtx::get_tenant_id(uint64_t &tenant_id) const +{ + int ret = OB_SUCCESS; + ObSpinLockGuard guard(lock_); + if (OB_ISNULL(ready_participant_objs_)) { + LOG_ERROR("ready participant objs is null, can not decide tenant id", + KPC(ready_participant_objs_), KPC(this)); + ret = OB_ERR_UNEXPECTED; + } else { + // The tenant ID of the first participant is used as the tenant ID of the distributed transaction + // TODO: support for cross-tenant transactions + tenant_id = ready_participant_objs_->get_tenant_id(); + } + return ret; +} + +int TransCtx::init_participant_array_(const int64_t part_count) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(part_count <= 0)) { + LOG_ERROR("invalid argument", K(part_count)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_UNLIKELY(NULL != participants_)) { + LOG_ERROR("participant array has been initialized", K(participants_), K(participant_count_)); + ret = OB_INIT_TWICE; + } else { + int64_t parts_alloc_size = part_count * sizeof(participants_[0]); + + participants_ = static_cast(allocator_.alloc(parts_alloc_size)); + + if (OB_ISNULL(participants_)) { + LOG_ERROR("allocate memory for participant array fail", K(part_count), K(parts_alloc_size)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + // Number of valid participants is 0 + participant_count_ = 0; + } + } + + return ret; +} + +void TransCtx::destroy_participant_array_() +{ + if (NULL != participants_ && participant_count_ > 0) { + for (int64_t index = 0; index < participant_count_; index++) { + participants_[index].~TransPartInfo(); + } + + allocator_.free(participants_); + participants_ = NULL; + participant_count_ = 0; + } +} + +} // namespace liboblog +} // namespace oceanbase diff --git a/src/liboblog/src/ob_log_trans_ctx.h b/src/liboblog/src/ob_log_trans_ctx.h new file mode 100644 index 0000000000000000000000000000000000000000..c7b4fc82bdb77b30e674b8239f0b058f1fd27bd1 --- /dev/null +++ b/src/liboblog/src/ob_log_trans_ctx.h @@ -0,0 +1,348 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_TRANS_CTX_ +#define OCEANBASE_LIBOBLOG_TRANS_CTX_ + +#include "share/ob_define.h" // OB_* +#include "lib/allocator/ob_allocator.h" // ObIAllocator +#include "lib/hash/ob_ext_iter_hashset.h" // ObExtIterHashSet +#include "lib/container/ob_se_array.h" // ObSEArray +#include "lib/allocator/page_arena.h" // ObArenaAllocator +#include "lib/lock/ob_spin_lock.h" // ObSpinLock +#include "common/ob_partition_key.h" // ObPartitionKey +#include "storage/transaction/ob_trans_define.h" // ObTransID + +#include "ob_log_part_trans_task.h" // PartTransTask, IStmtTask, DmlStmtTask + +namespace oceanbase +{ +namespace liboblog +{ + +class IObLogPartMgr; +class IObLogTransCtxMgr; +class TransCtx; + +// Participant information +struct TransPartInfo +{ + PartTransTask *obj_; + common::ObPartitionKey pkey_; + + explicit TransPartInfo(const common::ObPartitionKey &pkey) : obj_(NULL), pkey_(pkey) + {} + + bool operator < (const TransPartInfo &other) const + { + return pkey_ < other.pkey_; + } + + TO_STRING_KV(KPC_(obj), K_(pkey)); +}; + +struct TransPartInfoCompare +{ + bool operator() (const TransPartInfo &p1, const TransPartInfo &p2) const + { + return p1 < p2; + } +}; + +// Allows assignment and copy constructs for use in priority queues +class TransCtxSortElement +{ +public: + TransCtxSortElement(); + ~TransCtxSortElement(); + +public: + int init(TransCtx &trans_ctx_host, + const transaction::ObTransID &trans_id, + const int64_t global_trans_version); + void reset(); + + TransCtx *get_trans_ctx_host() { return trans_ctx_host_; } + const transaction::ObTransID &get_trans_id() const { return trans_id_; } + int64_t get_global_trans_version() const { return global_trans_version_; } + + struct TransCtxCmp + { + bool operator() (const TransCtxSortElement &tx1, const TransCtxSortElement &tx2) const; + }; + + TO_STRING_KV( + K_(trans_id), + K_(global_trans_version)); + +private: + TransCtx *trans_ctx_host_; + transaction::ObTransID trans_id_; + int64_t global_trans_version_; // Global transaction version +}; + +class TransCtx +{ +public: + enum + { + TRANS_CTX_STATE_DISCARDED = -1, // Deprecated and useless, the transaction context will be deleted + TRANS_CTX_STATE_INVALID = 0, // invalid + TRANS_CTX_STATE_PREPARED = 1, // ready + TRANS_CTX_STATE_PARTICIPANT_READY = 2, // participant aggregation ready + TRANS_CTX_STATE_SEQUENCED = 3, // sequence done + TRANS_CTX_STATE_DATA_READY = 4, // data is ready when complete formated in memory-mode or stored in storage-mode + TRANS_CTX_STATE_COMMITTED = 5, // complete commit, waiting in user queue + TRANS_CTX_STATE_PARTICIPANT_REVERTED = 6, // all participant objects have been reclaimed + TRANS_CTX_STATE_MAX + }; + + enum { DATA_OP_TIMEOUT = 10 * 1000 * 1000 }; + +public: + static const int64_t PAGE_SIZE = common::OB_MALLOC_NORMAL_BLOCK_SIZE; + static const int64_t DEFAULT_DEP_SET_SIZE = 64; + static const int64_t PRINT_LOG_INTERVAL = 10 * _SEC_; + +public: + TransCtx(); + virtual ~TransCtx(); + +public: + void reset(); + int init(IObLogTransCtxMgr *host); + + /// Prepare the list of participants and initialize the transaction context: ensure that the state is no longer INVALID upon return + /// 1. If in INVALID state, initialize it and prepare the list of participants + /// a) If the current participant is in service, the initialisation is successful and the state changes to PREPARED + /// b) If the current participant is not serviced, the initialisation fails and the state changes to DISCARDED and is deprecated + /// 2. return INVALID_ERROR if in DISCARDED state + /// 3. If in any other state, return success directly + /// + /// @param [in] part_trans_task Target participant + /// @param [in] part_mgr Partition transaction manager to determine if the participant is served + /// @param [in] stop_flag Exit status flag + /// @param [out] need_discard Return value: marks whether the transaction context needs to be deprecated + /// + /// @retval OB_SUCCESS Success + /// @retval OB_INVALID_ERROR This transaction context is deprecated + /// @retval OB_IN_STOP_STATE exist + /// @retval other_error_code Fail + int prepare(PartTransTask &part_trans_task, + IObLogPartMgr &part_mgr, + const bool print_partition_not_serve_info, + volatile bool &stop_flag, + bool &need_discard); + + /// Adds participants, determines if they are in service + /// Requires status of at least PREPARED, not INVALID or DISCARDED + /// + /// @param [in] part_trans_task Target participant + /// @param [out] is_part_trans_served Return value: marks whether the partition transaction is being served + /// @param [out] is_all_participants_ready Return value: marks if all participants are READY + /// + /// @retval OB_SUCCESS Success + /// @retval other_error_code Fail + int add_participant(PartTransTask &part_trans_task, + bool &is_part_trans_served, + bool &is_all_participants_ready); + + /// Sequence the trans + /// Set the sequence number and set the status to SEQUENCED + /// Requires that all participants are gathered, i.e. the status is PARTICIPANT_READY + /// + /// @param seq sequence + /// + /// @retval OB_SUCCESS Success + /// @retval other_error_code Fail + int sequence(const int64_t seq); + + /// Wait all participant which has been data ready + /// Requires that sequencing has been completed, i.e., the status is SEQUENCED + /// If all participants have been data ready, the state is advanced to TRANS_CTX_STATE_DATA_READY + /// + /// @retval OB_SUCCESS succ + /// @retval Other return values fail + int wait_data_ready(const int64_t timeout, + volatile bool &stop_flag); + + /// mark status to COMMITTED + /// Requires all participants to have completed formatting, i.e. a status of SEQUENCED + /// + /// @retval OB_SUCCESS Success + /// @retval other_error_code Fail + int commit(); + + /// Increase the number of participants that can be reclaimed + /// Request status of committed: COMMITTED + /// + /// @param [out] all_participant_revertable Are all participants recoverable + /// + /// @retval OB_SUCCESS Success + /// @retval other_error_code Fail + int inc_revertable_participant_count(bool &all_participant_revertable); + + /// Reclaim all participants + /// Requires a status of COMMITTED and that all participants can be reclaimed, i.e. the number of reclaimed participants is equal to the number of all participants + /// + /// @retval OB_SUCCESS Success + /// @retval other_error_code Fail + int revert_participants(); + +public: + static bool is_state_valid(const int state) { return state >= TRANS_CTX_STATE_DISCARDED && state < TRANS_CTX_STATE_MAX; } + bool is_participants_ready() { return TRANS_CTX_STATE_PARTICIPANT_READY == state_; } + bool is_sequenced() const { return TRANS_CTX_STATE_SEQUENCED == state_; } + bool is_data_ready() const { return TRANS_CTX_STATE_DATA_READY == state_; } + const transaction::ObTransID &get_trans_id() const { return trans_id_; } + const common::ObString &get_trans_id_str() const { return trans_id_str_; } + const common::ObString &get_major_version_str() const { return major_version_str_; } + TransCtxSortElement &get_trx_sort_elem() { return trx_sort_elem_; } + PartTransTask *get_participant_objs() { return ready_participant_objs_; } + int64_t get_ready_participant_count() const { return ready_participant_count_; } + + void br_committer_queue_signal() { br_committer_queue_cond_.signal(); } + void br_committer_queue_timedwait(const int64_t time) { br_committer_queue_cond_.timedwait(time); } + int64_t get_total_br_count() const { return total_br_count_; } + void set_total_br_count(const int64_t total_br_count) { total_br_count_ = total_br_count; } + bool is_all_br_committed() const { return ATOMIC_LOAD(&total_br_count_) == ATOMIC_LOAD(&committed_br_count_); } + void inc_committed_br_count() { ATOMIC_INC(&committed_br_count_); } + int64_t get_valid_part_trans_task_count() const { return valid_part_trans_task_count_; } + void set_valid_part_trans_task_count(const int64_t valid_part_trans_task_count) { valid_part_trans_task_count_ = valid_part_trans_task_count; } + + int lock(); + int unlock(); + int get_tenant_id(uint64_t &tenant_id) const; + + // for unittest start + int set_trans_id(const transaction::ObTransID &trans_id); + const TransPartInfo *get_participants() const { return participants_; } + int64_t get_participant_count() const { return participant_count_; } + int get_state() const; + + int set_state(const int target_state); + int set_ready_participant_count(const int64_t count); + int64_t get_revertable_participant_count() const; + int set_ready_participant_objs(PartTransTask *part_trans_task); + // for unittest end + +public: + const char *print_state() const + { + const char *ret = "UNKNOWN"; + switch (state_) { + case TRANS_CTX_STATE_DISCARDED: + ret = "DISCARDED"; + break; + case TRANS_CTX_STATE_INVALID: + ret = "INVALID"; + break; + case TRANS_CTX_STATE_PREPARED: + ret = "PREPARED"; + break; + case TRANS_CTX_STATE_PARTICIPANT_READY: + ret = "PARTICIPANT_READY"; + break; + case TRANS_CTX_STATE_SEQUENCED: + ret = "SEQUENCED"; + break; + case TRANS_CTX_STATE_DATA_READY: + ret = "DATA_READY"; + break; + case TRANS_CTX_STATE_COMMITTED: + ret = "COMMITTED"; + break; + case TRANS_CTX_STATE_PARTICIPANT_REVERTED: + ret = "PARTICIPANT_REVERTED"; + break; + default: + ret = "UNKNOWN"; + break; + } + return ret; + } + +public: + TO_STRING_KV( + "state", print_state(), + K_(trx_sort_elem), + K_(seq), + K_(participants), + K_(participant_count), + KP_(ready_participant_objs), + K_(ready_participant_count), + K_(total_br_count), + K_(committed_br_count), + K_(revertable_participant_count)); + +private: + // Prepare the transaction context + // INVALID -> PREPARED + int prepare_(PartTransTask &host, + IObLogPartMgr &part_mgr, + const bool print_partition_not_serve_info, + volatile bool &stop_flag); + int inc_part_trans_count_on_serving_(bool &is_serving, + IObLogPartMgr &part_mgr, + const transaction::ObTransID &trans_id, + const common::ObPartitionKey &key, + const uint64_t prepare_log_id, + const int64_t prepare_log_timestamp, + const bool print_partition_not_serve_info, + volatile bool &stop_flag); + int add_ready_participant_(PartTransTask &part_trans_task, + bool &is_part_trans_served, + bool &is_all_participants_ready); + void switch_state_(const int target_state); + int init_participant_array_(const int64_t part_count); + void destroy_participant_array_(); + int build_ready_participants_list_(); + int init_trans_id_str_(const transaction::ObTransID &trans_id); + // only init major version str for observer version less than 2_0_0 + int init_major_version_str_(const common::ObVersion &freeze_version); + +private: + IObLogTransCtxMgr *host_; + + int8_t state_; + transaction::ObTransID trans_id_; + common::ObString trans_id_str_; + common::ObString major_version_str_; + TransCtxSortElement trx_sort_elem_; + + // sequence info + int64_t seq_; //Global sequence number of the distributed transaction + + // Participant Information + TransPartInfo *participants_; // Participant Information + int64_t participant_count_; // Participant count + PartTransTask *ready_participant_objs_; // List of added participant objects + int64_t ready_participant_count_; // Amount of added participant objects + + common::ObCond br_committer_queue_cond_; + int64_t total_br_count_ CACHE_ALIGNED; + int64_t committed_br_count_ CACHE_ALIGNED; + int64_t valid_part_trans_task_count_; // Number of valid participants count + + // status info + int64_t revertable_participant_count_; // Number of participants able to be released + + // allocator + common::ObArenaAllocator allocator_; + mutable common::ObSpinLock lock_; + +private: + DISALLOW_COPY_AND_ASSIGN(TransCtx); +}; +} // namespace liboblog +} // namespace oceanbase +#endif /* OCEANBASE_LIBOBLOG_TRANS_CTX_ */ diff --git a/src/liboblog/src/ob_log_trans_ctx_mgr.cpp b/src/liboblog/src/ob_log_trans_ctx_mgr.cpp new file mode 100644 index 0000000000000000000000000000000000000000..55fa6c466763e7f30f229b48a8a5ebdba026881e --- /dev/null +++ b/src/liboblog/src/ob_log_trans_ctx_mgr.cpp @@ -0,0 +1,337 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG + +#define STAT_STR "[STAT] [TRANS_CTX] " +#define TCTX_STAT(level, format_str, ...) OBLOG_LOG(level, STAT_STR format_str, ##__VA_ARGS__) +#define TCTX_STAT_DEBUG(format_str, ...) TCTX_STAT(DEBUG, format_str, ##__VA_ARGS__) +#define TCTX_STAT_INFO(format_str, ...) TCTX_STAT(INFO, format_str, ##__VA_ARGS__) + +#include "ob_log_trans_ctx_mgr.h" +#include "ob_log_utils.h" // get_timestamp + +#define DATA_PRINT(buf, size, pos, fmt, ...) \ + if (OB_SUCC(ret)) { \ + if (OB_FAIL(databuff_printf(buf, size, pos, fmt, ##__VA_ARGS__))) {\ + LOG_WARN("databuff_printf fail", KR(ret), K(size), K(pos)); \ + } \ + } \ + +using namespace oceanbase::common; +using namespace oceanbase::transaction; +namespace oceanbase +{ +namespace liboblog +{ +void ObLogTransCtxMgr::Scanner::operator() (const transaction::ObTransID &trans_id, TransCtx *trans_ctx) +{ + int ret = OB_SUCCESS; + + if (NULL != trans_ctx && NULL != buffer_ && buffer_size_ > 0 && trans_id.is_valid()) { + if (pos_ >= buffer_size_) { + LOG_WARN("print transaction information overflow", K(buffer_size_), K(pos_), K(trans_id), K(*trans_ctx)); + } else if (OB_FAIL(trans_ctx->lock())) { + LOG_ERROR("lock trans_ctx fail", KR(ret), K(trans_id), K(*trans_ctx)); + } else if (trans_ctx->get_state() >= TransCtx::TRANS_CTX_STATE_PREPARED + && trans_ctx->get_state() < TransCtx::TRANS_CTX_STATE_SEQUENCED) { + int state = trans_ctx->get_state(); + char *ptr = buffer_; + int64_t size = buffer_size_; + const TransPartInfo *participants = trans_ctx->get_participants(); + int64_t participant_count = trans_ctx->get_participant_count(); + + trans_count_[state]++; + valid_trans_count_++; + + DATA_PRINT(ptr, size, pos_, "trans_id:%s;", to_cstring(trans_id)); + DATA_PRINT(ptr, size, pos_, "state:%s;", trans_ctx->print_state()); + DATA_PRINT(ptr, size, pos_, "ready_participant_count:%ld/%ld;", + trans_ctx->get_ready_participant_count(), participant_count); + DATA_PRINT(ptr, size, pos_, "participants:"); + + for (int64_t index = 0; OB_SUCC(ret) && index < participant_count; index++) { + DATA_PRINT(ptr, size, pos_, "%ld(%ld)", + participants[index].pkey_.get_table_id(), + participants[index].pkey_.get_partition_id()); + + // Not NULL here, means the participant is ready + if (NULL != participants[index].obj_) { + DATA_PRINT(ptr, size, pos_, "[SEQ=%ld],", + participants[index].obj_->get_checkpoint_seq()); + } else { + DATA_PRINT(ptr, size, pos_, "[NOT_READY],"); + } + } + + if (OB_SUCC(ret)) { + if (participant_count > 0) { + pos_--; + } + + DATA_PRINT(ptr, size, pos_, ";"); + } + + if (OB_SUCC(ret)) { + DATA_PRINT(ptr, size, pos_, "\n"); + } + } + + (void)trans_ctx->unlock(); + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +ObLogTransCtxMgr::ObLogTransCtxMgr() : + inited_(false), + map_(), + need_sort_participant_(false), + valid_trans_count_(0), + created_trans_count_(0), + last_created_trans_count_(0), + sequenced_trans_count_(0), + last_sequenced_trans_count_(0), + last_stat_time_(0) +{ +} + +ObLogTransCtxMgr::~ObLogTransCtxMgr() +{ + destroy(); +} + +int ObLogTransCtxMgr::init(const int64_t max_cached_trans_ctx_count, + const bool need_sort_participant) +{ + int ret = OB_SUCCESS; + + if (inited_) { + ret = OB_INIT_TWICE; + } else if (max_cached_trans_ctx_count <= 0) { + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(map_.init(max_cached_trans_ctx_count, BLOCK_SIZE, ObModIds::OB_LOG_TRANS_CTX))) { + LOG_ERROR("init TransCtxMap fail", KR(ret), K(max_cached_trans_ctx_count)); + } else { + inited_ = true; + + need_sort_participant_ = need_sort_participant; + valid_trans_count_ = 0; + (void)memset(trans_count_, 0, sizeof(trans_count_)); + created_trans_count_ = 0; + last_created_trans_count_ = 0; + sequenced_trans_count_ = 0; + last_sequenced_trans_count_ = 0; + last_stat_time_ = get_timestamp(); + } + + return ret; +} + +void ObLogTransCtxMgr::destroy() +{ + inited_ = false; + map_.destroy(); + + need_sort_participant_ = false; + valid_trans_count_ = 0; + (void)memset(trans_count_, 0, sizeof(trans_count_)); + created_trans_count_ = 0; + last_created_trans_count_ = 0; + sequenced_trans_count_ = 0; + last_sequenced_trans_count_ = 0; + last_stat_time_ = 0; +} + +int ObLogTransCtxMgr::get_trans_ctx(const transaction::ObTransID &key, TransCtx *&trans_ctx, bool enable_create) +{ + int ret = OB_SUCCESS; + if (! inited_) { + ret = OB_NOT_INIT; + } else if (! key.is_valid()) { + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(map_.get(key, trans_ctx, enable_create))) { + if (OB_ENTRY_NOT_EXIST != ret) { + LOG_ERROR("get TransCtx from map fail", KR(ret), K(key), K(enable_create)); + } + } else { + if (enable_create) { + if (OB_FAIL(trans_ctx->init(this))) { + LOG_ERROR("trans_ctx init fail", KR(ret)); + } + } + + TCTX_STAT_DEBUG("get_trans_ctx", K(key), K(trans_ctx)); + } + + return ret; +} + +int ObLogTransCtxMgr::revert_trans_ctx(TransCtx *trans_ctx) +{ + int ret = OB_SUCCESS; + if (! inited_) { + ret = OB_NOT_INIT; + } else if (NULL == trans_ctx) { + ret = OB_INVALID_ARGUMENT; + } else { + TCTX_STAT_DEBUG("revert_trans_ctx", "key", trans_ctx->get_trans_id(), K(trans_ctx)); + + if (OB_FAIL(map_.revert(trans_ctx))) { + LOG_ERROR("revert TransCtx fail", KR(ret), K(trans_ctx)); + } else { + // succ + trans_ctx = NULL; + } + } + return ret; +} + +int ObLogTransCtxMgr::remove_trans_ctx(const transaction::ObTransID &key) +{ + int ret = OB_SUCCESS; + if (! inited_) { + ret = OB_NOT_INIT; + } else if (! key.is_valid()) { + ret = OB_INVALID_ARGUMENT; + } else { + TCTX_STAT_DEBUG("remove_trans_ctx", K(key)); + + if (OB_FAIL(map_.remove(key))) { + if (OB_ENTRY_NOT_EXIST != ret) { + LOG_ERROR("remove TransCtx fail", KR(ret), K(key)); + } + } else { + // succ + } + } + return ret; +} + +int ObLogTransCtxMgr::update_stat_info(const int trans_state) +{ + int ret = OB_SUCCESS; + static const int START_STATE = TransCtx::TRANS_CTX_STATE_INVALID + 1; + static const int END_STATE = TransCtx::TRANS_CTX_STATE_MAX - 1; + if (!TransCtx::is_state_valid(trans_state)) { + ret = OB_INVALID_ARGUMENT; + } else { + if (START_STATE == trans_state) { + ATOMIC_INC(&created_trans_count_); + ATOMIC_INC(&valid_trans_count_); + ATOMIC_INC(&(trans_count_[trans_state])); + } else if (END_STATE == trans_state) { + ATOMIC_DEC(&valid_trans_count_); + ATOMIC_DEC(&(trans_count_[trans_state - 1])); + } else { + ATOMIC_INC(&(trans_count_[trans_state])); + ATOMIC_DEC(&(trans_count_[trans_state - 1])); + if (TransCtx::TRANS_CTX_STATE_SEQUENCED == trans_state) { + ATOMIC_INC(&sequenced_trans_count_); + } + } + } + + return ret; +} + +void ObLogTransCtxMgr::print_stat_info() +{ + int64_t current_timestamp = get_timestamp(); + int64_t local_created_trans_count = ATOMIC_LOAD(&created_trans_count_); + int64_t local_last_created_trans_count = ATOMIC_LOAD(&last_created_trans_count_); + int64_t local_sequenced_trans_count = ATOMIC_LOAD(&sequenced_trans_count_); + int64_t local_last_sequenced_trans_count = ATOMIC_LOAD(&last_sequenced_trans_count_); + int64_t local_last_stat_time = last_stat_time_; + int64_t delta_time = current_timestamp - local_last_stat_time; + int64_t delta_create_count = local_created_trans_count - local_last_created_trans_count; + int64_t delta_sequence_count = local_sequenced_trans_count - local_last_sequenced_trans_count; + double create_tps = 0.0; + double sequence_tps = 0.0; + + // Update the last statistics + last_created_trans_count_ = local_created_trans_count; + last_sequenced_trans_count_ = local_sequenced_trans_count; + last_stat_time_ = current_timestamp; + + if (delta_time > 0) { + create_tps = (double)(delta_create_count) * 1000000.0 / (double)delta_time; + sequence_tps = (double)(delta_sequence_count) * 1000000.0 / (double) delta_time; + } + + _LOG_INFO("[STAT] [PERF] CREATE_TRANS_TPS=%.3lf SEQ_TRANS_TPS=%.3lf CREATE_TRANS_COUNT=%ld " + "SEQ_TRANS_COUNT=%ld", + create_tps, sequence_tps, local_created_trans_count - local_last_created_trans_count, + local_sequenced_trans_count - local_last_sequenced_trans_count); + + _LOG_INFO("[STAT] [TRANS_COUNT] TOTAL=%ld PREPARED=%ld PART_READY=%ld SEQ=%ld DATA_READY=%ld " + "COMMITTED=%ld RECYCLE=%ld", + valid_trans_count_, + trans_count_[TransCtx::TRANS_CTX_STATE_PREPARED], + trans_count_[TransCtx::TRANS_CTX_STATE_PARTICIPANT_READY], + trans_count_[TransCtx::TRANS_CTX_STATE_SEQUENCED], + trans_count_[TransCtx::TRANS_CTX_STATE_DATA_READY], + trans_count_[TransCtx::TRANS_CTX_STATE_COMMITTED], + trans_count_[TransCtx::TRANS_CTX_STATE_PARTICIPANT_REVERTED]); + + map_.print_state(STAT_STR); +} + +int64_t ObLogTransCtxMgr::get_trans_count(const int trans_ctx_state) +{ + int64_t ret_count = 0; + + if (TransCtx::is_state_valid(trans_ctx_state)) { + ret_count = trans_count_[trans_ctx_state]; + } else { + LOG_ERROR("trans ctx state is invalid", K(trans_ctx_state)); + } + + return ret_count; +} + +int ObLogTransCtxMgr::dump_pending_trans_info(char *buffer, const int64_t size, int64_t &pos) +{ + int ret = OB_SUCCESS; + if (NULL != buffer && size > 0 && pos < size) { + Scanner scanner; + + scanner.buffer_ = buffer; + scanner.buffer_size_ = size; + scanner.pos_ = pos; + + if (OB_FAIL(map_.for_each(scanner))) { + LOG_ERROR("scan trans ctx map fail", KR(ret)); + } else { + _LOG_INFO("[STAT] [PENDING_TRANS_COUNT] TOTAL=%ld PREPARED=%ld PART_READY=%ld ", + scanner.valid_trans_count_, + scanner.trans_count_[TransCtx::TRANS_CTX_STATE_PREPARED], + scanner.trans_count_[TransCtx::TRANS_CTX_STATE_PARTICIPANT_READY]); + + DATA_PRINT(scanner.buffer_, scanner.buffer_size_, scanner.pos_, "[STAT] [TRANS_COUNT] " + "TOTAL=%ld PREPARED=%ld PART_READY=%ld\n", + scanner.valid_trans_count_, + scanner.trans_count_[TransCtx::TRANS_CTX_STATE_PREPARED], + scanner.trans_count_[TransCtx::TRANS_CTX_STATE_PARTICIPANT_READY]); + + pos = scanner.pos_; + } + } else { + LOG_ERROR("invalid arguments", K(buffer), K(size), K(pos)); + ret = OB_INVALID_ARGUMENT; + } + + return ret; +} + +} // namespace liboblog +} // namespace oceanbase diff --git a/src/liboblog/src/ob_log_trans_ctx_mgr.h b/src/liboblog/src/ob_log_trans_ctx_mgr.h new file mode 100644 index 0000000000000000000000000000000000000000..6496f872e5bea978954119c1ecec0a9317b13663 --- /dev/null +++ b/src/liboblog/src/ob_log_trans_ctx_mgr.h @@ -0,0 +1,152 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_LOG_TRANS_CTX_MGR_ +#define OCEANBASE_LIBOBLOG_LOG_TRANS_CTX_MGR_ + +#include "ob_log_trans_ctx.h" // TransCtx + +#include "ob_easy_hazard_map.h" // ObEasyHazardMap +#include "storage/transaction/ob_trans_define.h" // ObTransID + +namespace oceanbase +{ +namespace liboblog +{ +class IObLogTransCtxMgr +{ +public: + IObLogTransCtxMgr() {} + virtual ~IObLogTransCtxMgr() {} + +public: + /// Get the transaction context and support the creation of a new context if it does not exist + /// + /// @note must be called in pairs with the revert_trans_ctx() function + /// + /// @param [in] key Trans ID + /// @param [out] trans_ctx returned trans context + /// @param [in] enable_create Whether to allow the creation of a new object when the transaction context does not exist, not allowed by default + /// + /// @retval OB_SUCCESS Success + /// @retval OB_ENTRY_NOT_EXIST tenant not exist + /// @retval other_error_code Fail + virtual int get_trans_ctx(const transaction::ObTransID &key, TransCtx *&trans_ctx, bool enable_create = false) = 0; + + /// revert trans context + /// + /// @param trans_ctx target trans context + /// + /// @retval OB_SUCCESS Success + /// @retval other_error_code Fail + virtual int revert_trans_ctx(TransCtx *trans_ctx) = 0; + + /// delete trans context + /// + /// @param key target trans context + /// + /// @retval OB_SUCCESS Success + /// @retval OB_ENTRY_NOT_EXIST trans context not exist + /// @retval other_error_code Fail + virtual int remove_trans_ctx(const transaction::ObTransID &key) = 0; + + /// Updating statistical information + virtual int update_stat_info(int trans_state) = 0; + + /// Print statistics + virtual void print_stat_info() = 0; + + /// Get the number of transactions in a given state + virtual int64_t get_trans_count(int trans_ctx_state) = 0; + + /// Print information on pending transactions + virtual int dump_pending_trans_info(char *buffer, const int64_t size, int64_t &pos) = 0; + + // Do you need to sort the list of participants + virtual bool need_sort_participant() const = 0; +}; + +////////////////////////////////////////////////////////////////////////////////////////////// + +class ObLogTransCtxMgr : public IObLogTransCtxMgr +{ + struct Scanner + { + Scanner() : buffer_(NULL), buffer_size_(0), pos_(0), valid_trans_count_(0) + { + (void)memset(trans_count_, 0, sizeof(trans_count_)); + } + + ~Scanner() {} + + void operator() (const transaction::ObTransID &trans_id, TransCtx *trans_ctx); + + char *buffer_; + int64_t buffer_size_; + int64_t pos_; + int64_t valid_trans_count_; + int64_t trans_count_[TransCtx::TRANS_CTX_STATE_MAX]; + }; + +public: + static const int64_t BLOCK_SIZE = 1 << 24; + static const int64_t PRINT_STATE_INTERVAL = 10 * 1000 * 1000; + typedef ObEasyHazardMap TransCtxMap; + +public: + ObLogTransCtxMgr(); + virtual ~ObLogTransCtxMgr(); + +public: + int get_trans_ctx(const transaction::ObTransID &key, TransCtx *&trans_ctx, bool enable_create = false); + int revert_trans_ctx(TransCtx *trans_ctx); + int remove_trans_ctx(const transaction::ObTransID &key); + int update_stat_info(const int trans_state); + void print_stat_info(); + int64_t get_trans_count(const int trans_ctx_state); + int dump_pending_trans_info(char *buffer, const int64_t size, int64_t &pos); + bool need_sort_participant() const { return need_sort_participant_; }; + +public: + int init(const int64_t max_cached_trans_ctx_count, const bool need_sort_participant); + void destroy(); + // Get the number of valid TransCtx, i.e. the number of TransCtx present in the map + inline int64_t get_valid_trans_ctx_count() const { return map_.get_valid_count(); } + + // Get the number of allocated TransCtx objects + inline int64_t get_alloc_trans_ctx_count() const { return map_.get_alloc_count(); } + + // Get the number of free TransCtx objects + inline int64_t get_free_trans_ctx_count() const { return map_.get_free_count(); } + +private: + bool inited_; + TransCtxMap map_; + bool need_sort_participant_; + + /// state values + int64_t valid_trans_count_; // Current number of transactions + int64_t trans_count_[TransCtx::TRANS_CTX_STATE_MAX]; // Number of transactions in various states + + /// Statistical values + int64_t created_trans_count_; // Number of created transactions counted + int64_t last_created_trans_count_; // Number of transactions created at last count + int64_t sequenced_trans_count_; // Number of transactions in statistical order + int64_t last_sequenced_trans_count_; // Number of transactions in fixed order at last count + int64_t last_stat_time_; // Last statistical time + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogTransCtxMgr); +}; +} // namespace liboblog +} // namespace oceanbase +#endif /* OCEANBASE_LIBOBLOG_LOG_TRANS_CTX_MGR_ */ diff --git a/src/liboblog/src/ob_log_trans_log.cpp b/src/liboblog/src/ob_log_trans_log.cpp new file mode 100644 index 0000000000000000000000000000000000000000..93698b819fb923bcf74dd213e83300e0f0d7c8d4 --- /dev/null +++ b/src/liboblog/src/ob_log_trans_log.cpp @@ -0,0 +1,149 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG + +#include "ob_log_trans_log.h" +using namespace oceanbase::common; + +namespace oceanbase +{ +namespace liboblog +{ +int RedoLogMetaNode::update_redo_meta(const int64_t log_no, const uint64_t log_id) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(log_no < 0) || OB_UNLIKELY(OB_INVALID_ID == log_id)) { + LOG_ERROR("invalid argument", K(log_no), K(log_id)); + ret = OB_INVALID_ARGUMENT; + // Requires log numbers to be consecutive + } else if (OB_UNLIKELY(log_no != (end_log_no_ + 1))) { + LOG_ERROR("log no is not consecutive", K(end_log_no_), K(log_no)); + ret = OB_DISCONTINUOUS_LOG; + } else { + // update log range + end_log_no_ = log_no; + end_log_id_ = log_id; + } + + return ret; +} + +int DmlRedoLogNode::append_redo_log(const char *buf, + const int64_t buf_len) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(buf) || OB_UNLIKELY(buf_len <= 0)) { + LOG_ERROR("invalid argument", K(buf_len), K(buf)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_UNLIKELY(! is_valid())) { + LOG_ERROR("redo log node is not valid", KPC(this)); + ret = OB_INVALID_DATA; + } + // Requires enough buffer space to hold the data + else if (OB_UNLIKELY(size_ - pos_ < buf_len)) { + LOG_ERROR("buffer is not enough", K(size_), K(pos_), K(buf_len)); + ret = OB_BUF_NOT_ENOUGH; + } else { + (void)MEMCPY(data_ + pos_, buf, buf_len); + pos_ += buf_len; + } + + return ret; +} + +int DdlRedoLogNode::append_redo_log(const int64_t log_no, const uint64_t log_id, const char *buf, + const int64_t buf_len) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(buf) || OB_UNLIKELY(log_no < 0) || OB_UNLIKELY(OB_INVALID_ID == log_id) + || OB_UNLIKELY(buf_len <= 0)) { + LOG_ERROR("invalid argument", K(log_no), K(log_id), K(buf_len), K(buf)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_UNLIKELY(! is_valid())) { + LOG_ERROR("redo log node is not valid", KPC(this)); + ret = OB_INVALID_DATA; + } else if (OB_FAIL(update_redo_meta(log_no, log_id))) { + LOG_ERROR("update_redo_meta fail", KR(ret), K(log_no), K(log_id)); + } + // Requires enough buffer space to hold the data + else if (OB_UNLIKELY(size_ - pos_ < buf_len)) { + LOG_ERROR("buffer is not enough", K(size_), K(pos_), K(buf_len)); + ret = OB_BUF_NOT_ENOUGH; + } else { + (void)MEMCPY(data_ + pos_, buf, buf_len); + pos_ += buf_len; + } + + return ret; +} + +// push redo log with order +int SortedRedoLogList::push(RedoLogMetaNode *node) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(node) || OB_UNLIKELY(! node->is_valid())) { + OBLOG_LOG(ERROR, "invalid argument", K(node)); + ret = OB_INVALID_ARGUMENT; + } else if (NULL == head_) { + head_ = node; + node->next_ = NULL; + tail_ = node; + node_num_ = 1; + log_num_ = node->get_log_num(); + } else { // NULL != head_ + if (OB_ISNULL(tail_)) { + OBLOG_LOG(ERROR, "tail node is NULL, but head node is not NULL", K(head_), K(tail_)); + ret = OB_ERR_UNEXPECTED; + } else { + // quick-path + if (tail_->before(*node)) { + tail_->next_ = node; + tail_ = node; + node->next_ = NULL; + } else { + // Iterate through all nodes to find the first redo node that is greater than or equal to the target node + RedoLogMetaNode **next_ptr = &head_; + while ((*next_ptr)->before(*node)) { + next_ptr = &((*next_ptr)->next_); + } + + // If the node value is duplicated, export error OB_ENTRY_EXIST + if ((*next_ptr)->start_log_id_ == node->start_log_id_) { + OBLOG_LOG(INFO, "redo log is pushed twice", KPC(node), KPC(*next_ptr), KPC(this)); + ret = OB_ENTRY_EXIST; + } else { + node->next_ = (*next_ptr); + *next_ptr = node; + } + } + + if (OB_SUCCESS == ret) { + log_num_ += node->get_log_num(); + ATOMIC_INC(&node_num_); + } + } + } + + if (OB_SUCCESS == ret) { + last_push_node_ = node; + } + + return ret; +} + +} // namespace liboblog */ +} // namespace oceanbase */ diff --git a/src/liboblog/src/ob_log_trans_log.h b/src/liboblog/src/ob_log_trans_log.h new file mode 100644 index 0000000000000000000000000000000000000000..3526b3038f1c636a8cf8ec9b46f11b6522b12068 --- /dev/null +++ b/src/liboblog/src/ob_log_trans_log.h @@ -0,0 +1,279 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_TRANS_LOG_H__ +#define OCEANBASE_LIBOBLOG_TRANS_LOG_H__ + +#include "storage/transaction/ob_trans_log.h" // ObTransPrepareLog, ObTransCommitLog +#include "lib/allocator/ob_mod_define.h" +#include "ob_log_row_data_index.h" + +namespace oceanbase +{ +namespace liboblog +{ +struct RedoLogMetaNode +{ + RedoLogMetaNode() { reset(); } + ~RedoLogMetaNode() { reset(); } + + // The log numbering interval + // The numbers in the interval are consecutive + // For example, logs numbered 2, 3 and 4, with 2 being the start number and 4 being the end number + int64_t start_log_no_; // start log no + int64_t end_log_no_; // end log no + + // Log ID interval + // The IDs in the interval are not consecutive + // e.g. log IDs: 100, 120, 200, 100 is the starting log ID, 200 is the ending log ID + uint64_t start_log_id_; // start log id + uint64_t end_log_id_; // end log id + + RedoLogMetaNode *next_; // next log + + void reset() + { + start_log_no_ = -1; + end_log_no_ = -1; + start_log_id_ = common::OB_INVALID_ID; + end_log_id_ = common::OB_INVALID_ID; + next_ = NULL; + } + + void reset(const int64_t log_no, const uint64_t log_id) + { + start_log_no_ = log_no; + end_log_no_ = log_no; + start_log_id_ = log_id; + end_log_id_ = log_id; + next_ = NULL; + } + + bool is_valid() const + { + return start_log_no_ >= 0 + && end_log_no_ >= start_log_no_ + && common::OB_INVALID_ID != start_log_id_ + && end_log_id_ >= start_log_id_; + } + + int32_t get_log_num() const { return static_cast(end_log_no_ - start_log_no_ + 1); } + + // Is the log ID sequentially located before the target node + bool before(const RedoLogMetaNode &node) + { + return end_log_id_ < node.start_log_id_; + } + + int update_redo_meta(const int64_t log_no, const uint64_t log_id); + + // TODO to_string + TO_STRING_KV( + K_(start_log_no), + K_(end_log_no), + K_(start_log_id), + K_(end_log_id), + KP_(next)); +}; + +struct DmlRedoLogMetaNode : public RedoLogMetaNode +{ + DmlRedoLogMetaNode() { reset(); } + ~DmlRedoLogMetaNode() { reset(); } + + ObLogRowDataIndex *get_row_head() { return row_head_; } + const ObLogRowDataIndex *get_row_head() const { return row_head_; } + ObLogRowDataIndex *get_row_tail() { return row_tail_; } + const ObLogRowDataIndex *get_row_tail() const { return row_tail_; } + int64_t get_valid_row_num() const { return valid_row_num_; } + bool is_contain_valid_row() const { return 0 != valid_row_num_; } + bool is_contain_rollback_row() const { return is_contain_rollback_row_; } + + void reset() + { + RedoLogMetaNode::reset(); + row_head_ = NULL; + row_tail_ = NULL; + valid_row_num_ = 0; + is_contain_rollback_row_ = false; + } + + void reset(const int64_t log_no, const uint64_t log_id) + { + reset(); + RedoLogMetaNode::reset(log_no, log_id); + } + + ObLogRowDataIndex *row_head_; + ObLogRowDataIndex *row_tail_; + int64_t valid_row_num_; + bool is_contain_rollback_row_; + + TO_STRING_KV("RedoLogMetaNode", static_cast(*this), + K_(row_head), + K_(row_tail), + K_(valid_row_num), + K_(is_contain_rollback_row)); +}; + +struct DmlRedoLogNode +{ + DmlRedoLogNode() { reset(); } + ~DmlRedoLogNode() { reset(); } + + // Mutator data, which may contain the contents of multiple REDO logs + // This holds the serialized data directly from the ObMemtableMutatorRow + // Excluding the ObMemtableMutatorMeta part + // See PartTransTask::push_redo_log() for details + char *data_; // Mutator data + int64_t size_; // Length of Mutator data + int64_t pos_; // Length of data already filled + + bool is_valid() const + { + return NULL != data_ + && size_ > 0 + && pos_ > 0; + } + + // Requires data to be valid and complete + bool check_data_integrity() const + { + return NULL != data_ && size_ > 0 && size_ == pos_; + } + + void reset() + { + data_ = NULL; + size_ = 0; + pos_ = 0; + } + + void reset(char *data, const int64_t size, const int64_t pos) + { + data_ = data; + size_ = size; + pos_ = pos; + } + + // Continue to add redo log data, requiring consecutive log numbers and enough buffer space + int append_redo_log(const char *buf, const int64_t buf_len); + + TO_STRING_KV( + KP_(data), + K_(size), + K_(pos)); +}; + +// Redo log structure +// To support LOB, make sure that the mutator data is complete and that it contains one to many REDO logs +// log_no and log_id are stored as ranges +struct DdlRedoLogNode : public RedoLogMetaNode +{ + DdlRedoLogNode() { reset(); } + ~DdlRedoLogNode() { reset(); } + + // Mutator data, which may contain the contents of multiple REDO logs + // This holds the serialized data directly from the ObMemtableMutatorRow + // Excluding the ObMemtableMutatorMeta part + // See PartTransTask::push_redo_log() for details + char *data_; // Mutator数据 + int64_t size_; // Mutator数据长度 + int64_t pos_; // 已经填充的数据长度 + + bool is_valid() const + { + return RedoLogMetaNode::is_valid() + && NULL != data_ + && size_ > 0 + && pos_ > 0; + } + + // Requires data to be valid and complete + bool check_data_integrity() const + { + return NULL != data_ && size_ > 0 && size_ == pos_; + } + + void reset() + { + RedoLogMetaNode::reset(); + data_ = NULL; + size_ = 0; + pos_ = 0; + } + + void reset(const int64_t log_no, const uint64_t log_id, char *data, const int64_t size, + const int64_t pos) + { + RedoLogMetaNode::reset(log_no, log_id); + data_ = data; + size_ = size; + pos_ = pos; + } + + // Continue to add redo log data, requiring consecutive log numbers and enough buffer space + int append_redo_log(const int64_t log_no, const uint64_t log_id, const char *buf, + const int64_t buf_len); + + TO_STRING_KV( + K_(start_log_no), + K_(end_log_no), + K_(start_log_id), + K_(end_log_id), + KP_(data), + K_(size), + K_(pos), + KP_(next)); +}; + +// Ordered Redo log list +struct SortedRedoLogList +{ + int32_t node_num_; + int32_t log_num_; + RedoLogMetaNode *head_; + RedoLogMetaNode *tail_; + RedoLogMetaNode *last_push_node_; + + SortedRedoLogList() : node_num_(0), log_num_(0), head_(NULL), tail_(NULL), last_push_node_(NULL) + {} + + ~SortedRedoLogList() { reset(); } + + int32_t get_node_number() const { return ATOMIC_LOAD(&node_num_); } + + void reset() + { + node_num_ = 0; + log_num_ = 0; + head_ = NULL; + tail_ = NULL; + last_push_node_ = NULL; + } + + bool is_valid() const + { + return node_num_ > 0 && log_num_ > 0 && NULL != head_ && NULL != tail_ + && NULL != last_push_node_; + } + + // Returns OB_ENTRY_EXIST if the redo log already exists + int push(RedoLogMetaNode *node); + + TO_STRING_KV(K_(node_num), K_(log_num), KP_(head), KP_(tail), KP_(last_push_node)); +}; + +} // namespace liboblog +} // namespace oceanbase + +#endif /* OCEANBASE_LIBOBLOG_TRANS_LOG_H__ */ diff --git a/src/liboblog/src/ob_log_trans_stat_mgr.cpp b/src/liboblog/src/ob_log_trans_stat_mgr.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6f79dfe2f1527194da3dc9055e6c3476f72df0cd --- /dev/null +++ b/src/liboblog/src/ob_log_trans_stat_mgr.cpp @@ -0,0 +1,420 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG + +#include "ob_log_trans_stat_mgr.h" +#include "ob_log_utils.h" + +#define _STAT(level, fmt, args...) _OBLOG_LOG(level, "[TPS_STAT] " fmt, ##args) +#define STAT(level, fmt, args...) OBLOG_LOG(level, "[TPS_STAT] " fmt, ##args) +#define _ISTAT(fmt, args...) _STAT(INFO, fmt, ##args) +#define ISTAT(fmt, args...) STAT(INFO, fmt, ##args) + +namespace oceanbase +{ + +using namespace common; + +namespace liboblog +{ + +double TransTpsStatInfo::calc_tps(const int64_t delta_time) +{ + double create_tps = 0.0; + + int64_t local_created_trans_count = ATOMIC_LOAD(&created_trans_count_); + int64_t local_last_created_trans_count = ATOMIC_LOAD(&last_created_trans_count_); + int64_t delta_create_count = local_created_trans_count - local_last_created_trans_count; + + if (delta_time > 0) { + create_tps = (double)(delta_create_count) * 1000000.0 / (double)delta_time; + } + + // Update the last statistics + last_created_trans_count_ = local_created_trans_count; + + return create_tps; +} + +double TransRpsStatInfo::calc_rps(const int64_t delta_time) +{ + double create_rps = 0.0; + + int64_t local_created_records_count = ATOMIC_LOAD(&created_records_count_); + int64_t local_last_created_records_count = ATOMIC_LOAD(&last_created_records_count_); + int64_t delta_create_count = local_created_records_count - local_last_created_records_count; + + if (delta_time > 0) { + create_rps = (double)(delta_create_count) * 1000000.0 / (double)delta_time; + } + + // Update the last statistics + last_created_records_count_ = local_created_records_count; + + return create_rps; +} + +///////////////////////////// TransTpsRpsStatInfo /////////////////////////// +void TransTpsRpsStatInfo::reset() +{ + tps_stat_info_.reset(); + rps_stat_info_.reset(); +} + +double TransTpsRpsStatInfo::calc_tps(const int64_t delta_time) +{ + return tps_stat_info_.calc_tps(delta_time); +} + +double TransTpsRpsStatInfo::calc_rps(const int64_t delta_time) +{ + return rps_stat_info_.calc_rps(delta_time); +} + +///////////////////////////// TransTpsRpsStatInfo /////////////////////////// +ObLogTransStatMgr::ObLogTransStatMgr() : + inited_(false), + tps_stat_info_(), + rps_stat_info_before_filter_(), + rps_stat_info_after_filter_(), + tenant_stat_info_map_(), + tenant_stat_info_pool_(), + next_record_stat_(), + release_record_stat_(), + last_stat_time_(0) +{ +} + +ObLogTransStatMgr::~ObLogTransStatMgr() +{ + destroy(); +} + +int ObLogTransStatMgr::init() +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(inited_)) { + ret = OB_INIT_TWICE; + LOG_ERROR("ObLogTransStatMgr has inited"); + } else if (OB_FAIL(tenant_stat_info_map_.init(ObModIds::OB_LOG_TENANT_STAT_MAP))) { + LOG_ERROR("init tenant_stat_info_map_ fail", KR(ret)); + } else if (OB_FAIL(tenant_stat_info_pool_.init(CACHED_TENANT_STAT_INFO_COUNT, + ObModIds::OB_LOG_TENANT_STAT_INFO))) { + LOG_ERROR("init tenant_stat_info_pool_ fail", KR(ret), LITERAL_K(CACHED_TENANT_STAT_INFO_COUNT)); + } else { + tps_stat_info_.reset(); + rps_stat_info_before_filter_.reset(); + rps_stat_info_after_filter_.reset(); + next_record_stat_.reset(); + release_record_stat_.reset(); + last_stat_time_ = 0; + inited_ = true; + } + + return ret; +} + +void ObLogTransStatMgr::destroy() +{ + if (inited_) { + clear_tenant_stat_info_(); + + tps_stat_info_.reset(); + rps_stat_info_before_filter_.reset(); + rps_stat_info_after_filter_.reset(); + (void)tenant_stat_info_map_.destroy(); + tenant_stat_info_pool_.destroy(); + next_record_stat_.reset(); + release_record_stat_.reset(); + last_stat_time_ = 0; + inited_ = false; + } +} + +void ObLogTransStatMgr::do_tps_stat() +{ + tps_stat_info_.do_tps_stat(); +} + +void ObLogTransStatMgr::do_rps_stat_before_filter(const int64_t record_count) +{ + rps_stat_info_before_filter_.do_rps_stat(record_count); +} + +void ObLogTransStatMgr::do_rps_stat_after_filter(const int64_t record_count) +{ + rps_stat_info_after_filter_.do_rps_stat(record_count); +} + +void ObLogTransStatMgr::do_drc_consume_tps_stat() +{ + next_record_stat_.do_tps_stat(); +} + +void ObLogTransStatMgr::do_drc_consume_rps_stat() +{ + next_record_stat_.do_rps_stat(1); +} + +void ObLogTransStatMgr::do_drc_release_tps_stat() +{ + release_record_stat_.do_tps_stat(); +} + +void ObLogTransStatMgr::do_drc_release_rps_stat() +{ + release_record_stat_.do_rps_stat(1); +} + +void ObLogTransStatMgr::print_stat_info() +{ + int ret = OB_SUCCESS; + + int64_t current_timestamp = get_timestamp(); + int64_t local_last_stat_time = last_stat_time_; + int64_t delta_time = current_timestamp - local_last_stat_time; + + double create_tps = tps_stat_info_.calc_tps(delta_time); + double create_rps_before_filter = rps_stat_info_before_filter_.calc_rps(delta_time); + double create_rps_after_filter = rps_stat_info_after_filter_.calc_rps(delta_time); + + // Update the last statistics + last_stat_time_ = current_timestamp; + + _ISTAT("TOTAL TPS=%.3lf RPS=%.3lf RPS_ALL=%.3lf ", + create_tps, create_rps_after_filter, create_rps_before_filter); + + // Print Tenant Statistics + TenantStatInfoPrinter printer(delta_time); + if (OB_FAIL(tenant_stat_info_map_.for_each(printer))) { + LOG_ERROR("TenantStatInfoMap for each fail", KR(ret)); + } + + // Print drc consumption statistics + double next_record_tps = next_record_stat_.calc_tps(delta_time); + double next_record_rps = next_record_stat_.calc_rps(delta_time); + double release_record_tps = release_record_stat_.calc_tps(delta_time); + double release_record_rps = release_record_stat_.calc_rps(delta_time); + + _ISTAT("[DRC] NEXT_RECORD_TPS=%.3lf RELEASE_RECORD_TPS=%.3lf " + "NEXT_RECORD_RPS=%.3lf RELEASE_RECORD_RPS=%.3lf", + next_record_tps, release_record_tps, next_record_rps, release_record_rps); +} + +int ObLogTransStatMgr::add_served_tenant(const char *tenant_name, const uint64_t tenant_id) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogTransStatMgr has not inited"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(tenant_name) || + OB_UNLIKELY(OB_INVALID_ID == tenant_id)) { + LOG_ERROR("invalid argument", K(tenant_name), K(tenant_id)); + ret = OB_INVALID_ARGUMENT; + } else { + int64_t pos = 0; + TenantStatInfo *ts_info = NULL; + TenantID tid(tenant_id); + + if (OB_FAIL(tenant_stat_info_pool_.alloc(ts_info))) { + LOG_ERROR("alloc tenant stat info fail", KR(ret), K(tenant_id), K(tenant_name), KPC(ts_info)); + } else if (OB_FAIL(databuff_printf(ts_info->name_, sizeof(ts_info->name_), pos, + "%s", tenant_name))) { + LOG_ERROR("print tenant name fail", KR(ret), K(pos), K(tenant_id), K(tenant_name), KPC(ts_info)); + } else { + if (OB_FAIL(tenant_stat_info_map_.insert(tid, ts_info))) { + if (OB_ENTRY_EXIST != ret) { + LOG_ERROR("insert into served_tenant_db_map_ fail", KR(ret), + K(tid), KPC(ts_info)); + } else { + ts_info->reset(); + tenant_stat_info_pool_.free(ts_info); + ts_info = NULL; + ret = OB_SUCCESS; + } + } else { + _ISTAT("[ADD_TENANT] TENANT=%s(%lu) TOTAL_COUNT=%ld", + tenant_name, tenant_id, tenant_stat_info_map_.count()); + } + } + + // Recycle useless objects + if (OB_SUCCESS != ret && NULL != ts_info) { + ts_info->reset(); + tenant_stat_info_pool_.free(ts_info); + ts_info = NULL; + } + } + + return ret; +} + +int ObLogTransStatMgr::drop_served_tenant(const uint64_t tenant_id) +{ + int ret = OB_SUCCESS; + + TenantStatInfoErase erase_fn(tenant_id, tenant_stat_info_pool_); + + if (OB_FAIL(tenant_stat_info_map_.erase_if(tenant_id, erase_fn))) { + if (OB_ENTRY_NOT_EXIST == ret) { + ret = OB_SUCCESS; + LOG_WARN("tenant has been drop in trans stat mgr", K(tenant_id)); + } else { + LOG_ERROR("tenant stat info map erase_if tenant fail", KR(ret), K(tenant_id)); + } + } else { + // do nothing + } + + return ret; +} + +int ObLogTransStatMgr::do_tenant_tps_rps_stat(const uint64_t tenant_id, int64_t record_count) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogTransStatMgr has not inited"); + ret = OB_NOT_INIT; + } else { + TenantRpsBeforeFilterUpdater updater(tenant_id, record_count); + TenantID tid(tenant_id); + if (OB_FAIL(tenant_stat_info_map_.operate(tid, updater))) { + if (OB_ENTRY_NOT_EXIST != ret) { + LOG_ERROR("dp tenant rps stat before filter fail", KR(ret), K(tid), K(record_count)); + } else { + ret = OB_SUCCESS; + } + } else { + // do nothing + } + } + + return ret; +} + +int ObLogTransStatMgr::do_tenant_rps_stat_after_filter(const uint64_t tenant_id, int64_t record_count) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogTransStatMgr has not inited"); + ret = OB_NOT_INIT; + } else { + TenantRpsAfterFilterUpdater updater(tenant_id, record_count); + TenantID tid(tenant_id); + if (OB_FAIL(tenant_stat_info_map_.operate(tid, updater))) { + if (OB_ENTRY_NOT_EXIST != ret) { + LOG_ERROR("dp tenant rps stat after filter fail", KR(ret), K(tid), K(record_count)); + } else { + ret = OB_SUCCESS; + } + } else { + // do nothing + } + } + + return ret; +} + +void ObLogTransStatMgr::clear_tenant_stat_info_() +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogTransStatMgr has not inited"); + ret = OB_NOT_INIT; + } else { + TenantStatInfoClear clear(tenant_stat_info_pool_); + + if (OB_FAIL(tenant_stat_info_map_.remove_if(clear))) { + LOG_ERROR("clear tenant_stat_info_map fail", KR(ret)); + } + } +} + +bool ObLogTransStatMgr::TenantRpsBeforeFilterUpdater::operator()(const TenantID &tid, TenantStatInfo *ts_info) +{ + if (tid.tenant_id_ == tenant_id_) { + if (OB_ISNULL(ts_info)) { + LOG_ERROR("tenant stat info is null", K(tid), KPC(ts_info)); + } else { + ts_info->tps_stat_info_.do_tps_stat(); + ts_info->rps_stat_info_before_filter_.do_rps_stat(record_count_); + } + } + + return tid.tenant_id_ == tenant_id_; +} + +bool ObLogTransStatMgr::TenantRpsAfterFilterUpdater::operator()(const TenantID &tid, TenantStatInfo *ts_info) +{ + if (tid.tenant_id_ == tenant_id_) { + if (OB_ISNULL(ts_info)) { + LOG_ERROR("tenant stat info is null", K(tid), KPC(ts_info)); + } else { + ts_info->rps_stat_info_after_filter_.do_rps_stat(record_count_); + } + } + + return tid.tenant_id_ == tenant_id_; +} + +bool ObLogTransStatMgr::TenantStatInfoPrinter::operator()(const TenantID &tid, + TenantStatInfo *ts_info) +{ + if (NULL != ts_info) { + double tps = ts_info->tps_stat_info_.calc_tps(delta_time_); + double rps_before_filter = ts_info->rps_stat_info_before_filter_.calc_rps(delta_time_); + double rps_after_filter = ts_info->rps_stat_info_after_filter_.calc_rps(delta_time_); + + _ISTAT("TENANT=%s(%lu) TPS=%.3lf RPS=%.3lf RPS_ALL=%.3lf", + ts_info->name_, tid.tenant_id_, tps, rps_after_filter, rps_before_filter); + } + + return true; +} + +bool ObLogTransStatMgr::TenantStatInfoErase::operator()(const TenantID &tid, + TenantStatInfo *ts_info) +{ + if (tid.tenant_id_ == tenant_id_ && NULL != ts_info) { + ts_info->reset(); + (void)pool_.free(ts_info); + ts_info = NULL; + + _ISTAT("[DROP_TENANT] TENANT=%lu", tid.tenant_id_); + } + + return tid.tenant_id_ == tenant_id_; +} + +bool ObLogTransStatMgr::TenantStatInfoClear::operator()(const TenantID &tid, + TenantStatInfo *ts_info) +{ + UNUSED(tid); + if (NULL != ts_info) { + ts_info->reset(); + (void)pool_.free(ts_info); + ts_info = NULL; + + _ISTAT("[CLEAR_TENANT] TENANT=%s(%lu)", ts_info->name_, tid.tenant_id_); + } + + return true; +} + +} +} diff --git a/src/liboblog/src/ob_log_trans_stat_mgr.h b/src/liboblog/src/ob_log_trans_stat_mgr.h new file mode 100644 index 0000000000000000000000000000000000000000..bb0ab4b1ce2ccee7f727d459b642c243ade8f8f6 --- /dev/null +++ b/src/liboblog/src/ob_log_trans_stat_mgr.h @@ -0,0 +1,306 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_TRANS_STAT_INFO_H__ +#define OCEANBASE_LIBOBLOG_OB_LOG_TRANS_STAT_INFO_H__ + +#include "lib/utility/ob_print_utils.h" // TO_STRING_KV +#include "lib/objectpool/ob_small_obj_pool.h" // ObSmallObjPool +#include "lib/hash/ob_linear_hash_map.h" // ObLinearHashMap + +namespace oceanbase +{ +namespace liboblog +{ +// Transaction tps statistics +struct TransTpsStatInfo +{ + int64_t created_trans_count_; // Number of created transactions counts + int64_t last_created_trans_count_; // Number of created transactions counts last time + + TransTpsStatInfo() { reset(); } + ~TransTpsStatInfo() { reset(); } + + void reset() + { + created_trans_count_ = 0; + last_created_trans_count_ = 0; + } + + void do_tps_stat() + { + ATOMIC_INC(&created_trans_count_); + } + + double calc_tps(const int64_t delta_time); + + TO_STRING_KV(K_(created_trans_count), + K_(last_created_trans_count)); +}; + +// Transaction rps statistics +struct TransRpsStatInfo +{ + int64_t created_records_count_; // Number of created statements count + int64_t last_created_records_count_; // Number of created statements count last time + + TransRpsStatInfo() { reset(); } + ~TransRpsStatInfo() { reset(); } + + void reset() + { + created_records_count_ = 0; + last_created_records_count_ = 0; + } + + void do_rps_stat(int64_t record_count) + { + ATOMIC_AAF(&created_records_count_, record_count); + } + + double calc_rps(const int64_t delta_time); + + TO_STRING_KV(K_(created_records_count), + K_(last_created_records_count)); +}; + +// Transaction tps/rps statistics +struct TransTpsRpsStatInfo +{ + TransTpsStatInfo tps_stat_info_; + TransRpsStatInfo rps_stat_info_; + + TransTpsRpsStatInfo() { reset(); } + ~TransTpsRpsStatInfo() { reset(); } + void reset(); + + void do_tps_stat() + { + tps_stat_info_.do_tps_stat(); + } + + void do_rps_stat(int64_t record_count) + { + rps_stat_info_.do_rps_stat(record_count); + } + + double calc_tps(const int64_t delta_time); + + double calc_rps(const int64_t delta_time); + + TO_STRING_KV(K_(tps_stat_info), + K_(rps_stat_info)); +}; + +class IObLogTransStatMgr +{ +public: + IObLogTransStatMgr() {} + virtual ~IObLogTransStatMgr() {} + +public: + // trans stat + virtual void do_tps_stat() = 0; + virtual void do_rps_stat_before_filter(const int64_t record_count) = 0; + virtual void do_rps_stat_after_filter(const int64_t record_count) = 0; + + // tenant stat + virtual int add_served_tenant(const char *tenant_name, const uint64_t tenant_id) = 0; + virtual int drop_served_tenant(const uint64_t tenant_id) = 0; + // tenant tps and rps(before filter) stat + virtual int do_tenant_tps_rps_stat(const uint64_t tenant_id, int64_t record_count) = 0; + virtual int do_tenant_rps_stat_after_filter(const uint64_t tenant_id, int64_t record_count) = 0; + + // drc stat + // next record + virtual void do_drc_consume_tps_stat() = 0; + virtual void do_drc_consume_rps_stat() = 0; + // release record + virtual void do_drc_release_tps_stat() = 0; + virtual void do_drc_release_rps_stat() = 0; + + // print stat info + virtual void print_stat_info() = 0; +}; + +class ObLogTransStatMgr : public IObLogTransStatMgr +{ +public: + ObLogTransStatMgr(); + ~ObLogTransStatMgr(); + +public: + int init(); + void destroy(); + +public: + void do_tps_stat(); + void do_rps_stat_before_filter(const int64_t record_count); + void do_rps_stat_after_filter(const int64_t record_count); + + int add_served_tenant(const char *tenant_name, const uint64_t tenant_id); + int drop_served_tenant(const uint64_t tenant_id); + int do_tenant_tps_rps_stat(const uint64_t tenant_id, int64_t record_count); + int do_tenant_rps_stat_after_filter(const uint64_t tenant_id, int64_t record_count); + + void do_drc_consume_tps_stat(); + void do_drc_consume_rps_stat(); + void do_drc_release_tps_stat(); + void do_drc_release_rps_stat(); + + void print_stat_info(); + +private: + void clear_tenant_stat_info_(); + +private: + static const int64_t CACHED_TENANT_STAT_INFO_COUNT = 1 << 10; + +private: + struct TenantID; + struct TenantStatInfo; + typedef common::ObLinearHashMap TenantStatInfoMap; + typedef common::ObSmallObjPool TenantStatInfoPool; + +private: + struct TenantID + { + uint64_t tenant_id_; + + TenantID(const uint64_t tenant_id) : + tenant_id_(tenant_id) + {} + + int64_t hash() const + { + return static_cast(tenant_id_); + } + + bool operator== (const TenantID &other) const + { + return tenant_id_ == other.tenant_id_; + } + + void reset() + { + tenant_id_ = common::OB_INVALID_ID; + } + + TO_STRING_KV(K_(tenant_id)); + }; + + struct TenantStatInfo + { + // storage format: TENANT_NAME + char name_[common::OB_MAX_TENANT_NAME_LENGTH + 1]; + // tps + TransTpsStatInfo tps_stat_info_; + // RPS( before filtered by Formatter) + TransRpsStatInfo rps_stat_info_before_filter_; + // RPS( after filtered by Formatter) + TransRpsStatInfo rps_stat_info_after_filter_; + + TenantStatInfo() { reset(); } + ~TenantStatInfo() { reset(); } + + void reset() + { + name_[0] = '\0'; + tps_stat_info_.reset(); + rps_stat_info_before_filter_.reset(); + rps_stat_info_after_filter_.reset(); + } + + TO_STRING_KV(K_(name), K_(tps_stat_info), + K_(rps_stat_info_before_filter), + K_(rps_stat_info_after_filter)); + }; + + // Update rps information before filtering for a given tenant + struct TenantRpsBeforeFilterUpdater + { + uint64_t tenant_id_; + int64_t record_count_; + + TenantRpsBeforeFilterUpdater(const uint64_t tenant_id, const int64_t record_count) : + tenant_id_(tenant_id), + record_count_(record_count) {} + + bool operator()(const TenantID &tid, TenantStatInfo *ts_info); + }; + + // Update filtered rps information for a given tenant + struct TenantRpsAfterFilterUpdater + { + uint64_t tenant_id_; + int64_t record_count_; + + TenantRpsAfterFilterUpdater(const uint64_t tenant_id, const int64_t record_count) : + tenant_id_(tenant_id), + record_count_(record_count) {} + + bool operator()(const TenantID &tid, TenantStatInfo *ts_info); + }; + + struct TenantStatInfoPrinter + { + int64_t delta_time_; + TenantStatInfoPrinter(const int64_t delta_time) : delta_time_(delta_time) {} + + bool operator()(const TenantID &tid, TenantStatInfo *ts_info); + }; + + struct TenantStatInfoErase + { + uint64_t tenant_id_; + TenantStatInfoPool &pool_; + + TenantStatInfoErase(const uint64_t tenant_id, TenantStatInfoPool &pool) : + tenant_id_(tenant_id), pool_(pool) {} + + bool operator()(const TenantID &tid, TenantStatInfo *ts_info); + }; + + struct TenantStatInfoClear + { + TenantStatInfoPool &pool_; + + TenantStatInfoClear(TenantStatInfoPool &pool) : pool_(pool) {} + + bool operator()(const TenantID &tid, TenantStatInfo *ts_info); + }; + +private: + bool inited_; + // tps + TransTpsStatInfo tps_stat_info_ CACHE_ALIGNED; + // rps before filter + TransRpsStatInfo rps_stat_info_before_filter_ CACHE_ALIGNED; + // rps after filter + TransRpsStatInfo rps_stat_info_after_filter_ CACHE_ALIGNED; + // 租户统计信息 + TenantStatInfoMap tenant_stat_info_map_; + TenantStatInfoPool tenant_stat_info_pool_; + // drc 消费统计信息 + TransTpsRpsStatInfo next_record_stat_ CACHE_ALIGNED; // Statistics next_record: tps and rps information + TransTpsRpsStatInfo release_record_stat_ CACHE_ALIGNED; // Statistics release_record: tps and rps information + // 记录统计时间 + int64_t last_stat_time_ CACHE_ALIGNED; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogTransStatMgr); +}; + +} +} + +#endif diff --git a/src/liboblog/src/ob_log_utils.cpp b/src/liboblog/src/ob_log_utils.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0d0d6fefee46b740f08d995d2dc954fc89895f0a --- /dev/null +++ b/src/liboblog/src/ob_log_utils.cpp @@ -0,0 +1,1395 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG + +#include "ob_log_utils.h" + +#include +#include +#include +#include +#include +#include +#include +#include // strtoll +#include // MD5 +#include // StrArray + +#include "lib/string/ob_string.h" // ObString +#include "lib/utility/serialization.h" // serialization +#include "lib/charset/ob_charset.h" // ObCharset +#include "lib/time/ob_time_utility.h" // ObTimeUtility +#include "share/schema/ob_table_schema.h" // ObTableSchema +#include "share/schema/ob_column_schema.h" // ObColumnSchemaV2 +#include "share/schema/ob_schema_struct.h" +#include "rpc/obmysql/ob_mysql_global.h" // MYSQL_TYPE_* +#include "ob_log_config.h" + +using namespace oceanbase::common; +using namespace oceanbase::storage; +using namespace oceanbase::share; +using namespace oceanbase::share::schema; + +namespace oceanbase +{ +namespace liboblog +{ + +int print_human_tstamp(char *buf, const int64_t buf_len, int64_t &pos, + const int64_t usec_tstamp) +{ + int ret = common::OB_SUCCESS; + if (common::OB_INVALID_TIMESTAMP == usec_tstamp) { + ret = common::databuff_printf(buf, buf_len, pos, "[INVALID]"); + } + else { + struct timeval tv; + tv.tv_sec = usec_tstamp / _SEC_; + tv.tv_usec = usec_tstamp % _SEC_; + struct tm tm; + ::localtime_r((const time_t *) &tv.tv_sec, &tm); + ret = common::databuff_printf(buf, buf_len, pos, + "[%04d-%02d-%02d %02d:%02d:%02d.%06ld]", + tm.tm_year + 1900, + tm.tm_mon + 1, + tm.tm_mday, + tm.tm_hour, + tm.tm_min, + tm.tm_sec, + tv.tv_usec); + } + return ret; +} + +int print_human_timeval(char *buf, + const int64_t buf_len, + int64_t &pos, + const int64_t usec_tval) +{ + int ret = common::OB_SUCCESS; + if (INT64_MAX == usec_tval) { + ret = common::databuff_printf(buf, buf_len, pos, "[INVALID_TVAL]"); + } + else { + bool negative = (usec_tval < 0); + struct timeval tv; + if (negative) { + tv.tv_sec = (0 - usec_tval) / _SEC_; + tv.tv_usec = (0 - usec_tval) % _SEC_; + } + else { + tv.tv_sec = usec_tval / _SEC_; + tv.tv_usec = usec_tval % _SEC_; + } + int64_t hr = static_cast(tv.tv_sec) / 3600; + int64_t min = (static_cast(tv.tv_sec) / 60) % 60; + int64_t sec = static_cast(tv.tv_sec) % 60; + ret = common::databuff_printf(buf, buf_len, pos, + "[%s%02ld:%02ld:%02ld.%06ld]", + negative ? "-" : "", + hr, + min, + sec, + tv.tv_usec); + } + return ret; +} + +int get_local_ip(ObString &local_ip) +{ + int ret = OB_SUCCESS; + const static char *DEFAULT_IP = "127.0.0.1"; + const static char *DEFAULT_DEV = "DEFAULT"; + const static int64_t BUFSIZE = 128; + int sock_fd = 0; + struct ifconf conf; + struct ifreq *ifr = NULL; + char buff[BUFSIZE]; + int64_t ifreq_num = 0; + const char *ret_ip = DEFAULT_IP; + const char *local_dev = DEFAULT_DEV; + + if (OB_UNLIKELY(local_ip.size() <= 0) || OB_ISNULL(local_ip.ptr())) { + LOG_ERROR("invalid argument", K(local_ip)); + ret = OB_INVALID_ARGUMENT; + } else { + sock_fd = socket(PF_INET, SOCK_DGRAM, 0); + if (sock_fd < 0){ + LOG_ERROR("socket fail", K(sock_fd), K(errno), KERRMSG); + ret = OB_ERR_UNEXPECTED; + } else { + conf.ifc_len = BUFSIZE; + conf.ifc_buf = buff; + + ioctl(sock_fd, SIOCGIFCONF, &conf); + ifreq_num = conf.ifc_len / sizeof(struct ifreq); + ifr = conf.ifc_req; + + if (OB_UNLIKELY(ifreq_num <= 0) || OB_ISNULL(ifr)) { + LOG_WARN("no valid network device, set default IP", K(ifreq_num), KP(ifr)); + ret_ip = DEFAULT_IP; + } else { + // 设置默认IP + ret_ip = DEFAULT_IP; + + for (int64_t i = 0; i < ifreq_num; i++) { + struct sockaddr_in *sin = reinterpret_cast(&ifr->ifr_addr); + + ioctl(sock_fd, SIOCGIFFLAGS, ifr); + + if (((ifr->ifr_flags & IFF_LOOPBACK) == 0) && (ifr->ifr_flags & IFF_UP)) { + ret_ip = inet_ntoa(sin->sin_addr); + local_dev = ifr->ifr_name; + break; + } + + ifr++; + } + } + } + + if (OB_SUCC(ret)) { + if (OB_ISNULL(ret_ip)) { + LOG_ERROR("get local ip fail", K(ret_ip)); + ret = OB_ERR_UNEXPECTED; + } else { + int64_t local_ip_buf_size = local_ip.size(); + char *local_ip_buf = local_ip.ptr(); + + ObString::obstr_size_t len = snprintf(local_ip_buf, local_ip_buf_size, "%s", ret_ip); + if (OB_UNLIKELY(len < 0) || OB_UNLIKELY(len >= local_ip_buf_size)) { + LOG_ERROR("buffer not enough", K(len), K(local_ip_buf_size), K(ret_ip)); + ret = OB_BUF_NOT_ENOUGH; + } else { + local_ip.set_length(len); + + LOG_INFO("[LOCAL_ADDR]", "DEV", local_dev, "IP", local_ip); + } + } + } + } + + return ret; +} + +RecordType get_record_type(const ObRowDml &dml_type) +{ + RecordType record_type = EUNKNOWN; + + // Set record type + // Note: The REPLACE type is not handled, it does not exist in Redo + switch (dml_type) { + case T_DML_INSERT: + record_type = EINSERT; + break; + + case T_DML_UPDATE: + record_type = EUPDATE; + break; + + case T_DML_DELETE: + record_type = EDELETE; + break; + + default: + record_type = EUNKNOWN; + break; + } + + return record_type; +} + +const char* print_dml_type(const ObRowDml &dml_type) +{ + const char *dml_str = "UNKNOWN"; + + switch (dml_type) { + case T_DML_INSERT: + dml_str = "INSERT"; + break; + + case T_DML_UPDATE: + dml_str = "UPDATE"; + break; + + case T_DML_DELETE: + dml_str = "DELETE"; + break; + + default: + dml_str = "UNKNOWN"; + break; + } + + return dml_str; +} + +int64_t get_non_hidden_column_count(const share::schema::ObTableSchema &table_schema) +{ + int64_t ret_column_count = 0; + ObTableSchema::const_column_iterator cs_iter = table_schema.column_begin(); + ObTableSchema::const_column_iterator cs_iter_end = table_schema.column_end(); + + for (; cs_iter != cs_iter_end; cs_iter++) { + const ObColumnSchemaV2 &column_schema = **cs_iter; + + if (! column_schema.is_hidden()) { + ret_column_count++; + } + } + + return ret_column_count; +} + +const char *print_record_type(int type) +{ + static const char *str = "UNKNOWN"; + + switch (type) + { + case EDELETE: + str = "DELETE"; + break; + + case EINSERT: + str = "INSERT"; + break; + + case EREPLACE: + str = "REPLACE"; + break; + + case EUPDATE: + str = "UPDATE"; + break; + + case HEARTBEAT: + str = "HEARTBEAT"; + break; + + case CONSISTENCY_TEST: + str = "CONSISTENCY_TEST"; + break; + + case EBEGIN: + str = "EBEGIN"; + break; + + case ECOMMIT: + str = "ECOMMIT"; + break; + + case EDDL: + str = "EDDL"; + break; + + case EROLLBACK: + str = "EROLLBACK"; + break; + + case EDML: + str = "EDML"; + break; + + default: + str = "UNKNOWN"; + break; + } + + return str; +} + +const char *print_src_category(int src_category) +{ + static const char *sc_name = "UNKNOWN"; + + switch (src_category) + { + case SRC_FULL_RECORDED: + sc_name = "SRC_FULL_RECORDED"; + break; + + case SRC_FULL_RETRIEVED: + sc_name = "SRC_FULL_RETRIEVED"; + break; + + case SRC_FULL_FAKED: + sc_name = "SRC_FULL_FAKED"; + break; + + case SRC_PART_RECORDED: + sc_name = "SRC_PART_RECORDED"; + break; + + default: + sc_name = "UNKNOWN"; + break; + } + + return sc_name; +} + +const char *print_record_src_type(int type) +{ + static const char *st_name = "UNKNOWN"; + + switch (type) + { + case SRC_MYSQL: + st_name = "SRC_MYSQL"; + break; + + case SRC_OCEANBASE: + st_name = "SRC_OCEANBASE"; + break; + + case SRC_HBASE: + st_name = "SRC_HBASE"; + break; + + case SRC_ORACLE: + st_name = "SRC_ORACLE"; + break; + + case SRC_OCEANBASE_1_0: + st_name = "SRC_OCEANBASE_1_0"; + break; + + case SRC_UNKNOWN: + st_name = "SRC_UNKNOWN"; + break; + + default: + st_name = "UNKNOWN"; + break; + } + + return st_name; +} + +const char *print_compat_mode(const share::ObWorker::CompatMode &compat_mode) +{ + const char *compat_mode_str = "INVALID"; + + switch (compat_mode) { + case ObWorker::CompatMode::MYSQL: + compat_mode_str = "MYSQL"; + break; + + case ObWorker::CompatMode::ORACLE: + compat_mode_str = "ORACLE"; + break; + + default: + compat_mode_str = "INVALID"; + break; + } + + return compat_mode_str; +} + +const char *get_ctype_string(int ctype) +{ + const char *sc_type = "UNKNOWN"; + + switch (ctype) + { + case oceanbase::obmysql::MYSQL_TYPE_DECIMAL: + sc_type = "MYSQL_TYPE_DECIMAL"; + break; + + case oceanbase::obmysql::MYSQL_TYPE_TINY: + sc_type = "MYSQL_TYPE_TINY"; + break; + + case oceanbase::obmysql::MYSQL_TYPE_SHORT: + sc_type = "MYSQL_TYPE_SHORT"; + break; + + case oceanbase::obmysql::MYSQL_TYPE_LONG: + sc_type = "MYSQL_TYPE_LONG"; + break; + + case oceanbase::obmysql::MYSQL_TYPE_FLOAT: + sc_type = "MYSQL_TYPE_FLOAT"; + break; + + case oceanbase::obmysql::MYSQL_TYPE_DOUBLE: + sc_type = "MYSQL_TYPE_DOUBLE"; + break; + + case oceanbase::obmysql::MYSQL_TYPE_NULL: + sc_type = "MYSQL_TYPE_NULL"; + break; + + case oceanbase::obmysql::MYSQL_TYPE_TIMESTAMP: + sc_type = "MYSQL_TYPE_TIMESTAMP"; + break; + + case oceanbase::obmysql::MYSQL_TYPE_LONGLONG: + sc_type = "MYSQL_TYPE_LONGLONG"; + break; + + case oceanbase::obmysql::MYSQL_TYPE_INT24: + sc_type = "MYSQL_TYPE_INT24"; + break; + + case oceanbase::obmysql::MYSQL_TYPE_DATE: + sc_type = "MYSQL_TYPE_DATE"; + break; + + case oceanbase::obmysql::MYSQL_TYPE_TIME: + sc_type = "MYSQL_TYPE_TIME"; + break; + + case oceanbase::obmysql::MYSQL_TYPE_DATETIME: + sc_type = "MYSQL_TYPE_DATETIME"; + break; + + case oceanbase::obmysql::MYSQL_TYPE_YEAR: + sc_type = "MYSQL_TYPE_YEAR"; + break; + + case oceanbase::obmysql::MYSQL_TYPE_NEWDATE: + sc_type = "MYSQL_TYPE_NEWDATE"; + break; + + case oceanbase::obmysql::MYSQL_TYPE_VARCHAR: + sc_type = "MYSQL_TYPE_VARCHAR"; + break; + + case oceanbase::obmysql::MYSQL_TYPE_BIT: + sc_type = "MYSQL_TYPE_BIT"; + break; + + case oceanbase::obmysql::MYSQL_TYPE_OB_TIMESTAMP_WITH_TIME_ZONE: + sc_type = "MYSQL_TYPE_OB_TIMESTAMP_WITH_TIME_ZONE"; + break; + case oceanbase::obmysql::MYSQL_TYPE_OB_TIMESTAMP_WITH_LOCAL_TIME_ZONE: + sc_type = "MYSQL_TYPE_OB_TIMESTAMP_WITH_LOCAL_TIME_ZONE"; + break; + case oceanbase::obmysql::MYSQL_TYPE_OB_TIMESTAMP_NANO: + sc_type = "MYSQL_TYPE_OB_TIMESTAMP_NANO"; + break; + case oceanbase::obmysql::MYSQL_TYPE_OB_RAW: + sc_type = "MYSQL_TYPE_OB_RAW"; + break; + + case oceanbase::obmysql::MYSQL_TYPE_NEWDECIMAL: + sc_type = "MYSQL_TYPE_NEWDECIMAL"; + break; + + case oceanbase::obmysql::MYSQL_TYPE_ENUM: + sc_type = "MYSQL_TYPE_ENUM"; + break; + + case oceanbase::obmysql::MYSQL_TYPE_SET: + sc_type = "MYSQL_TYPE_SET"; + break; + + case oceanbase::obmysql::MYSQL_TYPE_TINY_BLOB: + sc_type = "MYSQL_TYPE_TINY_BLOB"; + break; + + case oceanbase::obmysql::MYSQL_TYPE_MEDIUM_BLOB: + sc_type = "MYSQL_TYPE_MEDIUM_BLOB"; + break; + + case oceanbase::obmysql::MYSQL_TYPE_LONG_BLOB: + sc_type = "MYSQL_TYPE_LONG_BLOB"; + break; + + case oceanbase::obmysql::MYSQL_TYPE_BLOB: + sc_type = "MYSQL_TYPE_BLOB"; + break; + + case oceanbase::obmysql::MYSQL_TYPE_VAR_STRING: + sc_type = "MYSQL_TYPE_VAR_STRING"; + break; + + case oceanbase::obmysql::MYSQL_TYPE_STRING: + sc_type = "MYSQL_TYPE_STRING"; + break; + + case oceanbase::obmysql::MYSQL_TYPE_GEOMETRY: + sc_type = "MYSQL_TYPE_GEOMETRY"; + break; + + case oceanbase::obmysql::MYSQL_TYPE_NOT_DEFINED: + sc_type = "MYSQL_TYPE_NOT_DEFINED"; + break; + + case oceanbase::obmysql::MYSQL_TYPE_OB_INTERVAL_YM: + sc_type = "MYSQL_TYPE_OB_INTERVAL_YM"; + break; + + case oceanbase::obmysql::MYSQL_TYPE_OB_INTERVAL_DS: + sc_type = "MYSQL_TYPE_OB_INTERVAL_DS"; + break; + + case oceanbase::obmysql::MYSQL_TYPE_OB_NUMBER_FLOAT: + sc_type = "MYSQL_TYPE_OB_NUMBER_FLOAT"; + break; + case oceanbase::obmysql::MYSQL_TYPE_OB_NVARCHAR2: + sc_type = "MYSQL_TYPE_OB_NVARCHAR2"; + break; + + case oceanbase::obmysql::MYSQL_TYPE_OB_NCHAR: + sc_type = "MYSQL_TYPE_OB_NCHAR"; + break; + + case oceanbase::obmysql::MYSQL_TYPE_OB_UROWID: + sc_type = "MYSQL_TYPE_OB_UROWID"; + break; + + default: + sc_type = "UNKNOWN"; + break; + } + + return sc_type; +} + +bool is_lob_type(const int ctype) +{ + bool bool_ret = false; + switch (ctype) + { + case oceanbase::obmysql::MYSQL_TYPE_TINY_BLOB: + case oceanbase::obmysql::MYSQL_TYPE_MEDIUM_BLOB: + case oceanbase::obmysql::MYSQL_TYPE_LONG_BLOB: + case oceanbase::obmysql::MYSQL_TYPE_BLOB: + bool_ret = true; + break; + + default: + bool_ret = false; + break; + } + + return bool_ret; +} + +double get_delay_sec(const int64_t tstamp) +{ + int64_t delta = (ObTimeUtility::current_time() - tstamp); + double delay_sec = static_cast(delta) / 1000000.0;; + return delay_sec; +} + +bool is_ddl_partition(const common::ObPartitionKey &pkey) +{ + return is_ddl_table(pkey.get_table_id()); +} + +bool is_ddl_table(const uint64_t table_id) +{ + return (OB_ALL_DDL_OPERATION_TID == extract_pure_id(table_id)); +} + +int64_t get_ddl_table_id() +{ + return combine_id(OB_SYS_TENANT_ID, OB_ALL_DDL_OPERATION_TID); +} + +bool is_mysql_client_errno(int err) +{ + return (err <= -2000 && err >= -3000); +} + +bool is_mysql_server_errno(int err) +{ + return (err >= -1999 && err <= -1000); +} + +bool is_ob_sql_errno(int err) +{ + return (err > -6000 && err <= -5000); +} + +bool is_ob_trans_errno(int err) +{ + return (err > -7000 && err <= -6000); +} + +bool is_ob_election_errno(int err) +{ + return (err > -7100 && err <= -7000); +} + +void *ob_log_malloc(const int64_t nbyte) +{ + ObMemAttr memattr; + memattr.label_ = ObModIds::OB_LOG_TEMP_MEMORY; + + return ob_malloc(nbyte, memattr); +} + +void ob_log_free(void *ptr) +{ + ob_free(ptr); +} + +void column_cast(common::ObObj &obj, const share::schema::ObColumnSchemaV2 &column_schema) +{ + // Neither the NULL type nor the Ext type update Meta information + if (! obj.is_null() && ! obj.is_ext()) { + // update meta + obj.set_meta_type(column_schema.get_meta_type()); + + // Update scale separately + // The scale information of the bit type has a different meaning than the scale information of the normal type + if (obj.is_bit()) { + obj.set_scale(column_schema.get_accuracy().get_precision()); + } else { + obj.set_scale(column_schema.get_accuracy().get_scale()); + } + } +} + +void column_cast(common::ObObj &obj, const ColumnSchemaInfo &column_schema_info) +{ + // Neither the NULL type nor the Ext type update Meta information + if (! obj.is_null() && ! obj.is_ext()) { + // upadte Meta + obj.set_meta_type(column_schema_info.get_meta_type()); + + // Update scale separately + // The scale information of the bit type has a different meaning than the scale information of the normal type + if (obj.is_bit()) { + obj.set_scale(column_schema_info.get_accuracy().get_precision()); + } else { + obj.set_scale(column_schema_info.get_accuracy().get_scale()); + } + } +} + +// OB_APP_MIN_COLUMN_ID: 16 +// OB_MIN_SHADOW_COLUMN_ID : 32767 +int filter_non_user_column(const bool is_hidden_pk_table, + const bool enable_output_hidden_primary_key, + const uint64_t column_id, + bool &is_non_user_column, + bool &is_hidden_pk_table_pk_increment_column) +{ + int ret = OB_SUCCESS; + is_non_user_column = false; + is_hidden_pk_table_pk_increment_column = false; + + if (OB_UNLIKELY(OB_INVALID_ID == column_id)) { + LOG_ERROR("invalid argument", K(column_id)); + ret = OB_INVALID_ARGUMENT; + } else if (is_hidden_pk_table + && OB_HIDDEN_PK_INCREMENT_COLUMN_ID == column_id + && enable_output_hidden_primary_key) { + is_non_user_column = false; + is_hidden_pk_table_pk_increment_column = true; + } else { + is_non_user_column = (column_id < OB_APP_MIN_COLUMN_ID) || (column_id >= OB_MIN_SHADOW_COLUMN_ID); + } + + return ret; +} + +int Runnable::create() +{ + int ret = common::OB_SUCCESS; + int errcode = 0; + if (0 != (errcode = pthread_create(&thread_, NULL, pthread_routine, this))) { + ret = common::OB_ERR_UNEXPECTED; + LOG_ERROR("err create pthread", KR(ret), K(errcode)); + } + else { + joinable_ = true; + } + return ret; +} + +int Runnable::join() +{ + int ret = common::OB_SUCCESS; + int errcode = 0; + void *thread_ret = NULL; + if (!joinable_ || 0 != (errcode = pthread_join(thread_, &thread_ret))) { + ret = common::OB_ERR_UNEXPECTED; + LOG_ERROR("err join pthread", KR(ret), K(errcode)); + } else if (OB_FAIL(static_cast(reinterpret_cast(thread_ret)))) { + LOG_WARN("thread exit on err", KR(ret)); + } + else { + joinable_ = false; + } + return ret; +} + +void* Runnable::pthread_routine(void* arg) +{ + return reinterpret_cast((static_cast(arg))->routine()); +} + +int ObLogKVCollection::KVPair::init(const char* delimiter) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("init twice", K(inited_)); + ret = OB_INIT_TWICE; + } else if (OB_ISNULL(delimiter)) { + LOG_ERROR("invalid argument", K(delimiter)); + ret = OB_INVALID_ARGUMENT; + } else { + delimiter_ = delimiter; + inited_ = true; + } + return ret; +} + +int ObLogKVCollection::KVPair::set_key_and_value(const char* key, const char* value) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!inited_)) { + LOG_ERROR("kv not init", K(inited_)); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(key) || OB_ISNULL(value)) { + LOG_ERROR("data for kvpair should not be null", K(key), K(value)); + ret = OB_INVALID_DATA; + } else { + key_ = key; + value_ = value; + } + return ret; +} + +bool ObLogKVCollection::KVPair::is_valid() const +{ + return inited_ && NULL != delimiter_ && NULL != key_ && NULL != value_; +} + +int ObLogKVCollection::KVPair::length() const +{ + int len = 0; + if (is_valid()) { + len += strlen(key_); + len += strlen(value_); + len += strlen(delimiter_); + } + return len; +} + +int ObLogKVCollection::KVPair::serialize(char* buf, int64_t buf_len, int64_t &pos) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_valid())) { + LOG_ERROR("kv pair not valid", K_(inited), K_(delimiter), K_(key), K_(value)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_ISNULL(buf) || OB_UNLIKELY(buf_len <= 0)) { + LOG_ERROR("invalid argument", K(buf), K(buf_len), K(pos)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(databuff_printf(buf, buf_len, pos, "%s%s%s", key_, delimiter_, value_))) { + LOG_ERROR("databuff_printf fail", K(ret), K(buf_len), K(pos), K_(key), K_(value), K_(delimiter)); + } else if (OB_UNLIKELY(pos >= buf_len)) { + LOG_ERROR("buf is not enough", K(pos), K(buf_len)); + ret = OB_BUF_NOT_ENOUGH; + } else { + buf[pos] = '\0'; + } + return ret; +} + +int ObLogKVCollection::KVPair::deserialize(char *buf) +{ + int ret = OB_SUCCESS; + char *save_ptr = NULL; + if (!inited_) { + LOG_ERROR("kv pair not init", K_(inited)); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(buf)) { + LOG_ERROR("invalid argument", K(buf)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(key_ = strtok_r(buf, delimiter_, &save_ptr))) { + LOG_ERROR("key is null", K(save_ptr)); + ret = OB_INVALID_DATA; + } else if (OB_ISNULL(value_ = strtok_r(NULL, delimiter_, &save_ptr))) { + LOG_ERROR("value is null", K(save_ptr)); + ret = OB_INVALID_DATA; + } else { + LOG_DEBUG("found key and value", K_(key), K_(value)); + } + return ret; +} + +int ObLogKVCollection::init(const char *kv_delimiter, const char *pair_delimiter) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("init twice", K(inited_)); + ret = OB_INIT_TWICE; + } else if (OB_ISNULL(kv_delimiter_ = kv_delimiter) || OB_ISNULL(pair_delimiter_ = pair_delimiter)){ + LOG_ERROR("invalid argument", K(kv_delimiter), K(pair_delimiter)); + ret = OB_INVALID_ARGUMENT; + } else { + kv_pairs_.reset(); + inited_ = true; + } + return ret; +} + +bool ObLogKVCollection::is_valid() const +{ + bool valid = true; + if (OB_UNLIKELY(!inited_) || OB_ISNULL(kv_delimiter_) || OB_ISNULL(pair_delimiter_)) { + LOG_ERROR("invalid argument", K_(inited), K_(kv_delimiter), K_(pair_delimiter)); + valid = false; + } else { + int64_t idx = 0; + int64_t size = kv_pairs_.size(); + while (idx < size && valid) { + KVPair pair = kv_pairs_.at(idx); + valid = pair.is_valid(); + idx ++; + } + } + return valid; +} + +int ObLogKVCollection::length() const +{ + int len = 0; + int64_t idx = 0; + int64_t size = kv_pairs_.size(); + if (is_valid()) { + while (idx < size) { + KVPair pair = kv_pairs_.at(idx); + len += pair.length(); + if (idx != size-1) { + len += strlen(pair_delimiter_); + } + idx ++; + } + } + return len; +} + +int ObLogKVCollection::append_kv_pair(KVPair &kv_pair) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_valid() || OB_UNLIKELY(!kv_pair.is_valid()))) { + LOG_ERROR("oblog kv collection or kv_pair not valid", K(kv_pair)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(kv_pairs_.push_back(kv_pair))) { + LOG_ERROR("push kv_pair to kv collection failed", KR(ret), K(kv_pair), K_(kv_pairs)); + } + return ret; +} + +int ObLogKVCollection::serialize(char* kv_str, const int64_t kv_str_len, int64_t &pos) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(kv_str) || OB_UNLIKELY(kv_str_len <= 0)) { + LOG_ERROR("invalid argument", K(kv_str), K(kv_str_len), K(pos)); + ret = OB_INVALID_ARGUMENT; + } else { + int64_t size = kv_pairs_.size(); + int64_t idx = 0; + while (OB_SUCC(ret) && idx < size) { + KVPair kvpair = kv_pairs_.at(idx); + if (OB_FAIL(kvpair.serialize(kv_str, kv_str_len, pos))) { + LOG_ERROR("serialize kvpair fail", K(kv_str), K(kv_str_len), K(pos)); + } else if (idx < size - 1 && OB_FAIL(databuff_printf(kv_str, kv_str_len, pos, "%s", pair_delimiter_))) { + LOG_ERROR("append pair delimiter fail", KR(ret), K(idx), K(size), K(kv_str), K(kv_str_len), K(pos), K_(pair_delimiter)); + } else if (OB_UNLIKELY(pos >= kv_str_len)) { + LOG_ERROR("buf is not enough", K(kv_str), K(pos), K(kv_str_len)); + ret = OB_BUF_NOT_ENOUGH; + } else { + kv_str[pos] = '\0'; + } + idx ++; + } + } + return ret; +} + +int ObLogKVCollection::deserialize(char* kv_str) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!inited_)) { + LOG_ERROR("kv collection not init", K_(inited)); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(kv_str) || OB_ISNULL(kv_delimiter_) || OB_ISNULL(pair_delimiter_)) { + LOG_ERROR("invalid argument", K(kv_str), K_(kv_delimiter), K_(pair_delimiter)); + ret = OB_INVALID_ARGUMENT; + } else { + char *kv = NULL; + char *save_ptr = NULL; + while (OB_SUCC(ret) && (NULL != (kv = strtok_r(kv_str, pair_delimiter_, &save_ptr)))) { + KVPair kvpair; + if (OB_FAIL(kvpair.init(kv_delimiter_))) { + LOG_ERROR("failed to init kv pair", KR(ret), K(kv), K(kv_str), K(save_ptr)); + ret = OB_INVALID_DATA; + } else if (OB_FAIL(kvpair.deserialize(kv))) { + LOG_ERROR("failed to deserialilze kv str", KR(ret), K(kvpair), K(kv)); + } else { + kv_pairs_.push_back(kvpair); + } + kv_str = NULL; + } + } + return ret; +} + +int ObLogKVCollection::contains_key(const char* key, bool &contain) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_valid())) { + LOG_ERROR("kv collection not valid", K(key)); + ret = OB_ERR_UNEXPECTED; + } else { + contain = false; + int64_t size = kv_pairs_.size(); + int64_t idx = 0; + while (idx < size && !contain) { + KVPair kv = kv_pairs_.at(idx); + const char *key_ = kv.get_key(); + contain = (0 == strcmp(key, key_)); + idx ++; + } + } + return ret; +} + +int ObLogKVCollection::get_value_of_key(const char *key, const char *&value) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_valid())) { + LOG_ERROR("kv collection invalid", K(key), K(kv_pairs_)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_UNLIKELY(NULL == key)) { + LOG_ERROR("invalid argument(key should not be null)"); + ret = OB_INVALID_ARGUMENT; + } else { + int64_t size = kv_pairs_.size(); + int64_t idx = 0; + bool found = false; + while (idx < size && !found) { + KVPair kv = kv_pairs_.at(idx); + const char *key_ = kv.get_key(); + found = (0 == strcmp(key, key_)); + value = kv.get_value(); + idx ++; + } + if (!found) { + ret = OB_ENTRY_NOT_EXIST; + } + if (NULL == value) { + ret = OB_INVALID_DATA; + } + } + return ret; +} + +int split(char *str, const char *delimiter, + const int64_t expect_res_cnt, const char **res, int64_t &res_cnt) +{ + int ret = OB_SUCCESS; + res_cnt = 0; + + if ((OB_ISNULL(str) || OB_UNLIKELY(0 == strlen(str))) || + (OB_ISNULL(delimiter) || OB_UNLIKELY(0 == strlen(delimiter))) || + OB_UNLIKELY(expect_res_cnt <= 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_ERROR("invalid argument", KP(str), KP(delimiter), K(expect_res_cnt)); + } else { + char *ptr = NULL; + char *p = NULL; + int64_t idx = 0; + + ptr = strtok_r(str, delimiter, &p); + + while (OB_SUCCESS == ret && ptr != NULL) { + if (idx++ < expect_res_cnt) { + *res++ = ptr; + ++res_cnt; + ptr = strtok_r(NULL, delimiter, &p); + } else { + ret = OB_INVALID_ARGUMENT; + LOG_ERROR("split error, check input parameter", KP(str), KP(delimiter), + K(idx), K(expect_res_cnt), K(res_cnt)); + } + } + } + + return ret; +} + +int split_int64(const ObString &str, const char delimiter, ObIArray &ret_array) +{ + int ret = OB_SUCCESS; + void *buffer = NULL; + int64_t buf_len = str.length() + 1; // include the last '\0' + ObString data_str; + + if (str.length() <= 0) { + // empty string + } else if (OB_ISNULL(buffer = ob_malloc(buf_len))) { + LOG_ERROR("allocate memory for buffer fail", K(buffer), K(str.length())); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + ObDataBuffer data_buffer(static_cast(buffer), buf_len); + + if (OB_FAIL(data_str.clone(str, data_buffer))) { + LOG_ERROR("clone string fail", KR(ret), K(str), K(data_buffer)); + } else { + bool done = false; + + // fill '\0' at tail + data_str.ptr()[data_str.length()] = '\0'; + + // Split all substrings, converting each one to int + while (data_str.length() > 0 && OB_SUCCESS == ret && ! done) { + ObString ret_str = data_str.split_on(delimiter); + + if (ret_str.length() <= 0) { + // If the ret_str pointer is not empty but has a length of 0, there is no valid content before the separator + // Continue parsing + if (NULL != ret_str.ptr()) { + continue; + } + + // If ret_str is empty, the target string does not contain the corresponding separator + // In this case the target string is analysed directly + ret_str = data_str; + done = true; + } + + char *end_ptr = NULL; + + // file '\0' + ret_str.ptr()[ret_str.length()] = '\0'; + + // clear errno + errno = 0; + + // convert string to int + int64_t ret_int_val = strtoll(ret_str.ptr(), &end_ptr, 10); + + // If there is an error in the conversion process, or if the characters in the string are not all converted to numbers + // it means that the original string is faulty and cannot be converted to string + if (errno != 0 || (NULL != end_ptr && *end_ptr != '\0')) { + LOG_ERROR("strtoll convert string to int value fail", K(ret_int_val), K(ret_str), + K(end_ptr), KP(ret_str.ptr()), KP(end_ptr), "error", strerror(errno)); + ret = OB_INVALID_DATA; + } else if (OB_FAIL(ret_array.push_back(ret_int_val))) { + LOG_ERROR("push back into array fail", KR(ret), K(ret_int_val), K(ret_array)); + } else { + // success + LOG_DEBUG("split_int64", K(done), K(str), K(delimiter), K(data_str), K(ret_str), + K(ret_int_val), K(ret_array)); + } + } + } + } + + if (NULL != buffer) { + ob_free(buffer); + buffer = NULL; + } + + return ret; +} + +int decode_storage_log_type(const clog::ObLogEntry& log_entry, int64_t &pos, + storage::ObStorageLogType &log_type) +{ + int ret = OB_SUCCESS; + const char *buf = log_entry.get_buf(); + const int64_t len = log_entry.get_header().get_data_len(); + int64_t log_type_val = storage::OB_LOG_UNKNOWN; + + if (OB_FAIL(serialization::decode_i64(buf, len, pos, &log_type_val))) { + LOG_ERROR("decode log type fail", KR(ret), K(buf), K(len), K(pos), K(log_entry)); + } else { + log_type = static_cast(log_type_val); + } + return ret; +} + +const char *calc_md5_cstr(const char *buf, const int64_t length) +{ + const char *md5_cstr = ""; + + static const int64_t MD5_SUM_LEN = 16; + static const int64_t MD5_STR_LEN = MD5_SUM_LEN * 2 + 1; + static char MD5_SUM_BUF[MD5_SUM_LEN]; + static char MD5_STR_BUF[MD5_STR_LEN]; + const unsigned char *in_buf = reinterpret_cast(buf); + unsigned char *out_buf = reinterpret_cast(MD5_SUM_BUF); + + if (NULL == buf) { + md5_cstr = "NULL"; + } else { + // Calculate MD5 + (void)MD5(in_buf, length, out_buf); + // Print to hex + (void)to_hex_cstr(MD5_SUM_BUF, MD5_SUM_LEN, MD5_STR_BUF, MD5_STR_LEN); + (void)ObCharset::casedn(CS_TYPE_UTF8MB4_GENERAL_CI, MD5_STR_BUF, MD5_STR_LEN, MD5_STR_BUF, MD5_STR_LEN); + md5_cstr = MD5_STR_BUF; + } + return md5_cstr; +} + +int deep_copy_str(const ObString &src, + ObString &dest, + common::ObIAllocator &allocator) +{ + int ret = OB_SUCCESS; + char *buf = NULL; + + if (src.length() > 0) { + int64_t len = src.length() + 1; + if (OB_ISNULL(buf = static_cast(allocator.alloc(len)))) { + LOG_ERROR("allocate memory fail", K(len)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + MEMCPY(buf, src.ptr(), len - 1); + buf[len - 1] = '\0'; + dest.assign_ptr(buf, static_cast(len - 1)); + } + } else { + dest.reset(); + } + + return ret; +} + +#define RETRY_FUNC_CALL(stop_flag, func, args...) \ + RETRY_FUNC_CALL_ON_ERROR(OB_TIMEOUT, stop_flag, func, ##args) + +#define RETRY_FUNC_CALL_ON_ERROR(err_no, stop_flag, func, args...) \ + do {\ + if (OB_SUCC(ret)) \ + { \ + ret = (err_no); \ + while ((err_no) == ret && ! (stop_flag)) \ + { \ + ret = OB_SUCCESS; \ + ret = func(args); \ + } \ + if ((stop_flag)) \ + { \ + ret = OB_IN_STOP_STATE; \ + } \ + } \ + } while (0) + +int get_tenant_compat_mode(const uint64_t tenant_id, + share::ObWorker::CompatMode &compat_mode, + volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + const static int64_t GET_TENANT_COMPAT_MODE_TIMEOUT = 1 * 1000 * 1000; + + RETRY_FUNC_CALL(stop_flag, get_tenant_compat_mode, tenant_id, compat_mode, GET_TENANT_COMPAT_MODE_TIMEOUT); + + return ret; +} + +int get_tenant_compat_mode(const uint64_t tenant_id, + share::ObWorker::CompatMode &compat_mode, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + bool done = false; + int64_t end_time = ObTimeUtility::current_time() + timeout; + + while (OB_SUCC(ret) && ! done) { + if (OB_FAIL(share::ObCompatModeGetter::instance().get_tenant_mode(tenant_id, compat_mode))) { + LOG_WARN("ObCompatModeGetter get_tenant_mode fail", KR(ret), K(tenant_id), "compat_mode", print_compat_mode(compat_mode)); + } else if (OB_UNLIKELY(share::ObWorker::CompatMode::INVALID == compat_mode)) { + LOG_ERROR("tenant compat_mode is invalid", K(tenant_id), "compat_mode", print_compat_mode(compat_mode)); + } else { + done = true; + } + + if (! done) { + // Retry to get it again + ret = OB_SUCCESS; + // After a failure to acquire the tenant schema, and in order to ensure that the modules can handle the performance, usleep for a short time + usleep(100L); + } + + int64_t left_time = end_time - ObTimeUtility::current_time(); + + if (left_time <= 0) { + ret = OB_TIMEOUT; + } + } // while + + return ret; +} + +constexpr uint64_t BackupTableHelper::inner_table_ids[]; +bool BackupTableHelper::is_sys_table_exist_on_backup_mode(const bool is_sys_table, + const uint64_t table_id) +{ + int bret = false; + if (! is_backup_mode() || (! is_sys_table)) { + bret = false; + } else { + int32_t size = sizeof(inner_table_ids) / sizeof(uint64_t); + const uint64_t pure_tb_id = extract_pure_id(table_id); + bret = std::binary_search(inner_table_ids, inner_table_ids+size, pure_tb_id); + } + return bret; +} + +int BackupTableHelper::get_table_ids_on_backup_mode(common::ObIArray &table_ids) +{ + int ret = OB_SUCCESS; + if (! is_backup_mode()) { + // skip + } else { + uint64_t table_id = OB_INVALID_ID; + int32_t size = sizeof(inner_table_ids) / sizeof(uint64_t); + for (int32_t i = 0; i < size && OB_SUCC(ret); i++) { + table_id = extract_pure_id(inner_table_ids[i]); + if (OB_LIKELY(is_sys_table(table_id))) { + if (OB_FAIL(table_ids.push_back(table_id))) { + LOG_ERROR("failed to push_back table_id into table_ids array", KR(ret), K(table_id)); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("invalid sys table id", K(i), K(table_id)); + } + } + } + return ret; +} + +bool is_backup_mode() +{ + return (TCONF.enable_backup_mode != 0); +} + +char *lbt_oblog() +{ + int ret = OB_SUCCESS; + //As lbt used when print error log, can not print error log + //in this function and functions called. + static __thread void *addrs[100]; + static __thread char buf[LBT_BUFFER_LENGTH]; + int size = backtrace(addrs, 100); + char **res = backtrace_symbols(addrs, 100); + int64_t pos = 0; + + for (int idx = 0; OB_SUCC(ret) && idx < size; ++idx) { + char *res_idx = res[idx]; + + if (NULL != res_idx) { + if (OB_FAIL(databuff_printf(buf, LBT_BUFFER_LENGTH, pos, "%s", res_idx))) { + LOG_ERROR("databuff_printf fail", KR(ret), K(buf), K(pos), K(LBT_BUFFER_LENGTH)); + } + } + } + + if (NULL != res) { + free(res); + } + + return buf; +} + +int get_br_value(ILogRecord *br, + ObArray &new_values) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(br)) { + LOG_ERROR("invalid argument"); + ret = OB_INVALID_ARGUMENT; + } else { + StrArray *new_cols = br->parsedNewCols(); + int64_t new_cols_count = 0; + if (NULL != new_cols) { + new_cols_count = new_cols->size(); + } + + const char *new_col_value = NULL; + size_t new_col_value_len = 0; + int64_t index = 0; + + while (OB_SUCC(ret) && index < new_cols_count) { + ret = new_cols->elementAt(index, new_col_value, new_col_value_len); + BRColElem new_col_elem(new_col_value, new_col_value_len); + + if (OB_FAIL(new_values.push_back(new_col_elem))) { + LOG_ERROR("new_values push_back fail", KR(ret)); + } else { + ++index; + } + } + } + + return ret; +} + +int get_mem_br_value(ILogRecord *br, + ObArray &new_values) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(br)) { + LOG_ERROR("invalid argument"); + ret = OB_INVALID_ARGUMENT; + } else { + int64_t new_cols_count = 0; + BinLogBuf *new_cols = br->newCols((unsigned int &)new_cols_count); + int64_t index = 0; + + while (OB_SUCC(ret) && index < new_cols_count) { + const char *new_col_value = new_cols[index].buf; + size_t new_col_value_len = static_cast(new_cols[index].buf_used_size); + + BRColElem new_col_elem(new_col_value, new_col_value_len); + + if (OB_FAIL(new_values.push_back(new_col_elem))) { + LOG_ERROR("new_values push_back fail", KR(ret)); + } else { + ++index; + } + } + } + return ret; +} + +int c_str_to_int(const char* str, int64_t &num) +{ + int ret = OB_SUCCESS; + errno = 0; + char *end_str = NULL; + if (OB_ISNULL(str) || OB_UNLIKELY(0 == strlen(str))) { + LOG_ERROR("c_str_to_int str should not null"); + ret = OB_INVALID_ARGUMENT; + } else { + num = strtoll(str, &end_str, 10); + if (errno != 0 || (NULL != end_str && *end_str != '\0')) { + LOG_ERROR("strtoll convert string to int value fail", K(str), K(num), + "error", strerror(errno), K(end_str)); + ret = OB_INVALID_DATA; + } + } + return ret; +} + +////////////////////////////////////////////////////////////////// + +} /* liboblog */ +} /* oceanbase */ diff --git a/src/liboblog/src/ob_log_utils.h b/src/liboblog/src/ob_log_utils.h new file mode 100644 index 0000000000000000000000000000000000000000..06eee91d39220636ef2cbcb334bb95ce26f7c11c --- /dev/null +++ b/src/liboblog/src/ob_log_utils.h @@ -0,0 +1,533 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_UTILS_H__ +#define OCEANBASE_LIBOBLOG_UTILS_H__ + +#include // RecordType + +#include "lib/allocator/ob_allocator.h" // ObIAllocator +#include "lib/allocator/ob_malloc.h" // ob_malloc +#include "lib/allocator/ob_mod_define.h" // ObModIds +#include "lib/container/ob_iarray.h" // ObIArray +#include "lib/container/ob_array.h" // ObArray +#include "lib/container/ob_array_iterator.h" //ObArrayIterator +#include "lib/container/ob_se_array_iterator.h"//ObSEArrayIterator +#include "common/object/ob_object.h" // ObObj +#include "share/schema/ob_column_schema.h" // ObColumnSchemaV2 +#include "clog/ob_log_entry.h" // ObLogEntry +#include "storage/ob_storage_log_type.h" // ObStorageLogType +#include "storage/ob_i_store.h" // ObRowDml +#include "ob_log_schema_cache_info.h" // ColumnSchemaInfo +#include "share/schema/ob_schema_service.h" // ObSchemaService +#include "share/inner_table/ob_inner_table_schema.h" // OB_ALL_SEQUENCE_VALUE_TID +#include "share/ob_get_compat_mode.h" // ObCompatModeGetter + +using namespace oceanbase::logmessage; + +namespace oceanbase +{ +namespace share +{ +namespace schema +{ +class ObTableSchema; +} +} + +namespace common +{ +class ObString; +} + +namespace liboblog +{ +/* + * Memory size. + */ +static const int64_t _K_ = (1L << 10); +static const int64_t _M_ = (1L << 20); +static const int64_t _G_ = (1L << 30); +static const int64_t _T_ = (1L << 40); + +/* + * Time utils. + * Microsecond Timestamp Generator. + * Time Constants. + * Stop Watch. + * Time Marker. +*/ +inline void usec_sleep(const int64_t u) { usleep(static_cast<__useconds_t>(u)); } + +typedef common::ObSEArray ObLogIdArray; +#define TS_TO_STR(tstamp) HumanTstampConverter(tstamp).str() +#define TVAL_TO_STR(tval) HumanTimevalConverter(tval).str() + +const int64_t _MSEC_ = 1000L; +const int64_t _SEC_ = 1000L * _MSEC_; +const int64_t _MIN_ = 60L * _SEC_; +const int64_t _HOUR_ = 60L * _MIN_; +const int64_t _DAY_ = 24L * _HOUR_; +const int64_t _YEAR_ = 365L * _DAY_; + +int print_human_tstamp(char *buf, const int64_t buf_len, int64_t &pos, + const int64_t usec_tstamp); + +int print_human_timeval(char *buf, const int64_t buf_len, int64_t &pos, + const int64_t usec_tval); + +class HumanTstampConverter +{ +public: + explicit HumanTstampConverter(const int64_t usec_tstamp) + { + buf_[0] = '\0'; + int64_t pos = 0; + (void)print_human_tstamp(buf_, BufLen, pos, usec_tstamp); + } + virtual ~HumanTstampConverter() + { + buf_[0] = '\0'; + } + const char* str() const + { + return buf_; + } +private: + const static int64_t BufLen = 64; + char buf_[BufLen]; +}; + +class HumanTimevalConverter +{ +public: + explicit HumanTimevalConverter(const int64_t usec_tval) + { + buf_[0] = '\0'; + int64_t pos = 0; + (void)print_human_timeval(buf_, BufLen, pos, usec_tval); + } + virtual ~HumanTimevalConverter() + { + buf_[0] = '\0'; + } + const char *str() const + { + return buf_; + } +private: + const static int64_t BufLen = 64; + char buf_[BufLen]; +}; + +inline int64_t get_timestamp() { return ::oceanbase::common::ObTimeUtility::current_time(); } + +class HumanDataSizeConverter +{ + static const int64_t BufSize = 128; +public: + explicit HumanDataSizeConverter(const int64_t bytes) : bytes_(bytes) {} + ~HumanDataSizeConverter() {} + const char* to_data_size_cstr() + { + double val = 0; + int64_t pos = 0; + const char *unit = ""; + if (bytes_ < _K_) { + val = (double)bytes_; + unit = "B"; + } + else if (bytes_ < _M_) { + val = (double)bytes_ / (double)_K_; + unit = "KB"; + } + else if (bytes_ < _G_) { + val = (double)bytes_ / (double)_M_; + unit = "MB"; + } + else { + val = (double)bytes_ / (double)_G_; + unit = "GB"; + } + + (void)common::databuff_printf(buf_, BufSize, pos, "%.2f%s", val, unit); + + return buf_; + } + const char *str() + { + return to_data_size_cstr(); + } +private: + int64_t bytes_; + char buf_[BufSize]; +}; + +// Converting data sizes to strings +#define SIZE_TO_STR(size) HumanDataSizeConverter(size).str() + +class TstampToDelay +{ +public: + explicit TstampToDelay(const int64_t tstamp) + { + if (common::OB_INVALID_TIMESTAMP == tstamp) { + (void)snprintf(buf_, sizeof(buf_), "[INVALID]"); + } else { + int64_t cur_time = get_timestamp(); + int64_t delay_us = (cur_time - tstamp) % _SEC_; + int64_t delay_sec = (cur_time - tstamp) / _SEC_; + + buf_[0] = '\0'; + + (void)snprintf(buf_, sizeof(buf_), "[%ld.%.06ld sec]", delay_sec, delay_us); + } + } + ~TstampToDelay() { buf_[0] = '\0'; } + const char *str() const + { + return buf_; + } +private: + const static int64_t BufLen = 64; + char buf_[BufLen]; +}; + +// Converting timestamps to DELAY strings.[1000.0001 sec] +#define TS_TO_DELAY(tstamp) TstampToDelay(tstamp).str() + +class StopWatch +{ +public: + StopWatch() : start_(0), elapsed_(0) { } + virtual ~StopWatch() { } +public: + void start() { start_ = get_timestamp(); } + void pause() { elapsed_ += (get_timestamp() - start_); } + void reset() { start_ = 0; elapsed_ = 0; } + double elapsed_sec() const { return static_cast( + elapsed_msec()) / 1000.0; } + int64_t elapsed_msec() const { return elapsed_usec() / 1000; } + int64_t elapsed_usec() const { return elapsed_; } + +private: + int64_t start_; + int64_t elapsed_; +}; + +int get_local_ip(common::ObString &local_ip); + +RecordType get_record_type(const storage::ObRowDml &dml_type); +const char *print_dml_type(const storage::ObRowDml &dml_type); +const char *print_record_type(int type); +const char *print_src_category(int src_category); +const char *print_record_src_type(int type); +const char *print_table_status(int status); +// Print compatible mode +const char *print_compat_mode(const share::ObWorker::CompatMode &compat_mode); +const char *get_ctype_string(int ctype); +bool is_lob_type(const int ctype); +int64_t get_non_hidden_column_count(const oceanbase::share::schema::ObTableSchema &table_schema); + +double get_delay_sec(const int64_t tstamp); + +bool is_ddl_partition(const common::ObPartitionKey &pkey); +bool is_ddl_table(const uint64_t table_id); +int64_t get_ddl_table_id(); + +// Is MySQL Client Error Code +bool is_mysql_client_errno(int err); +// Is MySQL Server Error Code +bool is_mysql_server_errno(int err); + +// Is ERROR CODE of OB SQL +bool is_ob_sql_errno(int err); + +// Is ERROR CODE of OB TRANS +bool is_ob_trans_errno(int err); + +// Is ERROR CODE of OB ELECTION +bool is_ob_election_errno(int err); + +// Encapsulated temporary memory allocator +void *ob_log_malloc(const int64_t nbyte); +void ob_log_free(void *ptr); + +class ObLogBufAllocator : public common::ObIAllocator +{ +public: + ObLogBufAllocator(char *buf, const int64_t buf_size, int64_t &used_buf_len) : + buf_(buf), + buf_size_(buf_size), + used_buf_len_(used_buf_len) + { + }; + void *alloc(const int64_t size) + { + char *ret = NULL; + if (NULL != buf_ && (used_buf_len_ + size) <= buf_size_) { + ret = buf_ + used_buf_len_; + used_buf_len_ += size; + } + return ret; + }; +private: + char *const buf_; + const int64_t buf_size_; + int64_t &used_buf_len_; +}; + +void column_cast(common::ObObj &obj, const share::schema::ObColumnSchemaV2 &column_schema); +void column_cast(common::ObObj &obj, const ColumnSchemaInfo &column_schema_info); + +// 1. Filter non-user columns +// User columns column id from OB_APP_MIN_COLUMN_ID, less than OB_MIN_SHADOW_COLUMN_ID +// 2. Backup link: enable_output_hidden_primary_key=true No primary key table column_id=1 No filtering +// Scenario: create table without primary key, first column is self-incrementing, then hidden primary key is less than 16, and is a hidden column +// 3. DRC link: default enable_output_hidden_primary_key=false filter no_primary_key_table +int filter_non_user_column(const bool is_hidden_pk_table, + const bool enable_output_hidden_primary_key, + const uint64_t column_id, + bool &is_non_user_column, + bool &is_hidden_pk_table_pk_increment_column); + +/* + * Runnable. + * Call create() to run a thread, join() to wait till it dies. + * Write code running in thread in routine(). Its error code is returned + * from join(). + */ +class Runnable +{ + typedef Runnable MyType; +public: + Runnable() : thread_(), joinable_(false) { } + virtual ~Runnable() { } + int create(); + int join(); + bool is_joinable() const { return joinable_; } +protected: + virtual int routine() = 0; +private: + static void* pthread_routine(void* arg); +private: + pthread_t thread_; + bool joinable_; +private: + DISALLOW_COPY_AND_ASSIGN(Runnable); +}; + +// filter inner table + +using namespace oceanbase::share; +class BackupTableHelper +{ +private: + // Add table by TID increment + static constexpr uint64_t inner_table_ids[] = { + OB_ALL_SEQUENCE_VALUE_TID // 215 + }; + +private: + BackupTableHelper() { } + virtual ~BackupTableHelper() { } +public: + static bool is_sys_table_exist_on_backup_mode(const bool is_sys_table, const uint64_t table_id); + static int get_table_ids_on_backup_mode(common::ObIArray &table_ids); +}; + +// key-value collection +// key1${sp1}val1${sp2}key2${sp1}val2${sp2}key3${sp1}val3 +// +// currently memory of key/value managed by user! +// input kv_str will be modified by serialize/deserialize of kv pair! +class ObLogKVCollection +{ +public: + // key-value pair + class KVPair + { + public: + KVPair() { reset(); } + virtual ~KVPair() { reset(); } + + int init(const char* delimiter); + void reset() + { + key_ = NULL; + value_ = NULL; + delimiter_ = NULL; + inited_ = false; + } + int set_key_and_value(const char* key, const char* value); + const char* get_key() const { return key_; } + const char* get_value() const { return value_; } + int length() const; + bool is_valid() const; + // set_key_and_value before use this + // output to a k-v string, linked by provider splitor + // + // @param [out] buf buf store serialized kv_str(key${delimiter}value) + // @param [in] buf_len total buf size + // @param [out] pos current modified buf position + int serialize(char* buf, int64_t buf_len, int64_t &pos); + // deserialize string to KV Pair, with split + int deserialize(char* buf); + TO_STRING_KV(K_(inited), K_(delimiter), KP_(key), K_(key), KP_(value), K_(value)); + + private: + bool inited_; + const char* key_; + const char* value_; + const char* delimiter_; + }; +public: + ObLogKVCollection() {reset();} + virtual ~ObLogKVCollection() { reset(); } + +public: + int init(const char* kv_delimiter, const char* pair_delimiter); + void reset() + { + kv_pairs_.reset(); + kv_delimiter_ = NULL; + pair_delimiter_ = NULL; + inited_ = false; + } + // @param [input] kv_pair + int append_kv_pair(KVPair &kv_pair); + bool is_valid() const; + // return number of kv pairs + int64_t size() const { return kv_pairs_.size(); } + // return length of kv_str(prediction) + int length() const; + // serialize this collection to a kv-string + int serialize(char* kv_str_output, const int64_t kv_str_len, int64_t &pos); + // deserialize string to KV Pair, with split + int deserialize(char* buf); + int contains_key(const char* key, bool &contain); + int get_value_of_key(const char *key, const char *&value); + TO_STRING_KV(K_(inited), K_(kv_delimiter), K_(pair_delimiter), K_(kv_pairs)); + +private: + common::ObArray kv_pairs_; + bool inited_; + const char* kv_delimiter_; + const char* pair_delimiter_; +}; + +/// split string by separator +/// +/// @param [in] str str to split +/// @param [in] delimiter delimiter/separator +/// @param [int] expect_res_cnt expected res count +/// @param [out] res split result array +/// @param [out] res_cnt count of split result +/// +/// @retval OB_SUCCESS split success +/// @retval other_error_code Fail +int split(char *str, const char *delimiter, + const int64_t expect_res_cnt, const char **res, int64_t &res_cnt); + +int split_int64(const common::ObString &str, const char delimiter, common::ObIArray &ret_array); + +int decode_storage_log_type(const clog::ObLogEntry& log_entry, int64_t &pos, + storage::ObStorageLogType &log_type); +const char *calc_md5_cstr(const char *buf, const int64_t length); + +template +int top_k(const common::ObArray &in_array, + const int64_t k_num, + common::ObArray &out_array, + CompareFunc &compare_func) +{ + int ret = common::OB_SUCCESS; + int64_t array_cnt = in_array.count(); + int64_t cnt = std::min(k_num, array_cnt); + + for (int64_t idx = 0; common::OB_SUCCESS == ret && idx < cnt; ++idx) { + if (OB_FAIL(out_array.push_back(in_array.at(idx)))) { + OBLOG_LOG(ERROR, "push back into slow array fail", KR(ret), K(idx)); + } else { + // do nothing + } + } + + if (common::OB_SUCCESS == ret && array_cnt > 0) { + if (array_cnt <= k_num) { + // Construct a big top heap, with the top of the heap being the maximum value in the current out_array + std::make_heap(out_array.begin(), out_array.end(), compare_func); + } else { + // Construct a big top heap, with the top of the heap being the maximum value in the current out_array + std::make_heap(out_array.begin(), out_array.end(), compare_func); + + for (int64_t idx = k_num; common::OB_SUCCESS == ret && idx < array_cnt; ++idx) { + // If the current element is smaller than the heap top element, replace the heap top element and re-std::make_heap + if (compare_func(in_array.at(idx), out_array.at(0))) { + out_array[0] = in_array.at(idx); + std::make_heap(out_array.begin(), out_array.end(), compare_func); + } else { + // do nothing + } + } // for + } + + if (common::OB_SUCCESS == ret) { + std::sort_heap(out_array.begin(), out_array.end(), compare_func); + } + } + + return ret; +} + +int deep_copy_str(const ObString &src, + ObString &dest, + common::ObIAllocator &allocator); + +int get_tenant_compat_mode(const uint64_t tenant_id, + share::ObWorker::CompatMode &compat_mode, + volatile bool &stop_flag); + +int get_tenant_compat_mode(const uint64_t tenant_id, + share::ObWorker::CompatMode &compat_mode, + const int64_t timeout); + +char *lbt_oblog(); + +bool is_backup_mode(); + +struct BRColElem +{ + BRColElem(const char *col_value, size_t col_value_len) : col_value_(col_value), col_value_len_(col_value_len) {} + BRColElem() { reset(); } + ~BRColElem() { reset(); } + + void reset() + { + col_value_ = NULL; + col_value_len_ = 0; + } + + TO_STRING_KV(K_(col_value), K_(col_value_len)); + + const char *col_value_; + size_t col_value_len_; +}; +int get_br_value(ILogRecord *br, + ObArray &new_values); +int get_mem_br_value(ILogRecord *br, + ObArray &new_values); + +int c_str_to_int(const char* str, int64_t &num); +} // namespace liboblog +} // namespace oceanbase +#endif /* OCEANBASE_LIBOBLOG_UTILS_H__ */ diff --git a/src/liboblog/src/ob_log_work_mode.cpp b/src/liboblog/src/ob_log_work_mode.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4d59705c2f692652231cbf4c2d5de8d33f99da4a --- /dev/null +++ b/src/liboblog/src/ob_log_work_mode.cpp @@ -0,0 +1,91 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include "ob_log_work_mode.h" + +namespace oceanbase +{ +namespace liboblog +{ +using namespace oceanbase::common; + +const char *print_working_mode(const WorkingMode mode) +{ + const char *mode_str = "INVALID"; + + switch (mode) { + case MEMORY_MODE: { + mode_str = "Memory Working Mode"; + break; + } + case STORAGER_MODE: { + mode_str = "Storager Working Mode"; + break; + } + case AUTO_MODE: { + mode_str = "Auto Working Mode"; + break; + } + default: { + mode_str = "INVALID"; + break; + } + } + + return mode_str; +} + +// TODO support auto mode +WorkingMode get_working_mode(const char *working_mode_str) +{ + WorkingMode ret_mode = UNKNOWN_MODE; + + if (OB_ISNULL(working_mode_str)) { + } else { + if (0 == strcmp("memory", working_mode_str)) { + ret_mode = MEMORY_MODE; + } else if (0 == strcmp("storage", working_mode_str)) { + ret_mode = STORAGER_MODE; + } else { + } + } + + return ret_mode; +} + +bool is_working_mode_valid(WorkingMode mode) +{ + bool bool_ret = false; + + bool_ret = (mode > WorkingMode::UNKNOWN_MODE) + && (mode < WorkingMode::MAX_MODE); + + return bool_ret; +} + +bool is_memory_working_mode(const WorkingMode mode) +{ + return WorkingMode::MEMORY_MODE == mode; +} + +bool is_storage_working_mode(const WorkingMode mode) +{ + return WorkingMode::STORAGER_MODE == mode; +} + +bool is_auto_working_mode(const WorkingMode mode) +{ + return WorkingMode::AUTO_MODE == mode; +} + +} +} diff --git a/src/liboblog/src/ob_log_work_mode.h b/src/liboblog/src/ob_log_work_mode.h new file mode 100644 index 0000000000000000000000000000000000000000..fd38568f436214564bdaf664c8bddc2c10813677 --- /dev/null +++ b/src/liboblog/src/ob_log_work_mode.h @@ -0,0 +1,44 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_OB_LOG_WORK_MODE_H_ +#define OCEANBASE_LIBOBLOG_OB_LOG_WORK_MODE_H_ + +#include "share/ob_define.h" +#include "share/ob_errno.h" + +namespace oceanbase +{ +namespace liboblog +{ +enum WorkingMode +{ + UNKNOWN_MODE = 0, + + MEMORY_MODE = 1, + STORAGER_MODE = 2, + AUTO_MODE = 3, + + MAX_MODE +}; +const char *print_working_mode(const WorkingMode mode); +WorkingMode get_working_mode(const char *working_mode_str); + +bool is_working_mode_valid(WorkingMode mode); +bool is_memory_working_mode(const WorkingMode mode); +bool is_storage_working_mode(const WorkingMode mode); +bool is_auto_working_mode(const WorkingMode mode); + +} +} + +#endif diff --git a/src/liboblog/src/ob_map_queue.h b/src/liboblog/src/ob_map_queue.h new file mode 100644 index 0000000000000000000000000000000000000000..c1098340d62e0be3da5e17fbcd77eecb012842d2 --- /dev/null +++ b/src/liboblog/src/ob_map_queue.h @@ -0,0 +1,212 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_MAP_QUEUE_H__ +#define OCEANBASE_MAP_QUEUE_H__ + +#include "share/ob_errno.h" // OB_SUCCESS +#include "lib/atomic/ob_atomic.h" // ATOMIC_* +#include "lib/hash/ob_linear_hash_map.h" // ObLinearHashMap +#include "lib/oblog/ob_log_module.h" // LIB_LOG +#include "lib/utility/ob_macro_utils.h" // OB_FAIL + +namespace oceanbase +{ +namespace common +{ + +template +class ObMapQueue +{ +public: + ObMapQueue() : inited_(false), map_(), head_(0), tail_(0), dummy_tail_(0) {} + virtual ~ObMapQueue() { destroy(); } + int init(const char *label); + void destroy(); + bool is_inited() const { return inited_; } + int64_t count() const { return map_.count(); } +public: + /// A non-blocking push operation + /// Should definitely succeed unless memory problems are encountered + int push(const T &val); + + /// non-blocking pop operation + /// error code: + /// - OB_EAGAIN: empty queue + int pop(T &val); + + /// reset + /// non-thread safe + int reset(); + +private: + // Key. + struct Key + { + int64_t idx_; + void reset(const int64_t idx) { idx_ = idx; } + uint64_t hash() const { return static_cast(idx_); } + bool operator==(const Key &other) const { return idx_ == other.idx_; } + }; + // Pop contidion. + class PopCond + { + public: + explicit PopCond(T &val) : val_(val) {} + ~PopCond() {} + bool operator()(const Key &key, const T &val) + { + UNUSED(key); + val_ = val; + return true; + } + private: + T &val_; + }; + +private: + bool inited_; + // Map. + common::ObLinearHashMap map_; + // Sn. + int64_t head_ CACHE_ALIGNED; + int64_t tail_ CACHE_ALIGNED; + int64_t dummy_tail_ CACHE_ALIGNED; + +private: + DISALLOW_COPY_AND_ASSIGN(ObMapQueue); +}; + +////////////////////////////////////////////////////////////////////////////////// + +template +int ObMapQueue::init(const char *label) +{ + int ret = common::OB_SUCCESS; + if (OB_UNLIKELY(inited_)) { + LIB_LOG(ERROR, "init twice"); + ret = common::OB_INIT_TWICE; + } else if (OB_FAIL(map_.init(label))) { + LIB_LOG(ERROR, "init map fail", KR(ret), K(label)); + } else { + head_ = 0; + tail_ = 0; + dummy_tail_ = 0; + inited_ = true; + } + + return ret; +} + +template +void ObMapQueue::destroy() +{ + inited_ = false; + (void)map_.destroy(); + head_ = 0; + tail_ = 0; + dummy_tail_ = 0; +} + +template +int ObMapQueue::push(const T &val) +{ + int ret = common::OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LIB_LOG(ERROR, "not init"); + ret = common::OB_NOT_INIT; + } else { + // Get sn. + int64_t sn = ATOMIC_LOAD(&dummy_tail_); + while (!ATOMIC_BCAS(&dummy_tail_, sn, sn + 1)) { + sn = ATOMIC_LOAD(&dummy_tail_); + } + + // Save val. + Key key; + key.reset(sn); + if (OB_FAIL(map_.insert(key, val))) { + LIB_LOG(ERROR, "err insert map", KR(ret), K(sn)); + } + + // Update tail. + if (OB_SUCCESS == ret) { + while (!ATOMIC_BCAS(&tail_, sn, sn + 1)) { PAUSE(); } + } + } + + return ret; +} + +template +int ObMapQueue::pop(T &val) +{ + int ret = common::OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LIB_LOG(ERROR, "not init"); + ret = common::OB_NOT_INIT; + } else { + int64_t head = 0; + int64_t tail = 0; + bool done = false; + while (OB_SUCCESS == ret + && !done + && (head = ATOMIC_LOAD(&head_)) < (tail = ATOMIC_LOAD(&tail_))) { + int64_t sn = head; + if (ATOMIC_BCAS(&head_, sn, sn + 1)) { + Key key; + key.reset(sn); + PopCond cond(val); + if (OB_FAIL(map_.erase_if(key, cond))) { + LIB_LOG(ERROR, "err erase map", KR(ret), K(sn)); + } + else { + done = true; + } + } + } + // Empty queue. + if (OB_SUCCESS == ret && (head == tail)) { + ret = common::OB_EAGAIN; + } + } + + return ret; +} + +template +int ObMapQueue::reset() +{ + int ret = common::OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LIB_LOG(ERROR, "not init"); + ret = common::OB_NOT_INIT; + } else { + // non-thread safe + head_ = 0; + tail_ = 0; + dummy_tail_ = 0; + if (OB_FAIL(map_.reset())) { + LIB_LOG(ERROR, "err reset map", KR(ret)); + } + } + + return ret; +} + +} +} + +#endif diff --git a/src/liboblog/src/ob_map_queue_thread.h b/src/liboblog/src/ob_map_queue_thread.h new file mode 100644 index 0000000000000000000000000000000000000000..fbeed6efe626cb389da75d7800284584eaee3056 --- /dev/null +++ b/src/liboblog/src/ob_map_queue_thread.h @@ -0,0 +1,405 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIB_MAP_QUEUE_THREAD_H__ +#define OCEANBASE_LIB_MAP_QUEUE_THREAD_H__ + +#include "ob_map_queue.h" // ObMapQueue + +#include "share/ob_errno.h" // OB_SUCCESS +#include "lib/utility/ob_macro_utils.h" // UNUSED +#include "lib/oblog/ob_log_module.h" // LIB_LOG +#include "lib/atomic/ob_atomic.h" // ATOMIC_* +#include "common/ob_queue_thread.h" // ObCond + +namespace oceanbase +{ +namespace common +{ + +// Thread pool +// +// One ObMapQueue per thread +// Since ObMapQueue is scalable, push operations do not block +template +class ObMapQueueThread +{ + typedef ObMapQueue QueueType; + static const int64_t DATA_OP_TIMEOUT = 1L * 1000L * 1000L; + +public: + ObMapQueueThread(); + virtual ~ObMapQueueThread(); + +public: + // Inserting data + // Non-blocking + // + // @retval OB_SUCCESS Success + // @retval Other_return_values Fail + int push(void *data, const uint64_t hash_val); + + // Thread execution function + // Users can override this function to customize the thread execution + virtual void run(const int64_t thread_index); + + // Data handling function + // Users can also override this function to process data directly while keeping the run() function + virtual int handle(void *data, const int64_t thread_index, volatile bool &stop_flag) + { + UNUSED(data); + UNUSED(thread_index); + stop_flag = stop_flag; + return 0; + } + +protected: + /// pop data from a thread-specific queue + /// + /// @param thread_index Thread number + /// @param data The data returned + /// + /// @retval OB_SUCCESS success + /// @retval OB_EAGAIN empty queue + /// @retval other_error_code Fail + int pop(const int64_t thread_index, void *&data); + + /// Execute cond timedwait on a specific thread's queue + void cond_timedwait(const int64_t thread_index, const int64_t wait_time); + +public: + int init(const int64_t thread_num, const char *label); + void destroy(); + int start(); + void stop(); + void mark_stop_flag() { ATOMIC_STORE(&stop_flag_, false); } + bool is_stoped() const { return ATOMIC_LOAD(&stop_flag_); } + int64_t get_thread_num() const { return thread_num_; } + +public: + typedef ObMapQueueThread HostType; + struct ThreadConf + { + pthread_t tid_; + HostType *host_; + int64_t thread_index_; + QueueType queue_; + ObCond cond_; + + ThreadConf(); + virtual ~ThreadConf(); + + int init(const char *label, const int64_t thread_index, HostType *host); + void destroy(); + }; + +private: + static void *thread_func_(void *arg); + int next_task_(const int64_t thread_index, void *&task); + +private: + bool inited_; + int64_t thread_num_; + ThreadConf tc_[MAX_THREAD_NUM]; + +// Valid for inherited classes +protected: + volatile bool stop_flag_ CACHE_ALIGNED; + +private: + DISALLOW_COPY_AND_ASSIGN(ObMapQueueThread); +}; + +///////////////////////////////////////////////////////////////////////////////////////////////////////// + +template +ObMapQueueThread::ObMapQueueThread() : + inited_(false), + thread_num_(0), + stop_flag_(true) +{ +} + +template +ObMapQueueThread::~ObMapQueueThread() +{ + destroy(); +} + +template +int ObMapQueueThread::init(const int64_t thread_num, const char *label) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(inited_)) { + LIB_LOG(ERROR, "init twice"); + ret = OB_INIT_TWICE; + } else if (OB_UNLIKELY(0 >= thread_num) || OB_UNLIKELY(thread_num > MAX_THREAD_NUM)) { + LIB_LOG(ERROR, "invalid argument", K(thread_num)); + ret = OB_INVALID_ARGUMENT; + } else { + for (int64_t index = 0; OB_SUCCESS == ret && index < thread_num; index++) { + if (OB_FAIL(tc_[index].init(label, index, this))) { + LIB_LOG(ERROR, "init queue fail", KR(ret), K(index), K(label)); + } + } + + thread_num_ = thread_num; + stop_flag_ = true; + inited_ = true; + } + + return ret; +} + +template +void ObMapQueueThread::destroy() +{ + stop(); + + inited_ = false; + + for (int64_t index = 0; index < thread_num_; index++) { + tc_[index].destroy(); + } + + thread_num_ = 0; + stop_flag_ = true; +} + +template +int ObMapQueueThread::start() +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(! inited_)) { + LIB_LOG(ERROR, "not inited"); + ret = OB_NOT_INIT; + } else if (stop_flag_) { + stop_flag_ = false; + + for (int64_t index = 0; OB_SUCCESS == ret && index < thread_num_; index++) { + int pthread_ret = 0; + ThreadConf &tc = tc_[index]; + + if (0 != (pthread_ret = pthread_create(&(tc.tid_), NULL, thread_func_, &tc))) { + LIB_LOG(ERROR, "pthread_create fail", K(pthread_ret), KERRNOMSG(pthread_ret), K(index)); + ret = OB_ERR_UNEXPECTED; + } + } + } + return ret; +} + +template +void ObMapQueueThread::stop() +{ + if (inited_) { + stop_flag_ = true; + + for (int64_t index = 0; index < thread_num_; index++) { + ThreadConf &tc = tc_[index]; + + if (0 != tc.tid_) { + int pthread_ret = pthread_join(tc.tid_, NULL); + + if (0 != pthread_ret) { + LIB_LOG(ERROR, "pthread_join fail", "thread_id", tc.tid_, K(pthread_ret)); + } + + tc.tid_ = 0; + } + } + } +} + +template +void *ObMapQueueThread::thread_func_(void *arg) +{ + ThreadConf *tc = static_cast(arg); + if (NULL != tc && NULL != tc->host_) { + tc->host_->run(tc->thread_index_); + } + return NULL; +} + +template +void ObMapQueueThread::run(const int64_t thread_index) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LIB_LOG(ERROR, "not init"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(thread_index < 0) || OB_UNLIKELY(thread_index >= thread_num_)) { + LIB_LOG(ERROR, "invalid thread index", K(thread_index), K(thread_num_)); + ret = OB_ERR_UNEXPECTED; + } else { + while (! stop_flag_ && OB_SUCCESS == ret) { + void *task = NULL; + + if (OB_FAIL(next_task_(thread_index, task))) { + if (OB_IN_STOP_STATE != ret) { + LIB_LOG(ERROR, "next_task_ fail", KR(ret), K(thread_index)); + } + } else if (OB_FAIL(handle(task, thread_index, stop_flag_))) { + if (OB_IN_STOP_STATE != ret) { + LIB_LOG(ERROR, "handle task fail", KR(ret), "task", (int64_t)task, K(thread_index)); + } + } else { + // do nothing + } + } + } + + // NOTE: One thread exits, others exit at the same time + stop_flag_ = true; +} + +template +int ObMapQueueThread::pop(const int64_t thread_index, void *&data) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(! inited_)) { + LIB_LOG(ERROR, "not init"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(thread_index < 0) || OB_UNLIKELY(thread_index >= thread_num_)) { + LIB_LOG(ERROR, "invalid thread index", K(thread_index), K(thread_num_)); + ret = OB_ERR_UNEXPECTED; + } else { + ret = tc_[thread_index].queue_.pop(data); + } + return ret; +} + +template +void ObMapQueueThread::cond_timedwait(const int64_t thread_index, + const int64_t wait_time) +{ + if (OB_UNLIKELY(! inited_)) { + LIB_LOG(ERROR, "not init"); + } else if (OB_UNLIKELY(thread_index < 0) || OB_UNLIKELY(thread_index >= thread_num_)) { + LIB_LOG(ERROR, "invalid thread index", K(thread_index), K(thread_num_)); + } else { + tc_[thread_index].cond_.timedwait(wait_time); + } +} + +template +int ObMapQueueThread::next_task_(const int64_t index, void *&task) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(index < 0) || OB_UNLIKELY(index >= thread_num_)) { + LIB_LOG(ERROR, "invalid thread index", K(index), K(thread_num_)); + ret = OB_ERR_UNEXPECTED; + } else { + ThreadConf &tc = tc_[index]; + while (! stop_flag_ && OB_SUCCESS == ret) { + task = NULL; + + if (OB_FAIL(tc.queue_.pop(task))) { + if (OB_EAGAIN == ret) { + // empty queue + ret = OB_SUCCESS; + tc.cond_.timedwait(DATA_OP_TIMEOUT); + } else { + LIB_LOG(ERROR, "pop task from queue fail", KR(ret)); + } + } else if (OB_ISNULL(task)) { + LIB_LOG(ERROR, "pop invalid task", K(task)); + ret = OB_ERR_UNEXPECTED; + } else { + break; + } + } + + if (stop_flag_) { + ret = OB_IN_STOP_STATE; + } + } + + return ret; +} + +template +int ObMapQueueThread::push(void *data, const uint64_t hash_val) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LIB_LOG(ERROR, "not init"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(data)) { + LIB_LOG(ERROR, "invalid argument", K(data)); + ret = OB_INVALID_ARGUMENT; + } else { + int64_t target_index = static_cast(hash_val % thread_num_); + ThreadConf &tc = tc_[target_index]; + + if (OB_FAIL(tc.queue_.push(data))) { + LIB_LOG(ERROR, "push data fail", KR(ret), K(data), K(target_index)); + } else { + tc.cond_.signal(); + } + } + + return ret; +} + +///////////////////////////////////////////// ThreadConf ///////////////////////////////////////////// + +template +ObMapQueueThread::ThreadConf::ThreadConf() : + tid_(0), + host_(NULL), + thread_index_(0), + queue_(), + cond_() +{} + +template +ObMapQueueThread::ThreadConf::~ThreadConf() +{ + destroy(); +} + +template +int ObMapQueueThread::ThreadConf::init(const char *label, + const int64_t thread_index, + HostType *host) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(thread_index < 0) || OB_ISNULL(host)) { + LIB_LOG(ERROR, "invalid argument", K(thread_index), K(host)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(queue_.init(label))) { + LIB_LOG(ERROR, "init queue fail", KR(ret), K(label)); + } else { + tid_ = 0; + host_ = host; + thread_index_ = thread_index; + } + return ret; +} + +template +void ObMapQueueThread::ThreadConf::destroy() +{ + queue_.destroy(); + tid_ = 0; + host_ = NULL; + thread_index_ = 0; +} + +} // namespace common +} // namespace oceanbase +#endif /* OCEANBASE_LIB_QUEUE_M_FIXED_QUEUE_H_ */ diff --git a/src/liboblog/src/ob_ms_queue_thread.h b/src/liboblog/src/ob_ms_queue_thread.h new file mode 100644 index 0000000000000000000000000000000000000000..17dff273ea37a2b7348ba71750ac2f431755a900 --- /dev/null +++ b/src/liboblog/src/ob_ms_queue_thread.h @@ -0,0 +1,473 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIB_MS_QUEUE_THREAD_H__ +#define OCEANBASE_LIB_MS_QUEUE_THREAD_H__ + +#include "lib/queue/ob_ms_queue.h" // ObMsQueue +#include "lib/allocator/page_arena.h" // ObArenaAllocator + +#include "share/ob_errno.h" // OB_SUCCESS +#include "lib/oblog/ob_log_module.h" // LIB_LOG +#include "lib/container/ob_bit_set.h" // ObFixedBitSet +#include "common/ob_queue_thread.h" // ObCond + +namespace oceanbase +{ +namespace common +{ + +// 全局一个ObMsQueue, 每个线程对应ObMsQueue一个seq queue +// ModuleClass: 标识使用该线程池的目标模块 +template +class ObMsQueueThread +{ + static const int64_t PRODUCER_TIMEWAIT = 1 * 1000; // 1ms + static const int64_t CONSUMER_TIMEWAIT = 100 * 1000; // 100ms + static const int64_t PAGE_SIZE = OB_MALLOC_NORMAL_BLOCK_SIZE; + +public: + ObMsQueueThread(); + virtual ~ObMsQueueThread(); + +public: + typedef ObFixedBitSet BitSet; + typedef ObLink Task; + virtual int handle(Task *task, const int64_t thread_index, volatile bool &stop_flag) = 0; + +public: + int init(const int64_t thread_num, const int64_t queue_size); + void destroy(); + int start(); + void stop(); + void run(const int64_t thread_index); + void mark_stop_flag() { stop_flag_ = true; } + + bool is_stoped() const { return ATOMIC_LOAD(&stop_flag_); } + + // bit_set records the number of subqueues operated by the producer ObMsQueue, used for signal + int push(Task *task, + const int64_t seq, + const uint64_t hash_value, + BitSet &bit_set, + const int64_t timeout); + int end_batch(const int64_t seq, + const int64_t count, + const BitSet &bit_set); + + int64_t get_thread_num() const { return thread_num_; } + +private: + typedef ObMsQueueThread HostType; + struct ThreadConf + { + pthread_t tid_; + HostType *host_; + int64_t thread_index_; + // 1. For each sub-queue of MsQueue, it is a multi-producer-single-consumer model (multiple threads concurrently pushing tasks to the sub-queue, single-threaded consumption) + // 2. FIXME does not wake up the exact thread that needs to be produced when the consumer gets the task and sends it via producer_cond_. + // 3. The consumer wakes up exactly by recording the subqueue it has operated on + + // Producer cond + ObCond producer_cond_; + // Consumer cond + ObCond consumer_cond_; + + ThreadConf(); + virtual ~ThreadConf(); + + int init(const int64_t thread_index, HostType *host); + void destroy(); + }; + + static void *thread_func_(void *arg); + int next_task_(int64_t queue_index, Task *&task); + int push_(Task *task, + const int64_t seq, + const uint64_t hash_value, + BitSet &bit_set, + const int64_t timeout); + +private: + bool inited_; + int64_t thread_num_; + + volatile bool stop_flag_ CACHE_ALIGNED; + + ThreadConf tc_[MAX_THREAD_NUM]; + ObMsQueue queue_; + ObArenaAllocator allocator_; // 分配器 + +private: + DISALLOW_COPY_AND_ASSIGN(ObMsQueueThread); +}; + +template +ObMsQueueThread::ObMsQueueThread() : + inited_(false), + thread_num_(0), + stop_flag_(true), + queue_(), + allocator_(ObModIds::OB_EXT_MS_QUEUE_QITEM, PAGE_SIZE) +{ +} + +template +ObMsQueueThread::~ObMsQueueThread() +{ + destroy(); +} + +template +int ObMsQueueThread::init(const int64_t thread_num, const int64_t queue_size) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(inited_)) { + LIB_LOG(ERROR, "ObMsQueueThread has been initialized"); + ret = OB_INIT_TWICE; + } else if (OB_UNLIKELY(thread_num <= 0) + || OB_UNLIKELY(thread_num > MAX_THREAD_NUM) + || OB_UNLIKELY(queue_size <= 0)) { + LIB_LOG(ERROR, "invalid arguments", K(thread_num), K(MAX_THREAD_NUM), K(queue_size)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(queue_.init(thread_num, queue_size, &allocator_))) { + LIB_LOG(ERROR, "initialize queue fail", KR(ret), K(thread_num), K(queue_size)); + } else { + for (int64_t index = 0; OB_SUCCESS == ret && index < thread_num; index++) { + if (OB_FAIL(tc_[index].init(index, this))) { + LIB_LOG(ERROR, "init queue fail", KR(ret), K(index)); + } + } + + thread_num_ = thread_num; + stop_flag_ = true; + + inited_ = true; + } + + return ret; +} + +template +void ObMsQueueThread::destroy() +{ + stop(); + + inited_ = false; + + for (int64_t index = 0; index < thread_num_; index++) { + tc_[index].destroy(); + } + + thread_num_ = 0; + stop_flag_ = true; + + (void)queue_.destroy(); +} + +template +int ObMsQueueThread::start() +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LIB_LOG(ERROR, "ObMsQueueThread has not been initialized"); + ret = OB_NOT_INIT; + } else if (stop_flag_) { + stop_flag_ = false; + + for (int64_t index = 0; OB_SUCC(ret) && index < thread_num_; index++) { + int pthread_ret = 0; + ThreadConf &tc = tc_[index]; + + if (0 != (pthread_ret = pthread_create(&(tc.tid_), NULL, thread_func_, &tc))) { + LIB_LOG(ERROR, "pthread_create fail", K(pthread_ret), + KERRNOMSG(pthread_ret), K(index)); + ret = OB_ERR_UNEXPECTED; + } + } + } + + return ret; +} + +template +void ObMsQueueThread::stop() +{ + if (inited_) { + stop_flag_ = true; + + for (int64_t index = 0; index < thread_num_; index++) { + ThreadConf &tc = tc_[index]; + + if (0 != tc.tid_) { + int pthread_ret = pthread_join(tc.tid_, NULL); + + if (0 != pthread_ret) { + LIB_LOG(ERROR, "pthread_join fail", "thread_id", tc.tid_, K(pthread_ret)); + } else { + // do nothing + } + + // finally reset to 0, to ensure that stop is called multiple times without problems + // Because ObLogInstance may call stop and destroy externally, and destroy includes stop + tc.tid_ = 0; + } + } + } +} + +template +void *ObMsQueueThread::thread_func_(void *arg) +{ + if (NULL != arg) { + ThreadConf *tc = static_cast(arg); + + if (NULL != tc && NULL != tc->host_) { + tc->host_->run(tc->thread_index_); + } + } + + return NULL; +} + +template +void ObMsQueueThread::run(const int64_t thread_index) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LIB_LOG(ERROR, "ObMQThread not initialized", K(thread_index)); + } else { + while (! stop_flag_ && OB_SUCCESS == ret) { + Task *task = NULL; + + if (OB_FAIL(next_task_(thread_index, task))) { + if (OB_IN_STOP_STATE != ret) { + LIB_LOG(ERROR, "next_task_ fail", KR(ret), K(thread_index)); + } + } else if (OB_FAIL(handle(task, thread_index, stop_flag_))) { + if (OB_IN_STOP_STATE != ret) { + LIB_LOG(ERROR, "handle task fail", KR(ret), K(task), K(thread_index)); + } + } else { + // do nothing + } + } + } + + // NOTE: One thread exits, others exit at the same time + stop_flag_ = true; +} + +// int get(Task*& task, const int64_t idx); +template +int ObMsQueueThread::next_task_(int64_t queue_index, Task *&task) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(queue_index < 0) || OB_UNLIKELY(queue_index >= thread_num_)) { + LIB_LOG(ERROR, "invalid thread index", K(queue_index), K(thread_num_)); + ret = OB_ERR_UNEXPECTED; + } else { + ThreadConf &tc = tc_[queue_index]; + + while (! stop_flag_ && OB_SUCCESS == ret) { + task = NULL; + + if (OB_FAIL(queue_.get(task, queue_index))) { + if (OB_EAGAIN == ret) { + // Data not ready at this time + ret = OB_SUCCESS; + tc.consumer_cond_.timedwait(CONSUMER_TIMEWAIT); + } else { + LIB_LOG(ERROR, "pop task from queue fail", KR(ret)); + } + } else if (OB_ISNULL(task)) { + LIB_LOG(ERROR, "get invalid task", K(task)); + ret = OB_ERR_UNEXPECTED; + } else { + break; + } + } + + if (OB_SUCC(ret)) { + tc.producer_cond_.signal(); + } + + if (stop_flag_) { + ret = OB_IN_STOP_STATE; + } + } + + return ret; +} + +template +int ObMsQueueThread::push(Task *task, + const int64_t seq, + const uint64_t hash_value, + BitSet &bit_set, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LIB_LOG(ERROR, "ObMsQueueThread not initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(task)) { + LIB_LOG(ERROR, "invalid argument", K(task)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_UNLIKELY(stop_flag_)) { + ret = OB_IN_STOP_STATE; + } else if (OB_FAIL(push_(task, seq, hash_value, bit_set, timeout))) { + if (OB_TIMEOUT != ret) { + LIB_LOG(ERROR, "push queue fail", KR(ret), KP(task), K(hash_value), K(bit_set)); + } + } else { + // succ + } + + return ret; +} + +template +int ObMsQueueThread::push_(Task *task, + const int64_t seq, + const uint64_t hash, + BitSet &bit_set, + const int64_t timeout) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LIB_LOG(ERROR, "ObMsQueueThread not initialized"); + ret = OB_NOT_INIT; + } else { + int64_t target_index = static_cast(hash % thread_num_); + ThreadConf &tc = tc_[target_index]; + int64_t end_time = ObTimeUtility::current_time() + timeout; + + while (true) { + ret = queue_.push(task, seq, hash); + + if (OB_EAGAIN != ret) { + break; + } + + int64_t left_time = end_time - ObTimeUtility::current_time(); + + if (left_time <= 0) { + ret = OB_TIMEOUT; + break; + } else { + const int64_t producer_timewait = PRODUCER_TIMEWAIT; + tc.producer_cond_.timedwait(std::min(left_time, producer_timewait)); + } + } // while + + if (OB_SUCC(ret)) { + if (OB_FAIL(bit_set.add_member(target_index))) { + LIB_LOG(ERROR, "bit_set add_member fail", KR(ret), K(task), K(seq), K(hash), + K(bit_set), K(target_index)); + } + } + } + + return ret; +} + +template +int ObMsQueueThread::end_batch(const int64_t seq, + const int64_t count, + const BitSet &bit_set) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LIB_LOG(ERROR, "ObMsQueueThread not initialized"); + ret = OB_NOT_INIT; + } else { + ret = queue_.end_batch(seq, count); + + // Number of sub-queues operated on + const int64_t handle_queue_total_cnt = bit_set.num_members(); + int64_t handle_queue_cnt = 0; + + // Iterate through all threads, trying to signal if it has operated before + for (int64_t index = 0; OB_SUCC(ret) && index < thread_num_; index++) { + if (bit_set.has_member(index)) { + // Try to wake up + ThreadConf &tc = tc_[index]; + if (queue_.next_is_ready(index)) { + tc.consumer_cond_.signal(); + } + + // If all operated sub-queues are found, stop traversal + ++handle_queue_cnt; + if (handle_queue_cnt == handle_queue_total_cnt) { + break; + } + } + } + } + + return ret; +} + +///////////////////////////////////////////// ThreadConf ///////////////////////////////////////////// + +template +ObMsQueueThread::ThreadConf::ThreadConf() : + tid_(0), + host_(NULL), + thread_index_(0), + producer_cond_(), + consumer_cond_() +{} + +template +ObMsQueueThread::ThreadConf::~ThreadConf() +{ + destroy(); +} + +template +int ObMsQueueThread::ThreadConf::init(const int64_t thread_index, + HostType *host) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(thread_index < 0) || OB_ISNULL(host)) { + LIB_LOG(ERROR, "invalid argument", K(thread_index), K(host)); + ret = OB_INVALID_ARGUMENT; + } else { + tid_ = 0; + host_ = host; + thread_index_ = thread_index; + } + + return ret; +} + +template +void ObMsQueueThread::ThreadConf::destroy() +{ + tid_ = 0; + host_ = NULL; + thread_index_ = 0; +} + +} // namespace common +} // namespace oceanbase +#endif /* OCEANBASE_LIB_MS_QUEUE_THREAD_H__ */ diff --git a/src/liboblog/src/ob_obj2str_helper.cpp b/src/liboblog/src/ob_obj2str_helper.cpp new file mode 100644 index 0000000000000000000000000000000000000000..167392631c0380c61ebb4c6b3492d9fa3a9b7d8e --- /dev/null +++ b/src/liboblog/src/ob_obj2str_helper.cpp @@ -0,0 +1,487 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include "ob_obj2str_helper.h" +#include "ob_log_timezone_info_getter.h" +#include "lib/timezone/ob_timezone_info.h" +#include "lib/string/ob_sql_string.h" +#include "sql/engine/expr/ob_datum_cast.h" // padding_char_for_cast +#include "lib/alloc/ob_malloc_allocator.h" +#include "sql/engine/expr/ob_expr_uuid.h" +#include "sql/engine/expr/ob_expr_operator.h" +#include "sql/engine/expr/ob_expr_res_type_map.h" + +#include "ob_log_utils.h" // _M_ + +using namespace oceanbase::common; +namespace oceanbase +{ +namespace liboblog +{ +const char* ObObj2strHelper::EMPTY_STRING = ""; + +ObObj2strHelper::ObObj2strHelper() : inited_(false), + timezone_info_getter_(NULL), + hbase_util_(NULL), + enable_hbase_mode_(false), + enable_convert_timestamp_to_unix_timestamp_(false), + enable_backup_mode_(false), + tenant_mgr_(NULL) +{ +} + +ObObj2strHelper::~ObObj2strHelper() +{ + destroy(); +} + +int ObObj2strHelper::init(IObLogTimeZoneInfoGetter &timezone_info_getter, + ObLogHbaseUtil &hbase_util, + const bool enable_hbase_mode, + const bool enable_convert_timestamp_to_unix_timestamp, + const bool enable_backup_mode, + IObLogTenantMgr &tenant_mgr) +{ + int ret = OB_SUCCESS; + + if (inited_) { + ret = OB_INIT_TWICE; + } else if (OB_FAIL(init_ob_charset_utils())) { + OBLOG_LOG(ERROR, "failed to init ob charset util!", KR(ret)); + } else { + timezone_info_getter_ = &timezone_info_getter; + hbase_util_ = &hbase_util; + enable_hbase_mode_ = enable_hbase_mode; + enable_convert_timestamp_to_unix_timestamp_ = enable_convert_timestamp_to_unix_timestamp; + enable_backup_mode_ = enable_backup_mode; + tenant_mgr_ = &tenant_mgr; + inited_ = true; + } + return ret; +} + +int ObObj2strHelper::init_ob_charset_utils() +{ + int ret = common::OB_SUCCESS; + lib::ObMallocAllocator *allocator = NULL; + const lib::ObMemAttr attr(common::OB_SYS_TENANT_ID, ObModIds::OB_NUMBER); + if (OB_FAIL(sql::ObExprTRDateFormat::init())) { + OBLOG_LOG(ERROR, "failed to init vars in oracle trunc", KR(ret)); + } else if (OB_FAIL(sql::ObExprUuid::init())) { + OBLOG_LOG(ERROR, "failed to init vars in uuid", KR(ret)); + } else if (OB_ISNULL(allocator = lib::ObMallocAllocator::get_instance())) { + ret = OB_ERR_UNEXPECTED; + OBLOG_LOG(ERROR, "allocator is null", KR(ret)); + } else if (OB_FAIL(common::ObNumberConstValue::init(*allocator, attr))) { + OBLOG_LOG(ERROR, "failed to init ObNumberConstValue", KR(ret)); + } else if (OB_FAIL(sql::ARITH_RESULT_TYPE_ORACLE.init())) { + OBLOG_LOG(ERROR, "failed to init ORACLE_ARITH_RESULT_TYPE", KR(ret)); + } else if (OB_FAIL(ObCharsetUtils::init(*allocator))) { + OBLOG_LOG(ERROR, "fail to init ObCharsetUtils", KR(ret)); + } + return ret; +} + +void ObObj2strHelper::destroy() +{ + inited_ = false; + timezone_info_getter_ = NULL; + hbase_util_ = NULL; + enable_hbase_mode_ = false; + enable_convert_timestamp_to_unix_timestamp_ = false; + enable_backup_mode_ = false; + tenant_mgr_ = NULL; +} + + +//extended_type_info used for enum/set +int ObObj2strHelper::obj2str(const uint64_t tenant_id, + const uint64_t table_id, + const uint64_t column_id, + const common::ObObj &obj, + common::ObString &str, + common::ObIAllocator &allocator, + const bool string_deep_copy, + const common::ObIArray &extended_type_info, + const common::ObAccuracy &accuracy, + const common::ObCollationType &collation_type) +{ + int ret = OB_SUCCESS; + ObObjType obj_type = obj.get_type(); + common::ObObjTypeClass obj_tc = common::ob_obj_type_class(obj_type); + ObWorker::CompatMode compat_mode = THIS_WORKER.get_compatibility_mode(); + + // Configure allowed conversions: mysql timestamp column -> UTC integer time + if (ObTimestampType == obj_type && enable_convert_timestamp_to_unix_timestamp_) { + if (OB_FAIL(convert_mysql_timestamp_to_utc_(obj, str, allocator))) { + OBLOG_LOG(ERROR, "convert_mysql_timestamp_to_utc_ fail", KR(ret), K(table_id), K(column_id), K(obj), K(obj_type), + K(str)); + } + } else if (common::ObNullTC == obj_tc) { + str.assign_ptr(NULL, 0); + } else if (common::ObExtendTC == obj_tc) { + static const int64_t MAX_EXT_PRINT_LEN = 1 << 10; + char BUFFER[MAX_EXT_PRINT_LEN]; + int64_t pos = 0; + char *ptr = NULL; + + if (OB_FAIL(obj.print_sql_literal(BUFFER, sizeof(BUFFER), pos)) + || pos <= 0) { + OBLOG_LOG(ERROR, "obj print_sql_literal fail", KR(ret), K(obj), K(MAX_EXT_PRINT_LEN), K(pos)); + ret = common::OB_SUCCESS == ret ? common::OB_ERR_UNEXPECTED : ret; + } else if (NULL == (ptr = (char *)allocator.alloc(pos))) { + OBLOG_LOG(ERROR, "allocate memory fail", "size", pos); + ret = common::OB_ALLOCATE_MEMORY_FAILED; + } else { + (void)MEMCPY(ptr, BUFFER, pos); + str.assign_ptr(ptr, (int32_t)pos); + OBLOG_LOG(DEBUG, "obj2str cast extend type", K(obj), "cast_str", str); + } + // This should be before is_string_type, because for char/nchar it is also ObStringTC, so is_string_type=true + } else if (need_padding_(compat_mode, obj)) { + if (OB_FAIL(convert_char_obj_to_padding_obj_(compat_mode, obj, accuracy, collation_type, allocator, str))) { + OBLOG_LOG(ERROR, "convert_char_obj_to_padding_obj_ fail", KR(ret), K(obj), K(accuracy), K(collation_type), + K(str), K(compat_mode), "compat_mode_str", print_compat_mode(compat_mode)); + } + } else if (obj.is_string_type()) { + if (string_deep_copy) { + // need deep-copy + void *dst_buf = NULL; + ObString src_str = obj.get_string(); + int64_t str_len = obj.get_val_len(); + + if (str_len > 0) { + if (OB_ISNULL(dst_buf = allocator.alloc(str_len))) { + OBLOG_LOG(ERROR, "allocate memory fail", K(str_len)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + MEMCPY(dst_buf, src_str.ptr(), src_str.length()); + } + } + + if (OB_SUCCESS == ret) { + str.assign_ptr(static_cast(dst_buf), + static_cast(str_len)); + } + } else { + // No deep copy required, direct reference to original string memory + if (OB_FAIL(obj.get_string(str))) { + OBLOG_LOG(ERROR, "get_string from ObObj fail", KR(ret), K(obj)); + } else { + // success + } + } + + if (OB_SUCC(ret)) { + // For a varchar with a default value of '', str_len=0, the empty string should be synchronised and not output as NULL + if (0 == obj.get_val_len()) { + str.assign_ptr(EMPTY_STRING, static_cast(obj.get_val_len())); + } + } + } else { + common::ObObj tmp_inner_obj; + const common::ObObj *in_obj = &obj; + ObObjMeta inner_meta; + inner_meta.set_collation_level(CS_LEVEL_NUMERIC); + inner_meta.set_collation_type(CS_TYPE_BINARY); + if (obj.is_enum() || obj.is_set()) { + if (OB_FAIL(ObObjCaster::enumset_to_inner(inner_meta, obj, tmp_inner_obj, + allocator, extended_type_info))) { + OBLOG_LOG(ERROR, "fail to enumset_to_inner", KR(ret)); + } else { + in_obj = &tmp_inner_obj; + } + } + + if (OB_SUCC(ret)) { + common::ObObj str_obj; + common::ObObjType target_type = common::ObMaxType; + + //liboblog need use_standard_format + ObTimeZoneInfoWrap *tz_info_wrap = nullptr; + const common::ObTimeZoneInfo *tz_info = nullptr; + if (OB_FAIL(tenant_mgr_->get_tenant_tz_wrap(tenant_id, tz_info_wrap))) { + OBLOG_LOG(ERROR, "get_tenant_tz_wrap failed", KR(ret), K(tenant_id)); + } else if (OB_ISNULL(tz_info_wrap)) { + ret = OB_ERR_UNEXPECTED; + OBLOG_LOG(ERROR, "tenant not exist", KR(ret), K(tenant_id)); + } else { + tz_info = tz_info_wrap->get_time_zone_info(); + const ObDataTypeCastParams dtc_params(tz_info); + ObObjCastParams cast_param(&allocator, &dtc_params, CM_NONE, collation_type); + cast_param.format_number_with_limit_ = false;//here need no limit format number for liboblog + + if (in_obj->is_bit()) { + target_type = common::ObUInt64Type; + } else { + target_type = common::ObVarcharType; + } + + if (OB_FAIL(ObObjCaster::to_type(target_type, cast_param, *in_obj, str_obj))) { + OBLOG_LOG(ERROR, "cast obj to varchar type fail", KR(ret), KPC(in_obj), K(target_type)); + if (OB_ERR_INVALID_TIMEZONE_REGION_ID == ret) { + // Refresh timezone until successful and convert again + ret = OB_SUCCESS; + + if (OB_FAIL(convert_timestamp_with_timezone_data_util_succ_(target_type, cast_param, + *in_obj, str_obj, str, tenant_id))) { + OBLOG_LOG(ERROR, "convert_timestamp_with_timezone_data_util_succ_ fail", KR(ret), KPC(in_obj), K(target_type)); + } + } + } else { + if (in_obj->is_bit()) { + if (OB_FAIL(convert_bit_obj_to_decimal_str_(obj, str_obj, str, allocator))) { + OBLOG_LOG(ERROR, "convert_bit_obj_to_decimal_str_ fail", KR(ret), K(obj), K(target_type)); + } + } else { + if (OB_FAIL(str_obj.get_string(str))) { + OBLOG_LOG(ERROR, "get_string from ObObj fail", KR(ret), K(str_obj)); + } else { + // For a varchar with a default value of '', str_len=0, the empty string should be synchronised and not output as NULL + if ((obj.is_enum() || obj.is_set()) && 0 == str_obj.get_val_len()) { + str.assign_ptr(EMPTY_STRING, static_cast(str_obj.get_val_len())); + } + } + } + } + } + + // 1. hbase table T column timestamp type should be converted to positive if it is negative + // 2. not converted in backup mode + if (OB_SUCC(ret)) { + bool is_hbase_table_T_column = false; + + if (obj.is_int() && enable_hbase_mode_ && ! enable_backup_mode_) { + if (OB_ISNULL(hbase_util_)) { + OBLOG_LOG(ERROR, "hbase_util_ is null", K(hbase_util_)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(hbase_util_->judge_hbase_T_column(table_id, column_id, is_hbase_table_T_column))) { + OBLOG_LOG(ERROR, "hbase_util_ judge_hbase_T_column fail", KR(ret), K(table_id), K(column_id), K(is_hbase_table_T_column)); + } else if (! is_hbase_table_T_column) { + // do nothing + } else { + if (OB_FAIL(convert_hbase_bit_obj_to_positive_bit_str_(obj, str_obj, str, allocator))) { + OBLOG_LOG(ERROR, "convert_hbase_bit_obj_to_positive_bit_str_ fail", KR(ret), K(obj), K(target_type)); + } + } + } + OBLOG_LOG(DEBUG, "[HBASE]", KR(ret), K(obj), K(obj_type), K(enable_hbase_mode_), K(enable_backup_mode_), + K(str_obj), K(is_hbase_table_T_column), K(table_id)); + } + } // OB_SUCC(ret) + } + + // If it is a LOB, larger than 2M, do not print the contents, print the address and length + // Avoid printing the log taking too long + if (str.length() > 2 * _M_) { + OBLOG_LOG(DEBUG, "obj2str", KR(ret), K(obj_type), K(obj.get_scale()), K(obj.get_meta()), K(obj_tc), K(accuracy), K(collation_type), + KP(obj.get_string().ptr()), K(obj.get_string().length()), KP(str.ptr()), K(str.length())); + } else { + OBLOG_LOG(DEBUG, "obj2str", KR(ret), K(obj_type), K(obj.get_scale()), K(obj.get_meta()), K(obj_tc), K(accuracy), K(collation_type), + K(obj), K(str), K(str.length())); + } + + return ret; +} + +int ObObj2strHelper::convert_timestamp_with_timezone_data_util_succ_(const common::ObObjType &target_type, + common::ObObjCastParams &cast_param, + const common::ObObj &in_obj, + common::ObObj &str_obj, + common::ObString &str, + const uint64_t tenant_id) +{ + int ret = OB_SUCCESS; + bool done = false; + ObTZInfoMap *tz_info_map = NULL; + + if (OB_ISNULL(timezone_info_getter_)) { + OBLOG_LOG(ERROR, "timezone_info_getter_ is null", K(timezone_info_getter_)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(tenant_mgr_->get_tenant_tz_map(tenant_id, tz_info_map))) { + OBLOG_LOG(ERROR, "get_tenant_tz_map failed", KR(ret), K(tenant_id)); + } else { + while (! done && OB_SUCCESS == ret) { + if (OB_FAIL(timezone_info_getter_->fetch_tenant_timezone_info_util_succ(tenant_id, tz_info_map))) { + OBLOG_LOG(ERROR, "fetch_tenant_timezone_info_util_succ fail", KR(ret), K(tenant_id)); + } else if (OB_FAIL(ObObjCaster::to_type(target_type, cast_param, in_obj, str_obj))) { + if (OB_ERR_INVALID_TIMEZONE_REGION_ID == ret) { + OBLOG_LOG(WARN, "cast obj to varchar type fail, try again", KR(ret), K(in_obj), K(target_type)); + } else { + OBLOG_LOG(ERROR, "cast obj to varchar type fail", KR(ret), K(in_obj), K(target_type)); + } + } else { + done = true; + if (OB_FAIL(str_obj.get_string(str))) { + OBLOG_LOG(ERROR, "get_string from ObObj fail", KR(ret), K(str_obj)); + } + } + + if (OB_ERR_INVALID_TIMEZONE_REGION_ID == ret) { + ret = OB_SUCCESS; + usleep(10L * 1000L); + } + } + } + + return ret; +} + +// bit type output decimal string +int ObObj2strHelper::convert_bit_obj_to_decimal_str_(const common::ObObj &obj, + const common::ObObj &str_obj, + common::ObString &str, + common::ObIAllocator &allocator) +{ + int ret = OB_SUCCESS; + uint64_t value = 0; + + if (OB_FAIL(str_obj.get_uint64(value))) { + OBLOG_LOG(ERROR, "get_uint64 from ObObj fail", KR(ret), K(obj), K(str_obj), K(value)); + } else { + char buf[MAX_BIT_DECIMAL_STR_LENGTH]; + int64_t pos = 0; + char *ptr = NULL; + + if (OB_FAIL(common::databuff_printf(buf, MAX_BIT_DECIMAL_STR_LENGTH, pos, "%lu", value))) { + OBLOG_LOG(ERROR, "databuff_printf fail", K(pos), K(value)); + } else if (OB_ISNULL(ptr = (char *)allocator.alloc(pos))) { + OBLOG_LOG(ERROR, "allocate memory fail", "size", pos); + ret = common::OB_ALLOCATE_MEMORY_FAILED; + } else { + (void)MEMCPY(ptr, buf, pos); + str.assign_ptr(ptr, (int32_t)pos); + OBLOG_LOG(DEBUG, "obj2str cast bit type", K(obj), "cast_str", str, K(value)); + } + } + + return ret; +} + +int ObObj2strHelper::convert_hbase_bit_obj_to_positive_bit_str_(const common::ObObj &obj, + const common::ObObj ¤t_str_obj, + common::ObString &str, + common::ObIAllocator &allocator) +{ + int ret = OB_SUCCESS; + UNUSED(allocator); + + ObString current_str; + const char *current_str_ptr = NULL; + ObString::obstr_size_t current_str_len = 0; + + if (OB_FAIL(current_str_obj.get_string(current_str))) { + OBLOG_LOG(ERROR, "get_string from ObObj fail", KR(ret), K(obj), K(current_str_obj), K(current_str)); + } else if (OB_ISNULL(current_str_ptr = current_str.ptr())) { + OBLOG_LOG(ERROR, "current_str_ptr is null", K(obj), K(current_str_obj), K(current_str), K(current_str_ptr)); + ret = OB_ERR_UNEXPECTED; + } else { + current_str_len = current_str.length(); + OBLOG_LOG(DEBUG, "[HBASE]", K(obj), K(current_str_obj), K(current_str), K(current_str_ptr), K(current_str_len)); + + if ('-' == current_str_ptr[0]) { + str.assign_ptr(current_str_ptr + 1, (int32_t)(current_str_len - 1)); + } + } + + return ret; +} + +int ObObj2strHelper::convert_mysql_timestamp_to_utc_(const common::ObObj &obj, + common::ObString &str, + common::ObIAllocator &allocator) +{ + int ret = OB_SUCCESS; + const int64_t utc_time = obj.get_timestamp(); + char buf[MAX_TIMESTAMP_UTC_LONG_STR_LENGTH]; + int64_t pos = 0; + char *ptr = NULL; + // external output of utc integer time, compatible with mysql, splitting seconds and microseconds with a decimal point + // Microsecond precision length of 6 + const int64_t usec_mod_val = 1000000; + + if (OB_FAIL(common::databuff_printf(buf, MAX_TIMESTAMP_UTC_LONG_STR_LENGTH, pos, "%ld.%06ld", + utc_time / usec_mod_val, utc_time % usec_mod_val))) { + OBLOG_LOG(ERROR, "databuff_printf fail", K(pos), K(utc_time), K(usec_mod_val)); + } else if (OB_ISNULL(ptr = (char *)allocator.alloc(pos))) { + OBLOG_LOG(ERROR, "allocate memory fail", "size", pos); + ret = common::OB_ALLOCATE_MEMORY_FAILED; + } else { + (void)MEMCPY(ptr, buf, pos); + str.assign_ptr(ptr, (int32_t)pos); + OBLOG_LOG(DEBUG, "obj2str cast timestamp type to utc long", K(obj), "cast_str", str, K(utc_time)); + } + + return ret; +} + +bool ObObj2strHelper::need_padding_(const ObWorker::CompatMode &compat_mode, + const common::ObObj &obj) const +{ + bool bool_ret = false; + + bool_ret = (ObWorker::CompatMode::ORACLE == compat_mode) + && (obj.is_char() || obj.is_nchar()); + + return bool_ret; +} + +int ObObj2strHelper::convert_char_obj_to_padding_obj_(const ObWorker::CompatMode &compat_mode, + const common::ObObj &obj, + const common::ObAccuracy &accuracy, + const common::ObCollationType &collation_type, + common::ObIAllocator &allocator, + common::ObString &str) +{ + int ret = OB_SUCCESS; + int32_t char_len = 0; + + if (OB_FAIL(obj.get_string(str))) { + OBLOG_LOG(ERROR, "get_string from ObObj fail", KR(ret), K(obj)); + } else if (OB_FAIL(obj.get_char_length(accuracy, char_len, ObWorker::CompatMode::ORACLE == compat_mode))) { + OBLOG_LOG(ERROR, "obj get_char_length fail", KR(ret), K(accuracy), K(char_len)); + } else { + // The calculation of padding here needs to be based on char_len, not str.length + // e.g. nchar, 'a', str,length=2, not 1 + const int64_t padding_cnt = accuracy.get_length() - char_len; + // need pad + if (padding_cnt > 0) { + ObString padding_res; + + if (OB_FAIL(sql::padding_char_for_cast(padding_cnt, collation_type, allocator, padding_res))) { + OBLOG_LOG(ERROR, "padding_char_for_cast fail", KR(ret), K(obj), K(accuracy), K(collation_type), + K(padding_res)); + } else { + int64_t all_size = padding_res.length() + str.length(); + char *res_ptr = static_cast(allocator.alloc(all_size)); + + if (OB_ISNULL(res_ptr)) { + OBLOG_LOG(ERROR, "allocate memory failed", KR(ret)); + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + MEMMOVE(res_ptr, str.ptr(), str.length()); + MEMMOVE(res_ptr + str.length(), padding_res.ptr(), padding_res.length()); + str.assign_ptr(res_ptr, static_cast(all_size)); + + OBLOG_LOG(DEBUG, "obj2str cast char/nchar type", K(obj), "cast_str", str, "cast_str_len", str.length(), + K(padding_cnt), K(padding_res), "padding_res_len", padding_res.length(), + K(accuracy), K(collation_type), K(char_len)); + } + } + } // if (padding_cnt > 0) + } + + return ret; +} + +} // namespace liboblog +} // namespace oceanbase diff --git a/src/liboblog/src/ob_obj2str_helper.h b/src/liboblog/src/ob_obj2str_helper.h new file mode 100644 index 0000000000000000000000000000000000000000..47a1e56b7197a3e44da6f1e790c3639f0f4be14c --- /dev/null +++ b/src/liboblog/src/ob_obj2str_helper.h @@ -0,0 +1,129 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_OBJ2STR_HELPER_H__ +#define OCEANBASE_OBJ2STR_HELPER_H__ + +#include "common/object/ob_object.h" // ObObj +#include "lib/allocator/ob_allocator.h" // ObIAllocator +#include "lib/string/ob_string.h" // ObString +#include "share/ob_worker.h" // ObWorker +#include "common/object/ob_obj_type.h" // ObObjTypeClass +#include "share/object/ob_obj_cast.h" // ObObjCastParams, ObObjCaster +#include "ob_log_hbase_mode.h" // ObLogHbaseUtil +#include "ob_log_tenant_mgr.h" // ObLogTenantMgr + +namespace oceanbase +{ +namespace common +{ +class ObTimeZoneInfo; +} +namespace liboblog +{ + +class IObLogTimeZoneInfoGetter; +class ObObj2strHelper +{ +public: + ObObj2strHelper(); + virtual ~ObObj2strHelper(); + +public: + // Converting objects to strings + // NOTE: + // 1. If the object is of string type ObStringTC (including: varchar, char, varbinary, binary) + // 1) string_deep_copy == false + // the string points directly to the content of the original object + // 2) string_deep_copy == true + // deep copy of the string + // 2. otherwise use allocator to allocate memory and print the object into memory + int obj2str(const uint64_t tenant_id, + const uint64_t table_id, + const uint64_t column_id, + const common::ObObj &obj, + common::ObString &str, + common::ObIAllocator &allocator, + const bool string_deep_copy, + const common::ObIArray &extended_type_info, + const common::ObAccuracy &accuracy, + const common::ObCollationType &collation_type); + +public: + int init(IObLogTimeZoneInfoGetter &timezone_info_getter, + ObLogHbaseUtil &hbase_util, + const bool enable_hbase_mode, + const bool enable_convert_timestamp_to_unix_timestamp, + const bool enable_backup_mode, + IObLogTenantMgr &tenant_mgr); + void destroy(); + +public: + static const char *EMPTY_STRING; + +private: + // initialize ObCharsetUtils (refer to ob_sql_init.h #init_sql_expr_static_var()) + // enum,set was developed at the stage when ob only supported utf8, and did not handle enum,set types when supporting other character sets, + // resulting in incorrect charset when converting enum,set to string. This can lead to garbled data and problems such as compare hang. (Corresponding to server-side modifications. + int init_ob_charset_utils(); + + int convert_timestamp_with_timezone_data_util_succ_(const common::ObObjType &target_type, + common::ObObjCastParams &cast_param, + const common::ObObj &in_obj, + common::ObObj &str_obj, + common::ObString &str, + const uint64_t tenant_id); + + // max length of uint64_t + static const int64_t MAX_BIT_DECIMAL_STR_LENGTH = 30; + int convert_bit_obj_to_decimal_str_(const common::ObObj &obj, + const common::ObObj &str_obj, + common::ObString &str, + common::ObIAllocator &allocator); + int convert_hbase_bit_obj_to_positive_bit_str_(const common::ObObj &obj, + const common::ObObj &str_obj, + common::ObString &str, + common::ObIAllocator &allocator); + + // max length of int64_t + static const int64_t MAX_TIMESTAMP_UTC_LONG_STR_LENGTH = 30; + int convert_mysql_timestamp_to_utc_(const common::ObObj &obj, + common::ObString &str, + common::ObIAllocator &allocator); + + // Oracle schema: char/nchar with automatic padding support + // TODO MySQL schema: char/binary supports padding based on specific requirments + bool need_padding_(const ObWorker::CompatMode &compat_mode, + const common::ObObj &obj) const; + int convert_char_obj_to_padding_obj_(const ObWorker::CompatMode &compat_mode, + const common::ObObj &obj, + const common::ObAccuracy &accuracy, + const common::ObCollationType &collation_type, + common::ObIAllocator &allocator, + common::ObString &str); + +private: + bool inited_; + IObLogTimeZoneInfoGetter *timezone_info_getter_; + ObLogHbaseUtil *hbase_util_; + bool enable_hbase_mode_; + bool enable_convert_timestamp_to_unix_timestamp_; + bool enable_backup_mode_; + IObLogTenantMgr *tenant_mgr_; + +private: + DISALLOW_COPY_AND_ASSIGN(ObObj2strHelper); +}; + +} // namespace liboblog +} // namespace oceanbase +#endif /* OCEANBASE_OBJ2STR_HELPER_H__ */ diff --git a/src/liboblog/src/ob_seq_thread.h b/src/liboblog/src/ob_seq_thread.h new file mode 100644 index 0000000000000000000000000000000000000000..fe9f824923dd7bb0869ed2c27920e95f707b1022 --- /dev/null +++ b/src/liboblog/src/ob_seq_thread.h @@ -0,0 +1,276 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_OB_SEQ_QUEUE_THREAD_H__ +#define OCEANBASE_OB_SEQ_QUEUE_THREAD_H__ + +#include "lib/ob_define.h" // RETRY_FUNC +#include "lib/allocator/ob_malloc.h" // ObMemAttr +#include "share/ob_errno.h" // KR + +#include "ob_concurrent_seq_queue.h" // ObConcurrentSeqQueue + +namespace oceanbase +{ +namespace common +{ + +// MAX_THREAD_NUM: Specifies the maximum number of threads supported +// ModuleClass: The module type of the thread class used, to distinguish between different modules for easy debugging +template +class ObSeqThread +{ + enum { DATA_OP_TIMEOUT = 1 * 1000 * 1000 }; + typedef ObConcurrentSeqQueue QueueType; + +public: + ObSeqThread(); + virtual ~ObSeqThread(); + +public: + virtual int handle(void *task, const int64_t task_seq, const int64_t thread_index, volatile bool &stop_flag) = 0; + +public: + int push(void *task, const int64_t task_seq, const int64_t timeout); + int start(); + void stop(); + void mark_stop_flag() { stop_flag_ = true; } + bool is_stoped() const { return ATOMIC_LOAD(&stop_flag_); } + int64_t get_thread_num() const { return thread_num_; } + int64_t get_task_num() const { return queue_.size(); } + +public: + int init(const int64_t thread_num, + const int64_t queue_size, + const ObMemAttr &memattr = default_memattr); + void destroy(); + +public: + void run(); + +private: + static void *thread_func_(void *arg); + int next_task_(const int64_t task_seq, void *&task); + int64_t next_seq_(); + +private: + bool inited_; + int64_t thread_num_; + int64_t thread_counter_; + + volatile bool stop_flag_ CACHE_ALIGNED; + int64_t task_seq_ CACHE_ALIGNED; + + QueueType queue_; + + pthread_t tids_[MAX_THREAD_NUM]; + +private: + DISALLOW_COPY_AND_ASSIGN(ObSeqThread); +}; + +template +ObSeqThread::ObSeqThread() : + inited_(false), + thread_num_(0), + thread_counter_(0), + stop_flag_(true), + task_seq_(0), + queue_() +{ + (void)memset(tids_, 0, sizeof(tids_)); +} + +template +ObSeqThread::~ObSeqThread() +{ + destroy(); +} + +template +int ObSeqThread::init(const int64_t thread_num, + const int64_t queue_size, + const ObMemAttr &memattr) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(inited_)) { + LIB_LOG(ERROR, "ObSeqThread has been initialized", K(inited_)); + ret = OB_INIT_TWICE; + } else if (OB_UNLIKELY(thread_num <= 0) + || OB_UNLIKELY(thread_num > MAX_THREAD_NUM) + || OB_UNLIKELY(queue_size <= 0)) { + LIB_LOG(ERROR, "invalid arguments", K(thread_num), K(MAX_THREAD_NUM), K(queue_size)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(queue_.init(queue_size, memattr))) { + LIB_LOG(ERROR, "initialize queue fail", KR(ret), K(queue_size)); + } else { + thread_num_ = thread_num; + thread_counter_ = 0; + stop_flag_ = true; + task_seq_ = 0; + (void)memset(tids_, 0, sizeof(tids_)); + + inited_ = true; + } + + return ret; +} + +template +void ObSeqThread::destroy() +{ + stop(); + + inited_ = false; + thread_num_ = 0; + thread_counter_ = 0; + stop_flag_ = true; + task_seq_ = 0; + queue_.destroy(); + + (void)memset(tids_, 0, sizeof(tids_)); +} + +template +int ObSeqThread::start() +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LIB_LOG(ERROR, "ObSeqThread has not been initialized"); + ret = OB_NOT_INIT; + } else if (stop_flag_) { + stop_flag_ = false; + + for (int64_t index = 0; OB_SUCC(ret) && index < thread_num_; index++) { + int pthread_ret = 0; + + if (0 != (pthread_ret = pthread_create(tids_ + index, NULL, thread_func_, this))) { + LIB_LOG(ERROR, "pthread_create fail", K(pthread_ret), KERRNOMSG(pthread_ret), K(index)); + ret = OB_ERR_UNEXPECTED; + } + } + } + + return ret; +} + +template +void ObSeqThread::stop() +{ + if (inited_) { + stop_flag_ = true; + + for (int64_t index = 0; index < thread_num_; index++) { + if (0 != tids_[index]) { + int pthread_ret = pthread_join(tids_[index], NULL); + + if (0 != pthread_ret) { + LIB_LOG(ERROR, "pthread_join fail", "thread_id", tids_[index], K(pthread_ret)); + } else { + // do nothing + } + } + } + + (void)memset(tids_, 0, sizeof(tids_)); + } +} + +template +void *ObSeqThread::thread_func_(void *arg) +{ + if (NULL != arg) { + ObSeqThread *td = static_cast *>(arg); + td->run(); + } + + return NULL; +} + +template +void ObSeqThread::run() +{ + int ret = OB_SUCCESS; + int64_t thread_index = ATOMIC_FAA(&thread_counter_, 1); + + if (OB_UNLIKELY(! inited_)) { + LIB_LOG(ERROR, "ObSeqThread not initialized", K(thread_index)); + ret = OB_NOT_INIT; + } else { + while (! stop_flag_ && OB_SUCCESS == ret) { + void *task = NULL; + // Get the next sequence number to be consumed + int64_t task_seq = next_seq_(); + if (OB_FAIL(next_task_(task_seq, task))) { + if (OB_IN_STOP_STATE != ret) { + LIB_LOG(ERROR, "next_task_ fail", KR(ret), K(task_seq)); + } + } else if (OB_FAIL(handle(task, task_seq, thread_index, stop_flag_))) { + if (OB_IN_STOP_STATE != ret) { + LIB_LOG(ERROR, "handle task fail", KR(ret), "task", (int64_t)task, K(task_seq), + K(thread_index)); + } + } else { + // do nothing + } + } + } + + // NOTE: One thread exits, others exit at the same time + stop_flag_ = true; +} + +template +int64_t ObSeqThread::next_seq_() +{ + return ATOMIC_FAA(&task_seq_, 1); +} + +template +int ObSeqThread::next_task_(const int64_t task_seq, void *&task) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LIB_LOG(ERROR, "ObSeqThread not initialized"); + ret = OB_NOT_INIT; + } else { + RETRY_FUNC(stop_flag_, queue_, pop, task, task_seq, DATA_OP_TIMEOUT); + } + + return ret; +} + +template +int ObSeqThread::push(void *task, const int64_t task_seq, const int64_t timeout) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(! inited_)) { + LIB_LOG(ERROR, "ObSeqThread not initialized"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(task) || OB_UNLIKELY(task_seq < 0)) { + LIB_LOG(ERROR, "invalid argument", K(task), K(task_seq)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_UNLIKELY(stop_flag_)) { + ret = OB_IN_STOP_STATE; + } else { + ret = queue_.push(task, task_seq, timeout); + } + + return ret; +} + +} // namespace common +} // namespace oceanbase +#endif /* OCEANBASE_MULTI_FIXED_QUEUE_THREAD_H__ */ diff --git a/src/liboblog/src/ob_small_arena.cpp b/src/liboblog/src/ob_small_arena.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a969d9ced134e2770aee6106eb91de89effcd878 --- /dev/null +++ b/src/liboblog/src/ob_small_arena.cpp @@ -0,0 +1,264 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG + +#include "ob_small_arena.h" + +#include "lib/utility/utility.h" +#include "lib/oblog/ob_log_module.h" +#include "lib/allocator/ob_malloc.h" + +namespace oceanbase +{ +using namespace common; +namespace liboblog +{ +ObSmallArena::ObSmallArena() : + large_allocator_(NULL), + page_size_(0), + local_page_(NULL), + small_page_list_(NULL), + large_page_list_(NULL), + small_alloc_count_(0), + large_alloc_count_(0), + lock_() +{ +} + +ObSmallArena::~ObSmallArena() +{ + do_reset_small_pages_(); + do_reset_large_pages_(); + + large_allocator_ = NULL; + page_size_ = 0; + local_page_ = NULL; + small_page_list_ = NULL; + large_page_list_ = NULL; + small_alloc_count_ = 0; + large_alloc_count_ = 0; +} + +void ObSmallArena::reset() +{ + ObSmallSpinLockGuard guard(lock_); + do_reset_small_pages_(); + do_reset_large_pages_(); + + // Require external local cache pages to be reclaimed before resetting + if (NULL != local_page_) { + local_page_->reset(); + } +} + +void ObSmallArena::set_allocator(const int64_t page_size, + common::ObIAllocator &large_allocator) +{ + large_allocator_ = &large_allocator; + page_size_ = page_size; +} + +void ObSmallArena::set_prealloc_page(void *page) +{ + if (NULL != local_page_) { + LOG_ERROR("prealloc page has been set", K(local_page_), K(page)); + } else if (NULL != page) { + local_page_ = new(page) SmallPage(); + } +} + +void ObSmallArena::revert_prealloc_page(void *&page) +{ + page = local_page_; + + if (NULL != local_page_) { + local_page_->~SmallPage(); + } + + local_page_ = NULL; +} + +void* ObSmallArena::alloc(const int64_t size) +{ + const int64_t default_align = sizeof(void*); + return alloc_aligned(size, default_align); +} + +bool ObSmallArena::is_valid_() const +{ + return NULL != large_allocator_ && page_size_ > 0; +} + +void* ObSmallArena::alloc_aligned(const int64_t size, const int64_t align) +{ + int tmp_ret = OB_SUCCESS; + void *ret_ptr = NULL; + ObSmallSpinLockGuard guard(lock_); + if (OB_UNLIKELY(!is_valid_())) { + tmp_ret = OB_ERR_UNEXPECTED; + LOG_ERROR("small arena is not valid", K(large_allocator_), K(page_size_)); + } else if (OB_UNLIKELY(0 >= size) + || OB_UNLIKELY(0 != (align & (align - 1))) + || OB_UNLIKELY(align > (page_size_ / 2))) { + tmp_ret = OB_INVALID_ARGUMENT; + LOG_ERROR("small arena alloc error, invalid argument", "ret", tmp_ret, K(size), + K(align), K(page_size_)); + } else if (need_large_page_(size, align)) { + ret_ptr = do_alloc_large_(size, align); + ATOMIC_INC(&large_alloc_count_); + } else { + ret_ptr = do_alloc_normal_(size, align); + ATOMIC_INC(&small_alloc_count_); + } + return ret_ptr; +} + +inline bool ObSmallArena::need_large_page_(const int64_t size, const int64_t align) +{ + return (size + SMALL_PAGE_HEADER_SIZE + (align - 1) > page_size_); +} + +// alloc large page from large_arena +void* ObSmallArena::do_alloc_large_(const int64_t size, const int64_t align) +{ + void *ret_ptr = NULL; + if (OB_ISNULL(large_allocator_)) { + LOG_ERROR("invalid large allocator", K(large_allocator_)); + } else { + int64_t alloc_size = size + LARGE_PAGE_HEADER_SIZE + align - 1; + LargePage *large_page = static_cast(large_allocator_->alloc(alloc_size)); + if (OB_ISNULL(large_page)) { + LOG_ERROR("alloc large page fail", K(alloc_size)); + } else { + int64_t start_addr = reinterpret_cast(large_page->addr_); + ret_ptr = reinterpret_cast(upper_align(start_addr, align)); + large_page->next_ = large_page_list_; + large_page_list_ = large_page; + } + } + return ret_ptr; +} + +void ObSmallArena::alloc_small_page_() +{ + SmallPage *new_cur_page = NULL; + void *ptr = NULL; + ObMemAttr mem_attr; + mem_attr.label_ = common::ObModIds::OB_LOG_PART_TRANS_TASK_SMALL; + + if (OB_ISNULL(ptr = ob_malloc(page_size_, mem_attr))) { + LOG_ERROR("alloc small page error", K(ptr), K(page_size_)); + } else { + new_cur_page = new (ptr) SmallPage(); + new_cur_page->next_ = small_page_list_; + small_page_list_ = new_cur_page; + } +} + +void *ObSmallArena::alloc_from_page_(SmallPage &page, const int64_t size, const int64_t align) +{ + void *ptr = NULL; + int64_t start_addr = reinterpret_cast(page.addr_); + int64_t cur_addr = start_addr + page.offset_; + int64_t aligned_addr = upper_align(cur_addr, align); + int64_t avail_size = page_size_ - (aligned_addr - start_addr + SMALL_PAGE_HEADER_SIZE); + + // Find pages with more free space than the requested size + if (avail_size >= size) { + ptr = reinterpret_cast(aligned_addr); + page.offset_ = aligned_addr + size - start_addr; + } + + return ptr; +} + +void* ObSmallArena::try_alloc_(const int64_t size, const int64_t align) +{ + void* ret_ptr = NULL; + + if (NULL != small_page_list_) { + int64_t depth = 0; + SmallPage *page = small_page_list_; + + // Iterate through the list of small pages to find pages with enough free space + // The purpose is to avoid having too many empty pages + while (NULL == ret_ptr && NULL != page && depth++ < MAX_FIND_PAGE_DEPTH) { + ret_ptr = alloc_from_page_(*page, size, align); + page = page->next_; + } + } + + // If no suitable page is found from the small page list, check if there is enough space on the local cache page + if (NULL == ret_ptr && NULL != local_page_) { + ret_ptr = alloc_from_page_(*local_page_, size, align); + } + + return ret_ptr; +} + +void* ObSmallArena::do_alloc_normal_(const int64_t size, const int64_t align) +{ + void *ret_ptr = NULL; + ret_ptr = try_alloc_(size, align); + if (NULL == ret_ptr) { + alloc_small_page_(); + ret_ptr = try_alloc_(size, align); + } + return ret_ptr; +} + +void ObSmallArena::do_reset_small_pages_() +{ + SmallPage *iter = NULL; + SmallPage *next = NULL; + iter = small_page_list_; + while (NULL != iter) { + next = iter->next_; + iter->~SmallPage(); + ob_free(iter); + iter = next; + } + + small_page_list_ = NULL; + small_alloc_count_ = 0; +} + +void ObSmallArena::do_reset_large_pages_() +{ + if (NULL != large_allocator_) { + LargePage *iter = NULL; + LargePage *next = NULL; + iter = large_page_list_; + while (NULL != iter) { + next = iter->next_; + iter->~LargePage(); + large_allocator_->free(iter); + iter = next; + } + large_page_list_ = NULL; + large_alloc_count_ = 0; + } +} + +int64_t ObSmallArena::get_small_alloc_count() const +{ + return ATOMIC_LOAD(&small_alloc_count_); +} + +int64_t ObSmallArena::get_large_alloc_count() const +{ + return ATOMIC_LOAD(&large_alloc_count_); +} + +} // ns liboblog +} // ns oceanbase diff --git a/src/liboblog/src/ob_small_arena.h b/src/liboblog/src/ob_small_arena.h new file mode 100644 index 0000000000000000000000000000000000000000..529147efae56df97d23567866f06921477b0abc2 --- /dev/null +++ b/src/liboblog/src/ob_small_arena.h @@ -0,0 +1,112 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_SRC_OB_SMALL_ARENA_ +#define OCEANBASE_LIBOBLOG_SRC_OB_SMALL_ARENA_ + +#include "lib/allocator/ob_allocator.h" +#include "lib/lock/ob_small_spin_lock.h" + +namespace oceanbase +{ +namespace liboblog +{ + +/* + Allocator for liboblog specific scenarios. + Note: The user needs to ensure that the parameter @sa passed in remains valid for the lifetime of this SmallArena. +*/ +class ObSmallArena : public common::ObIAllocator +{ + struct SmallPage + { + SmallPage() : offset_(0), next_(NULL) {} + ~SmallPage() { reset(); } + + void reset() { offset_ = 0; next_ = NULL; } + + int64_t offset_; + SmallPage *next_; + char addr_[0]; + }; + + struct LargePage + { + LargePage() : next_(NULL) {} + ~LargePage() { next_ = NULL; } + + LargePage *next_; + char addr_[0]; + }; + +public: + static const int64_t SMALL_PAGE_HEADER_SIZE = sizeof(SmallPage); + static const int64_t LARGE_PAGE_HEADER_SIZE = sizeof(LargePage); + static const int64_t MAX_FIND_PAGE_DEPTH = 10; + +public: + ObSmallArena(); + ~ObSmallArena(); + void *alloc_aligned(const int64_t size, const int64_t align); + void *alloc(const int64_t size, const common::ObMemAttr &attr) + { + UNUSEDx(attr); + return alloc(size); + } + void *alloc(const int64_t size); + void reset(); + int64_t get_small_alloc_count() const; + int64_t get_large_alloc_count() const; + + void set_allocator(const int64_t page_size, common::ObIAllocator &large_allocator); + + // Set pre-assigned pages + void set_prealloc_page(void *page); + + // Recycle pre-allocated pages + void revert_prealloc_page(void *&page); + +private: + bool is_valid_() const; + bool need_large_page_(const int64_t size, const int64_t align); + void *do_alloc_large_(const int64_t size, const int64_t align); + void *try_alloc_(const int64_t size, const int64_t align); + void alloc_small_page_(); + void *do_alloc_normal_(const int64_t size, const int64_t align); + void do_reset_small_pages_(); + void do_reset_large_pages_(); + void *alloc_from_page_(SmallPage &page, const int64_t size, const int64_t align); + +private: + common::ObIAllocator *large_allocator_; // large allocator + int64_t page_size_; // size of page + + // Local cache pages are only used to allocate small blocks of memory + // Local cache pages are not considered when determining whether a large page needs to be allocated + // + // Allow local cache to be empty + SmallPage *local_page_; // page cache in local + SmallPage *small_page_list_ CACHE_ALIGNED; // page list for small page + LargePage *large_page_list_ CACHE_ALIGNED; // page list for large page + + int64_t small_alloc_count_ CACHE_ALIGNED; + int64_t large_alloc_count_ CACHE_ALIGNED; + + mutable common::ObByteLock lock_; + + DISALLOW_COPY_AND_ASSIGN(ObSmallArena); +}; + +} // ns liboblog +} // ns oceanbase + +#endif diff --git a/src/liboblog/tests/CMakeLists.txt b/src/liboblog/tests/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..ab88ed53be059fd9c38fa1b91872072921faa4f2 --- /dev/null +++ b/src/liboblog/tests/CMakeLists.txt @@ -0,0 +1,7 @@ +add_executable(oblog_tailf + main.cpp + oblog_main.h oblog_main.cpp + ob_binlog_record_printer.h ob_binlog_record_printer.cpp + ) +target_link_libraries(oblog_tailf PRIVATE oblog) +target_include_directories(oblog_tailf PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../src) diff --git a/src/liboblog/tests/conf/liboblog.conf.tpl b/src/liboblog/tests/conf/liboblog.conf.tpl new file mode 100644 index 0000000000000000000000000000000000000000..4660f3879af45022f96cbf4db4766e65d99c4b60 --- /dev/null +++ b/src/liboblog/tests/conf/liboblog.conf.tpl @@ -0,0 +1,31 @@ +cluster_db_name=oceanbase +log_level=ALL.*:INFO;SHARE.SCHEMA:WARN +#log_level=ALL.*:DEBUG;SHARE.SCHEMA:WARN +#log_level=ALL.*:DEBUG;TLOG.FETCHER:INFO;TLOG.FORMATTER:INFO;SHARE.SCHEMA:WARN + +tb_white_list=*.*.* +tb_white_list=sys.oblog*.*|oblog_tt.*.* +tb_black_list=*.*.*_t|*.*.*_[0-9][a-z] +#tablegroup_white_list=tpch.TPCH_TG_1000G_LINEITEM_ORDER_GROUP +start_log_id_locator_locate_count=1 + +cluster_password= +cluster_user= + +need_verify_ob_trace_id=0 +ob_trace_id=obstress_trace_id1 +instance_index=0 +instance_num=1 +formatter_thread_num=25 +formatter_batch_stmt_count=30000 +storager_thread_num=20 +reader_thread_num=20 +stream_max_partition_count=500 +#skip_ob_version_compat_check=1 +sort_trans_participants=1 +#drc_message_factory_binlog_record_type=BinlogRecordImpl +#test_mode_on=1 +#test_mode_ignore_redo_count=10 +#enable_verify_mode=1 +#print_participant_not_serve_info=1 +#enable_output_hidden_primary_key=1 diff --git a/src/liboblog/tests/copy_oblog.sh b/src/liboblog/tests/copy_oblog.sh new file mode 100755 index 0000000000000000000000000000000000000000..e5ee6a54b474f3e1076a7af0644fc686aba2b9c0 --- /dev/null +++ b/src/liboblog/tests/copy_oblog.sh @@ -0,0 +1,45 @@ +#!/bin/bash + +OBLOG_DIR=`pwd`/lib +OBLOG_TAILF_DIR=`pwd` + +if [ $# -lt 1 ] +then +# echo "Usage ./copy_oblog.sh [oceanbase_dev_dir]" +# echo "Eg; ./copy_oblog.sh 1 that means copy from build_debug" +# echo "Eg: ./copy_oblog.sh 2 that means copy from build_release" + + if [ -d "../../../build_debug" ] + then + OCEANBASE_DIR="../../../build_debug" + elif [ -d "../../../build_release" ] + then + OCEANBASE_DIR="../../../build_release" + fi + +else + #echo $1 + ver_flag=0 + if [ $1 -eq 1 ] + then + OCEANBASE_DIR="../../../build_debug" + ver_flag=1 + elif [ $1 -eq 2 ] + then + OCEANBASE_DIR="../../../build_release" + ver_flag=1 + else + ver_flag=0 + echo "parameter is invalid" + fi +fi + +echo "copy liboblog.so, oblog_tailf from "$OCEANBASE_DIR + +OBLOG_SO="$OCEANBASE_DIR/src/liboblog/src/liboblog.so.1" +OBLOG_TAILF="$OCEANBASE_DIR/src/liboblog/tests/oblog_tailf" + +mkdir -p $OBLOG_DIR +[ -f $OBLOG_SO ] && libtool --mode=install cp $OBLOG_SO $OBLOG_DIR/ +[ -f $OBLOG_TAILF ] && libtool --mode=install cp $OBLOG_TAILF $OBLOG_TAILF_DIR + diff --git a/src/liboblog/tests/fetcher_integration_test_add_dispatch_discard.cpp b/src/liboblog/tests/fetcher_integration_test_add_dispatch_discard.cpp new file mode 100644 index 0000000000000000000000000000000000000000..dec8c57e39e6d26185ff19e12fd83fa97a665bc8 --- /dev/null +++ b/src/liboblog/tests/fetcher_integration_test_add_dispatch_discard.cpp @@ -0,0 +1,524 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include // fprintf +#include // getopt_long +#include // strtoull + +#include "share/ob_define.h" +#include "lib/file/file_directory_utils.h" +#include "liboblog/src/ob_log_fetcher_impl.h" + +using namespace oceanbase; +using namespace common; +using namespace liboblog; +using namespace fetcher; + +#define OB_LOGGER ::oceanbase::common::ObLogger::get_logger() + +#define EXPECT_EQ(EXP, VAL) \ + do { \ + if ((EXP) != (VAL)) { _E_("assert failed", #EXP, (EXP), #VAL, (VAL)); exit(1); } \ + } while(0) + +namespace oceanbase +{ +namespace liboblog +{ +namespace integrationtesting +{ + +/* + * Add&DiscardTest: + * - add n partitions, find m servers for each one, let it dispatch and create new workers + * - no log, no heartbeat + * - discard all, quit + */ +class AddDiscardTest +{ +public: + int64_t partition_cnt_; + int64_t server_cnt_; + int64_t runtime_; // usec +public: + /* + * Mock systable helper. + * - provide all m servers for each request, 127.0.0.[1-m] + */ + class MockSystableHelper : public ObILogSysTableHelper + { + public: + int64_t server_cnt_; + int64_t now_; + void init(const int64_t svr_cnt) + { + server_cnt_ = svr_cnt; + now_ = get_timestamp(); + } + public: + virtual int query_all_clog_history_info_by_log_id_1( + const common::ObPartitionKey &pkey, const uint64_t log_id, + AllClogHistoryInfos &records) { + // Generate random results. + int ret = OB_SUCCESS; + records.reset(); + AllClogHistoryInfoRecord rec; + const int64_t cnt = server_cnt_; + for (int64_t idx = 0; idx < cnt; ++idx) { + rec.reset(); + rec.table_id_ = (uint64_t)(pkey.table_id_); + rec.partition_idx_ = (int32_t)pkey.get_partition_id(); + rec.partition_cnt_ = pkey.get_partition_cnt();//partitoin cnt + rec.start_log_id_ = log_id; + rec.end_log_id_ = log_id + 10000; + rec.start_log_timestamp_ = now_; + rec.end_log_timestamp_ = now_ + 1 * _HOUR_; + snprintf(rec.svr_ip_, common::MAX_IP_ADDR_LENGTH + 1, "127.0.0.%ld", 1 + idx); + rec.svr_port_ = 8888; + records.push_back(rec); + } + return ret; + } + + virtual int query_all_clog_history_info_by_timestamp_1( + const common::ObPartitionKey &pkey, const int64_t timestamp, + AllClogHistoryInfos &records) { + // Generate random results. + int ret = OB_SUCCESS; + records.reset(); + AllClogHistoryInfoRecord rec; + const int64_t cnt = server_cnt_; + for (int64_t idx = 0; idx < cnt; ++idx) { + rec.reset(); + rec.table_id_ = (uint64_t)(pkey.table_id_); + rec.partition_idx_ = (int32_t)pkey.get_partition_id(); + rec.partition_cnt_ = 0;//partition cnt + rec.start_log_id_ = 0; + rec.end_log_id_ = 65536; + rec.start_log_timestamp_ = timestamp; + rec.end_log_timestamp_ = timestamp + (1 * _HOUR_); + snprintf(rec.svr_ip_, common::MAX_IP_ADDR_LENGTH + 1, "127.0.0.%ld", 1 + idx); + rec.svr_port_ = 8888; + records.push_back(rec); + } + return ret; + } + virtual int query_all_meta_table_1( + const common::ObPartitionKey &pkey, AllMetaTableRecords &records) { + // Generate random results. + int ret = OB_SUCCESS; + UNUSED(pkey); + records.reset(); + AllMetaTableRecord rec; + const int64_t cnt = server_cnt_; + for (int64_t idx = 0; idx < cnt; ++idx) { + rec.reset(); + snprintf(rec.svr_ip_, common::MAX_IP_ADDR_LENGTH + 1, "127.0.0.%ld", 1 + idx); + rec.svr_port_ = 8888; + rec.role_ = (0 == idx) ? LEADER : FOLLOWER; + records.push_back(rec); + } + return ret; + } + virtual int query_all_meta_table_for_leader( + const common::ObPartitionKey &pkey, + bool &has_leader, + common::ObAddr &leader) + { + UNUSED(pkey); + has_leader = true; + leader.set_ip_addr("127.0.0.1", 8888); + return OB_SUCCESS; + } + virtual int query_all_server_table_1( + AllServerTableRecords &records) + { + records.reset(); + AllServerTableRecord rec; + const int64_t cnt = server_cnt_; + for (int64_t idx = 0; idx < cnt; ++idx) { + rec.reset(); + snprintf(rec.svr_ip_, common::MAX_IP_ADDR_LENGTH + 1, "127.0.0.%ld", 1 + idx); + rec.svr_port_ = 8888; + records.push_back(rec); + } + return OB_SUCCESS; + } + }; + /* + * Rpc. + * - return start log id as 1 + * - no heartbeat + * - can open stream + * - no log + */ + class MockRpcInterface : public IFetcherRpcInterface + { + private: + common::ObAddr svr_; + + public: + ~MockRpcInterface() {} + virtual void set_svr(const common::ObAddr& svr) { svr_ = svr; } + virtual const ObAddr& get_svr() const { return svr_; } + virtual void set_timeout(const int64_t timeout) { UNUSED(timeout); } + virtual int req_start_log_id_by_ts( + const obrpc::ObLogReqStartLogIdByTsRequest& req, + obrpc::ObLogReqStartLogIdByTsResponse& res) + { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + virtual int req_start_log_id_by_ts_2(const obrpc::ObLogReqStartLogIdByTsRequestWithBreakpoint &req, + obrpc::ObLogReqStartLogIdByTsResponseWithBreakpoint &res) { + res.reset(); + for (int64_t idx = 0, cnt = req.get_params().count(); idx < cnt; ++idx) { + obrpc::ObLogReqStartLogIdByTsResponseWithBreakpoint::Result result; + result.reset(); + result.err_ = OB_SUCCESS; + result.start_log_id_ = 1; + res.append_result(result); + } + _D_(">>> req start log id", K(req), K(res)); + return OB_SUCCESS; + } + virtual int req_start_pos_by_log_id_2(const obrpc::ObLogReqStartPosByLogIdRequestWithBreakpoint &req, + obrpc::ObLogReqStartPosByLogIdResponseWithBreakpoint &res) { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + virtual int req_start_pos_by_log_id( + const obrpc::ObLogReqStartPosByLogIdRequest& req, + obrpc::ObLogReqStartPosByLogIdResponse& res) + { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + virtual int fetch_log( + const obrpc::ObLogExternalFetchLogRequest& req, + obrpc::ObLogExternalFetchLogResponse& res) + { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + virtual int req_heartbeat_info( + const obrpc::ObLogReqHeartbeatInfoRequest& req, + obrpc::ObLogReqHeartbeatInfoResponse& res) + { + res.reset(); + for (int64_t idx = 0, cnt = req.get_params().count(); idx < cnt; ++idx) { + obrpc::ObLogReqHeartbeatInfoResponse::Result result; + result.reset(); + result.err_ = OB_NEED_RETRY; + result.tstamp_ = OB_INVALID_TIMESTAMP; + res.append_result(result); + } + _D_(">>> heartbeat", K(req), K(res)); + return OB_SUCCESS; + } + + virtual int req_leader_heartbeat( + const obrpc::ObLogLeaderHeartbeatReq &req, + obrpc::ObLogLeaderHeartbeatResp &res) + { + res.reset(); + res.set_err(OB_SUCCESS); + res.set_debug_err(OB_SUCCESS); + for (int64_t idx = 0, cnt = req.get_params().count(); idx < cnt; ++idx) { + obrpc::ObLogLeaderHeartbeatResp::Result result; + const obrpc::ObLogLeaderHeartbeatReq::Param ¶m = req.get_params().at(idx); + + // table_id为偶数时,认为在访问主observer,返回正常的心跳信息 + // table_id为奇数时,认为在访问备observer,返回停滞的心跳 + bool asking_leader = ((param.pkey_.get_table_id() % 2) == 0); + + result.reset(); + result.err_ = asking_leader ? OB_SUCCESS : OB_NOT_MASTER; + result.next_served_log_id_ = param.next_log_id_; + result.next_served_ts_ = asking_leader ? get_timestamp() : 1; + + EXPECT_EQ(OB_SUCCESS, res.append_result(result)); + } + + _D_(">>> heartbeat", K(req), K(res)); + return OB_SUCCESS; + } + + virtual int open_stream(const obrpc::ObLogOpenStreamReq &req, + obrpc::ObLogOpenStreamResp &res) { + int ret = OB_SUCCESS; + UNUSED(req); + obrpc::ObStreamSeq seq; + seq.reset(); + seq.self_.set_ip_addr("127.0.0.1", 8888); + seq.seq_ts_ = get_timestamp(); + res.reset(); + res.set_err(OB_SUCCESS); + res.set_debug_err(OB_SUCCESS); + res.set_stream_seq(seq); + _D_(">>> open stream", K(req), K(res)); + return ret; + } + virtual int fetch_stream_log(const obrpc::ObLogStreamFetchLogReq &req, + obrpc::ObLogStreamFetchLogResp &res) { + UNUSED(req); + res.reset(); + res.set_err(OB_SUCCESS); + res.set_debug_err(OB_SUCCESS); + _D_(">>> fetch log", K(req), K(res)); + return OB_SUCCESS; + } + virtual int req_svr_feedback(const ReqLogSvrFeedback &feedback) + { + UNUSED(feedback); + return OB_SUCCESS; + } + }; + /* + * Factory. + */ + class MockRpcInterfaceFactory : public IFetcherRpcInterfaceFactory + { + public: + virtual int new_fetcher_rpc_interface(IFetcherRpcInterface*& rpc) + { + rpc = new MockRpcInterface(); + return OB_SUCCESS; + } + virtual int delete_fetcher_rpc_interface(IFetcherRpcInterface* rpc) + { + delete rpc; + return OB_SUCCESS; + } + }; + + /* + * Mock parser. + * - swallow everything + */ + class MockParser : public IObLogParser + { + public: + MockParser() : trans_cnt_(0) { } + virtual ~MockParser() { } + virtual int start() { return OB_SUCCESS; } + virtual void stop() { } + virtual void mark_stop_flag() { } + virtual int push(PartTransTask* task, const int64_t timeout) + { + UNUSED(timeout); + if (NULL != task) { + task->revert(); + + if (task->is_normal_trans()) { + trans_cnt_ += 1; + } + } + return OB_SUCCESS; + } + int64_t get_trans_cnt() const { return trans_cnt_; } + private: + int64_t trans_cnt_; + }; + /* + * Err handler. + * - exit on error + */ + class MockFetcherErrHandler : public IErrHandler + { + public: + virtual ~MockFetcherErrHandler() { } + public: + virtual void handle_err(int err_no, const char* fmt, ...) + { + UNUSED(err_no); + va_list ap; + va_start(ap, fmt); + __E__(fmt, ap); + va_end(ap); + exit(1); + } + }; + +public: + void run() + { + int err = OB_SUCCESS; + + // Task Pool. + ObLogTransTaskPool task_pool; + ObConcurrentFIFOAllocator task_pool_alloc; + err = task_pool_alloc.init(128 * _G_, 8 * _M_, OB_MALLOC_NORMAL_BLOCK_SIZE); + EXPECT_EQ(OB_SUCCESS, err); + err = task_pool.init(&task_pool_alloc, 10240, 1024, 4 * 1024 * 1024, true); + EXPECT_EQ(OB_SUCCESS, err); + + // Parser. + MockParser parser; + + // Err Handler. + MockFetcherErrHandler err_handler; + + // Rpc. + MockRpcInterfaceFactory rpc_factory; + + // Worker Pool. + FixedJobPerWorkerPool worker_pool; + err = worker_pool.init(1); + EXPECT_EQ(OB_SUCCESS, err); + + // StartLogIdLocator. + ::oceanbase::liboblog::fetcher::StartLogIdLocator locator; + err = locator.init(&rpc_factory, &err_handler, &worker_pool, 3); + EXPECT_EQ(OB_SUCCESS, err); + + // Heartbeater. + Heartbeater heartbeater; + err = heartbeater.init(&rpc_factory, &err_handler, &worker_pool, 3); + EXPECT_EQ(OB_SUCCESS, err); + + // SvrFinder. + MockSystableHelper systable_helper; + systable_helper.init(server_cnt_); + ::oceanbase::liboblog::fetcher::SvrFinder svrfinder; + err = svrfinder.init(&systable_helper, &err_handler, &worker_pool, 3); + EXPECT_EQ(OB_SUCCESS, err); + + // Fetcher Config. + FetcherConfig cfg; + cfg.reset(); + + // Init. + ::oceanbase::liboblog::fetcher::Fetcher fetcher; + err = fetcher.init(&task_pool, &parser, &err_handler, &rpc_factory, + &worker_pool, &svrfinder, &locator, &heartbeater, &cfg); + EXPECT_EQ(OB_SUCCESS, err); + + // Add partition. + // partition cnt.need rewrite + for (int64_t idx = 0, cnt = partition_cnt_; (idx < cnt); ++idx) { + ObPartitionKey p1(1001 + idx, 0, partition_cnt_); + err = fetcher.fetch_partition(p1, 1, OB_INVALID_ID); + EXPECT_EQ(OB_SUCCESS, err); + } + + // Run. + err = fetcher.start(); + EXPECT_EQ(OB_SUCCESS, err); + + // Runtime. + int64_t start = get_timestamp(); + while ((get_timestamp() - start) < runtime_) { + usec_sleep(500 * _MSEC_); + } + + // Discard partition. + // partition cnt.may need rewrite + for (int64_t idx = 0, cnt = partition_cnt_; (idx < cnt); ++idx) { + ObPartitionKey p1(1001 + idx, 0, partition_cnt_); + err = fetcher.discard_partition(p1); + EXPECT_EQ(OB_SUCCESS, err); + } + + // Stop. + err = fetcher.stop(true); + EXPECT_EQ(OB_SUCCESS, err); + + // Destroy. + err = fetcher.destroy(); + EXPECT_EQ(OB_SUCCESS, err); + err = locator.destroy(); + EXPECT_EQ(OB_SUCCESS, err); + err = svrfinder.destroy(); + EXPECT_EQ(OB_SUCCESS, err); + err = heartbeater.destroy(); + EXPECT_EQ(OB_SUCCESS, err); + worker_pool.destroy(); + EXPECT_EQ(OB_SUCCESS, err); + task_pool.destroy(); + EXPECT_EQ(OB_SUCCESS, err); + } +}; + +} +} +} + +void print_usage(const char *prog_name) +{ + printf("USAGE: %s\n" + " -p, --partition partition count\n" + " -s, --server server count\n" + " -r, --runtime run time in seconds, default -1, means to run forever\n", + prog_name); +} +int main(const int argc, char **argv) +{ + // option variables + int opt = -1; + const char *opt_string = "p:s:r:"; + struct option long_opts[] = + { + {"partition", 1, NULL, 'p'}, + {"server", 1, NULL, 's'}, + {"runtime", 1, NULL, 'r'}, + {0, 0, 0, 0} + }; + + if (argc <= 1) { + print_usage(argv[0]); + return 1; + } + + // Params. + int64_t partition_cnt = 0; + int64_t server_cnt = 0; + int64_t runtime = 1 * ::oceanbase::liboblog::_YEAR_; + + // Parse command line + while ((opt = getopt_long(argc, argv, opt_string, long_opts, NULL)) != -1) { + switch (opt) { + case 'p': { + partition_cnt = strtoll(optarg, NULL, 10); + break; + } + case 's': { + server_cnt = strtoll(optarg, NULL, 10); + break; + } + case 'r': { + runtime = strtoll(optarg, NULL, 10); + break; + } + default: + print_usage(argv[0]); + break; + } // end switch + } // end while + + printf("partition_cnt:%ld server_cnt:%ld runtime:%ld sec\n", partition_cnt, server_cnt, runtime); + + // Logger. + ::oceanbase::liboblog::fetcher::FetcherLogLevelSetter::get_instance().set_mod_log_levels("INFO"); + OB_LOGGER.set_log_level("INFO"); + // Run test. + ::oceanbase::liboblog::integrationtesting::AddDiscardTest test; + test.partition_cnt_ = partition_cnt; + test.server_cnt_ = server_cnt; + test.runtime_ = ::oceanbase::liboblog::_SEC_ * runtime; + test.run(); + return 0; +} diff --git a/src/liboblog/tests/fetcher_integration_test_fetch_log.cpp b/src/liboblog/tests/fetcher_integration_test_fetch_log.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9c53b3b3df7ee49cadef69561409e35e47b38c7d --- /dev/null +++ b/src/liboblog/tests/fetcher_integration_test_fetch_log.cpp @@ -0,0 +1,910 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include // fprintf +#include // getopt_long +#include // strtoull + +#include "share/ob_define.h" +#include "lib/file/file_directory_utils.h" +#include "liboblog/src/ob_log_fetcher_impl.h" +#include "ob_log_trans_log.h" + +using namespace oceanbase; +using namespace common; +using namespace transaction; +using namespace storage; +using namespace liboblog; +using namespace fetcher; +using namespace clog; + +#define OB_LOGGER ::oceanbase::common::ObLogger::get_logger() + +#define EXPECT_EQ(EXP, VAL) \ + do { \ + if ((EXP) != (VAL)) { _E_("assert failed", #EXP, (EXP), #VAL, (VAL)); exit(1); } \ + } while(0) + +namespace oceanbase +{ +namespace liboblog +{ +namespace integrationtesting +{ + +/* + * FetchLogTest: + * - add n partitions, find m servers for each one, let it dispatch and create new workers + * - in time log, no heartbeat + * - parser prints current min and max process + * - discard all, quit + */ +class FetchLogTest +{ +public: + int64_t partition_cnt_; + int64_t server_cnt_; + int64_t runtime_; // usec +public: + /* + * Mock systable helper. + * - provide all m servers for each request, 127.0.0.[1-m] + */ + class MockSystableHelper : public ObILogSysTableHelper + { + public: + int64_t server_cnt_; + int64_t now_; + void init(const int64_t svr_cnt) + { + server_cnt_ = svr_cnt; + now_ = get_timestamp(); + } + public: + virtual int query_all_clog_history_info_by_log_id_1( + const common::ObPartitionKey &pkey, const uint64_t log_id, + AllClogHistoryInfos &records) { + // Generate random results. + int ret = OB_SUCCESS; + records.reset(); + AllClogHistoryInfoRecord rec; + const int64_t cnt = server_cnt_; + for (int64_t idx = 0; idx < cnt; ++idx) { + rec.reset(); + rec.table_id_ = (uint64_t)(pkey.table_id_); + rec.partition_idx_ = (int32_t)(pkey.get_partition_id()); + rec.partition_cnt_ = 0;//partition cnt + rec.start_log_id_ = log_id; + rec.end_log_id_ = log_id + 100000000; + rec.start_log_timestamp_ = now_; + rec.end_log_timestamp_ = now_ + 100 * _HOUR_; + snprintf(rec.svr_ip_, common::MAX_IP_ADDR_LENGTH + 1, "127.0.0.%ld", 1 + idx); + rec.svr_port_ = 8888; + records.push_back(rec); + } + return ret; + } + + virtual int query_all_clog_history_info_by_timestamp_1( + const common::ObPartitionKey &pkey, const int64_t timestamp, + AllClogHistoryInfos &records) { + // Generate random results. + int ret = OB_SUCCESS; + records.reset(); + AllClogHistoryInfoRecord rec; + const int64_t cnt = server_cnt_; + for (int64_t idx = 0; idx < cnt; ++idx) { + rec.reset(); + rec.table_id_ = (uint64_t)(pkey.table_id_); + rec.partition_idx_ = (int32_t)(pkey.get_partition_id()); + rec.partition_cnt_ = 0; + rec.start_log_id_ = 0; + rec.end_log_id_ = 100000000; + rec.start_log_timestamp_ = timestamp; + rec.end_log_timestamp_ = timestamp + (100 * _HOUR_); + snprintf(rec.svr_ip_, common::MAX_IP_ADDR_LENGTH + 1, "127.0.0.%ld", 1 + idx); + rec.svr_port_ = 8888; + records.push_back(rec); + } + return ret; + } + virtual int query_all_meta_table_1( + const common::ObPartitionKey &pkey, AllMetaTableRecords &records) { + // Generate random results. + int ret = OB_SUCCESS; + UNUSED(pkey); + records.reset(); + AllMetaTableRecord rec; + const int64_t cnt = server_cnt_; + for (int64_t idx = 0; idx < cnt; ++idx) { + rec.reset(); + snprintf(rec.svr_ip_, common::MAX_IP_ADDR_LENGTH + 1, "127.0.0.%ld", 1 + idx); + rec.svr_port_ = 8888; + rec.role_ = (0 == idx) ? LEADER : FOLLOWER; + records.push_back(rec); + } + return ret; + } + virtual int query_all_meta_table_for_leader( + const common::ObPartitionKey &pkey, + bool &has_leader, + common::ObAddr &leader) + { + UNUSED(pkey); + has_leader = true; + leader.set_ip_addr("127.0.0.1", 8888); + return OB_SUCCESS; + } + virtual int query_all_server_table_1( + AllServerTableRecords &records) + { + UNUSED(records); + // Generate random results. + int ret = OB_SUCCESS; + records.reset(); + AllServerTableRecord rec; + const int64_t cnt = server_cnt_; + for (int64_t idx = 0; idx < cnt; ++idx) { + rec.reset(); + snprintf(rec.svr_ip_, common::MAX_IP_ADDR_LENGTH + 1, "127.0.0.%ld", 1 + idx); + rec.svr_port_ = 8888; + records.push_back(rec); + } + return ret; + } + }; + /* + * Rpc. + * - return start log id as 1 + * - in time log + * - can open stream + * - no heartbeat + * + * ********** + * Update: 1. to all partition, redo logs id 10-17, etc, prepare log id 18, etc, commit log id 19, etc transactions are all consecutive; + * 2. to all partition, start log id is always 7, so 7 redo, 8 prepare, then fetch 0-6 as missing redo logs. + * Result: check process, if process keeps going, it works. + * Impl: call gen_log(id), it returns log entry. It judges the log type by id, generates the log data, serialize it into buffer, + * and you get it. + * Update: 1. set feedback randomly: 1/100 rpc. + * Todo... all logs use the same trans id. + * ********** + */ + class MockRpcInterface : public IFetcherRpcInterface + { + struct PInfo + { + ObPartitionKey pkey_; + uint64_t next_log_id_; + ObTransID current_trans_id_; + TO_STRING_KV(K(pkey_), K(next_log_id_), K(current_trans_id_)); + }; + typedef ObArray PInfoArray; + struct StreamInfo + { + PInfoArray pinfo_array_; + int64_t last_partition_; + }; + typedef ObLinearHashMap StreamMap; + StreamMap stream_map_; + + int64_t log_pos_; + static const int64_t LOG_BUFFER_SIZE_ = 10240; + char log_buffer_[LOG_BUFFER_SIZE_]; + + static const int64_t MOCK_LOAD_SIZE_ = 8; + char mock_load_[MOCK_LOAD_SIZE_]; + + public: + MockRpcInterface() { EXPECT_EQ(OB_SUCCESS, stream_map_.init()); log_pos_ = 0;} + ~MockRpcInterface() { stream_map_.reset(); stream_map_.destroy();} + virtual void set_svr(const common::ObAddr& svr) { UNUSED(svr); } + virtual const ObAddr& get_svr() const { static ObAddr svr; return svr; } + virtual void set_timeout(const int64_t timeout) { UNUSED(timeout); } + virtual int req_start_log_id_by_ts( + const obrpc::ObLogReqStartLogIdByTsRequest& req, + obrpc::ObLogReqStartLogIdByTsResponse& res) + { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + virtual int req_start_log_id_by_ts_2(const obrpc::ObLogReqStartLogIdByTsRequestWithBreakpoint &req, + obrpc::ObLogReqStartLogIdByTsResponseWithBreakpoint &res) { + res.reset(); + for (int64_t idx = 0, cnt = req.get_params().count(); idx < cnt; ++idx) { + obrpc::ObLogReqStartLogIdByTsResponseWithBreakpoint::Result result; + result.reset(); + result.err_ = OB_SUCCESS; + result.start_log_id_ = 16; + res.append_result(result); + } + _D_(">>> req start log id", K(req), K(res)); + return OB_SUCCESS; + } + virtual int req_start_pos_by_log_id_2(const obrpc::ObLogReqStartPosByLogIdRequestWithBreakpoint &req, + obrpc::ObLogReqStartPosByLogIdResponseWithBreakpoint &res) { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + virtual int req_start_pos_by_log_id( + const obrpc::ObLogReqStartPosByLogIdRequest& req, + obrpc::ObLogReqStartPosByLogIdResponse& res) + { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + virtual int fetch_log( + const obrpc::ObLogExternalFetchLogRequest& req, + obrpc::ObLogExternalFetchLogResponse& res) + { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + virtual int req_heartbeat_info( + const obrpc::ObLogReqHeartbeatInfoRequest& req, + obrpc::ObLogReqHeartbeatInfoResponse& res) + { + res.reset(); + for (int64_t idx = 0, cnt = req.get_params().count(); idx < cnt; ++idx) { + obrpc::ObLogReqHeartbeatInfoResponse::Result result; + result.reset(); + result.err_ = OB_NEED_RETRY; + result.tstamp_ = OB_INVALID_TIMESTAMP; + res.append_result(result); + } + _D_(">>> heartbeat", K(req), K(res)); + return OB_SUCCESS; + } + virtual int req_leader_heartbeat( + const obrpc::ObLogLeaderHeartbeatReq &req, + obrpc::ObLogLeaderHeartbeatResp &res) + { + res.reset(); + res.set_err(OB_SUCCESS); + res.set_debug_err(OB_SUCCESS); + for (int64_t idx = 0, cnt = req.get_params().count(); idx < cnt; ++idx) { + obrpc::ObLogLeaderHeartbeatResp::Result result; + const obrpc::ObLogLeaderHeartbeatReq::Param ¶m = req.get_params().at(idx); + + result.reset(); + result.err_ = OB_NOT_MASTER; + result.next_served_log_id_ = param.next_log_id_; + result.next_served_ts_ = 1; + + EXPECT_EQ(OB_SUCCESS, res.append_result(result)); + } + + _D_(">>> heartbeat", K(req), K(res)); + return OB_SUCCESS; + } + virtual int open_stream(const obrpc::ObLogOpenStreamReq &req, + obrpc::ObLogOpenStreamResp &res) { + int ret = OB_SUCCESS; + UNUSED(req); + // Build stream. + obrpc::ObStreamSeq seq; + seq.reset(); + seq.self_.set_ip_addr("127.0.0.1", 8888); + seq.seq_ts_ = get_timestamp(); + PInfo pinfo; + StreamInfo *stream_info = new StreamInfo(); + for (int64_t idx = 0, cnt = req.get_params().count(); (idx < cnt); ++idx) { + pinfo.pkey_ = req.get_params().at(idx).pkey_; + pinfo.next_log_id_ = req.get_params().at(idx).start_log_id_; + EXPECT_EQ(OB_SUCCESS, stream_info->pinfo_array_.push_back(pinfo)); + } + stream_info->last_partition_ = 0; + EXPECT_EQ(OB_SUCCESS, stream_map_.insert(seq, stream_info)); + // Response. + res.reset(); + res.set_err(OB_SUCCESS); + res.set_debug_err(OB_SUCCESS); + res.set_stream_seq(seq); + _D_(">>> open stream", K(req), K(res)); + return ret; + } + virtual int fetch_stream_log(const obrpc::ObLogStreamFetchLogReq &req, + obrpc::ObLogStreamFetchLogResp &res) { + // Get stream info. + StreamInfo *stream_info = NULL; + EXPECT_EQ(OB_SUCCESS, stream_map_.get(req.get_stream_seq(), stream_info)); + res.reset(); + res.set_err(OB_SUCCESS); + res.set_debug_err(OB_SUCCESS); + // Build logs. + int ret = OB_SUCCESS; + int64_t reach_upper_limit_cnt = 0; + while (_SUCC_(ret) && (reach_upper_limit_cnt < stream_info->pinfo_array_.count())) { + for (int64_t idx = 0, cnt = req.get_log_cnt_per_part_per_round(); + (idx < cnt) && _SUCC_(ret); ++idx) { + int64_t &pidx = stream_info->last_partition_; + pidx += 1; + if (stream_info->pinfo_array_.count() <= pidx) { + pidx = 0; + } + const ObPartitionKey &pkey = stream_info->pinfo_array_.at(pidx).pkey_; + uint64_t &log_id = stream_info->pinfo_array_.at(pidx).next_log_id_; + ObTransID &trans_id = stream_info->pinfo_array_.at(pidx).current_trans_id_; + int64_t ts = get_timestamp(); + if (ts < req.get_upper_limit_ts()) { + // Gen log. + gen_log(pkey, log_id, trans_id); + ObProposalID proposal_id; + proposal_id.addr_ = ObAddr(ObAddr::IPV4, "127.0.0.1", 8888); + proposal_id.ts_ = ts; + ObLogEntryHeader header; + header.generate_header(OB_LOG_SUBMIT, pkey, + log_id, log_buffer_, log_pos_, + ts, ts, proposal_id, ts, ObVersion(1)); + ObLogEntry log_entry; + log_entry.generate_entry(header, log_buffer_); + ret = res.append_clog_entry(log_entry); // May buf not enough. + if (_SUCC_(ret)) { + log_id += 1; + } + } + else { + reach_upper_limit_cnt += 1; + obrpc::ObLogStreamFetchLogResp::FetchLogHeartbeatItem hb; + hb.reset(); + hb.pkey_ = pkey; + hb.next_log_id_ = log_id; + hb.heartbeat_ts_ = get_timestamp() - 1; + ret = res.append_hb(hb); + break; + } + } + } + if (OB_BUF_NOT_ENOUGH == ret) { + ret = OB_SUCCESS; + } + // Do some feedback randomly. + if ((get_timestamp() % 1000000) < 10000) { + // 1 / 100. + const ObPartitionKey &pkey = stream_info->pinfo_array_.at(stream_info->last_partition_).pkey_; + obrpc::ObLogStreamFetchLogResp::FeedbackPartition feedback; + feedback.pkey_ = pkey; + feedback.feedback_type_ = obrpc::ObLogStreamFetchLogResp::LAGGED_FOLLOWER; + EXPECT_EQ(OB_SUCCESS, res.append_feedback(feedback)); + } + _D_(">>> fetch log", K(req), K(res)); + return OB_SUCCESS; + } + virtual int req_svr_feedback(const ReqLogSvrFeedback &feedback) + { + UNUSED(feedback); + return OB_SUCCESS; + } + + void gen_header(const int64_t log_type, char *buf, const int64_t size, int64_t &pos) + { + EXPECT_EQ(OB_SUCCESS, serialization::encode_i64(buf, size, pos, log_type)); + EXPECT_EQ(OB_SUCCESS, serialization::encode_i64(buf, size, pos, 0)); + } + + void init_redo(ObTransRedoLog &redo_log, const ObPartitionKey &pkey, + const uint64_t log_id, const ObTransID &trans_id) + { + int64_t log_type = OB_LOG_TRANS_REDO; + ObPartitionKey partition_key(pkey); + ObAddr observer; + observer.set_ip_addr("127.0.0.1", 8888); + const int64_t log_no = (int64_t)(log_id % 10); + const uint64_t tenant_id = 100; + ObAddr scheduler; + ObPartitionKey coordinator; + ObPartitionArray participants; + ObStartTransParam parms; + parms.set_access_mode(ObTransAccessMode::READ_ONLY); + parms.set_type(ObTransType::TRANS_USER); + parms.set_isolation(ObTransIsolation::READ_COMMITED); + coordinator = partition_key; + participants.push_back(partition_key); + scheduler = observer; + const uint64_t cluster_id = 1000; + + ObVersion active_memstore_version(1); + EXPECT_EQ(OB_SUCCESS, redo_log.init(log_type, partition_key, trans_id, tenant_id, log_no, + scheduler, coordinator, participants, parms, cluster_id, + active_memstore_version)); + redo_log.get_mutator().set_data(mock_load_, MOCK_LOAD_SIZE_); + redo_log.get_mutator().get_position() += MOCK_LOAD_SIZE_; + EXPECT_EQ(true, redo_log.is_valid()); + } + + void init_prepare(ObTransPrepareLog &prepare_log, + const ObPartitionKey &pkey, const uint64_t log_id, + const ObRedoLogIdArray &redo_log_ids, + const ObTransID &trans_id) + { + UNUSED(log_id); + int64_t log_type = OB_LOG_TRANS_PREPARE; + const uint64_t cluster_id = 1000; + ObPartitionKey partition_key(pkey); + ObAddr observer; + observer.set_ip_addr("127.0.0.1", 8888); + ObAddr &scheduler = observer; + ObPartitionKey &coordinator = partition_key; + ObPartitionArray participants; + participants.push_back(partition_key); + + ObStartTransParam trans_param; + trans_param.set_access_mode(ObTransAccessMode::READ_WRITE); + trans_param.set_type(ObTransType::TRANS_USER); + trans_param.set_isolation(ObTransIsolation::READ_COMMITED); + + const int prepare_status = true; + const int64_t local_trans_version = 1000; + const uint64_t tenant_id = 100; + + ObVersion active_memstore_version(1); + EXPECT_EQ(OB_SUCCESS, prepare_log.init(log_type, partition_key, trans_id, tenant_id, + scheduler, coordinator, participants, trans_param, prepare_status, + redo_log_ids, local_trans_version, cluster_id, active_memstore_version)); + EXPECT_EQ(true, prepare_log.is_valid()); + } + + void init_commit(ObTransCommitLog &commit_log, + const ObPartitionKey &pkey, const uint64_t log_id, const uint64_t prepare_id, + const int64_t prepare_tstamp, + const ObTransID &trans_id) + { + UNUSED(log_id); + int64_t log_type = OB_LOG_TRANS_COMMIT; + const uint64_t cluster_id = 1000; + const int64_t global_trans_version = 1000; + ObPartitionKey partition_key(pkey); + ObAddr observer; + observer.set_ip_addr("127.0.0.1", 8888); + PartitionLogInfoArray array; + ObPartitionLogInfo pidinfo(pkey, prepare_id, prepare_tstamp); + EXPECT_EQ(OB_SUCCESS, array.push_back(pidinfo)); + EXPECT_EQ(OB_SUCCESS, commit_log.init(log_type, partition_key, trans_id, array, + global_trans_version, 0, cluster_id)); + EXPECT_EQ(true, commit_log.is_valid()); + } + + // Log generators. + void gen_redo(const ObPartitionKey &pkey, const uint64_t log_id, + ObTransID &trans_id, char *buf, const int64_t size, int64_t &pos) + { + ObTransRedoLog redo_log; + init_redo(redo_log, pkey, log_id, trans_id); + + gen_header(OB_LOG_TRANS_REDO, buf, size, pos); + EXPECT_EQ(OB_SUCCESS, redo_log.serialize(buf, size, pos)); + } + + void gen_prepare(const ObPartitionKey &pkey, const uint64_t log_id, + const ObRedoLogIdArray &redo_log_ids, + const ObTransID &trans_id, + char *buf, const int64_t size, int64_t &pos) + { + ObTransPrepareLog prepare_log; + init_prepare(prepare_log, pkey, log_id, redo_log_ids, trans_id); + + gen_header(OB_LOG_TRANS_PREPARE, buf, size, pos); + EXPECT_EQ(OB_SUCCESS, prepare_log.serialize(buf, size, pos)); + } + + void gen_commit(const ObPartitionKey &pkey, const uint64_t log_id, const uint64_t prepare_id, + const int64_t prepare_tstamp, + const ObTransID &trans_id, + char *buf, const int64_t size, int64_t &pos) + { + ObTransCommitLog commit_log; + init_commit(commit_log, pkey, log_id, prepare_id, prepare_tstamp, trans_id); + + gen_header(OB_LOG_TRANS_COMMIT, buf, size, pos); + EXPECT_EQ(OB_SUCCESS, commit_log.serialize(buf, size, pos)); + } + + void gen_redo_with_prepare(const ObPartitionKey &pkey, const uint64_t log_id, + const ObRedoLogIdArray &redo_log_ids, + const ObTransID &trans_id, + char *buf, const int64_t size, int64_t &pos) + { + ObTransRedoLog redo_log; + ObTransPrepareLog prepare_log; + + init_redo(redo_log, pkey, log_id, trans_id); + init_prepare(prepare_log, pkey, log_id, redo_log_ids, trans_id); + + gen_header(OB_LOG_TRANS_REDO | OB_LOG_TRANS_PREPARE, buf, size, pos); + EXPECT_EQ(OB_SUCCESS, redo_log.serialize(buf, size, pos)); + EXPECT_EQ(OB_SUCCESS, prepare_log.serialize(buf, size, pos)); + } + + void gen_log(const ObPartitionKey &pkey, const uint64_t id, ObTransID &trans_id) + { + // redo: id % 10 range [0, 7] + // prepare: id % 10 == 8 + // commit: id % 10 == 9 + log_pos_ = 0; + uint64_t mod = (id % 10); + // All logs uses a same trans id. TODO... + static const ObTransID trans_id_2(ObTransID(ObAddr(ObAddr::IPV4, "127.0.0.1", 8888))); + trans_id = trans_id_2; + if (mod <= 7) { + gen_redo(pkey, id, trans_id, log_buffer_, LOG_BUFFER_SIZE_, log_pos_); + } + + if (8 == mod) { + ObRedoLogIdArray redo_log_id_array; + for (int64_t idx = 0, cnt = 8; (idx < cnt); ++idx) { + EXPECT_EQ(OB_SUCCESS, redo_log_id_array.push_back((int64_t)(id - 8 + idx))); + } + gen_redo_with_prepare(pkey, id, redo_log_id_array, trans_id, log_buffer_, LOG_BUFFER_SIZE_, log_pos_); + } + + if (9 == mod) { + // Prepare tstamp is incorrect. + gen_commit(pkey, id, (id - 1), get_timestamp(),trans_id, log_buffer_, LOG_BUFFER_SIZE_, log_pos_); + } + } + }; + /* + * Factory. + */ + class MockRpcInterfaceFactory : public IFetcherRpcInterfaceFactory + { + public: + virtual int new_fetcher_rpc_interface(IFetcherRpcInterface*& rpc) + { + rpc = new MockRpcInterface(); + return OB_SUCCESS; + } + virtual int delete_fetcher_rpc_interface(IFetcherRpcInterface* rpc) + { + delete rpc; + return OB_SUCCESS; + } + }; + + /* + * Mock parser. + * - track process + * - print min & max process + */ + class MockParser : public IObLogParser + { + typedef common::ObLinearHashMap ProcessMap; + struct Updater + { + int64_t tstamp_; + bool operator()(const common::ObPartitionKey &pkey, int64_t &val) + { + UNUSED(pkey); + if (val < tstamp_) { val = tstamp_; } + return true; + } + }; + struct ProcessGetter + { + int64_t min_process_; + int64_t max_process_; + int64_t partition_cnt_; + bool operator()(const common::ObPartitionKey &pkey, const int64_t &val) + { + UNUSED(pkey); + if (OB_INVALID_TIMESTAMP == min_process_ || val < min_process_) { + min_process_ = val; + } + if (OB_INVALID_TIMESTAMP == max_process_ || max_process_ < val) { + max_process_ = val; + } + partition_cnt_ += 1; + return true; + } + void reset() { min_process_ = OB_INVALID_TIMESTAMP; max_process_ = OB_INVALID_TIMESTAMP; partition_cnt_ = 0;} + }; + public: + MockParser() : trans_cnt_(0) { EXPECT_EQ(OB_SUCCESS, process_map_.init()); } + virtual ~MockParser() { process_map_.reset(); process_map_.destroy(); } + virtual int start() { return OB_SUCCESS; } + virtual void stop() { } + virtual void mark_stop_flag() { } + virtual int push(PartTransTask* task, const int64_t timeout) + { + UNUSED(timeout); + if (NULL != task) { + if (task->is_heartbeat() || task->is_normal_trans()) { + const common::ObPartitionKey &pkey = task->get_partition(); + const int64_t tstamp = task->get_timestamp(); + Updater updater; + updater.tstamp_ = tstamp; + EXPECT_EQ(OB_SUCCESS, process_map_.operate(pkey, updater)); + + if (task->is_normal_trans()) { + EXPECT_EQ(OB_SUCCESS, handle_normal_trans_(task)); + } + } + task->revert(); + ATOMIC_INC(&trans_cnt_); + // Debug. + // _I_(">>> push parser", "req", task->get_seq()); + } + return OB_SUCCESS; + } + void add_partition(const common::ObPartitionKey &pkey) + { + EXPECT_EQ(OB_SUCCESS, process_map_.insert(pkey, 0)); + } + void print_process() + { + ProcessGetter process_getter; + process_getter.reset(); + EXPECT_EQ(OB_SUCCESS, process_map_.for_each(process_getter)); + fprintf(stderr, ">>> parser process: %s-%s partition count: %ld trans count: %ld\n", + TS_TO_STR(process_getter.min_process_), + TS_TO_STR(process_getter.max_process_), + process_getter.partition_cnt_, + ATOMIC_LOAD(&trans_cnt_)); + } + private: + int handle_normal_trans_(PartTransTask *task) + { + int ret = OB_SUCCESS; + if (NULL == task) { + ret = OB_INVALID_ARGUMENT; + } else { + RedoLogList &redo_list = task->get_redo_list(); + + // 如果存在Redo日志,则解析Redo日志 + if (redo_list.num_ > 0 && OB_FAIL(parse_redo_log_(task))) { + _E_("parse_redo_log_ fail", K(ret), "task", *task); + } + + } + return ret; + } + int parse_redo_log_(PartTransTask *task) + { + int ret = OB_SUCCESS; + + if (OB_ISNULL(task)) { + ret = OB_INVALID_ARGUMENT; + } else { + int64_t redo_num = 0; + RedoLogList &redo_list = task->get_redo_list(); + RedoLogNode *redo_node = redo_list.head_; + + if (OB_UNLIKELY(! redo_list.is_valid())) { + _E_("redo log list is invalid", K(redo_list), K(*task)); + ret = OB_ERR_UNEXPECTED; + } else { + while (OB_SUCCESS == ret && NULL != redo_node) { + _D_("parse redo log", "redo_node", *redo_node); + + if (OB_UNLIKELY(! redo_node->is_valid())) { + _E_("redo_node is invalid", "redo_node", *redo_node, "redo_index", redo_num); + ret = OB_INVALID_DATA; + // 校验Redo日志序号是否准确 + } else if (OB_UNLIKELY(redo_node->log_no_ != redo_num)) { + _E_("redo log_no is incorrect", "redo_no", redo_node->log_no_, + "expected_redo_no", redo_num, "redo_node", *redo_node); + ret = OB_INVALID_DATA; + } else { + redo_num++; + redo_node = redo_node->next_; + } + } + } + } + + return ret; + } + private: + int64_t trans_cnt_; + ProcessMap process_map_; + }; + /* + * Err handler. + * - exit on error + */ + class MockFetcherErrHandler : public IErrHandler + { + public: + virtual ~MockFetcherErrHandler() { } + public: + virtual void handle_err(int err_no, const char* fmt, ...) + { + UNUSED(err_no); + va_list ap; + va_start(ap, fmt); + __E__(fmt, ap); + va_end(ap); + exit(1); + } + }; + +public: + void run() + { + int err = OB_SUCCESS; + + // Init clock generator. + ObClockGenerator::init(); + + // Task Pool. + ObLogTransTaskPool task_pool; + ObConcurrentFIFOAllocator task_pool_alloc; + err = task_pool_alloc.init(128 * _G_, 8 * _M_, OB_MALLOC_NORMAL_BLOCK_SIZE); + EXPECT_EQ(OB_SUCCESS, err); + err = task_pool.init(&task_pool_alloc, 10240, 1024, 4 * 1024 * 1024, true); + EXPECT_EQ(OB_SUCCESS, err); + + // Parser. + MockParser parser; + + // Err Handler. + MockFetcherErrHandler err_handler; + + // Rpc. + MockRpcInterfaceFactory rpc_factory; + + // Worker Pool. + FixedJobPerWorkerPool worker_pool; + err = worker_pool.init(1); + EXPECT_EQ(OB_SUCCESS, err); + + // StartLogIdLocator. + ::oceanbase::liboblog::fetcher::StartLogIdLocator locator; + err = locator.init(&rpc_factory, &err_handler, &worker_pool, 3); + EXPECT_EQ(OB_SUCCESS, err); + + // Heartbeater. + Heartbeater heartbeater; + err = heartbeater.init(&rpc_factory, &err_handler, &worker_pool, 3); + EXPECT_EQ(OB_SUCCESS, err); + + // SvrFinder. + MockSystableHelper systable_helper; + systable_helper.init(server_cnt_); + ::oceanbase::liboblog::fetcher::SvrFinder svrfinder; + err = svrfinder.init(&systable_helper, &err_handler, &worker_pool, 3); + EXPECT_EQ(OB_SUCCESS, err); + + // Fetcher Config. + FetcherConfig cfg; + cfg.reset(); + + // Init. + ::oceanbase::liboblog::fetcher::Fetcher fetcher; + err = fetcher.init(&task_pool, &parser, &err_handler, &rpc_factory, + &worker_pool, &svrfinder, &locator, &heartbeater, &cfg); + EXPECT_EQ(OB_SUCCESS, err); + + // Add partition. + for (int64_t idx = 0, cnt = partition_cnt_; (idx < cnt); ++idx) { + ObPartitionKey p1(1001 + idx, 1, partition_cnt_); + err = fetcher.fetch_partition(p1, 1, OB_INVALID_ID); + EXPECT_EQ(OB_SUCCESS, err); + parser.add_partition(p1); + } + + // Run. + err = fetcher.start(); + EXPECT_EQ(OB_SUCCESS, err); + + // Runtime. + int64_t start = get_timestamp(); + int64_t last_print_process = start; + while ((get_timestamp() - start) < runtime_) { + usec_sleep(500 * _MSEC_); + if (1 * _SEC_ < get_timestamp() - last_print_process) { + last_print_process = get_timestamp(); + parser.print_process(); + } + } + + // Discard partition. + for (int64_t idx = 0, cnt = partition_cnt_; (idx < cnt); ++idx) { + ObPartitionKey p1(1001 + idx, 1, partition_cnt_); + err = fetcher.discard_partition(p1); + EXPECT_EQ(OB_SUCCESS, err); + } + + // Stop. + err = fetcher.stop(true); + EXPECT_EQ(OB_SUCCESS, err); + + // Destroy. + err = fetcher.destroy(); + EXPECT_EQ(OB_SUCCESS, err); + err = locator.destroy(); + EXPECT_EQ(OB_SUCCESS, err); + err = svrfinder.destroy(); + EXPECT_EQ(OB_SUCCESS, err); + err = heartbeater.destroy(); + EXPECT_EQ(OB_SUCCESS, err); + worker_pool.destroy(); + EXPECT_EQ(OB_SUCCESS, err); + task_pool.destroy(); + EXPECT_EQ(OB_SUCCESS, err); + } +}; + +} +} +} + +void print_usage(const char *prog_name) +{ + printf("USAGE: %s\n" + " -p, --partition partition count\n" + " -s, --server server count\n" + " -r, --runtime run time in seconds, default -1, means to run forever\n", + prog_name); +} +int main(const int argc, char **argv) +{ + // option variables + int opt = -1; + const char *opt_string = "p:s:r:"; + struct option long_opts[] = + { + {"partition", 1, NULL, 'p'}, + {"server", 1, NULL, 's'}, + {"runtime", 1, NULL, 'r'}, + {0, 0, 0, 0} + }; + + if (argc <= 1) { + print_usage(argv[0]); + return 1; + } + + // Params. + int64_t partition_cnt = 0; + int64_t server_cnt = 0; + int64_t runtime = 1 * ::oceanbase::liboblog::_YEAR_; + + // Parse command line + while ((opt = getopt_long(argc, argv, opt_string, long_opts, NULL)) != -1) { + switch (opt) { + case 'p': { + partition_cnt = strtoll(optarg, NULL, 10); + break; + } + case 's': { + server_cnt = strtoll(optarg, NULL, 10); + break; + } + case 'r': { + runtime = strtoll(optarg, NULL, 10); + break; + } + default: + print_usage(argv[0]); + break; + } // end switch + } // end while + + printf("partition_cnt:%ld server_cnt:%ld runtime:%ld sec\n", partition_cnt, server_cnt, runtime); + + // Logger. + ::oceanbase::liboblog::fetcher::FetcherLogLevelSetter::get_instance().set_mod_log_levels("TLOG.*:INFO"); + OB_LOGGER.set_log_level("INFO"); + + // Run test. + ::oceanbase::liboblog::integrationtesting::FetchLogTest test; + test.partition_cnt_ = partition_cnt; + test.server_cnt_ = server_cnt; + test.runtime_ = ::oceanbase::liboblog::_SEC_ * runtime; + test.run(); + return 0; +} diff --git a/src/liboblog/tests/fetcher_integration_test_heartbeater.cpp b/src/liboblog/tests/fetcher_integration_test_heartbeater.cpp new file mode 100644 index 0000000000000000000000000000000000000000..bd7950f3a2837d899c4dce91997003c8eedf8f6c --- /dev/null +++ b/src/liboblog/tests/fetcher_integration_test_heartbeater.cpp @@ -0,0 +1,580 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include // fprintf +#include // getopt_long +#include // strtoull + +#include "share/ob_define.h" +#include "lib/file/file_directory_utils.h" +#include "liboblog/src/ob_log_fetcher_impl.h" + +using namespace oceanbase; +using namespace common; +using namespace liboblog; +using namespace fetcher; + +#define OB_LOGGER ::oceanbase::common::ObLogger::get_logger() + +#define EXPECT_EQ(EXP, VAL) \ + do { \ + if ((EXP) != (VAL)) { _E_("assert failed", #EXP, (EXP), #VAL, (VAL)); exit(1); } \ + } while(0) + +namespace oceanbase +{ +namespace liboblog +{ +namespace integrationtesting +{ + +/* + * HeartbeatTest: + * - add n partitions, find m servers for each one, let it dispatch and create new workers + * - no log, in time heartbeat + * - parser prints current min and max process + * - discard all, quit + */ +class HeartbeatTest +{ +public: + int64_t partition_cnt_; + int64_t server_cnt_; + int64_t runtime_; // usec +public: + /* + * Mock systable helper. + * - provide all m servers for each request, 127.0.0.[1-m] + */ + class MockSystableHelper : public ObILogSysTableHelper + { + public: + int64_t server_cnt_; + int64_t now_; + void init(const int64_t svr_cnt) + { + server_cnt_ = svr_cnt; + now_ = get_timestamp(); + } + public: + virtual int query_all_clog_history_info_by_log_id_1( + const common::ObPartitionKey &pkey, const uint64_t log_id, + AllClogHistoryInfos &records) { + // Generate random results. + int ret = OB_SUCCESS; + records.reset(); + AllClogHistoryInfoRecord rec; + const int64_t cnt = server_cnt_; + for (int64_t idx = 0; idx < cnt; ++idx) { + rec.reset(); + rec.table_id_ = (uint64_t)(pkey.table_id_); + rec.partition_idx_ = static_cast(pkey.get_partition_id());//int64_t + rec.partition_cnt_ = pkey.get_partition_cnt();//partition cnt + rec.start_log_id_ = log_id; + rec.end_log_id_ = log_id + 10000; + rec.start_log_timestamp_ = now_; + rec.end_log_timestamp_ = now_ + 1 * _HOUR_; + snprintf(rec.svr_ip_, common::MAX_IP_ADDR_LENGTH + 1, "127.0.0.%ld", 1 + idx); + rec.svr_port_ = 8888; + records.push_back(rec); + } + return ret; + } + + virtual int query_all_clog_history_info_by_timestamp_1( + const common::ObPartitionKey &pkey, const int64_t timestamp, + AllClogHistoryInfos &records) { + // Generate random results. + int ret = OB_SUCCESS; + records.reset(); + AllClogHistoryInfoRecord rec; + const int64_t cnt = server_cnt_; + for (int64_t idx = 0; idx < cnt; ++idx) { + rec.reset(); + rec.table_id_ = (uint64_t)(pkey.table_id_); + rec.partition_idx_ = (int32_t)(pkey.get_partition_id()); + rec.partition_cnt_ = pkey.get_partition_cnt(); + rec.start_log_id_ = 0; + rec.end_log_id_ = 65536; + rec.start_log_timestamp_ = timestamp; + rec.end_log_timestamp_ = timestamp + (1 * _HOUR_); + snprintf(rec.svr_ip_, common::MAX_IP_ADDR_LENGTH + 1, "127.0.0.%ld", 1 + idx); + rec.svr_port_ = 8888; + records.push_back(rec); + } + return ret; + } + virtual int query_all_meta_table_1( + const common::ObPartitionKey &pkey, AllMetaTableRecords &records) { + // Generate random results. + int ret = OB_SUCCESS; + UNUSED(pkey); + records.reset(); + AllMetaTableRecord rec; + const int64_t cnt = server_cnt_; + for (int64_t idx = 0; idx < cnt; ++idx) { + rec.reset(); + snprintf(rec.svr_ip_, common::MAX_IP_ADDR_LENGTH + 1, "127.0.0.%ld", 1 + idx); + rec.svr_port_ = 8888; + rec.role_ = (0 == idx) ? LEADER : FOLLOWER; + records.push_back(rec); + } + return ret; + } + + virtual int query_all_meta_table_for_leader( + const common::ObPartitionKey &pkey, + bool &has_leader, + common::ObAddr &leader) + { + UNUSED(pkey); + has_leader = true; + leader.set_ip_addr("127.0.0.1", 8888); + return OB_SUCCESS; + } + + virtual int query_all_server_table_1( + AllServerTableRecords &records) + { + int ret = OB_SUCCESS; + records.reset(); + AllServerTableRecord rec; + const int64_t cnt = server_cnt_; + for (int64_t idx = 0; idx < cnt; ++idx) { + rec.reset(); + snprintf(rec.svr_ip_, common::MAX_IP_ADDR_LENGTH + 1, "127.0.0.%ld", 1 + idx); + rec.svr_port_ = 8888; + records.push_back(rec); + } + return ret; + } + }; + /* + * Rpc. + * - return start log id as 1 + * - in time heartbeat + * - can open stream + * - no log + */ + class MockRpcInterface : public IFetcherRpcInterface + { + public: + ~MockRpcInterface() {} + virtual void set_svr(const common::ObAddr& svr) { UNUSED(svr); } + virtual const ObAddr& get_svr() const { static ObAddr svr; return svr; } + virtual void set_timeout(const int64_t timeout) { UNUSED(timeout); } + virtual int req_start_log_id_by_ts( + const obrpc::ObLogReqStartLogIdByTsRequest& req, + obrpc::ObLogReqStartLogIdByTsResponse& res) + { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + virtual int req_start_log_id_by_ts_2(const obrpc::ObLogReqStartLogIdByTsRequestWithBreakpoint &req, + obrpc::ObLogReqStartLogIdByTsResponseWithBreakpoint &res) { + res.reset(); + for (int64_t idx = 0, cnt = req.get_params().count(); idx < cnt; ++idx) { + obrpc::ObLogReqStartLogIdByTsResponseWithBreakpoint::Result result; + result.reset(); + result.err_ = OB_SUCCESS; + result.start_log_id_ = 1; + res.append_result(result); + } + _D_(">>> req start log id", K(req), K(res)); + return OB_SUCCESS; + } + virtual int req_start_pos_by_log_id_2(const obrpc::ObLogReqStartPosByLogIdRequestWithBreakpoint &req, + obrpc::ObLogReqStartPosByLogIdResponseWithBreakpoint &res) { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + virtual int req_start_pos_by_log_id( + const obrpc::ObLogReqStartPosByLogIdRequest& req, + obrpc::ObLogReqStartPosByLogIdResponse& res) + { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + virtual int fetch_log( + const obrpc::ObLogExternalFetchLogRequest& req, + obrpc::ObLogExternalFetchLogResponse& res) + { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + virtual int req_heartbeat_info( + const obrpc::ObLogReqHeartbeatInfoRequest& req, + obrpc::ObLogReqHeartbeatInfoResponse& res) + { + res.reset(); + for (int64_t idx = 0, cnt = req.get_params().count(); idx < cnt; ++idx) { + obrpc::ObLogReqHeartbeatInfoResponse::Result result; + result.reset(); + result.err_ = OB_SUCCESS; + result.tstamp_ = get_timestamp(); + res.append_result(result); + } + _D_(">>> heartbeat", K(req), K(res)); + return OB_SUCCESS; + } + + virtual int req_leader_heartbeat( + const obrpc::ObLogLeaderHeartbeatReq &req, + obrpc::ObLogLeaderHeartbeatResp &res) + { + res.reset(); + res.set_err(OB_SUCCESS); + res.set_debug_err(OB_SUCCESS); + for (int64_t idx = 0, cnt = req.get_params().count(); idx < cnt; ++idx) { + obrpc::ObLogLeaderHeartbeatResp::Result result; + const obrpc::ObLogLeaderHeartbeatReq::Param ¶m = req.get_params().at(idx); + + result.reset(); + result.err_ = OB_SUCCESS; + result.next_served_log_id_ = param.next_log_id_; + result.next_served_ts_ = get_timestamp(); + + EXPECT_EQ(OB_SUCCESS, res.append_result(result)); + } + + _D_(">>> heartbeat", K(req), K(res)); + return OB_SUCCESS; + } + + virtual int open_stream(const obrpc::ObLogOpenStreamReq &req, + obrpc::ObLogOpenStreamResp &res) { + int ret = OB_SUCCESS; + UNUSED(req); + obrpc::ObStreamSeq seq; + seq.reset(); + seq.self_.set_ip_addr("127.0.0.1", 8888); + seq.seq_ts_ = get_timestamp(); + res.reset(); + res.set_err(OB_SUCCESS); + res.set_debug_err(OB_SUCCESS); + res.set_stream_seq(seq); + _D_(">>> open stream", K(req), K(res)); + return ret; + } + virtual int fetch_stream_log(const obrpc::ObLogStreamFetchLogReq &req, + obrpc::ObLogStreamFetchLogResp &res) { + UNUSED(req); + res.reset(); + res.set_err(OB_SUCCESS); + res.set_debug_err(OB_SUCCESS); + _D_(">>> fetch log", K(req), K(res)); + return OB_SUCCESS; + } + virtual int req_svr_feedback(const ReqLogSvrFeedback &feedback) + { + UNUSED(feedback); + return OB_SUCCESS; + } + }; + /* + * Factory. + */ + class MockRpcInterfaceFactory : public IFetcherRpcInterfaceFactory + { + public: + virtual int new_fetcher_rpc_interface(IFetcherRpcInterface*& rpc) + { + rpc = new MockRpcInterface(); + return OB_SUCCESS; + } + virtual int delete_fetcher_rpc_interface(IFetcherRpcInterface* rpc) + { + delete rpc; + return OB_SUCCESS; + } + }; + + /* + * Mock parser. + * - track process + * - print min & max process + */ + class MockParser : public IObLogParser + { + typedef common::ObLinearHashMap ProcessMap; + struct Updater + { + int64_t tstamp_; + bool operator()(const common::ObPartitionKey &pkey, int64_t &val) + { + UNUSED(pkey); + if (val < tstamp_) { val = tstamp_; } + return true; + } + }; + struct ProcessGetter + { + int64_t min_process_; + int64_t max_process_; + bool operator()(const common::ObPartitionKey &pkey, const int64_t &val) + { + UNUSED(pkey); + if (OB_INVALID_TIMESTAMP == min_process_ || val < min_process_) { + min_process_ = val; + } + if (OB_INVALID_TIMESTAMP == max_process_ || max_process_ < val) { + max_process_ = val; + } + return true; + } + void reset() { min_process_ = OB_INVALID_TIMESTAMP; max_process_ = OB_INVALID_TIMESTAMP; } + }; + public: + MockParser() : trans_cnt_(0) { EXPECT_EQ(OB_SUCCESS, process_map_.init()); } + virtual ~MockParser() { process_map_.reset(); process_map_.destroy(); } + virtual int start() { return OB_SUCCESS; } + virtual void stop() { } + virtual void mark_stop_flag() { } + virtual int push(PartTransTask* task, const int64_t timeout) + { + UNUSED(timeout); + if (NULL != task) { + if (task->is_heartbeat()) { + const common::ObPartitionKey &pkey = task->get_partition(); + const int64_t tstamp = task->get_timestamp(); + Updater updater; + updater.tstamp_ = tstamp; + EXPECT_EQ(OB_SUCCESS, process_map_.operate(pkey, updater)); + } + task->revert(); + trans_cnt_ += 1; + // Debug. + // _I_(">>> push parser", "req", task->get_seq()); + } + return OB_SUCCESS; + } + int64_t get_trans_cnt() const { return trans_cnt_; } + void add_partition(const common::ObPartitionKey &pkey) + { + EXPECT_EQ(OB_SUCCESS, process_map_.insert(pkey, 0)); + } + void print_process() + { + int64_t now = get_timestamp(); + ProcessGetter process_getter; + process_getter.reset(); + EXPECT_EQ(OB_SUCCESS, process_map_.for_each(process_getter)); + int64_t max_delay_sec = (now - process_getter.min_process_) / 1000000; + int64_t max_delay_us = (now - process_getter.min_process_) % 1000000; + int64_t min_delay_sec = (now - process_getter.max_process_) / 1000000; + int64_t min_delay_us = (now - process_getter.max_process_) % 1000000; + + fprintf(stderr, ">>> parser process: %s-%s DELAY=[%ld.%06ld, %ld.%06ld] sec\n", + TS_TO_STR(process_getter.min_process_), + TS_TO_STR(process_getter.max_process_), + min_delay_sec, min_delay_us, max_delay_sec, max_delay_us); + } + private: + int64_t trans_cnt_; + ProcessMap process_map_; + }; + /* + * Err handler. + * - exit on error + */ + class MockFetcherErrHandler : public IErrHandler + { + public: + virtual ~MockFetcherErrHandler() { } + public: + virtual void handle_err(int err_no, const char* fmt, ...) + { + UNUSED(err_no); + va_list ap; + va_start(ap, fmt); + __E__(fmt, ap); + va_end(ap); + exit(1); + } + }; + +public: + void run() + { + int err = OB_SUCCESS; + + // Task Pool. + ObLogTransTaskPool task_pool; + ObConcurrentFIFOAllocator task_pool_alloc; + err = task_pool_alloc.init(128 * _G_, 8 * _M_, OB_MALLOC_NORMAL_BLOCK_SIZE); + EXPECT_EQ(OB_SUCCESS, err); + err = task_pool.init(&task_pool_alloc, 10240, 1024, 4 * 1024 * 1024, true); + EXPECT_EQ(OB_SUCCESS, err); + + // Parser. + MockParser parser; + + // Err Handler. + MockFetcherErrHandler err_handler; + + // Rpc. + MockRpcInterfaceFactory rpc_factory; + + // Worker Pool. + FixedJobPerWorkerPool worker_pool; + err = worker_pool.init(1); + EXPECT_EQ(OB_SUCCESS, err); + + // StartLogIdLocator. + ::oceanbase::liboblog::fetcher::StartLogIdLocator locator; + err = locator.init(&rpc_factory, &err_handler, &worker_pool, 3); + EXPECT_EQ(OB_SUCCESS, err); + + // Heartbeater. + Heartbeater heartbeater; + err = heartbeater.init(&rpc_factory, &err_handler, &worker_pool, 3); + EXPECT_EQ(OB_SUCCESS, err); + + // SvrFinder. + MockSystableHelper systable_helper; + systable_helper.init(server_cnt_); + ::oceanbase::liboblog::fetcher::SvrFinder svrfinder; + err = svrfinder.init(&systable_helper, &err_handler, &worker_pool, 3); + EXPECT_EQ(OB_SUCCESS, err); + + // Fetcher Config. + FetcherConfig cfg; + cfg.reset(); + + // Init. + ::oceanbase::liboblog::fetcher::Fetcher fetcher; + err = fetcher.init(&task_pool, &parser, &err_handler, &rpc_factory, + &worker_pool, &svrfinder, &locator, &heartbeater, &cfg); + EXPECT_EQ(OB_SUCCESS, err); + + // Add partition. + for (int64_t idx = 0, cnt = partition_cnt_; (idx < cnt); ++idx) { + ObPartitionKey p1(1001 + idx, 1, partition_cnt_); + err = fetcher.fetch_partition(p1, 1, OB_INVALID_ID); + EXPECT_EQ(OB_SUCCESS, err); + parser.add_partition(p1); + } + + // Run. + err = fetcher.start(); + EXPECT_EQ(OB_SUCCESS, err); + + // Runtime. + int64_t start = get_timestamp(); + int64_t last_print_process = start; + while ((get_timestamp() - start) < runtime_) { + usec_sleep(500 * _MSEC_); + if (1 * _SEC_ < get_timestamp() - last_print_process) { + last_print_process = get_timestamp(); + parser.print_process(); + } + } + + // Discard partition. + for (int64_t idx = 0, cnt = partition_cnt_; (idx < cnt); ++idx) { + ObPartitionKey p1(1001 + idx, 1, partition_cnt_); + err = fetcher.discard_partition(p1); + EXPECT_EQ(OB_SUCCESS, err); + } + + // Stop. + err = fetcher.stop(true); + EXPECT_EQ(OB_SUCCESS, err); + + // Destroy. + err = fetcher.destroy(); + EXPECT_EQ(OB_SUCCESS, err); + err = locator.destroy(); + EXPECT_EQ(OB_SUCCESS, err); + err = svrfinder.destroy(); + EXPECT_EQ(OB_SUCCESS, err); + err = heartbeater.destroy(); + EXPECT_EQ(OB_SUCCESS, err); + worker_pool.destroy(); + EXPECT_EQ(OB_SUCCESS, err); + task_pool.destroy(); + EXPECT_EQ(OB_SUCCESS, err); + } +}; + +} +} +} + +void print_usage(const char *prog_name) +{ + printf("USAGE: %s\n" + " -p, --partition partition count\n" + " -s, --server server count\n" + " -r, --runtime run time in seconds, default -1, means to run forever\n", + prog_name); +} +int main(const int argc, char **argv) +{ + // option variables + int opt = -1; + const char *opt_string = "p:s:r:"; + struct option long_opts[] = + { + {"partition", 1, NULL, 'p'}, + {"server", 1, NULL, 's'}, + {"runtime", 1, NULL, 'r'}, + {0, 0, 0, 0} + }; + + if (argc <= 1) { + print_usage(argv[0]); + return 1; + } + + // Params. + int64_t partition_cnt = 0; + int64_t server_cnt = 0; + int64_t runtime = 1 * ::oceanbase::liboblog::_YEAR_; + + // Parse command line + while ((opt = getopt_long(argc, argv, opt_string, long_opts, NULL)) != -1) { + switch (opt) { + case 'p': { + partition_cnt = strtoll(optarg, NULL, 10); + break; + } + case 's': { + server_cnt = strtoll(optarg, NULL, 10); + break; + } + case 'r': { + runtime = strtoll(optarg, NULL, 10); + break; + } + default: + print_usage(argv[0]); + break; + } // end switch + } // end while + + printf("partition_cnt:%ld server_cnt:%ld runtime:%ld sec\n", partition_cnt, server_cnt, runtime); + + // Logger. + ::oceanbase::liboblog::fetcher::FetcherLogLevelSetter::get_instance().set_mod_log_levels("TLOG.*:INFO"); + // Run test. + ::oceanbase::liboblog::integrationtesting::HeartbeatTest test; + test.partition_cnt_ = partition_cnt; + test.server_cnt_ = server_cnt; + test.runtime_ = ::oceanbase::liboblog::_SEC_ * runtime; + test.run(); + return 0; +} diff --git a/src/liboblog/tests/kill_oblog.sh b/src/liboblog/tests/kill_oblog.sh new file mode 100755 index 0000000000000000000000000000000000000000..a466758bdb2338902e1b13e1feb45776a15b95bd --- /dev/null +++ b/src/liboblog/tests/kill_oblog.sh @@ -0,0 +1 @@ +cat run/liboblog.pid | xargs kill -9 diff --git a/src/liboblog/tests/main.cpp b/src/liboblog/tests/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c2702a965b4bfc416c4400939379c5914b272f47 --- /dev/null +++ b/src/liboblog/tests/main.cpp @@ -0,0 +1,50 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG_TAILF + +#include "oblog_main.h" + +using namespace oceanbase::liboblog; +using namespace oceanbase::common; + +int main(int argc, char **argv) +{ + int ret = OB_SUCCESS; + ObLogMain &oblog_main = ObLogMain::get_instance(); + + if (OB_FAIL(oblog_main.init(argc, argv))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("init oblog main fail", K(argc)); + } + } else if (OB_FAIL(oblog_main.start())) { + LOG_ERROR("start oblog main fail", K(ret)); + } else { + oblog_main.run(); + oblog_main.stop(); + + if (oblog_main.need_reentrant()) { + LOG_INFO("oblog reentrant"); + + if (OB_FAIL(oblog_main.start())) { + LOG_ERROR("start oblog main twice fail", K(ret)); + } else { + oblog_main.run(); + oblog_main.stop(); + } + } + } + + oblog_main.destroy(); + + return 0; +} diff --git a/src/liboblog/tests/ob_binlog_record_printer.cpp b/src/liboblog/tests/ob_binlog_record_printer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..444a5b68864c260e0613557eda34ae14c022ecef --- /dev/null +++ b/src/liboblog/tests/ob_binlog_record_printer.cpp @@ -0,0 +1,860 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG_TAILF + +#define DATA_DELEMITER ";" +#define STMT_DELEMITER "$" + +#define LOG_STD(str, ...) \ + do { \ + fprintf(stderr, str, ##__VA_ARGS__); \ + } while (0) + +#define ROW_PRINTF(ptr, size, pos, ri, fmt, ...) \ + do {\ + if (OB_SUCC(ret)) { \ + if (OB_FAIL(databuff_printf((ptr), (size), (pos), "[R%lu] " fmt "%s", ri, ##__VA_ARGS__, DATA_DELEMITER))) {\ + LOG_ERROR("databuff_printf fail", KP(ptr), K(size), K(pos), K(ri), K(ret)); \ + } \ + } \ + } while (0) + +#define DATABUFF_PRINTF(ptr, size, pos, fmt, ...) \ + do {\ + if (OB_SUCC(ret)) { \ + if (OB_FAIL(databuff_printf((ptr), (size), (pos), fmt, ##__VA_ARGS__))) {\ + LOG_ERROR("databuff_printf fail", KP(ptr), K(size), K(pos), K(ret)); \ + } \ + } \ + } while (0) + +#define COL_PRINT_VALUE(val, len) (int)(NULL == val ? sizeof("NULL") : len), (NULL == val ? "NULL" : val), len + +#include "ob_binlog_record_printer.h" + +#include // BinLogBuf +#include "rpc/obmysql/ob_mysql_global.h" // MYSQL_TYPE_* +#include "lib/file/file_directory_utils.h" +#include "lib/time/ob_time_utility.h" // ObTimeUtility + +#include "ob_log_utils.h" // calc_md5_cstr +#include "ob_log_binlog_record.h" // ObLogBR +#include "ob_log_part_trans_task.h" // PartTransTask + +using namespace oceanbase::common; +using namespace oceanbase::obmysql; + +namespace oceanbase +{ +namespace liboblog +{ + +ObBinlogRecordPrinter::ObBinlogRecordPrinter() : inited_(false), + data_file_(NULL), + data_file_fd_(-1), + heartbeat_file_fd_(-1), + only_print_hex_(false), + enable_print_hex_(false), + enable_print_console_(false), + enable_print_lob_md5_(false), + enable_verify_mode_(false), + enable_print_detail_(false) +{ +} + +ObBinlogRecordPrinter::~ObBinlogRecordPrinter() +{ + destroy(); +} + +int ObBinlogRecordPrinter::init(const char *data_file, + const char *heartbeat_file, + const bool enable_print_console, + const bool only_print_hex, + const bool enable_print_hex, + const bool enable_print_lob_md5, + const bool enable_verify_mode, + const bool enable_print_detail) +{ + int ret = OB_SUCCESS; + if (inited_) { + ret = OB_INIT_TWICE; + } else if (NULL != data_file && OB_FAIL(open_file_(data_file, data_file_fd_))) { + LOG_ERROR("open data file fail", K(ret), K(data_file)); + } else if (NULL != heartbeat_file && OB_FAIL(open_file_(heartbeat_file, heartbeat_file_fd_))) { + LOG_ERROR("open heartbeat file fail", K(ret), K(heartbeat_file)); + } else { + data_file_ = data_file; + only_print_hex_ = only_print_hex; + enable_print_hex_ = enable_print_hex; + enable_print_console_ = enable_print_console; + enable_print_lob_md5_ = enable_print_lob_md5; + enable_verify_mode_ = enable_verify_mode; + enable_print_detail_ = enable_print_detail; + inited_ = true; + } + return ret; +} + +void ObBinlogRecordPrinter::destroy() +{ + int ret = OB_SUCCESS; + if (data_file_fd_ >= 0) { + if (OB_FAIL(close(data_file_fd_))) { + LOG_WARN("failed to close data_file_fd", K(ret), K_(data_file_fd)); + } + data_file_fd_ = -1; + } + + if (heartbeat_file_fd_ >= 0) { + if (OB_FAIL(close(heartbeat_file_fd_))) { + LOG_WARN("failed to close heartbeat_file_fd", K(ret), K_(heartbeat_file_fd)); + } + heartbeat_file_fd_ = -1; + } + + only_print_hex_ = false; + enable_print_hex_ = false; + enable_print_console_ = false; + enable_print_lob_md5_ = false; + enable_verify_mode_ = false; + enable_print_detail_ = false; + data_file_ = NULL; + inited_ = false; +} + +int ObBinlogRecordPrinter::open_file_(const char *file_name, int &fd) +{ + OB_ASSERT(NULL != file_name); + + int ret = OB_SUCCESS; + char *p = strrchr(const_cast(file_name), '/'); + if (NULL != p) { + char dir_buffer[OB_MAX_FILE_NAME_LENGTH]; + snprintf(dir_buffer, OB_MAX_FILE_NAME_LENGTH, "%.*s", (int)(p - file_name), file_name); + common::FileDirectoryUtils::create_full_path(dir_buffer); + } + + if (OB_SUCC(ret)) { + fd = open(file_name, O_WRONLY | O_APPEND | O_CREAT, S_IRUSR | S_IWUSR); + if (0 > fd) { + LOG_ERROR("open data file fail", K(file_name), K(errno), KERRMSG); + ret = OB_IO_ERROR; + } + } + + return ret; +} + +int ObBinlogRecordPrinter::print_binlog_record(ILogRecord *br) +{ + int ret = OB_SUCCESS; + ObLogBR *oblog_br = NULL; + + if (! inited_) { + ret = OB_NOT_INIT; + } else if (OB_ISNULL(br)) { + LOG_ERROR("invalid arguments", K(br)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(oblog_br = reinterpret_cast(br->getUserData()))) { + LOG_ERROR("get user data fail", K(br), K(oblog_br)); + ret = OB_INVALID_ARGUMENT; + } else { + if (enable_print_console_) { + console_print(br, oblog_br); + } + int record_type = br->recordType(); + + // Heartbeat timestamp taken directly from the br + if (HEARTBEAT == br->recordType() && heartbeat_file_fd_ >= 0) { + if (OB_FAIL(output_heartbeat_file(heartbeat_file_fd_, get_precise_timestamp_(*br)))) { + LOG_ERROR("output_heartbeat_file fail", K(ret), K(heartbeat_file_fd_), K(oblog_br)); + } + } else if (data_file_fd_ >= 0) { + bool need_rotate_file = false; + + if (OB_FAIL(output_data_file(br, record_type, oblog_br, data_file_fd_, only_print_hex_, enable_print_hex_, + enable_print_lob_md5_, enable_verify_mode_, enable_print_detail_, need_rotate_file))) { + LOG_ERROR("output_data_file fail", K_(data_file_fd), K_(data_file), K(ret)); + } else if (need_rotate_file && OB_FAIL(rotate_data_file())) { + LOG_ERROR("rotate_data_file fail", K(ret)); + } + } + } + + return ret; +} + +int64_t ObBinlogRecordPrinter::get_precise_timestamp_(ILogRecord &br) +{ + int64_t timestamp_sec = br.getTimestamp(); + uint32_t timestamp_usec = br.getRecordUsec(); + int64_t precise_timestamp = timestamp_sec * 1000000 + timestamp_usec; + + return precise_timestamp; +} + +void ObBinlogRecordPrinter::console_print(ILogRecord *br, ObLogBR *oblog_br) +{ + if (NULL != br && NULL != oblog_br) { + if (EBEGIN == br->recordType()) { + console_print_begin(br, oblog_br); + } else if (ECOMMIT == br->recordType()) { + console_print_commit(br, oblog_br); + } else if (HEARTBEAT == br->recordType()) { + console_print_heartbeat(br, oblog_br); + } else { + console_print_statements(br, oblog_br); + } + } +} + +int ObBinlogRecordPrinter::output_heartbeat_file(const int fd, const int64_t heartbeat_timestamp) +{ + OB_ASSERT(fd >= 0); + int ret = OB_SUCCESS; + + const static int64_t BUFFER_SIZE = 64; + char buffer[BUFFER_SIZE]; + int64_t pos = 0; + + DATABUFF_PRINTF(buffer, sizeof(buffer), pos, "%ld\n", heartbeat_timestamp); + + // Empty the file + (void)ftruncate(fd, 0); + + int64_t left_len = pos; + const char *ptr = buffer; + while (OB_SUCCESS == ret && left_len > 0) { + int64_t write_len = write(fd, ptr, left_len); + if (write_len < 0) { + LOG_ERROR("write heartbeat file fail", K(errno), KERRMSG, K(fd), K(left_len)); + ret = OB_ERR_UNEXPECTED; + } else { + left_len -= write_len; + ptr += write_len; + } + } + + return ret; +} + +int ObBinlogRecordPrinter::output_data_file(ILogRecord *br, + const int record_type, + ObLogBR *oblog_br, + const int fd, + const bool only_print_hex, + const bool enable_print_hex, + const bool enable_print_lob_md5, + const bool enable_verify_mode, + const bool enable_print_detail, + bool &need_rotate_file) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(br) || OB_ISNULL(oblog_br) || OB_UNLIKELY(fd < 0)) { + LOG_ERROR("invalid argument", K(br), K(record_type), "record_type", print_record_type(record_type), + K(oblog_br), K(fd)); + ret = OB_INVALID_ARGUMENT; + } else { + static const int64_t DATA_BUFFER_SIZE = 128 * 1024 * 1024; + char *data_buffer = (char *)new char[DATA_BUFFER_SIZE]; + + // Record Index: index of statements within a transaction + static uint64_t ri = 0; + static const int64_t TRANS_ID_BUF_LENGTH = 1024; + static char begin_trans_id[TRANS_ID_BUF_LENGTH]; + + int64_t pos = 0; + char *ptr = data_buffer; + int64_t size = DATA_BUFFER_SIZE; + // get filterRuleValue + unsigned int filter_rv_count = 0; + LogRecordImpl *filter_rv_impl = static_cast(br); + const BinLogBuf *filter_rv = filter_rv_impl->filterValues((unsigned int &) filter_rv_count); + common::ObString trace_id; + common::ObString unique_id; + + if (filter_rv != NULL && filter_rv_count > 2) { + unique_id.assign_ptr(filter_rv[1].buf, filter_rv[1].buf_used_size); + trace_id.assign_ptr(filter_rv[2].buf, filter_rv[2].buf_used_size); + } + + if (EBEGIN == record_type) { + ri = 0; + ROW_PRINTF(ptr, size, pos, ri, "BEGIN"); + ROW_PRINTF(ptr, size, pos, ri, "org_cluster_id:%u", br->getThreadId()); + + memset(begin_trans_id, '\0', TRANS_ID_BUF_LENGTH * sizeof(char)); + MEMCPY(begin_trans_id, unique_id.ptr(), unique_id.length()); + begin_trans_id[unique_id.length()] = '\0'; + // The forth slot is major_version + int32_t major_version; + if (OB_FAIL(parse_major_version_(filter_rv, major_version))) { + LOG_ERROR("failed to parse major version", KR(ret), K(oblog_br), K(filter_rv), K(major_version)); + } else if (major_version > 0) { + ROW_PRINTF(ptr, size, pos, ri, "major_version:%d", major_version); + } else { + // do nothing + } + } else if (ECOMMIT == record_type) { + ri++; + ROW_PRINTF(ptr, size, pos, ri, "COMMIT"); + } else if (EDDL == record_type) { + ri = 0; + ITableMeta *table_meta = NULL; + if (0 != br->getTableMeta(table_meta)) { + LOG_ERROR("table_meta is NULL"); + ret = OB_ERR_UNEXPECTED; + } + int64_t column_count = table_meta ? table_meta->getColCount() : -1; + + // TODO + // 2.0.0 DDL binlog record with a new column ddl_schema_version + // To ensure 1.0 compatibility, column_count is first output as 1 + if (2 == column_count) { + column_count = 1; + } + + ROW_PRINTF(ptr, size, pos, ri, "record_type:%s", print_record_type(record_type)); + ROW_PRINTF(ptr, size, pos, ri, "db_name:%s", br->dbname()); + ROW_PRINTF(ptr, size, pos, ri, "table_name:%s", br->tbname()); + ROW_PRINTF(ptr, size, pos, ri, "column_count:%ld", column_count); + // The DDL is in memory, not persistent, and is accessed via the following interface + int64_t new_cols_count = 0; + BinLogBuf *new_cols = br->newCols((unsigned int &)new_cols_count); + + for (int64_t index = 0; index < new_cols_count; index++) { + IColMeta *col_meta = table_meta ? table_meta->getCol((int)index) : NULL; + const char *cname = col_meta ? col_meta->getName() : "NULL"; + // DDL binlog only output ddl_stmt + if (0 == index) { + ROW_PRINTF(ptr, size, pos, ri, "column_name:%s", cname); + ROW_PRINTF(ptr, size, pos, ri, "ddl_stmt_str: %.*s", (int)new_cols[index].buf_used_size, new_cols[index].buf); + ROW_PRINTF(ptr, size, pos, ri, "ddl_stmt_len: %ld", new_cols[index].buf_used_size); + } else { + LOG_INFO("DDL binlog record", K(index), "column_name", cname, + "ddl_schema_version", new_cols[index].buf, + "len", new_cols[index].buf_used_size); + } + } + + if (enable_verify_mode) { + if (unique_id.length() > 0) { + ROW_PRINTF(ptr, size, pos, ri, "unique_id:[%.*s](%d)", unique_id.length(), unique_id.ptr(), unique_id.length()); + } + } + } else if (EINSERT == record_type || EUPDATE == record_type || EDELETE == record_type) { + ri++; + const bool is_serilized = oblog_br->is_serilized(); + ITableMeta *table_meta = NULL; + + if (is_serilized) { + if (OB_ISNULL(table_meta = LogMsgFactory::createTableMeta())) { + LOG_ERROR("table_meta is NULL"); + ret = OB_ALLOCATE_MEMORY_FAILED; + } + } + + if (OB_SUCC(ret)) { + if (0 != br->getTableMeta(table_meta)) { + LOG_ERROR("table_meta is NULL", KPC(oblog_br)); + ret = OB_ERR_UNEXPECTED; + } + } + + int64_t column_count = table_meta ? table_meta->getColCount() : -1; + const char *pks = table_meta ? (table_meta->getPKs()) : "NULL"; + const char *uks = table_meta ? (table_meta->getUKs()) : "NULL"; + const char *has_pk = table_meta ? (table_meta->hasPK() ? "true" : "false") : "NULL"; + const char *has_uk = table_meta ? (table_meta->hasUK() ? "true" : "false") : "NULL"; + const char *pk_info = table_meta ? table_meta->getPkinfo() : "NULL"; + const char *uk_info = table_meta ? table_meta->getUkinfo() : "NULL"; + + ROW_PRINTF(ptr, size, pos, ri, "record_type:%s", print_record_type(record_type)); + ROW_PRINTF(ptr, size, pos, ri, "database_name:%s", br->dbname()); + ROW_PRINTF(ptr, size, pos, ri, "table_name:%s", br->tbname()); + ROW_PRINTF(ptr, size, pos, ri, "log_event:%s", br->firstInLogevent() ? "true" : "false"); + ROW_PRINTF(ptr, size, pos, ri, "column_count:%ld", column_count); + ROW_PRINTF(ptr, size, pos, ri, "source_category:%s", print_src_category(br->getSrcCategory())); + ROW_PRINTF(ptr, size, pos, ri, "source_type:%s", print_record_src_type(br->getSrcType())); + ROW_PRINTF(ptr, size, pos, ri, "has_pk:%s", has_pk); + ROW_PRINTF(ptr, size, pos, ri, "pk_info:%s", pk_info); + ROW_PRINTF(ptr, size, pos, ri, "pks:%s", pks); + ROW_PRINTF(ptr, size, pos, ri, "has_uk:%s", has_uk); + ROW_PRINTF(ptr, size, pos, ri, "uk_info:%s", uk_info); + ROW_PRINTF(ptr, size, pos, ri, "uks:%s", uks); + + // If trace_id is not empty, then print + if (trace_id.length() > 0) { + ROW_PRINTF(ptr, size, pos, ri, "trace_id:[%.*s](%d)", trace_id.length(), trace_id.ptr(), trace_id.length()); + } + + for (int64_t index = 0; OB_SUCC(ret) && index < column_count; index++) { + ret = output_data_file_column_data(is_serilized, br, table_meta, index, ptr, size, ri, only_print_hex, enable_print_hex, + enable_print_lob_md5, enable_print_detail, pos); + } + + DATABUFF_PRINTF(ptr, size, pos, "%s", STMT_DELEMITER); + + if (OB_SUCC(ret)) { + if (OB_FAIL(verify_begin_trans_id_(*oblog_br, begin_trans_id))) { + LOG_ERROR("verify_begin_trans_id_ fail", KR(ret), K(oblog_br), K(begin_trans_id)); + } + } + + if (is_serilized) { + if (NULL != table_meta) { + LogMsgFactory::destroy(table_meta); + } + } + } + + if (OB_SUCCESS == ret && 0 < pos) { + bool is_line_end = false; + + if (EDDL == record_type || ECOMMIT == record_type) { + is_line_end = true; + } + + if (OB_FAIL(write_data_file(fd, ptr, size, pos, is_line_end, need_rotate_file))) { + LOG_ERROR("write_data_file fail", K(ret), K(fd), K(size), K(pos), KP(ptr)); + } + } + + if (NULL != data_buffer) { + delete []data_buffer; + data_buffer = NULL; + } + } + + return ret; +} + +int ObBinlogRecordPrinter::parse_major_version_(const BinLogBuf *filter_rv, int32_t &major_version) { + int ret = OB_SUCCESS; + major_version = -1; // default -1, invalid value + // Get major version, major version is only output if version 1.x is configured and the corresponding configuration item is configured + bool need_major_version = common::ObClusterVersion::get_instance().get_cluster_version() < CLUSTER_VERSION_2000 + && TCONF.test_mode_on && TCONF.test_output_major_version; + if (need_major_version) { + const BinLogBuf *major_version_buf = filter_rv + 3; + major_version = (int32_t) atoi(major_version_buf->buf); + } + return ret; +} + +int ObBinlogRecordPrinter::verify_begin_trans_id_(ObLogBR &oblog_br, + const char *begin_trans_id) +{ + int ret = OB_SUCCESS; + + static const int64_t TRANS_ID_BUF_LENGTH = 1024; + char trans_id_buf[TRANS_ID_BUF_LENGTH]; + int64_t pos = 0; + ObLogRowDataIndex *row_data_index = NULL; + PartTransTask *task = NULL; + + if (OB_ISNULL(begin_trans_id)) { + LOG_ERROR("begin_trans_id is null"); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(row_data_index = static_cast(oblog_br.get_host()))) { + LOG_ERROR("row_data_index is NULL", KPC(row_data_index)); + ret = OB_ERR_UNEXPECTED; + } else if (OB_ISNULL(task = static_cast(row_data_index->get_host()))) { + LOG_ERROR("part trans task is NULL", KPC(task), KPC(row_data_index)); + ret = OB_ERR_UNEXPECTED; + } else { + const transaction::ObTransID &trans_id = task->get_trans_id(); + + if (OB_FAIL(common::databuff_printf(trans_id_buf, TRANS_ID_BUF_LENGTH, pos, "%s", to_cstring(trans_id)))) { + LOG_ERROR("databuff_printf fail", KR(ret), K(trans_id), K(trans_id_buf), K(TRANS_ID_BUF_LENGTH), K(pos)); + } else { + trans_id_buf[pos] = '\0'; + + if (0 == strcmp(begin_trans_id, trans_id_buf)) { + LOG_DEBUG("verify_begin_trans_id_ succ", K(begin_trans_id), K(trans_id_buf)); + } else { + LOG_ERROR("current trans_id is not equal to begin_trans_id", K(begin_trans_id), K(trans_id_buf)); + ret = OB_ERR_UNEXPECTED; + } + } + } + + return ret; +} + +int ObBinlogRecordPrinter::output_data_file_column_data(const bool is_serilized, + ILogRecord *br, + ITableMeta *table_meta, + const int64_t index, + char *ptr, + const int64_t size, + const uint64_t ri, + const bool only_print_hex, + const bool enable_print_hex, + const bool enable_print_lob_md5, + const bool enable_print_detail, + int64_t &pos) +{ + OB_ASSERT(NULL != br && NULL != table_meta && NULL != ptr && size > 0 && index >= 0 && pos >= 0); + + int ret = OB_SUCCESS; + int64_t new_cols_count = 0; + int64_t old_cols_count = 0; + StrArray *new_ser_cols = NULL; + StrArray *old_ser_cols = NULL; + BinLogBuf *new_cols = NULL; + BinLogBuf *old_cols = NULL; + + if (is_serilized) { + // Parsing deserialised data + new_ser_cols = br->parsedNewCols(); + if (NULL != new_ser_cols) { + new_cols_count = new_ser_cols->size(); + } + old_ser_cols = br->parsedOldCols(); + if (NULL != old_ser_cols) { + old_cols_count = old_ser_cols->size(); + } + } else { + new_cols = br->newCols((unsigned int &)new_cols_count); + old_cols = br->oldCols((unsigned int &)old_cols_count); + } + + IColMeta *col_meta = table_meta ? table_meta->getCol((int)index) : NULL; + const char *cname = col_meta ? col_meta->getName() : "NULL"; + int ctype = col_meta ? col_meta->getType() : -1; + const char *is_pk = col_meta ? (col_meta->isPK() ? "true" : "false") : "NULL"; + const char *encoding = col_meta ? col_meta->getEncoding() : "NULL"; + const char *is_not_null = col_meta ? (col_meta->isNotNull() ? "true" : "false") : "NULL"; +// const char *default_val = col_meta ? col_meta->getDefault() : "NULL"; + const char *is_signed = col_meta ? (col_meta->isSigned() ? "true" : "false") : "NULL"; + bool is_generated_column = col_meta ? col_meta->isGenerated() : false; + bool is_hidden_row_key_column = col_meta ? col_meta->isHiddenRowKey() : false; + bool is_lob = is_lob_type(ctype); + + int64_t column_index = index + 1; + ROW_PRINTF(ptr, size, pos, ri, "[C%ld] column_name:%s", column_index, cname); + ROW_PRINTF(ptr, size, pos, ri, "[C%ld] column_type:%s", column_index, get_ctype_string(ctype)); + ROW_PRINTF(ptr, size, pos, ri, "[C%ld] column_is_rowkey:%s", column_index, is_pk); + ROW_PRINTF(ptr, size, pos, ri, "[C%ld] column_is_signed:%s", column_index, is_signed); + ROW_PRINTF(ptr, size, pos, ri, "[C%ld] column_encoding:%s", column_index, encoding); + ROW_PRINTF(ptr, size, pos, ri, "[C%ld] column_is_not_null:%s", column_index, is_not_null); + if (enable_print_detail && is_hidden_row_key_column) { + ROW_PRINTF(ptr, size, pos, ri, "[C%ld] column_is_hidden_rowkey:%d", column_index, is_hidden_row_key_column); + } + if (is_generated_column) { + ROW_PRINTF(ptr, size, pos, ri, "[C%ld] is_generated_column:%d", column_index, is_generated_column); + } + // FIXME: does not check the value of the field until the length of the default value can be obtained +// ROW_PRINTF(ptr, size, pos, ri, "[C%ld] column_default_value:%s", column_index, default_val); + + if (OB_SUCC(ret)) { + if (index < new_cols_count) { + const char *new_col_value = NULL; + size_t new_col_value_len = 0; + + if (is_serilized) { + ret = new_ser_cols->elementAt(index, new_col_value, new_col_value_len); + // TODO Deserialization ends with /0, length minus 1 + if (NULL != new_col_value) { + new_col_value_len -= 1; + } + } else { + new_col_value = new_cols[index].buf; + new_col_value_len = new_cols[index].buf_used_size; + } + + if (OB_FAIL(ret)) { + } else if (is_lob && enable_print_lob_md5) { + ROW_PRINTF(ptr, size, pos, ri, "[C%ld] column_value_new_md5:[%s](%ld)", + column_index, calc_md5_cstr(new_col_value, new_col_value_len), new_col_value_len); + } else { + if (! only_print_hex) { + ROW_PRINTF(ptr, size, pos, ri, "[C%ld] column_value_new:[%.*s](%ld)", + column_index, COL_PRINT_VALUE(new_col_value, new_col_value_len)); + } + + if (OB_SUCCESS == ret && enable_print_hex && need_print_hex(ctype)) { + ROW_PRINTF(ptr, size, pos, ri, "[C%ld] column_value_new_hex:", column_index); + pos--; + + if (OB_SUCCESS == ret && OB_FAIL(print_hex(new_col_value, new_col_value_len, ptr, size, pos))) { + LOG_ERROR("print_hex fail", K(ret)); + } + } + } + } + + if (OB_SUCCESS == ret && index < old_cols_count) { + const char *old_col_value = NULL; + size_t old_col_value_len = 0; + + if (is_serilized) { + ret = old_ser_cols->elementAt(index, old_col_value, old_col_value_len); + // TODO Deserialization ends with /0, length minus 1 + if (NULL != old_col_value) { + old_col_value_len -= 1; + } + } else { + old_col_value = old_cols[index].buf; + old_col_value_len = old_cols[index].buf_used_size; + } + + if (OB_FAIL(ret)) { + } else if (EMySQLFieldType::MYSQL_TYPE_BIT == ctype) { + ROW_PRINTF(ptr, size, pos, ri, "[C%ld] column_value_old_hex:", column_index); + pos--; + + if (OB_SUCCESS == ret && OB_FAIL(print_hex(old_col_value, old_col_value_len, ptr, size, pos))) { + LOG_ERROR("print_hex fail", K(ret)); + } + } else if (is_lob && enable_print_lob_md5) { + ROW_PRINTF(ptr, size, pos, ri, "[C%ld] column_value_old_md5:[%s](%ld)", + column_index, calc_md5_cstr(old_col_value, old_col_value_len), old_col_value_len); + } else { + ROW_PRINTF(ptr, size, pos, ri, "[C%ld] column_value_old:[%.*s](%ld)", + column_index, COL_PRINT_VALUE(old_col_value, old_col_value_len)); + + if (OB_SUCCESS == ret && enable_print_hex && need_print_hex(ctype)) { + ROW_PRINTF(ptr, size, pos, ri, "[C%ld] column_value_old_hex:", column_index); + pos--; + + if (OB_SUCCESS == ret && OB_FAIL(print_hex(old_col_value, old_col_value_len, ptr, size, pos))) { + LOG_ERROR("print_hex fail", K(ret)); + } + } + } + } + } + + if (is_serilized) { + if (NULL != new_ser_cols) { + delete new_ser_cols; + new_ser_cols = NULL; + } + + if (NULL != old_ser_cols) { + delete old_ser_cols; + old_ser_cols = NULL; + } + } + + return ret; +} + +int ObBinlogRecordPrinter::print_hex(const char *str, int64_t len, char *buf, int64_t size, int64_t &pos) +{ + OB_ASSERT(NULL != buf && size > 0 && pos >= 0); + + int ret = OB_SUCCESS; + int64_t hex_len = 0; + + DATABUFF_PRINTF(buf, size, pos, "["); + + int64_t last_pos = pos; + if (NULL != str && len > 0) + { + for (int64_t i=0; iget_precise_timestamp(); + double delay_sec = static_cast(delta) / 1000000.0; + int64_t timestamp_usec = br->getTimestamp() * 1000000 + br->getRecordUsec(); + int64_t filter_rv_count = 0; + LogRecordImpl *filter_rv_impl = static_cast(br); + const BinLogBuf *filter_rv = filter_rv_impl->filterValues((unsigned int &) filter_rv_count); + LOG_STD("BEGIN TM=[%ld] DELAY=[%.3lf sec] ORG_CLUSTER_ID=%u ", timestamp_usec, delay_sec, br->getThreadId()); + // The forth slot is major_version + int32_t major_version; + if (OB_FAIL(parse_major_version_(filter_rv, major_version))) { + LOG_ERROR("failed to parse major version", KR(ret), K(oblog_br), K(filter_rv), K(major_version)); + } else if (major_version > 0) { + LOG_STD(" major version=[%d]\n\n", major_version); + } else { + LOG_STD("\n\n"); + } + } +} + +void ObBinlogRecordPrinter::console_print_commit(ILogRecord *br, ObLogBR *oblog_br) +{ + if (NULL != br && NULL != oblog_br) { + int64_t delta = ObTimeUtility::current_time() - get_precise_timestamp_(*br); + double delay_sec = static_cast(delta) / 1000000.0; + int64_t timestamp_usec = br->getTimestamp() * 1000000 + br->getRecordUsec(); + + LOG_STD("\nCOMMIT TM=[%ld] DELAY=[%.3lf sec]\n\n", timestamp_usec, delay_sec); + } +} + +void ObBinlogRecordPrinter::console_print_heartbeat(ILogRecord *br, ObLogBR *oblog_br) +{ + if (NULL != br && NULL != oblog_br) { + int64_t delta = ObTimeUtility::current_time() - get_precise_timestamp_(*br); + double delay_sec = static_cast(delta) / 1000000.0; + int64_t timestamp_usec = br->getTimestamp() * 1000000 + br->getRecordUsec(); + + LOG_STD("HEARTBEAT TM=[%ld] DELAY=[%.3lf sec]\n\n", timestamp_usec, delay_sec); + } +} + +void ObBinlogRecordPrinter::console_print_statements(ILogRecord *br, ObLogBR *oblog_br) +{ + if (NULL != br && NULL != oblog_br) { + int64_t old_cols_count = 0; + int64_t new_cols_count = 0; + BinLogBuf *old_cols = br->oldCols((unsigned int &)old_cols_count); + BinLogBuf *new_cols = br->newCols((unsigned int &)new_cols_count); + LogRecordImpl *filter_rv_impl = static_cast(br); + unsigned int filter_rv_count = 0; + const BinLogBuf *filter_rv = filter_rv_impl->filterValues((unsigned int &) filter_rv_count); + ObString trace_id; + ObString unique_id; + + if (filter_rv != NULL && filter_rv_count > 2) { + unique_id.assign_ptr(filter_rv[1].buf, filter_rv[1].buf_used_size); + trace_id.assign_ptr(filter_rv[2].buf, filter_rv[2].buf_used_size); + } + + int64_t delta = ObTimeUtility::current_time() - get_precise_timestamp_(*br); + double delay_sec = static_cast(delta) / 1000000.0; + int64_t timestamp_usec = br->getTimestamp() * 1000000 + br->getRecordUsec(); + + const char *padding = (EDDL == br->recordType() ? "" : " "); + + LOG_STD("%s[%s] DB=[%s] TB=[%s] TM=[%ld] CHKP=[%s] DELAY=[%.3lf sec] SRC_CAT=[%s] TRACE_ID=[%.*s](%d)\n" + "%s UNIQUE_ID=[%.*s](%d)\n", + padding, print_record_type(br->recordType()), br->dbname(), br->tbname(), + timestamp_usec, br->getCheckpoint(), delay_sec, print_src_category(br->getSrcCategory()), + trace_id.length(), trace_id.ptr(), trace_id.length(), + padding, unique_id.length(), unique_id.ptr(), unique_id.length()); + + LOG_STD("%s NewCols[%ld] ", padding, new_cols_count); + for (int64_t index = 0; NULL != new_cols && index < new_cols_count; index++) { + LOG_STD(" [%.*s](%ld)", COL_PRINT_VALUE(new_cols[index].buf, new_cols[index].buf_used_size)); + } + LOG_STD("\n"); + LOG_STD("%s OldCols[%ld] ", padding, old_cols_count); + for (int64_t index = 0; NULL != old_cols && index < old_cols_count; index++) { + LOG_STD(" [%.*s](%ld)", COL_PRINT_VALUE(old_cols[index].buf, old_cols[index].buf_used_size)); + } + LOG_STD("\n"); + + if (EDDL == br->recordType()) { + LOG_STD("\n"); + } + } +} + +bool ObBinlogRecordPrinter::need_print_hex(int ctype) +{ + return (obmysql::MYSQL_TYPE_VARCHAR == ctype + || obmysql::MYSQL_TYPE_VAR_STRING == ctype + || obmysql::MYSQL_TYPE_STRING == ctype + || obmysql::MYSQL_TYPE_OB_NVARCHAR2 == ctype + || obmysql::MYSQL_TYPE_OB_NCHAR == ctype); +} + +int ObBinlogRecordPrinter::write_data_file(const int fd, + char *buf, + const int64_t size, + const int64_t pos, + const bool is_line_end, + bool &need_rotate_file) +{ + OB_ASSERT(0 <= fd && NULL != buf && 0 < size && size >= pos); + + int ret = OB_SUCCESS; + int64_t left_len = pos; + const char *ptr = buf; + + need_rotate_file = false; + + if (is_line_end) { + DATABUFF_PRINTF(buf, size, left_len, "\n"); + } + + while (OB_SUCCESS == ret && left_len > 0) { + int64_t write_len = write(fd, ptr, left_len); + if (write_len < 0) { + LOG_ERROR("write data file fail", K(errno), KERRMSG, K(fd)); + ret = OB_ERR_UNEXPECTED; + break; + } else { + left_len -= write_len; + ptr += write_len; + } + } + + if (OB_SUCCESS == ret && is_line_end) { + off_t offset = ::lseek(fd, 0, SEEK_END); + if (static_cast(offset) >= MAX_DATA_FILE_SIZE) { + need_rotate_file = true; + } + } + + return ret; +} + +int ObBinlogRecordPrinter::rotate_data_file() +{ + OB_ASSERT(NULL != data_file_ && data_file_fd_ >= 0); + + int ret = OB_SUCCESS; + + char old_file_name[256]; + time_t t; + time(&t); + struct tm tm; + localtime_r((const time_t*)&t, &tm); + + sprintf(old_file_name, "%s.%04d%02d%02d%02d%02d%02d", + data_file_, tm.tm_year+1900, tm.tm_mon+1, tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec); + + ret = rename(data_file_, old_file_name); // return 0 if rename success + + if (OB_FAIL(ret)) { + LOG_ERROR("rename file failed", K(ret), KP_(data_file), KP(old_file_name)); // use K(ret) because ret value of rename is not OB error code + } else { + int fd = open(data_file_, O_WRONLY | O_APPEND | O_CREAT, S_IRUSR | S_IWUSR); + if (0 > fd) { + LOG_ERROR("open data file fail", KP_(data_file), K(errno), KERRMSG); + ret = OB_INVALID_ARGUMENT; + } else { + if (OB_FAIL(close(data_file_fd_))) { + LOG_ERROR("failed to close old data_file_fd", K(ret), K_(data_file_fd)); + } + data_file_fd_ = fd; + } + } + + return ret; +} + +} // namespace liboblog +} // namespace oceanbase diff --git a/src/liboblog/tests/ob_binlog_record_printer.h b/src/liboblog/tests/ob_binlog_record_printer.h new file mode 100644 index 0000000000000000000000000000000000000000..684dcd9fa97d8dfdf3ea19650eebcb088dd3e160 --- /dev/null +++ b/src/liboblog/tests/ob_binlog_record_printer.h @@ -0,0 +1,130 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_TESTS_BINLOG_RECORD_PRINTER_H__ +#define OCEANBASE_LIBOBLOG_TESTS_BINLOG_RECORD_PRINTER_H__ + +#include // ILogRecord +#include // ITableMeta +#include // StrArray + +#include "share/ob_define.h" // DISALLOW_COPY_AND_ASSIGN +#include "ob_log_config.h" // TCONF +#include "stdlib.h" //atio + +using namespace oceanbase::logmessage; + +namespace oceanbase +{ +namespace liboblog +{ +class ObLogBR; + +class IObBinlogRecordPrinter +{ +public: + virtual ~IObBinlogRecordPrinter() {} + +public: + virtual int print_binlog_record(ILogRecord *br) = 0; +}; + +class ObBinlogRecordPrinter : public IObBinlogRecordPrinter +{ + static const int64_t MAX_DATA_FILE_SIZE = 256 * 1024 * 1024; + // The length of the major version of the freeze version, used to output the major version to the binlog record, + // major version is int32_t(2147483647), so max_length is configed to MAJOR_VERSION_LENGTH to 10; + static const int64_t MAJOR_VERSION_LENGTH = 10; + +public: + ObBinlogRecordPrinter(); + virtual ~ObBinlogRecordPrinter(); + +public: + virtual int print_binlog_record(ILogRecord *br); + +public: + int init(const char *data_file, + const char *heartbeat_file, + const bool enable_print_console, + const bool only_print_hex, + const bool enable_print_hex, + const bool enable_print_lob_md5, + const bool enable_verify_mode, + const bool enable_print_detail); + void destroy(); + +private: + int open_file_(const char *data_file, int &fd); + int rotate_data_file(); + +private: + static int64_t get_precise_timestamp_(ILogRecord &br); + static void console_print_statements(ILogRecord *br, ObLogBR *oblog_br); + static void console_print_heartbeat(ILogRecord *br, ObLogBR *oblog_br); + static void console_print_commit(ILogRecord *br, ObLogBR *oblog_br); + static void console_print_begin(ILogRecord *br, ObLogBR *oblog_br); + static void console_print(ILogRecord *br, ObLogBR *oblog_br); + static int output_data_file(ILogRecord *br, + const int record_type, + ObLogBR *oblog_br, + const int fd, + const bool only_print_hex, + const bool enable_print_hex, + const bool enable_print_lob_md5, + const bool enable_verify_mode, + const bool enable_print_detail, + bool &need_rotate_file); + static int output_data_file_column_data(const bool is_serilized, + ILogRecord *br, + ITableMeta *table_meta, + const int64_t index, + char *ptr, + const int64_t size, + const uint64_t ri, + const bool only_print_hex, + const bool enable_print_hex, + const bool enable_print_lob_md5, + const bool enable_print_detail, + int64_t &pos); + static int print_hex(const char *str, int64_t len, char *buf, int64_t size, int64_t &pos); + static int write_data_file(const int fd, + char *buf, + const int64_t size, + const int64_t pos, + const bool is_line_end, + bool &need_rotate_file); + static bool need_print_hex(int ctype); + static int output_heartbeat_file(const int fd, const int64_t heartbeat_timestamp); + static int verify_begin_trans_id_(ObLogBR &oblog_br, + const char *begin_trans_id); + static int parse_major_version_(const BinLogBuf *filter_rv, int32_t &major_version); + +private: + bool inited_; + const char *data_file_; + int data_file_fd_; + int heartbeat_file_fd_; + bool only_print_hex_; + bool enable_print_hex_; + bool enable_print_console_; + bool enable_print_lob_md5_; + bool enable_verify_mode_; + bool enable_print_detail_; + +private: + DISALLOW_COPY_AND_ASSIGN(ObBinlogRecordPrinter); +}; + +} // namespace liboblog +} // namespace oceanbase +#endif /* OCEANBASE_LIBOBLOG_TESTS_BINLOG_RECORD_PRINTER_H__ */ diff --git a/src/liboblog/tests/oblog_main.cpp b/src/liboblog/tests/oblog_main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1958bf3a4b0a95c29b5a177fdf63fbc7d688ef3c --- /dev/null +++ b/src/liboblog/tests/oblog_main.cpp @@ -0,0 +1,455 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG_TAILF + +#include "oblog_main.h" +#include "ob_log_instance.h" // ObLogInstance + +#include // fprintf +#include // getopt_long +#include // strtoull + +#define LOG_STD(str, ...) \ + do { \ + fprintf(stderr, str, ##__VA_ARGS__); \ + } while (0) + +using namespace oceanbase::common; +namespace oceanbase +{ +namespace liboblog +{ + +ObLogMain &ObLogMain::get_instance() +{ + static ObLogMain oblog_main; + return oblog_main; +} + +ObLogMain::ObLogMain() : inited_(false), + oblog_(NULL), + oblog_factory_(), + br_printer_(), + only_print_hex_(false), + print_hex_(false), + print_lob_md5_(false), + use_daemon_(false), + data_file_(NULL), + heartbeat_file_(NULL), + run_time_us_(-1), + config_file_(NULL), + print_console_(false), + verify_mode_(false), + enable_reentrant_(false), + output_br_detail_(false), + start_timestamp_usec_(0), + tenant_id_(OB_INVALID_TENANT_ID), + tg_match_pattern_(NULL), + last_heartbeat_timestamp_micro_sec_(0), + stop_flag_(true) +{ +} + +ObLogMain::~ObLogMain() +{ + destroy(); +} + +int ObLogMain::init(int argc, char **argv) +{ + int ret = OB_SUCCESS; + + if (OB_FAIL(parse_args_(argc, argv))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("parse arguments fail", K(ret), K(argc)); + } + } else if (! check_args_()) { + LOG_ERROR("check arguments fail"); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(br_printer_.init(data_file_, heartbeat_file_, print_console_, only_print_hex_, print_hex_, + print_lob_md5_, verify_mode_, output_br_detail_))) { + LOG_ERROR("init binlog record printer fail", K(ret), K(data_file_), K(heartbeat_file_), + K(print_console_), K(only_print_hex_), K(print_hex_), K(print_lob_md5_), K(verify_mode_), K_(output_br_detail)); + } else { + stop_flag_ = true; + inited_ = true; + last_heartbeat_timestamp_micro_sec_ = start_timestamp_usec_; + } + + return ret; +} + +void ObLogMain::destroy() +{ + stop(); + + inited_ = false; + oblog_ = NULL; + only_print_hex_ = false; + print_hex_ = false; + print_lob_md5_ = false; + use_daemon_ = false; + data_file_ = NULL; + heartbeat_file_ = NULL; + run_time_us_ = 0; + config_file_ = NULL; + print_console_ = false; + verify_mode_ = false; + enable_reentrant_ = false; + start_timestamp_usec_ = 0; + tenant_id_ = OB_INVALID_TENANT_ID; + tg_match_pattern_ = NULL; + last_heartbeat_timestamp_micro_sec_ = 0; + stop_flag_ = true; + output_br_detail_ = false; + br_printer_.destroy(); +} + +int ObLogMain::parse_args_(int argc, char **argv) +{ + int ret = OB_SUCCESS; + + // option variables + int opt = -1; + const char *opt_string = "ivdD:f:hH:oVt:rR:OxmT:P"; + struct option long_opts[] = + { + {"use_daemon", 0, NULL, 'd'}, + {"data_file", 1, NULL, 'D'}, + {"config_file",1,NULL,'f'}, + {"help", 0, NULL, 'h'}, + {"heartbeat_file", 1, NULL, 'H'}, + {"print_console",0,NULL, 'o'}, + {"verify_mode", 0, NULL, 'V'}, + {"start_timestamp", 1, NULL, 't'}, // t: Represents a start-up timestamp in seconds + {"start_timestamp_usec", 1, NULL, 'T'}, // T: Represents a start-up timestamp in microsecond units + {"data_start_schema_version", 1, NULL, 's'}, + {"enable_reentrant", 0, NULL, 'r'}, + {"run_time_sec", 1, NULL, 'R'}, + {"only_print_hex", 0, NULL, 'O'}, + {"print_hex", 0, NULL, 'x'}, + {"print_lob_md5", 0, NULL, 'm'}, + {"version", 0, NULL, 'v'}, + {"verify_begin_trans_id", 0, NULL, 'P'}, + {"output_br_detail", 0, NULL, 'i'}, + {0, 0, 0, 0} + }; + + if (argc <= 1) { + print_usage(argv[0]); + ret = OB_IN_STOP_STATE; + } + + // Parse command line + while (OB_SUCCESS == ret && (opt = getopt_long(argc, argv, opt_string, long_opts, NULL)) != -1) { + switch (opt) { + case 'f': { + config_file_ = optarg; + break; + } + + case 'd': { + use_daemon_ = true; + break; + } + + case 'D': { + data_file_ = optarg; + break; + } + + case 'h': { + print_usage(argv[0]); + ret = OB_IN_STOP_STATE; + break; + } + + case 'H': { + heartbeat_file_ = optarg; + break; + } + + case 'o': { + print_console_ = true; + break; + } + + case 'V': { + verify_mode_ = true; + break; + } + + case 't': { + start_timestamp_usec_ = strtoll(optarg, NULL, 10) * 1000000L; + break; + } + + case 'T': { + start_timestamp_usec_ = strtoll(optarg, NULL, 10); + break; + } + + case 'r': { + enable_reentrant_ = true; + break; + } + + case 'R': { + run_time_us_ = strtoll(optarg, NULL, 10) * 1000000; + LOG_STD("RUN_TIME: %ld seconds\n", run_time_us_ / 1000000); + break; + } + + case 'O': { + only_print_hex_ = true; + break; + } + + case 'x': { + print_hex_ = true; + break; + } + + case 'm': { + print_lob_md5_ = true; + break; + } + + case 'i': { + // output detail info of binlog record, default off + output_br_detail_ = true; + break; + } + + case 'v': { + ObLogInstance::print_version(); + ret = OB_IN_STOP_STATE; + break; + } + case 'P': { + // Verify that the begin trans_id function does not fall back and is turned on by default + LOG_STD("verify_begin_trans_id\n"); + break; + } + + default: + ret = OB_ERROR; + LOG_ERROR("unknown parameters", K(opt), K(opt_string)); + break; + } // end switch + } // end while + + return ret; +} + +void ObLogMain::print_usage(const char *prog_name) +{ + LOG_STD("USAGE: %s -f config_file_path\n\n" + " -v, --version print version\n" + " -d, --use_daemon start as daemon, default no daemon\n" + " -D, --data_file data file used to store data\n" + " -f, --config_file configuration file\n" + " -h, --help display this help\n" + " -H, --heartbeat_file heartbeat file used to store heartbeat data\n" + " -o, --print_console output result to stderr or stdout, default not output\n" + " -V, --verify_mode start verify mode\n" + " -t, --start_timestamp start timestamp in second, default current timestamp\n" + " -T, --start_timestamp_usec start timestamp in micro second, default current timestamp\n" + " -r, --enable_reentrant enable reentrant after stop, default disable\n" + " -R, --run_time_sec run time in seconds, default -1, means to run forever\n" + " -x, --print_hex print hex for newcolumn, to check implicit char\n" + " -m, --print_lob_md5 print md5 info for LOB data\n" + " -i, --output_br_detail output immutable detail info of binlog record, default not output\n" + + "\neg: %s -f liboblog.conf\n", + prog_name, prog_name); +} + +bool ObLogMain::check_args_() +{ + int ret = OB_SUCCESS; + int nochdir = 1; + int noclose = 0; + + if (NULL == config_file_) { + LOG_ERROR("config file is missing"); + ret = OB_INVALID_ARGUMENT; + } else if (use_daemon_ && daemon(nochdir, noclose) < 0) { + LOG_ERROR("create daemon process error", K(errno), KERRMSG); + ret = OB_ERR_UNEXPECTED; + } + + return OB_SUCCESS == ret; +} + +int ObLogMain::start() +{ + int ret = OB_SUCCESS; + if (! inited_) { + ret = OB_NOT_INIT; + } else if (NULL != oblog_) { + LOG_ERROR("oblog has started"); + ret = OB_NOT_SUPPORTED; + } else if (stop_flag_) { + stop_flag_ = false; + + if (NULL == (oblog_ = oblog_factory_.construct_oblog())) { + LOG_ERROR("construct oblog fail"); + ret = OB_INIT_FAIL; + } else { + ObLogInstance *instance = (ObLogInstance *)oblog_; + // Disable redirected output + instance->set_disable_redirect_log(true); + + if (OB_FAIL(instance->init_with_start_tstamp_usec(config_file_, start_timestamp_usec_, handle_error))) { + LOG_ERROR("init oblog fail", K(ret), K_(config_file), K_(start_timestamp_usec), KP(handle_error)); + } else { + // do nothing + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(oblog_->launch())) { + LOG_ERROR("launch oblog fail", K(ret)); + } else { + OB_LOGGER.set_log_level(instance->get_log_level()); + OB_LOGGER.set_file_name(instance->get_log_file(), true, false); + LOG_INFO("start oblog success"); + } + } + } + } + + return ret; +} + +void ObLogMain::stop() +{ + stop_flag_ = true; + + if (NULL != oblog_) { + oblog_->stop(); + oblog_->destroy(); + oblog_factory_.deconstruct(oblog_); + oblog_ = NULL; + } +} + +void ObLogMain::run() +{ + if (inited_ && NULL != oblog_) { + int ret = OB_SUCCESS; + int64_t end_time = ::oceanbase::common::ObTimeUtility::current_time() + run_time_us_; + + while (OB_SUCCESS == ret && ! stop_flag_) { + ILogRecord *br = NULL; + ret = oblog_->next_record(&br, NEXT_RECORD_TIMEOUT); + + if (OB_SUCC(ret)) { + if (OB_FAIL(verify_record_info_(br))) { + LOG_ERROR("verify_record_info_ fail", K(ret), K(br)); + } + // output binlog record + else if (OB_FAIL(br_printer_.print_binlog_record(br))) { + LOG_ERROR("print_binlog_record fail", K(ret)); + } else { + oblog_->release_record(br); + br = NULL; + } + } else if (OB_TIMEOUT == ret) { + int64_t left_time = end_time - ::oceanbase::common::ObTimeUtility::current_time(); + if (run_time_us_ > 0 && left_time <= 0) { + ret = OB_TIMEOUT; + } else { + ret = OB_SUCCESS; + } + } else if (OB_ITER_END == ret) { + LOG_INFO("iterate BinlogRecord to the END"); + stop_flag_ = true; + ret = OB_SUCCESS; + } else if (OB_IN_STOP_STATE == ret) { + stop_flag_ = true; + ret = OB_SUCCESS; + } else { + LOG_ERROR("next_record fail", K(ret)); + } + } + } +} + +void ObLogMain::handle_error(const ObLogError &err) +{ + LOG_INFO("stop oblog on error", "level", err.level_, "errno", err.errno_, "errmsg", err.errmsg_); + ObLogMain::get_instance().mark_stop_flag(true); +} + +int ObLogMain::verify_record_info_(ILogRecord *br) +{ + int ret = OB_SUCCESS; + static bool is_first_br = true; + ObLogBR *oblog_br = NULL; + + if (OB_UNLIKELY(! inited_)) { + LOG_ERROR("ObLogMain has not inited"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(br)) { + LOG_ERROR("br is null"); + ret = OB_INVALID_ARGUMENT; + } else if (OB_ISNULL(oblog_br = reinterpret_cast(br->getUserData()))) { + LOG_ERROR("get user data fail", K(br), K(oblog_br)); + ret = OB_INVALID_ARGUMENT; + } else { + // heartbeat, updtae last_heartbeat_timestamp_micro_sec_ + if (HEARTBEAT == br->recordType()) { + int64_t timestamp_usec = OB_INVALID_TIMESTAMP; + if (is_first_br) { + // oblog_tailf -f $CONFIG -t 0 means start at current time + // The liboblog start timestamp is not available + // So the first BinlogRecord is obtained based on the checkpoint + timestamp_usec = br->getCheckpoint1() * 1000000 + br->getCheckpoint2(); + is_first_br = false; + } else { + timestamp_usec = br->getTimestamp() * 1000000 + br->getRecordUsec(); + } + last_heartbeat_timestamp_micro_sec_ = std::max(timestamp_usec, last_heartbeat_timestamp_micro_sec_); + } + + // Calibration timestamp and checkpoint + int64_t precise_timestamp = oblog_br->get_precise_timestamp(); + int64_t timestamp_sec = precise_timestamp / 1000000; + int64_t timestamp_usec = precise_timestamp % 1000000; + int64_t expect_checkpoint1 = last_heartbeat_timestamp_micro_sec_ / 1000000; + int64_t expect_checkpoint2 = last_heartbeat_timestamp_micro_sec_ % 1000000; + + if (OB_UNLIKELY(timestamp_sec != br->getTimestamp()) + || OB_UNLIKELY(timestamp_usec != br->getRecordUsec())) { + LOG_ERROR("timestamp is not right", K(precise_timestamp), "br_sec", br->getTimestamp(), + "br_usec", br->getRecordUsec()); + ret = OB_ERR_UNEXPECTED; + } else if (OB_UNLIKELY(expect_checkpoint1 != br->getCheckpoint1()) + || OB_UNLIKELY(expect_checkpoint2 != br->getCheckpoint2())) { + LOG_ERROR("checkpoint is not right", K(br), K(last_heartbeat_timestamp_micro_sec_), + K(expect_checkpoint1), "br_checkpoint1", br->getCheckpoint1(), + K(expect_checkpoint2), "br_checkpoint2", br->getCheckpoint2(), + "getTimestamp", br->getTimestamp(), "getRecordUsec", br->getRecordUsec(), + K(is_first_br)); + ret = OB_ERR_UNEXPECTED; + } else { + // succ + } + } + + return ret; +} + +} // namespace liboblog +} // namespace oceanbase diff --git a/src/liboblog/tests/oblog_main.h b/src/liboblog/tests/oblog_main.h new file mode 100644 index 0000000000000000000000000000000000000000..75eb1d96fc602e44be5999e5951cb1245d0f516a --- /dev/null +++ b/src/liboblog/tests/oblog_main.h @@ -0,0 +1,92 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBLOG_TESTS_OBLOG_H__ +#define OCEANBASE_LIBOBLOG_TESTS_OBLOG_H__ + +#include "share/ob_define.h" + +#include "liboblog.h" // IObLog +#include "ob_binlog_record_printer.h" // ObBinlogRecordPrinter + +namespace oceanbase +{ +namespace liboblog +{ +class IObLog; +class ObLogMain +{ + static const int64_t NEXT_RECORD_TIMEOUT = 1000000; + +public: + virtual ~ObLogMain(); + +protected: + ObLogMain(); + +public: + static ObLogMain &get_instance(); + +public: + int init(int argc, char **argv); + void destroy(); + + int start(); + void run(); + void stop(); + void mark_stop_flag(const bool stop_flag) { stop_flag_ = stop_flag; } + + bool need_reentrant() const { return enable_reentrant_; } + static void print_usage(const char *prog_name); + +public: + static void handle_error(const ObLogError &err); + +private: + int parse_args_(int argc, char **argv); + bool check_args_(); + int verify_record_info_(ILogRecord *br); + +private: + bool inited_; + IObLog *oblog_; + ObLogFactory oblog_factory_; + ObBinlogRecordPrinter br_printer_; + + // configuration + bool only_print_hex_; + bool print_hex_; + bool print_lob_md5_; + bool use_daemon_; + const char *data_file_; + const char *heartbeat_file_; + int64_t run_time_us_; + const char *config_file_; + bool print_console_; + bool verify_mode_; + bool enable_reentrant_; + bool output_br_detail_; + int64_t start_timestamp_usec_; + uint64_t tenant_id_; + const char *tg_match_pattern_; + + // Record heartbeat microsecond time stamps + int64_t last_heartbeat_timestamp_micro_sec_; + + volatile bool stop_flag_ CACHE_ALIGNED; + +private: + DISALLOW_COPY_AND_ASSIGN(ObLogMain); +}; +} // namespace liboblog +} // namespace oceanbase +#endif /* OCEANBASE_LIBOBLOG_TESTS_OBLOG_H__ */ diff --git a/src/liboblog/tests/start.sh.tpl b/src/liboblog/tests/start.sh.tpl new file mode 100755 index 0000000000000000000000000000000000000000..7e1321f8de7aae3bb0f82ce3cd051a45f66e55eb --- /dev/null +++ b/src/liboblog/tests/start.sh.tpl @@ -0,0 +1,33 @@ +#!/bin/bash + +#TIMESTAMP=`date -d "2019-08-09 12:01:31" +%s` +TIMESTAMP=1611816878745567 +TIMESTAMP=0 +TIMESTAMP=${TIMESTAMP: 0 :10} + +CONFIG=conf/liboblog.conf +DATA_FILE=data/data.log + +RUN_TIME=60 + +LOG_DIR=./log + +./kill_oblog.sh + +rm -fr $LOG_DIR/ core.* ${DATA_FILE}* + +export LD_LIBRARY_PATH=./lib/:$LD_LIBRARY_PATH +./oblog_tailf -v + +## work in background +#./oblog_tailf -f $CONFIG -t $TIMESTAMP -d +./oblog_tailf -f $CONFIG -t $TIMESTAMP -d + +# Timed runs in the background +#./oblog_tailf -R $RUN_TIME -f $CONFIG -t $TIMESTAMP -d + +# output data +#./oblog_tailf -V -f $CONFIG -T $TIMESTAMP -D${DATA_FILE} 2>&1 | grep -v tid + +# Timed runs with output data +#./oblog_tailf -x -o -R$RUN_TIME -f $CONFIG -t $TIMESTAMP -D${DATA_FILE} 2>&1 | grep -v tid diff --git a/src/liboblog/tests/top.sh b/src/liboblog/tests/top.sh new file mode 100755 index 0000000000000000000000000000000000000000..5e839eb9104467dc2bc78849d294dec1b2820fe0 --- /dev/null +++ b/src/liboblog/tests/top.sh @@ -0,0 +1,3 @@ +PID=`cat run/liboblog.pid` + +top -d 1 -p $PID diff --git a/src/liboblog/tests/watch_delay.sh b/src/liboblog/tests/watch_delay.sh new file mode 100755 index 0000000000000000000000000000000000000000..791c81630c37c99509643b31a87d8ad5b1ebe4ec --- /dev/null +++ b/src/liboblog/tests/watch_delay.sh @@ -0,0 +1,4 @@ + +LOG_FILE=log/liboblog.log + +watch -n 1 "if [ -f $LOG_FILE ]; then grep MIN_DELAY $LOG_FILE | grep HEARTBEAT | awk '{print \$11, \$12}' | tail -n 3; fi" diff --git a/src/liboblog/tests/watch_fetcher_delay.sh b/src/liboblog/tests/watch_fetcher_delay.sh new file mode 100755 index 0000000000000000000000000000000000000000..304568ada62a4560ac931a84a3b8357a30d46252 --- /dev/null +++ b/src/liboblog/tests/watch_fetcher_delay.sh @@ -0,0 +1,4 @@ + +LOG_FILE=log/liboblog.log + +watch -n 1 "if [ -f $LOG_FILE ]; then grep 'update progress upper limit' $LOG_FILE | awk -F ' INFO |lower_limit=|upper_limit=|fetcher_delay=' '{printf \"%s lower:%s upper:%s delay:%s\n\", \$1, \$3, \$4, \$5}' | tail -n 5; fi" diff --git a/src/liboblog/tests/watch_flow_control.sh b/src/liboblog/tests/watch_flow_control.sh new file mode 100755 index 0000000000000000000000000000000000000000..facfe520df734919ef481c38c131709c10dcff5b --- /dev/null +++ b/src/liboblog/tests/watch_flow_control.sh @@ -0,0 +1,3 @@ +LOG_FILE=log/liboblog.log + +watch -n 1 "if [ -f $LOG_FILE ]; then grep NEED log/liboblog.log | awk '{print \$10, \$11, \$12, \$13, \$15}' | tail -n 5; fi" diff --git a/src/liboblog/tests/watch_tps.sh b/src/liboblog/tests/watch_tps.sh new file mode 100755 index 0000000000000000000000000000000000000000..882c419ffef7f731f15be81bf12cc58e6d889f83 --- /dev/null +++ b/src/liboblog/tests/watch_tps.sh @@ -0,0 +1,4 @@ + +LOG_FILE=log/liboblog.log + +watch -n 1 "if [ -f $LOG_FILE ]; then grep NEXT_RECORD_TPS $LOG_FILE | awk '{print \$1, \$2, \$10, \$11, \$12, \$13}' | tail -n 3; fi" diff --git a/src/liboblog/tests/watch_traffic.sh b/src/liboblog/tests/watch_traffic.sh new file mode 100755 index 0000000000000000000000000000000000000000..d5d28e7951d383a941edb46a8b9d3e64f3541685 --- /dev/null +++ b/src/liboblog/tests/watch_traffic.sh @@ -0,0 +1,3 @@ +LOG=log/liboblog.log + +watch -n 1 "if [ -f $LOG ]; then grep traffic $LOG | grep -v traffic=0.00B | awk '{printf(\"%-26s %-13s %-10s %-10s %-14s %-12s %-18s %-28s %-25s %-26s\n\", \$10, \$11, \$12, \$13, \$14, \$15, \$17, \$18, \$19, \$20);}' | tail -n 3; fi" diff --git a/src/liboblog/tests/watch_trans_ctx.sh b/src/liboblog/tests/watch_trans_ctx.sh new file mode 100755 index 0000000000000000000000000000000000000000..8d37b6b692dfc970cbfcf2ecf866b8584bd6e065 --- /dev/null +++ b/src/liboblog/tests/watch_trans_ctx.sh @@ -0,0 +1,4 @@ + +LOG_FILE=log/liboblog.log + +watch -n 1 "if [ -f $LOG_FILE ]; then grep PREPARED $LOG_FILE | awk '{print \$9, \$10, \$11, \$12, \$13, \$14, \$15}' | tail -n 5; fi" diff --git a/src/liboblog/tests/watch_trans_task.sh b/src/liboblog/tests/watch_trans_task.sh new file mode 100755 index 0000000000000000000000000000000000000000..6312be554c3426ba016289b35d05d786fa2ce40b --- /dev/null +++ b/src/liboblog/tests/watch_trans_task.sh @@ -0,0 +1,4 @@ + +LOG_FILE=log/liboblog.log + +watch -n 1 "if [ -f $LOG_FILE ]; then grep TRANS_TASK_POOL $LOG_FILE | grep STAT | awk '{print \$1, \$2, \$9, \$10, \$11, \$12, \$13, \$14, \$15}' | tail -n 5; fi" diff --git a/unittest/CMakeLists.txt b/unittest/CMakeLists.txt index 6bf97f6aed4b2786b9a99d60bbea52b66ba03275..337ae8e487fa76f7a7d6f3ea8bd465edbd2a75fa 100644 --- a/unittest/CMakeLists.txt +++ b/unittest/CMakeLists.txt @@ -29,3 +29,4 @@ add_subdirectory(observer) add_subdirectory(share) add_subdirectory(rootserver) add_subdirectory(tools) +add_subdirectory(liboblog) diff --git a/unittest/liboblog/CMakeLists.txt b/unittest/liboblog/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..b0970e4d2209ab241e9e704038fd89bc2bb983c4 --- /dev/null +++ b/unittest/liboblog/CMakeLists.txt @@ -0,0 +1,34 @@ +add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) +link_directories(${DEP_DIR}/lib/mariadb) +function(liboblog_unittest case) + if(ARGC EQUAL 1) + add_executable(${case} ${case}.cpp) + else() + add_executable(${ARGV}) + endif() + target_link_libraries(${case} PRIVATE oceanbase oblog gtest gmock) + disable_pch(${case}) + target_include_directories(${case} + PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_SOURCE_DIR}/unittest ${CMAKE_SOURCE_DIR}/deps/oblib/unittest ${DEP_DIR}/include) +endfunction() + +liboblog_unittest(test_log_part_mgr) +liboblog_unittest(test_log_task_pool) +liboblog_unittest(test_small_arena) +#liboblog_unittest(test_log_config) // TODO may core, need fix +liboblog_unittest(test_log_fake_common_config) +liboblog_unittest(test_log_table_matcher) +liboblog_unittest(test_ob_map_queue) +liboblog_unittest(test_ob_map_queue_thread) +liboblog_unittest(test_ob_log_timer) +liboblog_unittest(test_ob_log_dlist) +liboblog_unittest(test_ob_log_part_svr_list) +liboblog_unittest(test_ob_log_all_svr_cache) +liboblog_unittest(test_ob_log_start_log_id_locator) +liboblog_unittest(test_ob_log_heartbeater) +liboblog_unittest(test_log_utils) +liboblog_unittest(test_ob_log_adapt_string) +liboblog_unittest(test_ob_concurrent_seq_queue) +liboblog_unittest(test_ob_seq_thread) +liboblog_unittest(test_ob_log_part_trans_resolver_new) +liboblog_unittest(test_log_svr_blacklist) diff --git a/unittest/liboblog/fake_config_server b/unittest/liboblog/fake_config_server new file mode 100755 index 0000000000000000000000000000000000000000..dc5fe975147c47b0adfdc1b5a20eb48ef0fe7547 --- /dev/null +++ b/unittest/liboblog/fake_config_server @@ -0,0 +1,12 @@ +#!/bin/bash + +port=${1:-6789} + +wrap_json='{"Message": "successful", "Success": true, "Code": 200, "Data":' +json='' +while content="$(printf "HTTP/1.1 200 OK\r\nContent-Length: %d\r\n\r\n%s" ${#json} "$json" | nc -l $port)" ; do + if echo "$content" | grep -q '^POST' && echo "$content" | tail -n 1 | grep -q '^{' ; then + json="$wrap_json$(echo "$content" | tail -n 1)}" + # echo "$json" + fi +done diff --git a/unittest/liboblog/nopretest_test_ext_break.cpp b/unittest/liboblog/nopretest_test_ext_break.cpp new file mode 100644 index 0000000000000000000000000000000000000000..35d910f30a9e14ef8cad160ff4a3725ae52654b2 --- /dev/null +++ b/unittest/liboblog/nopretest_test_ext_break.cpp @@ -0,0 +1,544 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include "ob_log_fetcher_rpc_interface.h" + +namespace oceanbase +{ +using namespace common; +using namespace obrpc; +using namespace liboblog; +using namespace liboblog::fetcher; +namespace unittest +{ + +class MockFectherInterface : public IFetcherRpcInterface +{ +public: + MockFectherInterface(ObNetClient &net_client, + const uint64_t tenant_id = OB_SYS_TENANT_ID) + : net_client_(net_client), + tenant_id_(tenant_id) + { + svr_finder_ = NULL; + } + void set_svr(const ObAddr &svr) + { + svr_ = svr; + } + virtual const ObAddr& get_svr() const + { + return svr_; + } + void set_timeout(const int64_t timeout) + { + timeout_ = timeout; + } + virtual int req_start_log_id_by_ts(const ObLogReqStartLogIdByTsRequest &req, + ObLogReqStartLogIdByTsResponse &res) + { + UNUSED(req); + UNUSED(res); + return OB_SUCCESS; + } + virtual int req_start_log_id_by_ts_2( + const ObLogReqStartLogIdByTsRequestWithBreakpoint &req, + ObLogReqStartLogIdByTsResponseWithBreakpoint &res) + { + int ret = OB_SUCCESS; + ObLogExternalProxy proxy; + if (OB_SUCCESS != (ret = net_client_.get_proxy(proxy))) { + _E_("err get proxy", K(ret)); + } else { + ret = proxy.to(svr_).by(tenant_id_).timeout(timeout_).req_start_log_id_by_ts_with_breakpoint(req, res); + int err = proxy.get_result_code().rcode_; + if (_FAIL_(ret) && _FAIL_(err)) { + _W_("err rpc req start log id by ts", K(ret), "result_code", err, + "svr", get_svr(), K(req)); + res.reset(); + res.set_err(OB_ERR_SYS); + ret = OB_SUCCESS; + } + else {} + _D_("rpc: req start log id by ts", K(ret), "svr", get_svr(), + K(req), K(res)); + } + return ret; + } + + virtual int req_start_pos_by_log_id( + const ObLogReqStartPosByLogIdRequest &req, + ObLogReqStartPosByLogIdResponse &res) + { + UNUSED(req); + UNUSED(res); + return OB_SUCCESS; + } + virtual int req_start_pos_by_log_id_2( + const ObLogReqStartPosByLogIdRequestWithBreakpoint& req, + ObLogReqStartPosByLogIdResponseWithBreakpoint& res) + { + int ret = OB_SUCCESS; + ObLogExternalProxy proxy; + if (OB_SUCCESS != (ret = net_client_.get_proxy(proxy))) { + _E_("err get proxy", K(ret)); + } else { + ret = proxy.to(svr_).by(tenant_id_).timeout(timeout_).req_start_pos_by_log_id_with_breakpoint(req, res); + int err = proxy.get_result_code().rcode_; + if (_FAIL_(ret) && _FAIL_(err)) { + _W_("err rpc req start pos by log id", K(ret), "result_code", err, + "svr", get_svr(), K(req)); + res.reset(); + res.set_err(OB_ERR_SYS); + ret = OB_SUCCESS; + } + else { } + _D_("rpc: req start pos by log id", K(ret), "svr", get_svr(), + K(req), K(res)); + } + return ret; + } + + virtual int fetch_log(const ObLogExternalFetchLogRequest& req, + ObLogExternalFetchLogResponse& res) + { + int ret = OB_SUCCESS; + ObLogExternalProxy proxy; + if (OB_SUCCESS != (ret = net_client_.get_proxy(proxy))) { + _E_("err get proxy", K(ret)); + } else { + ret = proxy.to(svr_).by(tenant_id_).timeout(timeout_).fetch_log(req, res); + int err = proxy.get_result_code().rcode_; + if (_FAIL_(ret) && _FAIL_(err)) { + _W_("err rpc fetch log", K(ret), "result_code", err, + "svr", get_svr(), K(req)); + res.reset(); + res.set_err(OB_ERR_SYS); + ret = OB_SUCCESS; + } + else { } + _D_("rpc: fetch log", K(ret), "svr", get_svr(), K(req), K(res)); + } + return ret; + } + + virtual int req_heartbeat_info(const ObLogReqHeartbeatInfoRequest& req, + ObLogReqHeartbeatInfoResponse& res) + { + int ret = OB_SUCCESS; + ObLogExternalProxy proxy; + if (OB_SUCCESS != (ret = net_client_.get_proxy(proxy))) { + _E_("err get proxy", K(ret)); + } else { + ret = proxy.to(svr_).by(tenant_id_).timeout(timeout_).req_heartbeat_info(req, res); + int err = proxy.get_result_code().rcode_; + if (_FAIL_(ret) && _FAIL_(err)) { + _W_("err rpc req heartbeat info", K(ret), "result_code", err, + "svr", get_svr(), K(req)); + res.reset(); + res.set_err(OB_ERR_SYS); + ret = OB_SUCCESS; + } + else { } + _D_("rpc: req heartbeat info", K(ret), "svr", get_svr(), K(req), K(res)); + } + return ret; + } + + virtual int req_leader_heartbeat( + const obrpc::ObLogLeaderHeartbeatReq &req, + obrpc::ObLogLeaderHeartbeatResp &res) + { + int ret = OB_SUCCESS; + ObLogExternalProxy proxy; + if (OB_SUCCESS != (ret = net_client_.get_proxy(proxy))) { + _E_("err get proxy", K(ret)); + } else { + ret = proxy.to(svr_).by(tenant_id_).timeout(timeout_).leader_heartbeat(req, res); + int err = proxy.get_result_code().rcode_; + if (_FAIL_(ret) && _FAIL_(err)) { + _W_("err rpc req heartbeat info", K(ret), "result_code", err, + "svr", get_svr(), K(req)); + res.reset(); + res.set_err(OB_ERR_SYS); + ret = OB_SUCCESS; + } + else { } + _D_("rpc: req heartbeat info", K(ret), "svr", get_svr(), K(req), K(res)); + } + return ret; + } + + virtual int req_svr_feedback(const ReqLogSvrFeedback &feedback) + { + // This interface is deprecated. + UNUSED(feedback); + return common::OB_NOT_IMPLEMENT; + } + + virtual int open_stream(const ObLogOpenStreamReq &req, + ObLogOpenStreamResp &resp) + { + int ret = OB_SUCCESS; + UNUSED(req); + UNUSED(resp); + return ret; + } + + virtual int fetch_stream_log(const ObLogStreamFetchLogReq &req, + ObLogStreamFetchLogResp &resp) + { + int ret = OB_SUCCESS; + UNUSED(req); + UNUSED(resp); + return ret; + } +private: + ObNetClient &net_client_; + SvrFinder *svr_finder_; + ObAddr svr_; + uint64_t tenant_id_; + int64_t timeout_; +}; +} +} + +using namespace oceanbase::common; +using namespace oceanbase::obrpc; +using namespace oceanbase::liboblog; +using namespace oceanbase::unittest; + +ObAddr get_svr_addr() +{ + ObAddr svr; + int32_t port = 59700; + svr.set_ip_addr("100.81.152.31", port); + return svr; +} + +#define MILLI_SECOND 1000 +#define SECOND (1000 * 1000) + +int64_t get_timeout() +{ + return 1000 * SECOND; +} + +#define N 3 +const ObPartitionKey pk1(1099511677785, 0, 1); +const ObPartitionKey pk3(1099511677784, 0, 1); +const ObPartitionKey pk2(1099511677783, 0, 1); +ObPartitionKey pks[N] = { pk1, pk2, pk3 }; + +typedef ObLogReqStartLogIdByTsRequestWithBreakpoint TsReq; +typedef ObLogReqStartLogIdByTsRequestWithBreakpoint::Param TsReqParam; +typedef ObLogReqStartLogIdByTsRequestWithBreakpoint::ParamArray TsReqParamArray; +typedef ObLogReqStartLogIdByTsResponseWithBreakpoint TsResp; +typedef ObLogReqStartLogIdByTsResponseWithBreakpoint::Result TsRespResult; +typedef ObLogReqStartLogIdByTsResponseWithBreakpoint::ResultArray TsRespResultArray; + +typedef ObLogReqStartPosByLogIdRequestWithBreakpoint IdReq; +typedef ObLogReqStartPosByLogIdRequestWithBreakpoint::Param IdReqParam; +typedef ObLogReqStartPosByLogIdRequestWithBreakpoint::ParamArray IdReqParamArray; +typedef ObLogReqStartPosByLogIdResponseWithBreakpoint IdResp; +typedef ObLogReqStartPosByLogIdResponseWithBreakpoint::Result IdRespResult; +typedef ObLogReqStartPosByLogIdResponseWithBreakpoint::ResultArray IdRespResultArray; + +void test_ts_break(const int64_t start_ts, TsResp &resp) +{ + int ret = OB_SUCCESS; + ObNetClient net_client; + if (OB_FAIL(net_client.init())) { + _E_("net client init error", K(ret)); + } else { + MockFectherInterface rpc(net_client); + rpc.set_svr(get_svr_addr()); + rpc.set_timeout(get_timeout()); + TsReq req; + for (int i = 0; OB_SUCC(ret) && i < N; i++) { + TsReqParam param; + param.pkey_ = pks[i]; + param.start_tstamp_ = start_ts; + if (OB_FAIL(req.append_param(param))) { + _W_("push param error", K(ret)); + } + } + ret = rpc.req_start_log_id_by_ts_2(req, resp); + _I_("----------------------------------------"); + _I_("start_ts:", K(start_ts)); + _I_("req_start_log_id_by_ts finish", K(ret), K(req), K(resp)); + _I_("----------------------------------------"); + } +} + +void test_id_break(uint64_t start_log_ids[N], IdResp &resp) +{ + int ret = OB_SUCCESS; + ObNetClient net_client; + if (OB_FAIL(net_client.init())) { + _E_("net client init error", K(ret)); + } else { + MockFectherInterface rpc(net_client); + rpc.set_svr(get_svr_addr()); + rpc.set_timeout(get_timeout()); + IdReq req; + for (int i = 0; OB_SUCC(ret) && i < N; i++) { + IdReqParam param; + param.pkey_ = pks[i]; + param.start_log_id_ = start_log_ids[i]; + if (OB_FAIL(req.append_param(param))) { + _W_("push param error", K(ret)); + } + } + ret = rpc.req_start_pos_by_log_id_2(req, resp); + _I_("----------------------------------------"); + _I_("start_log_id", K(start_log_ids[0]), K(start_log_ids[1]), K(start_log_ids[2])); + _I_("req_start_pos_by_log_id finish", K(ret), K(req), K(resp)); + _I_("----------------------------------------"); + } +} + +void ts_case_1() +{ + // normal test + int64_t start_ts = 1460969850000000; + TsResp resp; + test_ts_break(start_ts, resp); +} + +void ts_case_2() +{ + // large enough, test handle_cold_pkeys, get predict value + int64_t start_ts = 1500000000000000; + TsResp resp; + test_ts_break(start_ts, resp); +} + +void ts_case_3() +{ + // large enough, test handle cold by last info + int64_t start_ts = 1460970107619884 + 1; + TsResp resp; + test_ts_break(start_ts, resp); +} + +void ts_case_4() +{ + // small enough, test after_scan + int64_t start_ts = 1400000000080000; + TsResp resp; + test_ts_break(start_ts, resp); +} + +void ts_case_5() +{ + // test break + int ret = OB_SUCCESS; + int64_t start_ts = 1400000000080000; + ObNetClient net_client; + if (OB_FAIL(net_client.init())) { + _E_("net client init error", K(ret)); + } else { + MockFectherInterface rpc(net_client); + rpc.set_svr(get_svr_addr()); + rpc.set_timeout(get_timeout()); + + _I_("++++++++++++++++++++++++++++++++++++++++"); + TsReq req; + TsResp resp; + bool stop = false; + for (int i = 0; OB_SUCC(ret) && i < N; i++) { + TsReqParam param; + param.pkey_ = pks[i]; + param.start_tstamp_ = start_ts; + if (OB_FAIL(req.append_param(param))) { + _W_("push param error", K(ret)); + } + } + + while (!stop) { + stop = true; + ret = rpc.req_start_log_id_by_ts_2(req, resp); + _I_("----------------------------------------"); + _I_("start_ts:", K(start_ts)); + _I_("req_start_log_id_by_ts_with_breakpoint finish", K(ret), K(req), K(resp)); + _I_("----------------------------------------"); + + const TsRespResultArray &res_arr = resp.get_results(); + TsReqParamArray param_arr = req.get_params(); + int64_t i = 0; + int64_t res_count = res_arr.count(); + req.reset(); + for (i = 0; OB_SUCC(ret) && i < res_count; i++) { + const TsRespResult &res = res_arr[i]; + if (OB_EXT_HANDLE_UNFINISH == res.err_) { + TsReqParam param; + param.pkey_ = param_arr[i].pkey_; + param.start_tstamp_ = start_ts; + param.break_info_.break_file_id_ = res.break_info_.break_file_id_; + param.break_info_.min_greater_log_id_ = res.break_info_.min_greater_log_id_; + ret = req.append_param(param); + stop = false; + } else { + // finished pkey + } + } + resp.reset(); + if (OB_FAIL(ret)) { + _W_("re-send rpc error", K(ret)); + } + } + _I_("++++++++++++++++++++++++++++++++++++++++"); + } +} + +//---- +void id_case_1() +{ + // large enough, test handle_cold_pkeys_by_sw + uint64_t start_log_ids[N] = {1000, 1000, 1000}; + IdResp resp; + test_id_break(start_log_ids, resp); +} + +void id_case_2() +{ + // min_log_id in last_info_block, test handle_cold_pkeys_by_last_info_block + uint64_t start_log_ids[N] = {251, 251, 251}; + IdResp resp; + test_id_break(start_log_ids, resp); +} + +void id_case_3() +{ + // normal case + uint64_t start_log_ids[N] = {230, 230, 230}; + IdResp resp; + test_id_break(start_log_ids, resp); +} + +void id_case_4() +{ + // test break + int ret = OB_SUCCESS; + uint64_t start_log_ids[N] = {1, 1, 1}; + ObNetClient net_client; + if (OB_FAIL(net_client.init())) { + _E_("net client init error", K(ret)); + } else { + MockFectherInterface rpc(net_client); + rpc.set_svr(get_svr_addr()); + rpc.set_timeout(get_timeout()); + + _I_("++++++++++++++++++++++++++++++++++++++++"); + IdReq req; + IdResp resp; + bool stop = false; + for (int i = 0; OB_SUCC(ret) && i < N; i++) { + IdReqParam param; + param.pkey_ = pks[i]; + param.start_log_id_ = start_log_ids[i]; + if (OB_FAIL(req.append_param(param))) { + _W_("push param error", K(ret)); + } + } + + while (!stop) { + stop = true; + ret = rpc.req_start_pos_by_log_id_2(req, resp); + _I_("----------------------------------------"); + _I_("req_start_pos_by_log_id_with_breakpoint finish", K(ret), K(req), K(resp)); + _I_("----------------------------------------"); + + const IdRespResultArray &res_arr = resp.get_results(); + IdReqParamArray param_arr = req.get_params(); + int64_t i = 0; + int64_t res_count = res_arr.count(); + req.reset(); + for (i = 0; OB_SUCC(ret) && i < res_count; i++) { + const IdRespResult &res = res_arr[i]; + if (OB_EXT_HANDLE_UNFINISH == res.err_) { + IdReqParam param; + param.pkey_ = param_arr[i].pkey_; + param.start_log_id_ = start_log_ids[i]; + param.break_info_.break_file_id_ = res.break_info_.break_file_id_; + param.break_info_.min_greater_log_id_ = res.break_info_.min_greater_log_id_; + ret = req.append_param(param); + stop = false; + } else { + // finished pkey + } + } + resp.reset(); + if (OB_FAIL(ret)) { + _W_("re-send rpc error", K(ret)); + } + } + _I_("++++++++++++++++++++++++++++++++++++++++"); + } +} + +void ts_test() +{ + ts_case_1(); + ts_case_2(); + ts_case_3(); + ts_case_4(); + ts_case_5(); +} + +void id_test() +{ + id_case_1(); + id_case_2(); + id_case_3(); + id_case_4(); +} + +void test_id_cold() +{ + int ret = OB_SUCCESS; + ObNetClient net_client; + if (OB_FAIL(net_client.init())) { + _E_("net client init error", K(ret)); + } else { + MockFectherInterface rpc(net_client); + rpc.set_svr(get_svr_addr()); + rpc.set_timeout(get_timeout()); + + IdReq req; + IdResp resp; + + ObPartitionKey pkey(1099511677782, 0, 1); + IdReqParam param; + param.pkey_ = pkey; + param.start_log_id_ = 5; + if (OB_FAIL(req.append_param(param))) { + _W_("push param error", K(ret)); + } + ret = rpc.req_start_pos_by_log_id_2(req, resp); + _I_("----------------------------------------"); + _I_("req_start_pos_by_log_id finish", K(ret), K(req), K(resp)); + _I_("----------------------------------------"); + } +} + +int main(int argc, char** argv) +{ + UNUSED(argc); + UNUSED(argv); + ObLogger::get_logger().set_mod_log_levels("ALL.*:INFO, TLOG.*:INFO"); + + test_id_cold(); + + return 0; +} diff --git a/unittest/liboblog/nopretest_test_ext_fetcher.cpp b/unittest/liboblog/nopretest_test_ext_fetcher.cpp new file mode 100644 index 0000000000000000000000000000000000000000..17f5db20b106a6364bf45e93ae99f63cc0765c41 --- /dev/null +++ b/unittest/liboblog/nopretest_test_ext_fetcher.cpp @@ -0,0 +1,364 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include +#include "common/ob_queue_thread.h" +#include "ob_log_fetcher_rpc_interface.h" +#include "clog/ob_log_entry.h" + +namespace oceanbase +{ +using namespace common; +using namespace obrpc; +using namespace liboblog; +using namespace liboblog::fetcher; +namespace unittest +{ + +class MockFectherInterface : public IFetcherRpcInterface +{ +public: + MockFectherInterface(ObNetClient &net_client, + const uint64_t tenant_id = OB_SYS_TENANT_ID) + : net_client_(net_client), + tenant_id_(tenant_id) + { + svr_finder_ = NULL; + } + void set_svr(const ObAddr &svr) + { + svr_ = svr; + } + virtual const ObAddr& get_svr() const + { + return svr_; + } + void set_timeout(const int64_t timeout) + { + timeout_ = timeout; + } + virtual int req_start_log_id_by_ts(const ObLogReqStartLogIdByTsRequest &req, + ObLogReqStartLogIdByTsResponse &resp) + { + UNUSED(req); + UNUSED(resp); + return OB_SUCCESS; + } + virtual int req_start_log_id_by_ts_2( + const ObLogReqStartLogIdByTsRequestWithBreakpoint &req, + ObLogReqStartLogIdByTsResponseWithBreakpoint &resp) + { + UNUSED(req); + UNUSED(resp); + return OB_SUCCESS; + } + + virtual int req_start_pos_by_log_id( + const ObLogReqStartPosByLogIdRequest &req, + ObLogReqStartPosByLogIdResponse &resp) + { + UNUSED(req); + UNUSED(resp); + return OB_SUCCESS; + } + virtual int req_start_pos_by_log_id_2( + const ObLogReqStartPosByLogIdRequestWithBreakpoint& req, + ObLogReqStartPosByLogIdResponseWithBreakpoint& resp) + { + UNUSED(req); + UNUSED(resp); + return OB_SUCCESS; + } + virtual int fetch_log(const ObLogExternalFetchLogRequest& req, + ObLogExternalFetchLogResponse& resp) + { + UNUSED(req); + UNUSED(resp); + return OB_SUCCESS; + } + virtual int req_heartbeat_info(const ObLogReqHeartbeatInfoRequest& req, + ObLogReqHeartbeatInfoResponse& resp) + { + UNUSED(req); + UNUSED(resp); + return OB_SUCCESS; + } + virtual int req_leader_heartbeat( + const obrpc::ObLogLeaderHeartbeatReq &req, + obrpc::ObLogLeaderHeartbeatResp &res) + { + UNUSED(req); + UNUSED(res); + return OB_SUCCESS; + } + virtual int req_svr_feedback(const ReqLogSvrFeedback &feedback) + { + UNUSED(feedback); + return OB_SUCCESS; + } + + virtual int open_stream(const ObLogOpenStreamReq &req, + ObLogOpenStreamResp &resp) + { + int ret = OB_SUCCESS; + ObLogExternalProxy proxy; + if (OB_SUCCESS != (ret = net_client_.get_proxy(proxy))) { + _E_("err get proxy", K(ret)); + } else { + ret = proxy.to(svr_).by(tenant_id_).timeout(timeout_).open_stream(req, resp); + int err = proxy.get_result_code().rcode_; + if (_FAIL_(ret) && _FAIL_(err)) { + _W_("err rpc req heartbeat info", K(ret), "result_code", err, + "svr", get_svr(), K(req)); + resp.reset(); + resp.set_err(OB_ERR_SYS); + ret = OB_SUCCESS; + } + else { } + _D_("rpc: open_stream", K(ret), "svr", get_svr(), K(req), K(resp)); + } + return ret; + } + + virtual int fetch_stream_log(const ObLogStreamFetchLogReq &req, + ObLogStreamFetchLogResp &resp) + { + int ret = OB_SUCCESS; + ObLogExternalProxy proxy; + if (OB_SUCCESS != (ret = net_client_.get_proxy(proxy))) { + _E_("err get proxy", K(ret)); + } else { + ret = proxy.to(svr_).by(tenant_id_).timeout(timeout_).stream_fetch_log(req, resp); + int err = proxy.get_result_code().rcode_; + if (_FAIL_(ret) && _FAIL_(err)) { + _W_("err rpc req heartbeat info", K(ret), "result_code", err, + "svr", get_svr(), K(req)); + resp.reset(); + resp.set_err(OB_ERR_SYS); + ret = OB_SUCCESS; + } + else { } + _D_("rpc: stream_fetch_log", K(ret), "svr", get_svr(), K(req), K(resp)); + } + return ret; + } +private: + ObNetClient &net_client_; + SvrFinder *svr_finder_; + ObAddr svr_; + uint64_t tenant_id_; + int64_t timeout_; +}; +} +} + +using namespace oceanbase::common; +using namespace oceanbase::common::sqlclient; +using namespace oceanbase::obrpc; +using namespace oceanbase::liboblog; +using namespace oceanbase::unittest; +using namespace oceanbase::clog; + +ObAddr get_svr_addr() +{ + ObAddr svr; + int32_t port = 27800; + svr.set_ip_addr("100.81.140.76", port); + // int32_t port = 27800; + // svr.set_ip_addr("10.210.170.16", port); + return svr; +} + +int64_t get_timeout() +{ + return 60L * 1000 * 1000; +} + +//#define PKEY_COUNT 1 +#define PKEY_COUNT 2 +ObPartitionKey pks[PKEY_COUNT]; +ObCond table_ready; +int64_t trans_log_count_recved[PKEY_COUNT]; +uint64_t start_log_id[PKEY_COUNT]; + +#define INSERT_COUNT 9 +#define LIFE_TIME (1000 * 1000 * 60) + +void init_env() +{ + const int64_t table_id = 1101710651081591; + // const int64_t table_id = 1101710651081589; + for (int i = 0; i < PKEY_COUNT; i++) { + pks[i].init(table_id + i, 0, 1); + trans_log_count_recved[i] = 0; + start_log_id[i] = 1; + } +} + +void report_log_recved() +{ + for (int i = 0; i < PKEY_COUNT; i++) { + fprintf(stdout, "pkey.table_id = %ld, trans_log_num = %ld, next_log_id = %ld\n", static_cast(pks[i].table_id_), trans_log_count_recved[i], start_log_id[i]); + } +} + +void recv_log(ObLogStreamFetchLogResp &fetch_resp) +{ + int ret = OB_SUCCESS; + const int64_t log_num = fetch_resp.get_log_num(); + const char *buf = fetch_resp.get_log_entry_buf(); + ObLogEntry entry; + int64_t pos = 0; + int p = 0; + for (int64_t idx = 0; idx < log_num; ++idx) { + ret = entry.deserialize(buf, OB_MAX_LOG_BUFFER_SIZE, pos); + ASSERT_EQ(OB_SUCCESS, ret); + const ObLogEntryHeader &header = entry.get_header(); + _I_("recv clog_entry", K(ret), K(entry)); + for (p = 0; p < PKEY_COUNT && pks[p] != header.get_partition_key(); p++); + ASSERT_TRUE(p < PKEY_COUNT); + if (OB_LOG_SUBMIT == header.get_log_type()) { + trans_log_count_recved[p]++; + _I_("trans_log_count_recved", K(p), "pkey", pks[p], "trans_cnt", trans_log_count_recved[p]); + } + ASSERT_TRUE(header.get_log_id() == start_log_id[p]); + start_log_id[p]++; + } +} + +bool recv_all() +{ + int i = 0; + for (i = 0; (trans_log_count_recved[i] == INSERT_COUNT) && i < PKEY_COUNT; i++); + // return i == PKEY_COUNT; + return false; +} + +void start_fetch() +{ + int ret = OB_SUCCESS; + ObNetClient net_client; + ASSERT_EQ(OB_SUCCESS, net_client.init()); + MockFectherInterface rpc(net_client); + rpc.set_svr(get_svr_addr()); + rpc.set_timeout(get_timeout()); + + int64_t c1 = 0; + int64_t c2 = 0; + int err = OB_SUCCESS; + while (!recv_all()) { + c1++; + ObLogOpenStreamReq open_req; + ObLogOpenStreamResp open_resp; + for (int i = 0; OB_SUCC(ret) && i < PKEY_COUNT; i++) { + ObLogOpenStreamReq::Param param; + param.pkey_ = pks[i]; + param.start_log_id_ = start_log_id[i]; + ASSERT_EQ(OB_SUCCESS, open_req.append_param(param)); + } + open_req.set_stream_lifetime(LIFE_TIME); + ret = rpc.open_stream(open_req, open_resp); + ASSERT_EQ(OB_SUCCESS, ret); + ASSERT_TRUE(open_resp.get_stream_seq().is_valid()); + + _I_("open_stream success", K(open_resp)); + + const ObStreamSeq &seq = open_resp.get_stream_seq(); + const int64_t upper_lmt_ts = 100000000000000000L; // large enough + const int64_t step = 100; + c2 = 0; + while (!recv_all()) { + c2++; + ObLogStreamFetchLogReq fetch_req; + ObLogStreamFetchLogResp fetch_resp; + ASSERT_EQ(OB_SUCCESS, fetch_req.set_stream_seq(seq)); + ASSERT_EQ(OB_SUCCESS, fetch_req.set_upper_limit_ts(upper_lmt_ts)); + ASSERT_EQ(OB_SUCCESS, fetch_req.set_log_cnt_per_part_per_round(step)); + + ret = rpc.fetch_stream_log(fetch_req, fetch_resp); + ASSERT_EQ(OB_SUCCESS, ret); + err = fetch_resp.get_err(); + if (OB_SUCCESS == err) { + recv_log(fetch_resp); + } else if (OB_STREAM_NOT_EXIST == err) { + fprintf(stdout, "stream not exist\n"); + break; + } else { + fprintf(stdout, "error ret=%d\n", err); + ASSERT_TRUE(false); + } + _I_("fetch", K(c1), K(c2)); + if (true && REACH_TIME_INTERVAL(1000 * 1000)) { + fprintf(stdout, "--------------------------------------------------\n"); + fprintf(stdout, "fetch, c1 = %ld, c2 = %ld\n", c1, c2); + report_log_recved(); + } + usleep(1000 * 1000); + } + } +} + +void del_stale() +{ + int ret = OB_SUCCESS; + ObNetClient net_client; + ASSERT_EQ(OB_SUCCESS, net_client.init()); + MockFectherInterface rpc(net_client); + rpc.set_svr(get_svr_addr()); + rpc.set_timeout(get_timeout()); + + ObLogOpenStreamReq open_req; + ObLogOpenStreamResp open_resp; + for (int i = 0; OB_SUCC(ret) && i < PKEY_COUNT; i++) { + ObLogOpenStreamReq::Param param; + param.pkey_ = pks[i]; + param.start_log_id_ = start_log_id[i]; + ASSERT_EQ(OB_SUCCESS, open_req.append_param(param)); + } + open_req.set_stream_lifetime(LIFE_TIME); + ret = rpc.open_stream(open_req, open_resp); + ASSERT_EQ(OB_SUCCESS, ret); + ASSERT_TRUE(open_resp.get_stream_seq().is_valid()); + + _I_("open_stream success", K(open_resp)); + + const ObStreamSeq &first_seq = open_resp.get_stream_seq(); + + ObLogOpenStreamReq open_req2; + ObLogOpenStreamResp open_resp2; + for (int i = 0; OB_SUCC(ret) && i < PKEY_COUNT; i++) { + ObLogOpenStreamReq::Param param; + param.pkey_ = pks[i]; + param.start_log_id_ = start_log_id[i]; + ASSERT_EQ(OB_SUCCESS, open_req2.append_param(param)); + } + open_req2.set_stale_stream(first_seq); + open_req2.set_stream_lifetime(LIFE_TIME); + ret = rpc.open_stream(open_req2, open_resp2); + ASSERT_EQ(OB_SUCCESS, ret); + ASSERT_TRUE(open_resp2.get_stream_seq().is_valid()); + + _I_("open_stream success", K(open_resp2)); +} + +int main(int argc, char** argv) +{ + UNUSED(argc); + UNUSED(argv); + system("rm els.log"); + OB_LOGGER.set_file_name("els.log", true); + ObLogger::get_logger().set_mod_log_levels("ALL.*:INFO, TLOG.*:DEBUG"); + init_env(); + start_fetch(); + // del_stale(); + return 0; +} diff --git a/unittest/liboblog/test_log_config.cpp b/unittest/liboblog/test_log_config.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a8cf2e064e53202ef355e754c1e80d8c177d9cab --- /dev/null +++ b/unittest/liboblog/test_log_config.cpp @@ -0,0 +1,261 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG + +#include +#include "ob_log_config.h" + +#define ADD_CONFIG_INT(var, value) \ + do { \ + std::string name_str = #var; \ + std::string value_str = #value; \ + var = value; \ + databuff_printf(config_buf_, sizeof(config_buf_), config_buf_pos_, "%s=%ld\n", #var, var); \ + config_map_.erase(name_str); \ + config_map_.insert(std::pair(name_str, value_str)); \ + } while (0) + +#define ADD_CONFIG_STR(var, value) \ + do { \ + std::string name_str = #var; \ + std::string value_str = value; \ + var = value; \ + databuff_printf(config_buf_, sizeof(config_buf_), config_buf_pos_, "%s=%s\n", #var, var); \ + config_map_.erase(name_str); \ + config_map_.insert(std::pair(name_str, value_str)); \ + } while (0) + +using namespace oceanbase::common; +namespace oceanbase +{ +namespace liboblog +{ +class TestLogConfig : public ::testing::Test +{ + static const int64_t MAX_CONFIG_BUFFER_SIZE = 1 << 10; +public: + TestLogConfig() {} + ~TestLogConfig() {} + + virtual void SetUp(); + virtual void TearDown() {} + +public: + int64_t dml_parser_thread_num; + int64_t sequencer_thread_num; + int64_t formatter_thread_num; + int64_t instance_num; + int64_t instance_index; + const char *log_level; + const char *cluster_url; + const char *cluster_user; + const char *cluster_password; + const char *config_fpath; + const char *cluster_appname; + const char *cluster_db_name; + const char *timezone; + const char *tb_white_list; + const char *tb_black_list; + int64_t sql_conn_timeout_us; + int64_t sql_query_timeout_us; + + int64_t unknown_int_config; + const char *unknown_str_config; + + char config_buf_[MAX_CONFIG_BUFFER_SIZE]; + int64_t config_buf_pos_; + + std::map config_map_; +}; + +void TestLogConfig::SetUp() +{ + config_buf_pos_ = 0; + + ADD_CONFIG_INT(dml_parser_thread_num, 100); + ADD_CONFIG_INT(sequencer_thread_num, 200); + ADD_CONFIG_INT(formatter_thread_num, 300); + ADD_CONFIG_INT(instance_num, 1); + ADD_CONFIG_INT(instance_index, 0); + ADD_CONFIG_INT(sql_conn_timeout_us, 13000000000); + ADD_CONFIG_INT(sql_query_timeout_us, 12000000000); + + ADD_CONFIG_STR(log_level, "INFO"); + ADD_CONFIG_STR(cluster_url, "http:://www.test_url.com/abcdefg/"); + ADD_CONFIG_STR(cluster_user, "中华人民共和国"); + ADD_CONFIG_STR(cluster_password, "阿里巴巴"); + ADD_CONFIG_STR(config_fpath, "/home/abcdefg/hijklmn"); + ADD_CONFIG_STR(cluster_appname, "obtest"); + ADD_CONFIG_STR(cluster_db_name, "oceanbase"); + ADD_CONFIG_STR(timezone, "+8:00"); + ADD_CONFIG_STR(tb_white_list, "*.*.*"); + ADD_CONFIG_STR(tb_black_list, "|"); + + // test unknown config + ADD_CONFIG_INT(unknown_int_config, 1010); + ADD_CONFIG_STR(unknown_str_config, "unknown"); +} + +TEST_F(TestLogConfig, init) +{ + LOG_INFO("sizeof ObLogConfig"); + ObLogConfig config; + + EXPECT_EQ(OB_SUCCESS, config.init()); + // After initialization, the configuration items are not detected by default + EXPECT_NE(OB_SUCCESS, config.check_all()); + config.print(); +} + +TEST_F(TestLogConfig, load_from_buffer) +{ + ObLogConfig config; + EXPECT_EQ(OB_SUCCESS, config.init()); + + EXPECT_EQ(OB_SUCCESS, config.load_from_buffer(config_buf_, strlen(config_buf_))); + EXPECT_EQ(OB_SUCCESS, config.check_all()); + config.print(); + + EXPECT_EQ(dml_parser_thread_num, config.dml_parser_thread_num); + EXPECT_EQ(sequencer_thread_num, config.sequencer_thread_num); + EXPECT_EQ(formatter_thread_num, config.formatter_thread_num); + EXPECT_EQ(0, strcmp(cluster_url, config.cluster_url.str())); + EXPECT_EQ(0, strcmp(log_level, config.log_level.str())); + EXPECT_EQ(0, strcmp(cluster_user, config.cluster_user.str())); + EXPECT_EQ(0, strcmp(cluster_password, config.cluster_password.str())); + EXPECT_EQ(0, strcmp(config_fpath, config.config_fpath.str())); + + bool check_name = true; + int64_t version = 0; + EXPECT_NE(OB_SUCCESS, + config.load_from_buffer(config_buf_, strlen(config_buf_), version, check_name)); +} + +TEST_F(TestLogConfig, load_from_map) +{ + ObLogConfig config; + EXPECT_EQ(OB_SUCCESS, config.init()); + + EXPECT_EQ(OB_SUCCESS, config.load_from_map(config_map_)); + EXPECT_EQ(OB_SUCCESS, config.check_all()); + config.print(); + + EXPECT_EQ(dml_parser_thread_num, config.dml_parser_thread_num); + EXPECT_EQ(sequencer_thread_num, config.sequencer_thread_num); + EXPECT_EQ(formatter_thread_num, config.formatter_thread_num); + EXPECT_EQ(0, strcmp(cluster_url, config.cluster_url.str())); + EXPECT_EQ(0, strcmp(log_level, config.log_level.str())); + EXPECT_EQ(0, strcmp(cluster_user, config.cluster_user.str())); + EXPECT_EQ(0, strcmp(cluster_password, config.cluster_password.str())); + EXPECT_EQ(0, strcmp(config_fpath, config.config_fpath.str())); + + bool check_name = true; + int64_t version = 0; + EXPECT_NE(OB_SUCCESS, config.load_from_map(config_map_, version, check_name)); +} + +TEST_F(TestLogConfig, load_from_file) +{ + // The ObLogConfig class is larger than the local variable stack and would overflow if located + // Therefore, the dynamic construction method is used here + ObLogConfig *config_from_buffer_ptr = new ObLogConfig(); + ObLogConfig *config_from_file_ptr = new ObLogConfig(); + + EXPECT_EQ(OB_SUCCESS, config_from_buffer_ptr->init()); + EXPECT_EQ(OB_SUCCESS, config_from_file_ptr->init()); + ObLogConfig &config_from_buffer = *config_from_buffer_ptr; + ObLogConfig &config_from_file = *config_from_file_ptr; + const char *config_file = "liboblog.conf"; + + // Load configuration items from the Buffer and verify the accuracy of the configuration items + EXPECT_EQ(OB_SUCCESS, config_from_buffer.load_from_buffer(config_buf_, strlen(config_buf_))); + EXPECT_EQ(OB_SUCCESS, config_from_buffer.check_all()); + config_from_buffer.print(); + EXPECT_EQ(dml_parser_thread_num, config_from_buffer.dml_parser_thread_num); + EXPECT_EQ(sequencer_thread_num, config_from_buffer.sequencer_thread_num); + EXPECT_EQ(formatter_thread_num, config_from_buffer.formatter_thread_num); + EXPECT_EQ(0, strcmp(cluster_url, config_from_buffer.cluster_url.str())); + EXPECT_EQ(0, strcmp(log_level, config_from_buffer.log_level.str())); + EXPECT_EQ(0, strcmp(cluster_user, config_from_buffer.cluster_user.str())); + EXPECT_EQ(0, strcmp(cluster_password, config_from_buffer.cluster_password.str())); + EXPECT_EQ(0, strcmp(config_fpath, config_from_buffer.config_fpath.str())); + + // Dump configuration items into a file + EXPECT_EQ(OB_SUCCESS, config_from_buffer.dump2file(config_file)); + + // Loading configuration items from a file + EXPECT_EQ(OB_SUCCESS, config_from_file.load_from_file(config_file)); + + // Verify the accuracy of configuration items + config_from_file.print(); + EXPECT_EQ(dml_parser_thread_num, config_from_file.dml_parser_thread_num); + EXPECT_EQ(sequencer_thread_num, config_from_file.sequencer_thread_num); + EXPECT_EQ(formatter_thread_num, config_from_file.formatter_thread_num); + EXPECT_EQ(0, strcmp(cluster_url, config_from_file.cluster_url.str())); + EXPECT_EQ(0, strcmp(log_level, config_from_file.log_level.str())); + EXPECT_EQ(0, strcmp(cluster_user, config_from_file.cluster_user.str())); + EXPECT_EQ(0, strcmp(cluster_password, config_from_file.cluster_password.str())); + EXPECT_EQ(0, strcmp(config_fpath, config_from_file.config_fpath.str())); + + if (NULL != config_from_buffer_ptr) { + delete config_from_buffer_ptr; + config_from_buffer_ptr = NULL; + } + + if (NULL != config_from_file_ptr) { + delete config_from_file_ptr; + config_from_file_ptr = NULL; + } +} + +// Check that the ObLogConfig::check_all() function actually formats the cluster_url +// default check_all() removes the double quotes from cluster_url +TEST_F(TestLogConfig, format_cluster_url) +{ + ObLogConfig config; + EXPECT_EQ(OB_SUCCESS, config.init()); + const char *URL = "http://abc.com/def/hijklmn"; + char cluster_url[1024]; + + ASSERT_EQ(OB_SUCCESS, config.load_from_buffer(config_buf_, strlen(config_buf_))); + ASSERT_EQ(OB_SUCCESS, config.check_all()); + + snprintf(cluster_url, sizeof(cluster_url), "\""); + ASSERT_TRUE(config.cluster_url.set_value(cluster_url)); + ASSERT_NE(OB_SUCCESS, config.format_cluster_url()); + + snprintf(cluster_url, sizeof(cluster_url), "\"\""); + ASSERT_TRUE(config.cluster_url.set_value(cluster_url)); + ASSERT_NE(OB_SUCCESS, config.format_cluster_url()); + + snprintf(cluster_url, sizeof(cluster_url), "\"%s\"", URL); + ASSERT_TRUE(config.cluster_url.set_value(cluster_url)); + ASSERT_EQ(OB_SUCCESS, config.format_cluster_url()); + EXPECT_EQ(0, strcmp(URL, config.cluster_url.str())); + + // No handling of single inverted commas + snprintf(cluster_url, sizeof(cluster_url), "\'\'"); + ASSERT_TRUE(config.cluster_url.set_value(cluster_url)); + ASSERT_EQ(OB_SUCCESS, config.format_cluster_url()); +} + +} // namespace liboblog +} // namespace oceanbase + +int main(int argc, char **argv) +{ + OB_LOGGER.set_file_name("test_log_config.log", true); + OB_LOGGER.set_log_level(OB_LOG_LEVEL_INFO); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/unittest/liboblog/test_log_fake_common_config.cpp b/unittest/liboblog/test_log_fake_common_config.cpp new file mode 100644 index 0000000000000000000000000000000000000000..208bb033a15ed5f390351e87c99489d816b50e55 --- /dev/null +++ b/unittest/liboblog/test_log_fake_common_config.cpp @@ -0,0 +1,47 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include +#include "ob_log_fake_common_config.h" + +using namespace oceanbase::common; + +namespace oceanbase +{ +namespace liboblog +{ +class TestLogFakeCommonConfig : public ::testing::Test +{ +public: + TestLogFakeCommonConfig() {} + ~TestLogFakeCommonConfig() {} + + virtual void SetUp() {} + virtual void TearDown() {} +}; + +TEST_F(TestLogFakeCommonConfig, common_test) +{ + ObLogFakeCommonConfig fake_config; + EXPECT_EQ(OB_OBLOG, fake_config.get_server_type()); +} + +} +} + +int main(int argc, char **argv) +{ + oceanbase::common::ObLogger::get_logger().set_log_level("INFO"); + OB_LOGGER.set_log_level("INFO"); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/unittest/liboblog/test_log_fetcher.cpp b/unittest/liboblog/test_log_fetcher.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5819b5920f5e8d2ca8b3502faaf8610efbbc80dc --- /dev/null +++ b/unittest/liboblog/test_log_fetcher.cpp @@ -0,0 +1,165 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include +#include + +#include "lib/allocator/ob_malloc.h" +#include "lib/allocator/ob_concurrent_fifo_allocator.h" +#include "lib/container/ob_array.h" + +#include "liboblog/src/ob_i_log_fetcher.h" +#include "liboblog/src/ob_log_fetcher_utils.h" +#include "liboblog/src/ob_log_fetcher.h" + +#include "test_log_fetcher_common_utils.h" + +using namespace oceanbase; +using namespace common; +using namespace liboblog; +using namespace storage; +using namespace transaction; +using namespace clog; +using namespace fetcher; + +namespace oceanbase +{ +namespace unittest +{ + + +/* + * Manual: + * - This test allows you to fetch log data from + * a single observer. + * - Partitions and data are set up by this test. + */ +/* + * Fetch Log Test. + * Use schema 1. + */ +TEST(DISABLED_ObLogFetcherEnhanced, FetchLogTest1) +//TEST(ObLogFetcherEnhanced, FetchLogTest1) +{ + ObClockGenerator::init(); + + // Prepare svr. + SvrCfg svr_cfg; + svr_cfg.svr_addr_ = "10.210.177.162"; + svr_cfg.internal_port_ = 43000; + svr_cfg.mysql_port_ = 43001; + svr_cfg.mysql_db_ = "oceanbase"; + svr_cfg.mysql_password_ = ""; + svr_cfg.mysql_user_ = "root"; + svr_cfg.mysql_timeout_ = 1 * _SEC_; + + // Prepare table. + ObArray pkeys; + const int64_t table_cnt = 3; + prepare_table_1(svr_cfg, + prepare_table_name_1(), + table_cnt, + prepare_table_schema_1(), + pkeys); + + // Print them. + for (int64_t idx = 0; idx < pkeys.count(); ++idx) { + ObPartitionKey &key = pkeys.at(idx); + _I_(">>> add partition key", K(key)); + } + + // Prepare svr provider. + MockSvrProvider1 svr_provider; + ObAddr addr(ObAddr::IPV4, svr_cfg.svr_addr_, svr_cfg.mysql_port_); + svr_provider.add_svr(addr); + + // Prepare err handler. + MockLiboblogErrHandler1 err_handler; + + // Prepare parser. + MockParser1 mock_parser; + + // Fetcher config. + FetcherConfig fcfg; + fcfg.reset(); + + ObConcurrentFIFOAllocator fifo; + int64_t G = 1024 * 1024 * 1024; + EXPECT_EQ(OB_SUCCESS, fifo.init(1 * G, 1 * G, OB_MALLOC_BIG_BLOCK_SIZE)); + + // Task Pool + ObLogTransTaskPool task_pool; + EXPECT_EQ(OB_SUCCESS, task_pool.init(&fifo, 10240, 1024, 4 * 1024L * 1024L, true)); + + // Prepare fetcher. + ObLogFetcherEnhanced fetcher; + int ret = fetcher.init(&mock_parser, + &err_handler, + &svr_provider, + &task_pool, + fcfg); + EXPECT_EQ(OB_SUCCESS, ret); + + // Add partitions. + for (int64_t idx = 0; idx < pkeys.count(); ++idx) { + ret = fetcher.start_fetch(pkeys.at(idx), 1); + EXPECT_EQ(OB_SUCCESS, ret); + } + + // Start worker. + ret = fetcher.start(); + EXPECT_EQ(OB_SUCCESS, ret); + _I_(">>> Start fetch"); + + // Generate data. + _I_(">>> Generate data"); + const int64_t trans_cnt_per_part = 100; + const int64_t part_cnt = table_cnt; // pcnt == table cnt. + const int64_t trans_cnt = part_cnt * trans_cnt_per_part; + ConnectorConfig cfg = prepare_cfg_1(svr_cfg); + for (int64_t idx = 0; idx < table_cnt; ++idx) { + DataGenerator1 gen(cfg); + gen.insert(prepare_table_name_1()[idx], 0, trans_cnt_per_part); + gen.join(); + } + + // Wait. + while (mock_parser.get_trans_cnt() < trans_cnt) { + usec_sleep(1 * _SEC_); + _I_(">>> Waiting..."); + } + + // Stop everything. + _I_(">>> Stop fetch"); + for (int64_t idx = 0; idx < pkeys.count(); ++idx) { + ret = fetcher.stop_fetch(pkeys.at(idx)); + EXPECT_EQ(OB_SUCCESS, ret); + } + + fetcher.stop(); + ret = fetcher.destroy(); + EXPECT_EQ(OB_SUCCESS, ret); +} + +} +} + + + +int main(int argc, char **argv) +{ + ObLogger::get_logger().set_mod_log_levels("ALL.*:DEBUG, TLOG.*:DEBUG"); + testing::InitGoogleTest(&argc,argv); + // testing::FLAGS_gtest_filter = "DO_NOT_RUN"; + return RUN_ALL_TESTS(); +} + diff --git a/unittest/liboblog/test_log_fetcher_common_utils.h b/unittest/liboblog/test_log_fetcher_common_utils.h new file mode 100644 index 0000000000000000000000000000000000000000..db9cf522d1efc97de9ad38ed8ac3f2db1228c0d6 --- /dev/null +++ b/unittest/liboblog/test_log_fetcher_common_utils.h @@ -0,0 +1,1494 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include +#include +#include + +#include "share/ob_define.h" +#include "storage/ob_storage_log_type.h" +#include "storage/transaction/ob_trans_log.h" + +#include "liboblog/src/ob_log_instance.h" +#include "liboblog/src/ob_log_fetcher_stream.h" +#include "liboblog/src/ob_log_fetcher_part_stream.h" + +#include "ob_log_utils.h" // get_timestamp + +using namespace oceanbase; +using namespace common; +using namespace liboblog; +using namespace fetcher; +using namespace transaction; +using namespace storage; +using namespace clog; + +namespace oceanbase +{ +namespace unittest +{ + +/* + * Utils. + */ +typedef std::vector Svrs; +typedef std::vector PKeys; +typedef std::vector LogIds; +typedef std::vector Tstamps; + +/* + * Mock Rpc Interface 1. + * It owns N partitions, each has M log entries. + * It returns L log entries in each fetch_log() call. + * Log entry contains nothing. + * Used to test: + * - add partition into Stream. + * - update file id & offset. + * - fetch log. + * - kick out offline partitions. + * - discard partitions. + */ +class MockRpcInterface1 : public IFetcherRpcInterface +{ + struct Entry + { + file_id_t file_id_; + offset_t offset_; + uint64_t log_id_; + ObPartitionKey pkey_; + }; + // Use offset_ as index of Entry in EntryVec. + typedef std::vector EntryVec; +public: + MockRpcInterface1(const PKeys &pkeys, + const int64_t log_entry_per_p, + const int64_t log_entry_per_call) + { + log_entry_per_call_ = log_entry_per_call; + log_entry_per_p_ = log_entry_per_p; + addr_ = ObAddr(ObAddr::IPV4, "127.0.0.1", 5999); + // Gen entries. + int64_t log_entry_cnt = 0; + for (int64_t log_id = 1; log_id < log_entry_per_p + 1; ++log_id) { + for (int64_t pidx = 0, cnt = pkeys.size(); pidx < cnt; ++pidx) { + // Gen entry. + Entry entry; + entry.pkey_ = pkeys.at(pidx); + entry.file_id_ = 1; + entry.offset_ = static_cast(log_entry_cnt++); + entry.log_id_ = log_id; + // Save it. + entries_.push_back(entry); + } + } + } + + virtual ~MockRpcInterface1() { } + + virtual void set_svr(const common::ObAddr& svr) { UNUSED(svr); } + + virtual const ObAddr& get_svr() const { static ObAddr svr; return svr; } + + virtual void set_timeout(const int64_t timeout) { UNUSED(timeout); } + + virtual int req_start_log_id_by_ts( + const obrpc::ObLogReqStartLogIdByTsRequest& req, + obrpc::ObLogReqStartLogIdByTsResponse& res) + { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + + virtual int req_start_log_id_by_ts_2(const obrpc::ObLogReqStartLogIdByTsRequestWithBreakpoint &req, + obrpc::ObLogReqStartLogIdByTsResponseWithBreakpoint &res) { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + + virtual int req_start_pos_by_log_id_2(const obrpc::ObLogReqStartPosByLogIdRequestWithBreakpoint &req, + obrpc::ObLogReqStartPosByLogIdResponseWithBreakpoint &res) { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + + virtual int req_start_pos_by_log_id( + const obrpc::ObLogReqStartPosByLogIdRequest& req, + obrpc::ObLogReqStartPosByLogIdResponse& res) + { + /* + * Err Supportted: + * - OB_SUCCESS + * - OB_ENTRY_NOT_EXIST: partition exists without any log + * - OB_ERR_OUT_OF_LOWER_BOUND: log id beyond lower bound + * - OB_ERR_OUT_OF_UPPER_BOUND: log id beyond upper bound + */ + typedef obrpc::ObLogReqStartPosByLogIdRequest::Param Param; + typedef obrpc::ObLogReqStartPosByLogIdResponse::Result Result; + for (int64_t idx = 0, cnt = req.get_params().count(); idx < cnt; ++idx) { + // Locate for a partition. + const Param ¶m = req.get_params().at(idx); + const ObPartitionKey &pkey = param.pkey_; + const uint64_t start_log_id = param.start_log_id_; + Result result; + result.reset(); + // Search. + bool done = false; + bool partition_exist = false; + for (int64_t entry_idx = 0, entry_cnt = entries_.size(); + _SUCC_(result.err_) && !done && entry_idx < entry_cnt; + ++entry_idx) { + const Entry &entry = entries_.at(entry_idx); + if (entry.pkey_ == pkey) { + partition_exist = true; + // Got it. + if (start_log_id == entry.log_id_) { + result.err_ = OB_SUCCESS; + result.file_id_ = 1; + result.offset_ = entry.offset_; + done = true; + } + // Too small log id. + else if (start_log_id < entry.log_id_) { + result.err_ = OB_ERR_OUT_OF_LOWER_BOUND; + } + } + } + if (!done && _SUCC_(result.err_)) { + // No log entry. + if (!partition_exist) { + result.err_ = OB_ENTRY_NOT_EXIST; + } + // Too large log id. + else { + result.err_ = OB_ERR_OUT_OF_UPPER_BOUND; + } + } + res.append_result(result); + } + _D_("mock rpc 1 req pos", K(req), K(res)); + return OB_SUCCESS; + } + + virtual int fetch_log( + const obrpc::ObLogExternalFetchLogRequest& req, + obrpc::ObLogExternalFetchLogResponse& res) + { + typedef obrpc::ObLogExternalFetchLogRequest::Param Param; + typedef obrpc::ObLogExternalFetchLogResponse::OfflinePartition OP; + + // Fetch log. + const offset_t offset = req.get_offset(); + if (offset < 0) { + return OB_INVALID_ARGUMENT; + } + offset_t ret_offset = offset; + // Scan. + for (int64_t idx = static_cast(offset), cnt = entries_.size(); + idx < cnt && res.get_log_num() < log_entry_per_call_; + ++idx) { + const Entry &entry = entries_.at(idx); + bool fetch = false; + for (int64_t pidx = 0, pcnt = req.get_params().count(); + !fetch && pidx < pcnt; + ++pidx) { + const Param ¶m = req.get_params().at(pidx); + if (entry.pkey_ == param.pkey_ + && param.start_log_id_ <= entry.log_id_ + && entry.log_id_ <= param.last_log_id_) { + fetch = true; + } + } + if (fetch) { + ret_offset = static_cast(entry.offset_); + // Gen header. + int64_t ts = get_timestamp(); + ObProposalID proposal_id; + proposal_id.addr_ = addr_; + proposal_id.ts_ = ts; + ObLogEntryHeader header; + header.generate_header(OB_LOG_SUBMIT, entry.pkey_, + entry.log_id_, mock_load_, mock_load_len_, + ts, ts, proposal_id, ts, ObVersion(1)); + ObLogEntry log_entry; + log_entry.generate_entry(header, mock_load_); + res.append_log(log_entry); + } + } + res.set_file_id_offset(1, ret_offset + 1); + + // Handle offline partition. + // Here, if a partition reaches its last log, it is offline. + for (int64_t idx = 0, cnt = req.get_params().count(); idx < cnt; ++idx) { + const Param ¶m = req.get_params().at(idx); + const uint64_t last_log_id = log_entry_per_p_; + if (last_log_id < param.start_log_id_) { + OP op; + op.pkey_ = param.pkey_; + // op.last_log_id_ = last_log_id; + op.sync_ts_ = last_log_id; + res.append_offline_partition(op); + } + } + + _D_("mock rpc 1 fetch log", K(req), K(res)); + + return OB_SUCCESS; + } + + virtual int req_heartbeat_info( + const obrpc::ObLogReqHeartbeatInfoRequest& req, + obrpc::ObLogReqHeartbeatInfoResponse& res) + { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + + virtual int req_leader_heartbeat( + const obrpc::ObLogLeaderHeartbeatReq &req, + obrpc::ObLogLeaderHeartbeatResp &res) + { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + + virtual int open_stream(const obrpc::ObLogOpenStreamReq &req, + obrpc::ObLogOpenStreamResp &res) { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + + virtual int fetch_stream_log(const obrpc::ObLogStreamFetchLogReq &req, + obrpc::ObLogStreamFetchLogResp &res) { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + + virtual int req_svr_feedback(const ReqLogSvrFeedback &feedback) + { + UNUSED(feedback); + return OB_SUCCESS; + } +private: + ObAddr addr_; + int64_t log_entry_per_call_; + int64_t log_entry_per_p_; + EntryVec entries_; + static const int64_t mock_load_len_ = 8; + char mock_load_[mock_load_len_]; +}; + +/* + * Factory. + */ +class MockRpcInterface1Factory : public IFetcherRpcInterfaceFactory +{ +public: + MockRpcInterface1Factory(const PKeys &pkeys, + const int64_t log_entry_per_p, + const int64_t log_entry_per_call) + : pkeys_(pkeys), + log_entry_per_p_(log_entry_per_p), + log_entry_per_call_(log_entry_per_call) + { } + virtual int new_fetcher_rpc_interface(IFetcherRpcInterface*& rpc) + { + rpc = new MockRpcInterface1(pkeys_, log_entry_per_p_, log_entry_per_call_); + return OB_SUCCESS; + } + virtual int delete_fetcher_rpc_interface(IFetcherRpcInterface* rpc) + { + delete rpc; + return OB_SUCCESS; + } +private: + PKeys pkeys_; + int64_t log_entry_per_p_; + int64_t log_entry_per_call_; +}; + +/* + * Mock Rpc Interface 2. + * It owns N servers, each hold some partitions, one of them is + * the leader. When request start log id, a preseted value is returned. + * Used to test: + * - fetch partition + * - locate start log id + * - activate partition stream + * - discard partition stream + */ +class MockRpcInterface2 : public IFetcherRpcInterface +{ + struct Partition + { + ObPartitionKey pkey_; + uint64_t start_log_id_; + bool is_leader_; + }; + struct Svr + { + ObAddr svr_; + std::vector partitions_; + bool operator==(const Svr &other) const + { + return svr_ == other.svr_; + } + bool operator<(const Svr &other) const + { + return svr_ < other.svr_; + } + }; +public: + /* + * Set static result set. The first partition in svrs is the leader. + */ + static void add_partition(const ObPartitionKey &pkey, + uint64_t start_log_id, + std::vector svrs) + { + EXPECT_NE(0, svrs.size()); + Partition pt = { pkey, start_log_id, false }; + for (int64_t idx = 0, cnt = svrs.size(); + idx < cnt; + ++idx) { + pt.is_leader_ = (0 == idx); + Svr target; + target.svr_ = svrs.at(idx); + std::vector::iterator itor = + std::find(svrs_.begin(), svrs_.end(), target); + if (svrs_.end() == itor) { + target.partitions_.push_back(pt); + svrs_.push_back(target); + std::sort(svrs_.begin(), svrs_.end()); + } + else { + (*itor).partitions_.push_back(pt); + } + } + } + /* + * Clear static result set. + */ + static void clear_result_set() + { + svrs_.clear(); + } +public: + virtual void set_svr(const common::ObAddr& svr) { svr_ = svr; } + + virtual const ObAddr& get_svr() const { return svr_; } + + virtual void set_timeout(const int64_t timeout) { UNUSED(timeout); } + + virtual int req_start_log_id_by_ts( + const obrpc::ObLogReqStartLogIdByTsRequest& req, + obrpc::ObLogReqStartLogIdByTsResponse& res) + { + bool svr_exist = false; + for (int64_t idx = 0, cnt = req.get_params().count(); idx < cnt; ++idx) { + const ObPartitionKey &pkey = req.get_params().at(idx).pkey_; + bool done = false; + for (int64_t svr_idx = 0, svr_cnt = svrs_.size(); + svr_idx < svr_cnt; + ++svr_idx) { + // Simulating sending rpc to svr. + if (svr_ == svrs_.at(svr_idx).svr_) { + svr_exist = true; + const Svr &svr = svrs_.at(svr_idx); + for (int64_t pidx = 0, pcnt = svr.partitions_.size(); + pidx < pcnt; + ++pidx) { + const Partition &p = svr.partitions_.at(pidx); + if (pkey == p.pkey_) { + done = true; + typedef obrpc::ObLogReqStartLogIdByTsResponse::Result Result; + Result result = { OB_SUCCESS, p.start_log_id_, false}; + res.append_result(result); + } + } + if (!done) { + res.set_err(OB_PARTITION_NOT_EXIST); + } + } + } // End for. + } + + _D_("mock rpc req start log id", K(req), K(res)); + + return (svr_exist) ? OB_SUCCESS : OB_TIMEOUT; + } + + virtual int req_start_log_id_by_ts_2(const obrpc::ObLogReqStartLogIdByTsRequestWithBreakpoint &req, + obrpc::ObLogReqStartLogIdByTsResponseWithBreakpoint &res) { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + + virtual int req_start_pos_by_log_id_2(const obrpc::ObLogReqStartPosByLogIdRequestWithBreakpoint &req, + obrpc::ObLogReqStartPosByLogIdResponseWithBreakpoint &res) { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + + virtual int req_start_pos_by_log_id( + const obrpc::ObLogReqStartPosByLogIdRequest& req, + obrpc::ObLogReqStartPosByLogIdResponse& res) + { + UNUSED(req); + UNUSED(res); + // Timeout. + _D_("mock rpc req pos by log id", K(req), K(res)); + return OB_TIMEOUT; + } + + virtual int fetch_log( + const obrpc::ObLogExternalFetchLogRequest& req, + obrpc::ObLogExternalFetchLogResponse& res) + { + UNUSED(req); + UNUSED(res); + // Timeout. + _D_("mock rpc req pos by log id", K(req), K(res)); + return OB_TIMEOUT; + } + + virtual int req_heartbeat_info( + const obrpc::ObLogReqHeartbeatInfoRequest& req, + obrpc::ObLogReqHeartbeatInfoResponse& res) + { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + + virtual int req_leader_heartbeat( + const obrpc::ObLogLeaderHeartbeatReq &req, + obrpc::ObLogLeaderHeartbeatResp &res) + { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + + virtual int open_stream(const obrpc::ObLogOpenStreamReq &req, + obrpc::ObLogOpenStreamResp &res) { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + + virtual int fetch_stream_log(const obrpc::ObLogStreamFetchLogReq &req, + obrpc::ObLogStreamFetchLogResp &res) { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + virtual int get_log_svr(const ObPartitionKey& pkey, const uint64_t log_id, + ObSvrs& svrs, int& leader_cnt) + { + UNUSED(log_id); + leader_cnt = 0; + for (int64_t svr_idx = 0, svr_cnt = svrs_.size(); + svr_idx < svr_cnt; + ++svr_idx) { + Svr &svr = svrs_.at(svr_idx); + for (int64_t pidx = 0, pcnt = svr.partitions_.size(); + pidx < pcnt; + ++pidx) { + const Partition &p = svr.partitions_.at(pidx); + if (pkey == p.pkey_) { + svrs.push_back(svr.svr_); + if (p.is_leader_) { + std::swap(svrs.at(leader_cnt), svrs.at(svrs.count() - 1)); + leader_cnt += 1; + } + } + } + } + _D_("mock rpc req log servers", K(pkey), K(svrs), K(leader_cnt)); + return OB_SUCCESS; + } + virtual int req_svr_feedback(const ReqLogSvrFeedback &feedback) + { + UNUSED(feedback); + return OB_SUCCESS; + } +private: + // Target svr. + ObAddr svr_; + // Data set. + static std::vector svrs_; +}; +// Static data set. So all instances could access it. +std::vector MockRpcInterface2::svrs_; + +/* + * Factory. + */ +class MockRpcInterface2Factory : public IFetcherRpcInterfaceFactory +{ +public: + virtual int new_fetcher_rpc_interface(IFetcherRpcInterface*& rpc) + { + rpc = new MockRpcInterface2(); + return OB_SUCCESS; + } + virtual int delete_fetcher_rpc_interface(IFetcherRpcInterface* rpc) + { + delete rpc; + return OB_SUCCESS; + } +}; + +/* + * Mock Rpc Interface 3. + * It owns some servers and partitions, can return + * svr addr and heartbeat timestamps. + * Notice: user set tuples + * as results. + * Used to test: + * - Heartbeat facilities. + */ +class MockRpcInterface3 : public IFetcherRpcInterface +{ + struct Entry + { + ObAddr svr_; + ObPartitionKey pkey_; + uint64_t log_id_; + int64_t tstamp_; + bool operator<(const Entry &other) + { + return log_id_ < other.log_id_; + } + }; + typedef std::vector EntryVec; +public: + static void clear_result() + { + entry_vec_.clear(); + } + static void add_result(const ObAddr &svr, const ObPartitionKey &pkey, + const uint64_t log_id, const int64_t tstamp) + { + Entry entry = { svr, pkey, log_id, tstamp }; + entry_vec_.push_back(entry); + } +public: + virtual void set_svr(const common::ObAddr& svr) { UNUSED(svr); } + + virtual const ObAddr& get_svr() const { static ObAddr svr; return svr; } + + virtual void set_timeout(const int64_t timeout) { UNUSED(timeout); } + + virtual int req_start_log_id_by_ts( + const obrpc::ObLogReqStartLogIdByTsRequest& req, + obrpc::ObLogReqStartLogIdByTsResponse& res) + { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + + virtual int req_start_log_id_by_ts_2(const obrpc::ObLogReqStartLogIdByTsRequestWithBreakpoint &req, + obrpc::ObLogReqStartLogIdByTsResponseWithBreakpoint &res) { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + + virtual int req_start_pos_by_log_id_2(const obrpc::ObLogReqStartPosByLogIdRequestWithBreakpoint &req, + obrpc::ObLogReqStartPosByLogIdResponseWithBreakpoint &res) { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + + virtual int req_start_pos_by_log_id( + const obrpc::ObLogReqStartPosByLogIdRequest& req, + obrpc::ObLogReqStartPosByLogIdResponse& res) + { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + + virtual int fetch_log( + const obrpc::ObLogExternalFetchLogRequest& req, + obrpc::ObLogExternalFetchLogResponse& res) + { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + + virtual int req_heartbeat_info( + const obrpc::ObLogReqHeartbeatInfoRequest& req, + obrpc::ObLogReqHeartbeatInfoResponse& res) + { + typedef obrpc::ObLogReqHeartbeatInfoRequest::Param Param; + typedef obrpc::ObLogReqHeartbeatInfoResponse::Result Result; + // Itor params. + for (int64_t idx = 0, cnt = req.get_params().count(); idx < cnt; ++idx) { + const Param ¶m = req.get_params().at(idx); + // Find result. + bool done = false; + for (int64_t idx2 = 0, cnt2 = entry_vec_.size(); + !done && idx2 < cnt2; + ++idx2) { + const Entry &entry = entry_vec_[idx2]; + if (param.pkey_ == entry.pkey_ + && param.log_id_ == entry.log_id_) { + done = true; + Result result; + result.err_ = OB_SUCCESS; + result.tstamp_ = entry.tstamp_; + res.append_result(result); + } + } + if (!done) { + Result result; + result.err_ = OB_NEED_RETRY; + res.append_result(result); + } + } + + _D_("mock rpc: req heartbeat", K(req), K(res)); + return OB_SUCCESS; + } + + virtual int req_leader_heartbeat( + const obrpc::ObLogLeaderHeartbeatReq &req, + obrpc::ObLogLeaderHeartbeatResp &res) + { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + + virtual int open_stream(const obrpc::ObLogOpenStreamReq &req, + obrpc::ObLogOpenStreamResp &res) { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + + virtual int fetch_stream_log(const obrpc::ObLogStreamFetchLogReq &req, + obrpc::ObLogStreamFetchLogResp &res) { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + + virtual int get_log_svr(const ObPartitionKey& pkey, const uint64_t log_id, + ObSvrs& svrs, int& leader_cnt) + { + // Todo. In this version, only one result is enough, log_id is not used. + UNUSED(log_id); + UNUSED(leader_cnt); + + for (int64_t idx = 0, cnt = entry_vec_.size(); idx < cnt; ++idx) { + const Entry &entry = entry_vec_[idx]; + if (pkey == entry.pkey_) { + svrs.push_back(entry.svr_); + break; + } + } + return OB_SUCCESS; + } + + virtual int req_svr_feedback(const ReqLogSvrFeedback &feedback) + { + UNUSED(feedback); + return OB_SUCCESS; + } +private: + static EntryVec entry_vec_; +}; + +MockRpcInterface3::EntryVec MockRpcInterface3::entry_vec_; + +/* + * Factory. + */ +class MockRpcInterface3Factory : public IFetcherRpcInterfaceFactory +{ +public: + virtual int new_fetcher_rpc_interface(IFetcherRpcInterface*& rpc) + { + rpc = new MockRpcInterface3(); + return OB_SUCCESS; + } + virtual int delete_fetcher_rpc_interface(IFetcherRpcInterface* rpc) + { + delete rpc; + return OB_SUCCESS; + } +}; + +/* + * TransLog Generator 1. + * Generate single partition transaction logs. + * Support get trans logs in CORRECT order. + * Use: + * - Call next_trans(), specify trans params. + * - Get logs in correct order: redo, redo, ..., prepare, commit/abort. + */ +struct TransParam1 +{ + // Params used in trans log. + ObPartitionKey pkey_; + ObTransID trans_id_; + ObAddr scheduler_; + ObPartitionKey coordinator_; + ObPartitionArray participants_; + ObStartTransParam trans_param_; +}; + +class TransLogGenerator1 +{ +public: + TransLogGenerator1() + : param_(), + redo_(), + prepare_(), + commit_(), + abort_() + { } + virtual ~TransLogGenerator1() { } +public: + void next_trans(const TransParam1 ¶m) + { + param_ = param; + } + const ObTransRedoLog& next_redo(const uint64_t log_id) + { + int err = OB_SUCCESS; + uint64_t tenant_id = 100; + const uint64_t cluster_id = 1000; + redo_.reset(); + ObVersion active_memstore_version(1); + err = redo_.init(OB_LOG_TRANS_REDO, param_.pkey_, param_.trans_id_, + tenant_id, log_id, param_.scheduler_, param_.coordinator_, + param_.participants_, param_.trans_param_, cluster_id, active_memstore_version); + EXPECT_EQ(OB_SUCCESS, err); + ObTransMutator &mutator = redo_.get_mutator(); + if (NULL == mutator.get_mutator_buf()) { + mutator.init(true); + } + const char *data = "fly"; + char *buf = static_cast(mutator.alloc(strlen(data))); + strcpy(buf, data); + return redo_; + } + const ObTransPrepareLog& next_prepare(const ObRedoLogIdArray &all_redos) + { + int err = OB_SUCCESS; + uint64_t tenant_id = 100; + const uint64_t cluster_id = 1000; + prepare_.reset(); + ObVersion active_memstore_version(1); + err = prepare_.init(OB_LOG_TRANS_PREPARE, param_.pkey_, param_.trans_id_, + tenant_id, param_.scheduler_, param_.coordinator_, + param_.participants_, param_.trans_param_, + OB_SUCCESS, all_redos, 0, cluster_id, active_memstore_version); + EXPECT_EQ(OB_SUCCESS, err); + return prepare_; + } + const ObTransCommitLog& next_commit(const uint64_t prepare_log_id) + { + int err = OB_SUCCESS; + const uint64_t cluster_id = 1000; + ObPartitionLogInfo ptl_id(param_.pkey_, prepare_log_id, get_timestamp()); + PartitionLogInfoArray ptl_ids; + if (OB_INVALID_ID == prepare_log_id) { + // Pass. For prepare-commit trans log. + } + else { + err = ptl_ids.push_back(ptl_id); + EXPECT_EQ(OB_SUCCESS, err); + } + commit_.reset(); + err = commit_.init(OB_LOG_TRANS_COMMIT, param_.pkey_, param_.trans_id_, + ptl_ids, 1, 0, cluster_id); + EXPECT_EQ(OB_SUCCESS, err); + return commit_; + } + const ObTransAbortLog& next_abort() + { + int err = OB_SUCCESS; + const uint64_t cluster_id = 1000; + PartitionLogInfoArray array; + abort_.reset(); + err = abort_.init(OB_LOG_TRANS_ABORT, param_.pkey_, param_.trans_id_, array, cluster_id); + EXPECT_EQ(OB_SUCCESS, err); + return abort_; + } +private: + TransParam1 param_; + ObTransRedoLog redo_; + ObTransPrepareLog prepare_; + ObTransCommitLog commit_; + ObTransAbortLog abort_; +}; + +/* + * Transaction Log Entry Generator 1. + * Generate log entries of transactions. + */ +class TransLogEntryGenerator1 +{ +public: + TransLogEntryGenerator1(const ObPartitionKey &pkey) + : pkey_(pkey), + log_id_(0), + remain_log_cnt_(0), + commit_(false), + param_(), + trans_log_gen_(), + prepare_id_(0), + redos_(), + data_len_(0) + { + ObAddr addr(ObAddr::IPV4, "127.0.0.1", 5566); + param_.pkey_ = pkey_; + param_.trans_id_ = ObTransID(addr); + param_.scheduler_ = addr; + param_.coordinator_ = pkey_; + int err = param_.participants_.push_back(pkey_); + EXPECT_EQ(OB_SUCCESS, err); + param_.trans_param_.set_access_mode(ObTransAccessMode::READ_WRITE); + param_.trans_param_.set_isolation(ObTransIsolation::READ_COMMITED); + param_.trans_param_.set_type(ObTransType::TRANS_NORMAL); + + buf_ = new char[buf_len_]; + EXPECT_TRUE(NULL != buf_); + } + virtual ~TransLogEntryGenerator1() + { + delete[] buf_; + } + // Generate normal trans. + // Start a new trans. + void next_trans(const int64_t redo_cnt, bool commit) + { + remain_log_cnt_ = 2 + redo_cnt; + commit_ = commit; + redos_.reset(); + trans_log_gen_.next_trans(param_); + } + // Get next log entry. + int next_log_entry(ObLogEntry &log_entry) + { + int ret = OB_SUCCESS; + if (2 < remain_log_cnt_) { + next_redo_(log_entry); + // Store redo id. + int err = redos_.push_back(log_id_); + EXPECT_EQ(OB_SUCCESS, err); + log_id_ += 1; + remain_log_cnt_ -= 1; + } + else if (2 == remain_log_cnt_) { + next_prepare_(log_entry); + prepare_id_ = log_id_; + log_id_ += 1; + remain_log_cnt_ -= 1; + } + else if (1 == remain_log_cnt_ && commit_) { + next_commit_(log_entry); + log_id_ += 1; + remain_log_cnt_ -= 1; + } + else if (1 == remain_log_cnt_ && !commit_) { + next_abort_(log_entry); + log_id_ += 1; + remain_log_cnt_ -= 1; + } + else { + ret = OB_ITER_END; + } + return ret; + } + // Generate: redo, redo, redo, prepare-commit. + int next_log_entry_2(ObLogEntry &log_entry) + { + int ret = OB_SUCCESS; + if (2 < remain_log_cnt_) { + next_redo_(log_entry); + // Store redo id. + int err = redos_.push_back(log_id_); + EXPECT_EQ(OB_SUCCESS, err); + log_id_ += 1; + remain_log_cnt_ -= 1; + } + else if (2 == remain_log_cnt_ && commit_) { + next_prepare_with_commit(log_entry); + log_id_ += 1; + remain_log_cnt_ -= 2; + } + else if (2 == remain_log_cnt_ && !commit_) { + next_prepare_(log_entry); + log_id_ += 1; + remain_log_cnt_ -= 1; + } + else if (1 == remain_log_cnt_ && !commit_) { + next_abort_(log_entry); + log_id_ += 1; + remain_log_cnt_ -= 1; + } + else { + ret = OB_ITER_END; + } + return ret; + } +private: + void next_redo_(ObLogEntry &log_entry) + { + int err = OB_SUCCESS; + // Gen trans log. + const ObTransRedoLog &redo = trans_log_gen_.next_redo(log_id_); + int64_t pos = 0; + err = serialization::encode_i64(buf_, buf_len_, pos, OB_LOG_TRANS_REDO); + EXPECT_EQ(OB_SUCCESS, err); + err = serialization::encode_i64(buf_, buf_len_, pos, 0); + EXPECT_EQ(OB_SUCCESS, err); + err = redo.serialize(buf_, buf_len_, pos); + EXPECT_EQ(OB_SUCCESS, err); + data_len_ = pos; + // Gen entry header. + ObLogEntryHeader header; + header.generate_header(OB_LOG_SUBMIT, pkey_, log_id_, buf_, + data_len_, get_timestamp(), get_timestamp(), + ObProposalID(), get_timestamp(), ObVersion(0)); + // Gen log entry. + log_entry.generate_entry(header, buf_); + } + void next_prepare_(ObLogEntry &log_entry) + { + int err = OB_SUCCESS; + // Gen trans log. + const ObTransPrepareLog &prepare= trans_log_gen_.next_prepare(redos_); + int64_t pos = 0; + err = serialization::encode_i64(buf_, buf_len_, pos, OB_LOG_TRANS_PREPARE); + EXPECT_EQ(OB_SUCCESS, err); + err = serialization::encode_i64(buf_, buf_len_, pos, 0); + EXPECT_EQ(OB_SUCCESS, err); + err = prepare.serialize(buf_, buf_len_, pos); + EXPECT_EQ(OB_SUCCESS, err); + data_len_ = pos; + // Gen entry header. + ObLogEntryHeader header; + header.generate_header(OB_LOG_SUBMIT, pkey_, log_id_, buf_, + data_len_, get_timestamp(), get_timestamp(), + ObProposalID(), get_timestamp(), ObVersion(0)); + // Gen log entry. + log_entry.generate_entry(header, buf_); + } + void next_commit_(ObLogEntry &log_entry) + { + int err = OB_SUCCESS; + // Gen trans log. + const ObTransCommitLog &commit = trans_log_gen_.next_commit(prepare_id_); + int64_t pos = 0; + err = serialization::encode_i64(buf_, buf_len_, pos, OB_LOG_TRANS_COMMIT); + EXPECT_EQ(OB_SUCCESS, err); + err = serialization::encode_i64(buf_, buf_len_, pos, 0); + EXPECT_EQ(OB_SUCCESS, err); + err = commit.serialize(buf_, buf_len_, pos); + EXPECT_EQ(OB_SUCCESS, err); + data_len_ = pos; + // Gen entry header. + ObLogEntryHeader header; + header.generate_header(OB_LOG_SUBMIT, pkey_, log_id_, buf_, + data_len_, get_timestamp(), get_timestamp(), + ObProposalID(), get_timestamp(), ObVersion(0)); + // Gen log entry. + log_entry.generate_entry(header, buf_); + } + void next_abort_(ObLogEntry &log_entry) + { + int err = OB_SUCCESS; + // Gen trans log. + const ObTransAbortLog &abort = trans_log_gen_.next_abort(); + int64_t pos = 0; + err = serialization::encode_i64(buf_, buf_len_, pos, OB_LOG_TRANS_ABORT); + EXPECT_EQ(OB_SUCCESS, err); + err = serialization::encode_i64(buf_, buf_len_, pos, 0); + EXPECT_EQ(OB_SUCCESS, err); + err = abort.serialize(buf_, buf_len_, pos); + EXPECT_EQ(OB_SUCCESS, err); + data_len_ = pos; + // Gen entry header. + ObLogEntryHeader header; + header.generate_header(OB_LOG_SUBMIT, pkey_, log_id_, buf_, + data_len_, get_timestamp(), get_timestamp(), + ObProposalID(), get_timestamp(), ObVersion(0)); + // Gen log entry. + log_entry.generate_entry(header, buf_); + } + void next_prepare_with_commit(ObLogEntry &log_entry) + { + int err = OB_SUCCESS; + // Gen trans log. + const ObTransPrepareLog &prepare= trans_log_gen_.next_prepare(redos_); + const ObTransCommitLog &commit = trans_log_gen_.next_commit(OB_INVALID_ID); + int64_t pos = 0; + err = serialization::encode_i64(buf_, buf_len_, + pos, OB_LOG_TRANS_PREPARE_WITH_COMMIT); + EXPECT_EQ(OB_SUCCESS, err); + err = serialization::encode_i64(buf_, buf_len_, pos, 0); + EXPECT_EQ(OB_SUCCESS, err); + err = prepare.serialize(buf_, buf_len_, pos); + EXPECT_EQ(OB_SUCCESS, err); + err = commit.serialize(buf_, buf_len_, pos); + EXPECT_EQ(OB_SUCCESS, err); + data_len_ = pos; + // Gen entry header. + ObLogEntryHeader header; + header.generate_header(OB_LOG_SUBMIT, pkey_, log_id_, buf_, + data_len_, get_timestamp(), get_timestamp(), + ObProposalID(), get_timestamp(), ObVersion(0)); + // Gen log entry. + log_entry.generate_entry(header, buf_); + } +private: + // Params. + ObPartitionKey pkey_; + uint64_t log_id_; + int64_t remain_log_cnt_; + bool commit_; + // Gen. + TransParam1 param_; + TransLogGenerator1 trans_log_gen_; + uint64_t prepare_id_; + ObRedoLogIdArray redos_; + // Buf. + int64_t data_len_; + static const int64_t buf_len_ = 2 * _M_; + char *buf_; +}; + +/* + * Mock Parser 1. + * Read Task, revert it immediately, and count Task number. + */ +class MockParser1 : public IObLogParser +{ +public: + MockParser1() : trans_cnt_(0) { } + virtual ~MockParser1() { } + virtual int start() { return OB_SUCCESS; } + virtual void stop() { } + virtual void mark_stop_flag() { } + virtual int push(PartTransTask* task, const int64_t timeout) + { + UNUSED(timeout); + if (NULL != task && task->is_normal_trans()) { + task->revert(); + trans_cnt_ += 1; + // Debug. + // _I_(">>> push parser", "req", task->get_seq()); + } + return OB_SUCCESS; + } + int64_t get_trans_cnt() const { return trans_cnt_; } +private: + int64_t trans_cnt_; +}; + +/* + * Mock Fetcher Error Handler. + */ +class MockFetcherErrHandler1 : public IErrHandler +{ +public: + virtual ~MockFetcherErrHandler1() { } +public: + virtual void handle_err(int err_no, const char* fmt, ...) + { + UNUSED(err_no); + va_list ap; + va_start(ap, fmt); + __E__(fmt, ap); + va_end(ap); + abort(); + } +}; + +/* + * Mock Liboblog Error Handler. + */ +class MockLiboblogErrHandler1 : public IObLogErrHandler +{ +public: + virtual void handle_error(int err_no, const char* fmt, ...) + { + UNUSED(err_no); + va_list ap; + va_start(ap, fmt); + __E__(fmt, ap); + va_end(ap); + } +}; + +/* + * Mock SvrProvider. + * User set svrs into it. + */ +class MockSvrProvider1 : public sqlclient::ObMySQLServerProvider +{ +public: + virtual ~MockSvrProvider1() { } + void add_svr(const ObAddr &svr) { svrs_.push_back(svr); } +public: + virtual int get_cluster_list(common::ObIArray &cluster_list) + { + int ret = OB_SUCCESS; + if (svrs_.size() > 0) { + if (OB_FAIL(cluster_list.push_back(common::OB_INVALID_ID))) { + LOG_WARN("fail to push back cluster_id", K(ret)); + } + } + return ret; + } + virtual int get_server(const int64_t cluster_id, const int64_t svr_idx, ObAddr& server) + { + UNUSED(cluster_id); + int ret = OB_SUCCESS; + if (0 <= svr_idx && svr_idx < static_cast(svrs_.size())) { + server = svrs_[svr_idx]; + } + else { + ret = OB_ERR_UNEXPECTED; + } + return ret; + } + virtual int64_t get_cluster_count() const + { + return svrs_.size() > 0 ? 1 : 0; + } + virtual int64_t get_server_count(const int64_t cluster_id) const + { + UNUSED(cluster_id) + return static_cast(svrs_.size()); + } + virtual int refresh_server_list() { return OB_SUCCESS; } +private: + std::vector svrs_; +}; + +/* + * Test Dataset Generator. + */ + +/* + * Svr Config. + * Set svr address and mysql port. + */ +struct SvrCfg +{ + // Svr. + const char *svr_addr_; + int internal_port_; + // Mysql. + int mysql_port_; + const char *mysql_user_; + const char *mysql_password_; + const char *mysql_db_; + int64_t mysql_timeout_; +}; + +/* + * Configuration for mysql connector. + */ +inline ConnectorConfig prepare_cfg_1(const SvrCfg &svr_cfg) +{ + ConnectorConfig cfg; + cfg.mysql_addr_ = svr_cfg.svr_addr_; + cfg.mysql_port_ = svr_cfg.mysql_port_; + cfg.mysql_user_ = svr_cfg.mysql_user_; + cfg.mysql_password_ = svr_cfg.mysql_password_; + cfg.mysql_db_ = svr_cfg.mysql_db_; + cfg.mysql_timeout_ = svr_cfg.mysql_timeout_; + return cfg; +} + +/* + * Build table names. + */ +inline const char** prepare_table_name_1() +{ + static const char* tnames[] = { + "table1", + "table2", + "table3", + "table4", + "table5", + "table6", + "table7", + "table8", + "table9", + "table10", + "table11", + "table12", + "table13", + "table14", + "table15", + "table16" + }; + return tnames; +} + +/* + * Build table schema. + */ +inline const char* prepare_table_schema_1() +{ + return "c1 int primary key"; +} + +/* + * Create table. + */ +class CreateTable : public MySQLQueryBase +{ +public: + CreateTable(const char *tname, const char *schema) + { + snprintf(buf_, 512, "create table %s(%s)", tname, schema); + sql_ = buf_; + sql_len_ = strlen(sql_); + } +private: + char buf_[512]; +}; + +/* + * Drop table. + */ +class DropTable : public MySQLQueryBase +{ +public: + DropTable(const char *tname) + { + snprintf(buf_, 512, "drop table if exists %s", tname); + sql_ = buf_; + sql_len_ = strlen(sql_); + } +private: + char buf_[512]; +}; + +/* + * Get table id. + */ +class GetTableId : public MySQLQueryBase +{ +public: + GetTableId(const char *tname) + { + snprintf(buf_, 512, "select table_id " + "from __all_table where table_name='%s'", tname); + sql_ = buf_; + sql_len_ = strlen(sql_); + } + int get_tid(uint64_t &tid) + { + int ret = common::OB_SUCCESS; + while (common::OB_SUCCESS == (ret = next_row())) { + uint64_t table_id = 0; + if (OB_SUCC(ret)) { + if (common::OB_SUCCESS != (ret = get_uint(0, table_id))) { + OBLOG_LOG(WARN, "err get uint", K(ret)); + } + } + tid = table_id; + } + ret = (common::OB_ITER_END == ret) ? common::OB_SUCCESS : ret; + return ret; + } +private: + char buf_[512]; +}; + +/* + * Get partition key by table id from system table. + */ +class GetPartitionKey : public MySQLQueryBase +{ +public: + GetPartitionKey(const uint64_t tid) + { + snprintf(buf_, 512, "select table_id, partition_id, partition_cnt " + "from __all_meta_table where table_id=%lu", tid); + sql_ = buf_; + sql_len_ = strlen(sql_); + } + int get_pkeys(ObArray &pkeys) + { + int ret = common::OB_SUCCESS; + while (common::OB_SUCCESS == (ret = next_row())) { + uint64_t table_id = 0; + int32_t partition_id = 0; + int32_t partition_cnt = 0; + if (OB_SUCC(ret)) { + if (common::OB_SUCCESS != (ret = get_uint(0, table_id))) { + OBLOG_LOG(WARN, "err get uint", K(ret)); + } + } + if (OB_SUCC(ret)) { + int64_t val = 0; + if (common::OB_SUCCESS != (ret = get_int(1, val))) { + OBLOG_LOG(WARN, "err get int", K(ret)); + } else { + partition_id = static_cast(val); + } + } + if (OB_SUCC(ret)) { + int64_t val = 0; + if (common::OB_SUCCESS != (ret = get_int(2, val))) { + OBLOG_LOG(WARN, "err get int", K(ret)); + } else { + partition_cnt = static_cast(val); + } + } + ObPartitionKey pkey; + pkey.init(table_id, partition_id, partition_cnt); + pkeys.push_back(pkey); + } + ret = (common::OB_ITER_END == ret) ? common::OB_SUCCESS : ret; + return ret; + } +private: + char buf_[512]; +}; + +/* + * Create table and return their partition keys. + */ +inline void prepare_table_1(const SvrCfg& svr_cfg, + const char** tnames, + const int64_t tcnt, + const char* schema, + ObArray& pkeys) +{ + ObLogMySQLConnector conn; + ConnectorConfig cfg = prepare_cfg_1(svr_cfg); + + int ret = conn.init(cfg); + EXPECT_EQ(OB_SUCCESS, ret); + + // Prepare tables. + + for (int64_t idx = 0; idx < tcnt; ++idx) { + // Drop. + DropTable drop_table(tnames[idx]); + ret = conn.exec(drop_table); + EXPECT_EQ(OB_SUCCESS, ret); + // Create. + CreateTable create_table(tnames[idx], schema); + ret = conn.exec(create_table); + EXPECT_EQ(OB_SUCCESS, ret); + // Get tid. + GetTableId get_tid(tnames[idx]); + ret = conn.query(get_tid); + EXPECT_EQ(OB_SUCCESS, ret); + uint64_t tid = OB_INVALID_ID; + ret = get_tid.get_tid(tid); + EXPECT_EQ(OB_SUCCESS, ret); + // Get pkeys. + GetPartitionKey get_pkey(tid); + ret = conn.query(get_pkey); + EXPECT_EQ(OB_SUCCESS, ret); + ret = get_pkey.get_pkeys(pkeys); + EXPECT_EQ(OB_SUCCESS, ret); + } + + ret = conn.destroy(); + EXPECT_EQ(OB_SUCCESS, ret); +} + +/* + * Data generator. + * Insert a bunch of data in server. + * For schema 1. + */ +class DataGenerator1 : public Runnable +{ + class Inserter : public MySQLQueryBase + { + public: + void set_data(const char *tname, const int64_t data) + { + reuse(); + snprintf(buf_, 512, "insert into %s (c1) values (%ld)", tname, data); + sql_ = buf_; + sql_len_ = strlen(sql_); + } + private: + char buf_[512]; + }; +public: + DataGenerator1(const ConnectorConfig &cfg) : + conn_(), cfg_(cfg), tname_(NULL), start_(0), end_(0) + { + int err = conn_.init(cfg_); + EXPECT_EQ(OB_SUCCESS, err); + } + ~DataGenerator1() + { + conn_.destroy(); + } + void insert(const char *tname, const int64_t start, const int64_t end) + { + tname_ = tname; + start_ = start; + end_ = end; + create(); + } +private: + int routine() + { + Inserter inserter; + int err = OB_SUCCESS; + for (int64_t cur = start_; + OB_SUCCESS == err && cur < end_; + cur++) { + inserter.set_data(tname_, cur); + err = conn_.exec(inserter); + EXPECT_EQ(OB_SUCCESS, err); + } + return OB_SUCCESS; + } +private: + ObLogMySQLConnector conn_; + ConnectorConfig cfg_; + const char *tname_; + int64_t start_; + int64_t end_; +}; + +/* + * End of Test Dataset Generator. + */ + +} +} diff --git a/unittest/liboblog/test_log_fetcher_heartbeat_mgr.cpp b/unittest/liboblog/test_log_fetcher_heartbeat_mgr.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7f6be42f22ec7236ed023755ac6e484d93966130 --- /dev/null +++ b/unittest/liboblog/test_log_fetcher_heartbeat_mgr.cpp @@ -0,0 +1,463 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include +#include + +#include "share/ob_define.h" + +#include "liboblog/src/ob_log_fetcher_heartbeat_mgr.h" + +#include "test_log_fetcher_common_utils.h" + +using namespace oceanbase; +using namespace common; +using namespace liboblog; +using namespace fetcher; +using namespace transaction; +using namespace storage; +using namespace clog; + +namespace oceanbase +{ +namespace unittest +{ + + +/* + * Heartbeater Tests. + */ +/* + * Basic function test 1. + * - N thread & M requests for each thread + * - result timestamp == next log id + * - rpc always succeed, no server internal error + * - rpc interface returns correct result or an error code randomly + * (30% correct so most requests are sent to at least 2 servers) + */ +namespace basic_func_test_1 +{ +class MockRpcInterface : public IFetcherRpcInterface +{ +public: + ~MockRpcInterface() {} + virtual void set_svr(const common::ObAddr& svr) { UNUSED(svr); } + virtual const ObAddr& get_svr() const { static ObAddr svr; return svr; } + virtual void set_timeout(const int64_t timeout) { UNUSED(timeout); } + virtual int req_start_log_id_by_ts( + const obrpc::ObLogReqStartLogIdByTsRequest& req, + obrpc::ObLogReqStartLogIdByTsResponse& res) + { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + virtual int req_start_log_id_by_ts_2(const obrpc::ObLogReqStartLogIdByTsRequestWithBreakpoint &req, + obrpc::ObLogReqStartLogIdByTsResponseWithBreakpoint &res) { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + virtual int req_start_pos_by_log_id_2(const obrpc::ObLogReqStartPosByLogIdRequestWithBreakpoint &req, + obrpc::ObLogReqStartPosByLogIdResponseWithBreakpoint &res) { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + virtual int req_start_pos_by_log_id( + const obrpc::ObLogReqStartPosByLogIdRequest& req, + obrpc::ObLogReqStartPosByLogIdResponse& res) + { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + virtual int fetch_log( + const obrpc::ObLogExternalFetchLogRequest& req, + obrpc::ObLogExternalFetchLogResponse& res) + { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + virtual int req_heartbeat_info( + const obrpc::ObLogReqHeartbeatInfoRequest& req, + obrpc::ObLogReqHeartbeatInfoResponse& res) + { + res.reset(); + // Seed. + int64_t seed = (get_timestamp()); + for (int64_t idx = 0, cnt = req.get_params().count(); idx < cnt; ++idx) { + // 30%. + bool succeed = ((idx + seed) % 100) < 30; + obrpc::ObLogReqHeartbeatInfoResponse::Result result; + result.reset(); + result.err_ = (succeed) ? OB_SUCCESS : OB_NEED_RETRY; + result.tstamp_ = (succeed) ? (int64_t)(req.get_params().at(idx).log_id_) : OB_INVALID_TIMESTAMP; + EXPECT_EQ(OB_SUCCESS, res.append_result(result)); + } + return OB_SUCCESS; + } + + virtual int req_leader_heartbeat( + const obrpc::ObLogLeaderHeartbeatReq &req, + obrpc::ObLogLeaderHeartbeatResp &res) + { + res.reset(); + res.set_err(OB_SUCCESS); + res.set_debug_err(OB_SUCCESS); + // Seed. + int64_t seed = (get_timestamp()); + for (int64_t idx = 0, cnt = req.get_params().count(); idx < cnt; ++idx) { + obrpc::ObLogLeaderHeartbeatResp::Result result; + const obrpc::ObLogLeaderHeartbeatReq::Param ¶m = req.get_params().at(idx); + // 30%. + bool succeed = ((idx + seed) % 100) < 30; + + result.reset(); + result.err_ = succeed ? OB_SUCCESS : OB_NOT_MASTER; + result.next_served_log_id_ = param.next_log_id_; + result.next_served_ts_ = succeed ? get_timestamp() : 1; + + EXPECT_EQ(OB_SUCCESS, res.append_result(result)); + } + + _D_(">>> heartbeat", K(req), K(res)); + return OB_SUCCESS; + } + + virtual int open_stream(const obrpc::ObLogOpenStreamReq &req, + obrpc::ObLogOpenStreamResp &res) { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + + virtual int fetch_stream_log(const obrpc::ObLogStreamFetchLogReq &req, + obrpc::ObLogStreamFetchLogResp &res) { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + virtual int req_svr_feedback(const ReqLogSvrFeedback &feedback) + { + UNUSED(feedback); + return OB_SUCCESS; + } +}; + +/* + * Factory. + */ +class MockRpcInterfaceFactory : public IFetcherRpcInterfaceFactory +{ +public: + virtual int new_fetcher_rpc_interface(IFetcherRpcInterface*& rpc) + { + rpc = new MockRpcInterface(); + return OB_SUCCESS; + } + virtual int delete_fetcher_rpc_interface(IFetcherRpcInterface* rpc) + { + delete rpc; + return OB_SUCCESS; + } +}; + +//////////////////////基本功能测试////////////////////////////////////////// +/* + * Test HeartbeatRequest + */ +TEST(Heartbeater, BasicFuncTest1) +{ + // Build Heartbeater requests. + const int64_t AllSvrCnt = 3; + ObAddr svrs[AllSvrCnt]; + for (int64_t idx = 0, cnt = AllSvrCnt; idx < cnt; ++idx) { + svrs[idx] = ObAddr(ObAddr::IPV4, "127.0.0.1", (int32_t)(idx + 1000)); + } + const int64_t HeartbeatRequestCnt = 10000; + HeartbeatRequest *request_array = static_cast(ob_malloc( + HeartbeatRequestCnt * sizeof(HeartbeatRequest))); + // test assignment + for (int64_t idx = 0, cnt = HeartbeatRequestCnt; idx < cnt; ++idx) { + HeartbeatRequest &r = request_array[idx]; + r.reset(); + // reset IDLE + EXPECT_EQ(HeartbeatRequest::IDLE, r.get_state()); + r.pkey_ = ObPartitionKey((uint64_t)(1000 + idx), 0, 1); + r.next_log_id_ = (uint64_t)(1 + idx); + r.svr_ = svrs[idx % AllSvrCnt]; + // test getter and setter + EXPECT_EQ(HeartbeatRequest::IDLE, r.get_state()); + r.set_state(HeartbeatRequest::REQ); + EXPECT_EQ(HeartbeatRequest::REQ, r.get_state()); + r.set_state(HeartbeatRequest::DONE); + EXPECT_EQ(HeartbeatRequest::DONE, r.get_state()); + } + + ob_free(request_array); + request_array = NULL; +} + +/* + * Test Heartbeater + */ +TEST(Heartbeater, BasicFuncTest2) +{ + // Build Heartbeater requests. + const int64_t AllSvrCnt = 3; + ObAddr svrs[AllSvrCnt]; + for (int64_t idx = 0, cnt = AllSvrCnt; idx < cnt; ++idx) { + svrs[idx] = ObAddr(ObAddr::IPV4, "127.0.0.1", (int32_t)(idx + 1000)); + } + const int64_t HeartbeatRequestCnt = 10000; + HeartbeatRequest *request_array = static_cast(ob_malloc( + HeartbeatRequestCnt * sizeof(HeartbeatRequest))); + for (int64_t idx = 0, cnt = HeartbeatRequestCnt; idx < cnt; ++idx) { + HeartbeatRequest &r = request_array[idx]; + r.reset(); + r.pkey_ = ObPartitionKey((uint64_t)(1000 + idx), 0, 1); + r.next_log_id_ = (uint64_t)(1 + idx); + r.svr_ = svrs[idx % AllSvrCnt]; + } + // Heartbeater + Heartbeater heartbeater; + MockRpcInterfaceFactory rpc_factory; + MockFetcherErrHandler1 err_handler1; + FixedJobPerWorkerPool worker_pool; + const int64_t heartbeat_worker_cnt = 3; + + int err = OB_SUCCESS; + err = worker_pool.init(1); + EXPECT_EQ(OB_SUCCESS, err); + + err = heartbeater.init(&rpc_factory, &err_handler1, &worker_pool, heartbeat_worker_cnt); + EXPECT_EQ(OB_SUCCESS, err); + // test async_heartbeat_req + for (int64_t idx = 0, cnt = HeartbeatRequestCnt; idx < cnt; ++idx) { + HeartbeatRequest &r = request_array[idx]; + EXPECT_EQ(OB_SUCCESS, heartbeater.async_heartbeat_req(&r)); + } + // test destroy + err = heartbeater.destroy(); + EXPECT_EQ(OB_SUCCESS, err); + + err = worker_pool.destroy(); + EXPECT_EQ(OB_SUCCESS, err); + + ob_free(request_array); + request_array = NULL; +} + +/* + * Test Worker. + */ +class TestWorker : public Runnable +{ +public: + Heartbeater *heartbeater_; + virtual int routine() + { + // Build requests. + const int64_t AllSvrCnt = 3; + ObAddr svrs[AllSvrCnt]; + for (int64_t idx = 0, cnt = AllSvrCnt; idx < cnt; ++idx) { + svrs[idx] = ObAddr(ObAddr::IPV4, "127.0.0.1", (int32_t)(idx + 1000)); + } + const int64_t HeartbeatRequestCnt = 10 * 10000; + HeartbeatRequest *request_array = new HeartbeatRequest[HeartbeatRequestCnt]; + for (int64_t idx = 0, cnt = HeartbeatRequestCnt; idx < cnt; ++idx) { + HeartbeatRequest &r = request_array[idx]; + r.reset(); + r.pkey_ = ObPartitionKey((uint64_t)(1000 + idx), 0, 1); + r.next_log_id_ = (uint64_t)(1 + idx); + r.svr_ = svrs[idx % AllSvrCnt]; + } + // Push requests into heartbeater. + for (int64_t idx = 0, cnt = HeartbeatRequestCnt; idx < cnt; ++idx) { + HeartbeatRequest &r = request_array[idx]; + EXPECT_EQ(OB_SUCCESS, heartbeater_->async_heartbeat_req(&r)); + if (0 == (idx % 1000)) { + usec_sleep(10 * _MSEC_); + } + } + // Wait for requests end. Max test time should set. + int64_t end_request_cnt = 0; + const int64_t TestTimeLimit = 10 * _MIN_; + const int64_t start_test_tstamp = get_timestamp(); + while (((get_timestamp() - start_test_tstamp) < TestTimeLimit) + && (end_request_cnt < HeartbeatRequestCnt)) { + for (int64_t idx = 0, cnt = HeartbeatRequestCnt; idx < cnt; ++idx) { + HeartbeatRequest &r = request_array[idx]; + if (HeartbeatRequest::DONE == r.get_state()) { + end_request_cnt += 1; + r.set_state(HeartbeatRequest::IDLE); + } + } + usec_sleep(100 * _MSEC_); + } + // Assert if test cannot finish. + EXPECT_EQ(HeartbeatRequestCnt, end_request_cnt); + // Do some statistics. + int64_t svr_consume_distribution[AllSvrCnt]; // 1, 2, 3, ... + for (int64_t idx = 0, cnt = AllSvrCnt; idx < cnt; ++idx) { + svr_consume_distribution[idx] = 0; + } + int64_t succ_cnt = 0; + for (int64_t idx = 0, cnt = HeartbeatRequestCnt; idx < cnt; ++idx) { + svr_consume_distribution[idx % AllSvrCnt] += 1; + } + delete[] request_array; + const int64_t BuffSize = 1024; + char buf[BuffSize]; + int64_t pos = 0; + for (int64_t idx = 0, cnt = AllSvrCnt; idx < cnt; ++idx) { + pos += snprintf(buf + pos, BuffSize - pos, "svr_cnt:%ld perc:%f ", (1 + idx), + ((double)svr_consume_distribution[idx] / (double)HeartbeatRequestCnt)); + } + fprintf(stderr, "request count: %ld distribution: %s succeed perc: %f \n", + HeartbeatRequestCnt, buf, (double)succ_cnt / (double)HeartbeatRequestCnt); + return OB_SUCCESS; + } +}; + +////////////////////// Boundary tests ////////////////////////////////////////// +// Heartbeater init fail +TEST(Heartbeater, BasicFuncTest3) +{ + //_I_("called", "prepare:", 100); + + MockRpcInterfaceFactory rpc_factory; + MockFetcherErrHandler1 err_handler1; + FixedJobPerWorkerPool worker_pool; + const int64_t heartbeat_worker_cnt = 3; + Heartbeater heartbeater; + + int err = OB_SUCCESS; + err = worker_pool.init(1); + EXPECT_EQ(OB_SUCCESS, err); + + err = heartbeater.init(NULL, &err_handler1, &worker_pool, heartbeat_worker_cnt); + EXPECT_EQ(OB_INVALID_ARGUMENT, err); + err = heartbeater.init(&rpc_factory, NULL, &worker_pool, heartbeat_worker_cnt); + EXPECT_EQ(OB_INVALID_ARGUMENT, err); + err = heartbeater.init(&rpc_factory, &err_handler1, NULL, heartbeat_worker_cnt); + EXPECT_EQ(OB_INVALID_ARGUMENT, err); + // heartbeat_worker_cnt error, [0, 32] + int64_t heartbeat_worker_cnt_err1 = -1; + err = heartbeater.init(&rpc_factory, &err_handler1, &worker_pool, heartbeat_worker_cnt_err1); + EXPECT_EQ(OB_INVALID_ARGUMENT, err); + int64_t heartbeat_worker_cnt_err2 = 33; + err = heartbeater.init(&rpc_factory, &err_handler1, &worker_pool, heartbeat_worker_cnt_err2); + EXPECT_EQ(OB_INVALID_ARGUMENT, err); +} + +// Heartbeater aync_heartbeat_req fail +TEST(Heartbeater, BasicFuncTest4) +{ + MockRpcInterfaceFactory rpc_factory; + MockFetcherErrHandler1 err_handler1; + FixedJobPerWorkerPool worker_pool; + const int64_t heartbeat_worker_cnt = 3; + Heartbeater heartbeater; + + int err = OB_SUCCESS; + err = worker_pool.init(1); + EXPECT_EQ(OB_SUCCESS, err); + + err = heartbeater.init(&rpc_factory, &err_handler1, &worker_pool, heartbeat_worker_cnt); + EXPECT_EQ(OB_SUCCESS, err); + + // Build Heartbeater requests. + ObAddr svr = ObAddr(ObAddr::IPV4, "127.0.0.1", (int32_t)(1000)); + HeartbeatRequest req; + req.reset(); + req.pkey_ = ObPartitionKey((uint64_t)(1000), 0, 1); + req.next_log_id_ = (uint64_t)(100); + req.svr_ = svr; + req.set_state(HeartbeatRequest::REQ); + + err = heartbeater.async_heartbeat_req(NULL); + EXPECT_NE(OB_SUCCESS, err); + err = heartbeater.async_heartbeat_req(&req); + EXPECT_NE(OB_SUCCESS, err); + + err = heartbeater.destroy(); + EXPECT_EQ(OB_SUCCESS, err); + + err = worker_pool.destroy(); + EXPECT_EQ(OB_SUCCESS, err); +} + +/* + * Test workflow + */ +//TEST(DISABLED_Heartbeater, BasicFuncTest5) +TEST(Heartbeater, BasicFuncTest5) +{ + _I_("called", "func:", "workflow"); + MockFetcherErrHandler1 err_handler1; + MockRpcInterfaceFactory rpc_factory; + FixedJobPerWorkerPool worker_pool; + Heartbeater heartbeater; + + int err = OB_SUCCESS; + err = worker_pool.init(1); + EXPECT_EQ(OB_SUCCESS, err); + _I_("workflow", "worker_pool:", "init OB_SUCCESS"); + + err = heartbeater.init(&rpc_factory, &err_handler1, &worker_pool, 3); + EXPECT_EQ(OB_SUCCESS, err); + _I_("workflow", "heartbeat:", "init OB_SUCCESS"); + + const int64_t TestWorkerCnt = 3; + TestWorker workers[TestWorkerCnt]; + for (int64_t idx = 0, cnt = TestWorkerCnt; idx < cnt; ++idx) { + TestWorker &w = workers[idx]; + w.heartbeater_ = &heartbeater; + w.create(); + _I_("workflow", "thread:", "create OB_SUCCESS"); + } + + for (int64_t idx = 0, cnt = TestWorkerCnt; idx < cnt; ++idx) { + TestWorker &w = workers[idx]; + w.join(); + _I_("workflow", "thread:", "join OB_SUCCESS"); + } + + err = heartbeater.destroy(); + EXPECT_EQ(OB_SUCCESS, err); + _I_("workflow", "heartbeat:", "destroy OB_SUCCESS"); + + err = worker_pool.destroy(); + EXPECT_EQ(OB_SUCCESS, err); + _I_("workflow", "work pool:", "destroy OB_SUCCESS"); +} + +}//end of basic_func_test_1 +}//end of unittest +}//end of oceanbase + +int main(int argc, char **argv) +{ + // ObLogger::get_logger().set_mod_log_levels("ALL.*:DEBUG, TLOG.*:DEBUG"); + // testing::InitGoogleTest(&argc,argv); + // testing::FLAGS_gtest_filter = "DO_NOT_RUN"; + int ret = 1; + ObLogger &logger = ObLogger::get_logger(); + logger.set_file_name("test_log_fetcher_heartbeat_mgr.log", true); + logger.set_log_level(OB_LOG_LEVEL_INFO); + testing::InitGoogleTest(&argc, argv); + ret = RUN_ALL_TESTS(); + return ret; +} diff --git a/unittest/liboblog/test_log_fetcher_impl.cpp b/unittest/liboblog/test_log_fetcher_impl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9445de0713dd8b21ebc791d2a621bbd60b885a47 --- /dev/null +++ b/unittest/liboblog/test_log_fetcher_impl.cpp @@ -0,0 +1,384 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include + +#include "share/ob_define.h" +#include "lib/allocator/ob_concurrent_fifo_allocator.h" + + +#include "test_log_fetcher_common_utils.h" +#include "liboblog/src/ob_log_fetcher_impl.h" + +using namespace oceanbase; +using namespace common; +using namespace liboblog; +using namespace fetcher; +using namespace transaction; +using namespace storage; +using namespace clog; + +namespace oceanbase +{ +namespace unittest +{ + +namespace BasicFunction1 +{ +/* + * Mock systable helper. // Todo... + */ +class MockSystableHelper : public ObILogSysTableHelper +{ +public: + virtual int query_all_clog_history_info_by_log_id_1( + const common::ObPartitionKey &pkey, const uint64_t log_id, + AllClogHistoryInfos &records) { + // Generate random results. + int ret = OB_SUCCESS; + int64_t seed = get_timestamp() / 3333333; + records.reset(); + AllClogHistoryInfoRecord rec; + const int64_t cnt = 1 + (seed % 6); + for (int64_t idx = 0; idx < cnt; ++idx) { + rec.reset(); + rec.table_id_ = (uint64_t)(pkey.table_id_); + rec.partition_idx_ = (int32_t)(pkey.get_partition_id()); + rec.partition_cnt_ = (int32_t)(pkey.get_partition_cnt()); + rec.start_log_id_ = log_id; + rec.end_log_id_ = log_id + 10000; + rec.start_log_timestamp_ = seed - (1 * _HOUR_); + rec.end_log_timestamp_ = seed + (1 * _HOUR_); + snprintf(rec.svr_ip_, common::MAX_IP_ADDR_LENGTH + 1, "127.0.0.%ld", (seed % 128)); + rec.svr_port_ = 8888; + records.push_back(rec); + seed += 17; + } + return ret; + } + + virtual int query_all_clog_history_info_by_timestamp_1( + const common::ObPartitionKey &pkey, const int64_t timestamp, + AllClogHistoryInfos &records) { + // Generate random results. + int ret = OB_SUCCESS; + int64_t seed = get_timestamp() / 7777777; + records.reset(); + AllClogHistoryInfoRecord rec; + const int64_t cnt = 1 + (seed % 6); + for (int64_t idx = 0; idx < cnt; ++idx) { + rec.reset(); + rec.table_id_ = (uint64_t)(pkey.table_id_); + rec.partition_idx_ = (int32_t)(pkey.get_partition_id()); + rec.partition_cnt_ = (int32_t)(pkey.get_partition_cnt()); + rec.start_log_id_ = 0; + rec.end_log_id_ = 65536; + rec.start_log_timestamp_ = timestamp - (1 * _HOUR_); + rec.end_log_timestamp_ = timestamp + (1 * _HOUR_); + snprintf(rec.svr_ip_, common::MAX_IP_ADDR_LENGTH + 1, "127.0.0.%ld", (seed % 128)); + rec.svr_port_ = 8888; + records.push_back(rec); + seed += 17; + } + return ret; + } + virtual int query_all_meta_table_1( + const common::ObPartitionKey &pkey, AllMetaTableRecords &records) { + // Generate random results. + int ret = OB_SUCCESS; + UNUSED(pkey); + int64_t seed = get_timestamp() / 3333333; + records.reset(); + AllMetaTableRecord rec; + const int64_t cnt = 1 + (seed % 6); + for (int64_t idx = 0; idx < cnt; ++idx) { + rec.reset(); + snprintf(rec.svr_ip_, common::MAX_IP_ADDR_LENGTH + 1, "127.0.0.%ld", (seed % 128)); + rec.svr_port_ = 8888; + rec.role_ = (0 == idx) ? LEADER : FOLLOWER; + records.push_back(rec); + seed += 17; + } + return ret; + } + + virtual int query_all_meta_table_for_leader( + const common::ObPartitionKey &pkey, + bool &has_leader, + common::ObAddr &leader) + { + UNUSED(pkey); + has_leader = true; + leader.set_ip_addr("127.0.0.1", 8888); + return OB_SUCCESS; + } + + virtual int query_all_server_table_1(AllServerTableRecords &records) { + UNUSED(records); + return OB_SUCCESS; + } +}; + +/* + * Mock rpc. + */ +class MockRpcInterface : public IFetcherRpcInterface +{ +public: + ~MockRpcInterface() {} + virtual void set_svr(const common::ObAddr& svr) { UNUSED(svr); } + virtual const ObAddr& get_svr() const { static ObAddr svr; return svr; } + virtual void set_timeout(const int64_t timeout) { UNUSED(timeout); } + virtual int req_start_log_id_by_ts( + const obrpc::ObLogReqStartLogIdByTsRequest& req, + obrpc::ObLogReqStartLogIdByTsResponse& res) + { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + virtual int req_start_log_id_by_ts_2(const obrpc::ObLogReqStartLogIdByTsRequestWithBreakpoint &req, + obrpc::ObLogReqStartLogIdByTsResponseWithBreakpoint &res) { + res.reset(); + // Seed. + int64_t seed = (get_timestamp()); + for (int64_t idx = 0, cnt = req.get_params().count(); idx < cnt; ++idx) { + // 30% success, 30% break. + int64_t rand = (idx + seed) % 100; + bool succeed = (rand < 30); + bool breakrpc = (30 <= rand) && (rand < 60); + obrpc::ObLogReqStartLogIdByTsResponseWithBreakpoint::Result result; + result.reset(); + result.err_ = (succeed) ? OB_SUCCESS : ((breakrpc) ? OB_EXT_HANDLE_UNFINISH : OB_NEED_RETRY); + result.start_log_id_ = 1; + // Break info is actually not returned. + EXPECT_EQ(OB_SUCCESS, res.append_result(result)); + } + return OB_SUCCESS; + } + virtual int req_start_pos_by_log_id_2(const obrpc::ObLogReqStartPosByLogIdRequestWithBreakpoint &req, + obrpc::ObLogReqStartPosByLogIdResponseWithBreakpoint &res) { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + virtual int req_start_pos_by_log_id( + const obrpc::ObLogReqStartPosByLogIdRequest& req, + obrpc::ObLogReqStartPosByLogIdResponse& res) + { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + virtual int fetch_log( + const obrpc::ObLogExternalFetchLogRequest& req, + obrpc::ObLogExternalFetchLogResponse& res) + { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + virtual int req_heartbeat_info( + const obrpc::ObLogReqHeartbeatInfoRequest& req, + obrpc::ObLogReqHeartbeatInfoResponse& res) + { + res.reset(); + for (int64_t idx = 0, cnt = req.get_params().count(); idx < cnt; ++idx) { + obrpc::ObLogReqHeartbeatInfoResponse::Result result; + result.reset(); + result.err_ = OB_SUCCESS; + result.tstamp_ = get_timestamp(); + EXPECT_EQ(OB_SUCCESS, res.append_result(result)); + } + _D_(">>> req heartbeat", K(req), K(res)); + return OB_SUCCESS; + } + virtual int req_leader_heartbeat( + const obrpc::ObLogLeaderHeartbeatReq &req, + obrpc::ObLogLeaderHeartbeatResp &res) + { + res.reset(); + res.set_err(OB_SUCCESS); + res.set_debug_err(OB_SUCCESS); + for (int64_t idx = 0, cnt = req.get_params().count(); idx < cnt; ++idx) { + obrpc::ObLogLeaderHeartbeatResp::Result result; + const obrpc::ObLogLeaderHeartbeatReq::Param ¶m = req.get_params().at(idx); + + result.reset(); + result.err_ = OB_SUCCESS; + result.next_served_log_id_ = param.next_log_id_; + result.next_served_ts_ = get_timestamp(); + + EXPECT_EQ(OB_SUCCESS, res.append_result(result)); + } + + _D_(">>> heartbeat", K(req), K(res)); + return OB_SUCCESS; + } + + virtual int open_stream(const obrpc::ObLogOpenStreamReq &req, + obrpc::ObLogOpenStreamResp &res) { + int ret = OB_SUCCESS; + UNUSED(req); + obrpc::ObStreamSeq seq; + seq.reset(); + seq.self_.set_ip_addr("127.0.0.1", 8888); + seq.seq_ts_ = get_timestamp(); + res.reset(); + res.set_err(OB_SUCCESS); + res.set_debug_err(OB_SUCCESS); + res.set_stream_seq(seq); + _D_(">>> open stream", K(req), K(res)); + return ret; + } + virtual int fetch_stream_log(const obrpc::ObLogStreamFetchLogReq &req, + obrpc::ObLogStreamFetchLogResp &res) { + UNUSED(req); + res.set_err(OB_SUCCESS); + res.set_debug_err(OB_SUCCESS); + return OB_SUCCESS; + } + virtual int req_svr_feedback(const ReqLogSvrFeedback &feedback) + { + UNUSED(feedback); + return OB_SUCCESS; + } +}; + +/* + * Factory. + */ +class MockRpcInterfaceFactory : public IFetcherRpcInterfaceFactory +{ +public: + virtual int new_fetcher_rpc_interface(IFetcherRpcInterface*& rpc) + { + rpc = new MockRpcInterface(); + return OB_SUCCESS; + } + virtual int delete_fetcher_rpc_interface(IFetcherRpcInterface* rpc) + { + delete rpc; + return OB_SUCCESS; + } +}; + +TEST(Fetcher, BasicFunction1) +{ + int err = OB_SUCCESS; + + // Task Pool. + ObLogTransTaskPool task_pool; + ObConcurrentFIFOAllocator task_pool_alloc; + err = task_pool_alloc.init(128 * _G_, 8 * _M_, OB_MALLOC_NORMAL_BLOCK_SIZE); + EXPECT_EQ(OB_SUCCESS, err); + err = task_pool.init(&task_pool_alloc, 10240, 1024, 4 * 1024 * 1024, true); + EXPECT_EQ(OB_SUCCESS, err); + + // Parser. + MockParser1 parser; + + // Err Handler. + MockLiboblogErrHandler1 err_handler; + MockFetcherErrHandler1 err_handler2; + + // Rpc. + MockRpcInterfaceFactory rpc_factory; + + // Worker Pool. + FixedJobPerWorkerPool worker_pool; + err = worker_pool.init(1); + EXPECT_EQ(OB_SUCCESS, err); + + // StartLogIdLocator. + ::oceanbase::liboblog::fetcher::StartLogIdLocator locator; + err = locator.init(&rpc_factory, &err_handler2, &worker_pool, 3); + EXPECT_EQ(OB_SUCCESS, err); + + // Heartbeater. + Heartbeater heartbeater; + err = heartbeater.init(&rpc_factory, &err_handler2, &worker_pool, 3); + EXPECT_EQ(OB_SUCCESS, err); + + // SvrFinder. + MockSystableHelper systable_helper; + ::oceanbase::liboblog::fetcher::SvrFinder svrfinder; + err = svrfinder.init(&systable_helper, &err_handler2, &worker_pool, 3); + EXPECT_EQ(OB_SUCCESS, err); + + // Fetcher Config. + FetcherConfig cfg; + cfg.reset(); + + // Init. + ::oceanbase::liboblog::fetcher::Fetcher fetcher; + err = fetcher.init(&task_pool, &parser, &err_handler2, &rpc_factory, + &worker_pool, &svrfinder, &locator, &heartbeater, &cfg); + EXPECT_EQ(OB_SUCCESS, err); + + // Add partition. + ObPartitionKey p1(1001, 1, 1); + ObPartitionKey p2(1002, 1, 1); + ObPartitionKey p3(1003, 1, 1); + err = fetcher.fetch_partition(p1, 1, OB_INVALID_ID); + EXPECT_EQ(OB_SUCCESS, err); +// err = fetcher.fetch_partition(p2, 1, OB_INVALID_ID); +// EXPECT_EQ(OB_SUCCESS, err); +// err = fetcher.fetch_partition(p3, 1, OB_INVALID_ID); +// EXPECT_EQ(OB_SUCCESS, err); + + // Run. + err = fetcher.start(); + EXPECT_EQ(OB_SUCCESS, err); + + usleep(10 * _SEC_); + + // Discard partition. + err = fetcher.discard_partition(p1); + EXPECT_EQ(OB_SUCCESS, err); +// err = fetcher.discard_partition(p2); +// EXPECT_EQ(OB_SUCCESS, err); +// err = fetcher.discard_partition(p3); +// EXPECT_EQ(OB_SUCCESS, err); + + usleep(10 * _SEC_); + + // Stop. + err = fetcher.stop(true); + EXPECT_EQ(OB_SUCCESS, err); + + // Destroy. + err = fetcher.destroy(); + EXPECT_EQ(OB_SUCCESS, err); + err = locator.destroy(); + EXPECT_EQ(OB_SUCCESS, err); + err = svrfinder.destroy(); + EXPECT_EQ(OB_SUCCESS, err); + err = heartbeater.destroy(); + EXPECT_EQ(OB_SUCCESS, err); + worker_pool.destroy(); + EXPECT_EQ(OB_SUCCESS, err); + task_pool.destroy(); + EXPECT_EQ(OB_SUCCESS, err); +} + +} +} +} + +int main(int argc, char **argv) +{ + ObLogger::get_logger().set_mod_log_levels("ALL.*:ERROR, TLOG.*:DEBUG"); + testing::InitGoogleTest(&argc,argv); + // testing::FLAGS_gtest_filter = "DO_NOT_RUN"; + return RUN_ALL_TESTS(); +} diff --git a/unittest/liboblog/test_log_fetcher_part_stream.cpp b/unittest/liboblog/test_log_fetcher_part_stream.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f0e69650a96d4ab7f107e2f2db1b092fdefda742 --- /dev/null +++ b/unittest/liboblog/test_log_fetcher_part_stream.cpp @@ -0,0 +1,295 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include "gtest/gtest.h" + +#include "share/ob_define.h" +#include "storage/ob_storage_log_type.h" +#include "storage/transaction/ob_trans_log.h" + +#include "liboblog/src/ob_log_fetcher_part_stream.h" +#include "test_log_fetcher_common_utils.h" + +using namespace oceanbase; +using namespace common; +using namespace liboblog; +using namespace fetcher; +using namespace transaction; +using namespace storage; +using namespace clog; + +namespace oceanbase +{ +namespace unittest +{ + +/* + * Basic Function Tests. + */ +/* + * Half commit, half abort. + * Fixed redo log cnt. + */ +TEST(PartitionStream, BasicTest1) +{ + int err = OB_SUCCESS; + + ObTransPrepareLog prepare_; + + // Commit half trans, whose has even idx. + const int64_t trans_cnt = 1000; + const int64_t commit_trans_cnt = trans_cnt / 2; + const int64_t redo_cnt = 5; + + // Pkey. + ObPartitionKey pkey(1000U, 1, 1); + // Log gen. + TransLogEntryGenerator1 log_gen(pkey); + // Task Pool. + ObConcurrentFIFOAllocator fifo_allocator; + ObLogTransTaskPool task_pool; + err = fifo_allocator.init(16 * _G_, 16 * _M_, OB_MALLOC_NORMAL_BLOCK_SIZE); + EXPECT_EQ(OB_SUCCESS, err); + err = task_pool.init(&fifo_allocator, 10240, 1024, 4 * 1024 * 1024, true); + EXPECT_EQ(OB_SUCCESS, err); + // Parser. + MockParser1 parser; + FetcherConfig cfg; + + // Init. + PartitionStream ps; + err = ps.init(pkey, &parser, &task_pool, &cfg); + EXPECT_EQ(OB_SUCCESS, err); + + // Read logs. + ObLogIdArray missing; + for (int64_t idx = 0; idx < trans_cnt; ++idx) { + // Commit trans with even idx. + log_gen.next_trans(redo_cnt, (0 == idx % 2)); + ObLogEntry log_entry; + while (OB_SUCCESS == log_gen.next_log_entry(log_entry)) { + err = ps.read(log_entry, missing); + EXPECT_EQ(OB_SUCCESS, err); + } + err = ps.flush(); + EXPECT_EQ(OB_SUCCESS, err); + } + + // Check. + EXPECT_EQ(commit_trans_cnt, parser.get_trans_cnt()); + + // Destroy. + err = ps.destroy(); + EXPECT_EQ(OB_SUCCESS, err); +} + +/* + * Half commit, half abort. + * Commit with Prepare-Commit trans log. + */ +TEST(PartitionStream, BasicTest2) +{ + int err = OB_SUCCESS; + + ObTransPrepareLog prepare_; + + // Commit half trans, whose has even idx. + const int64_t trans_cnt = 1000; + const int64_t commit_trans_cnt = trans_cnt / 2; + const int64_t redo_cnt = 5; + + // Pkey. + ObPartitionKey pkey(1000U, 1, 1); + // Log gen. + TransLogEntryGenerator1 log_gen(pkey); + // Task Pool. + ObConcurrentFIFOAllocator fifo_allocator; + ObLogTransTaskPool task_pool; + err = fifo_allocator.init(16 * _G_, 16 * _M_, OB_MALLOC_NORMAL_BLOCK_SIZE); + EXPECT_EQ(OB_SUCCESS, err); + err = task_pool.init(&fifo_allocator, 10240, 1024, 4 * 1024 * 1024, true); + EXPECT_EQ(OB_SUCCESS, err); + // Parser. + MockParser1 parser; + FetcherConfig cfg; + + // Init. + PartitionStream ps; + err = ps.init(pkey, &parser, &task_pool, &cfg); + EXPECT_EQ(OB_SUCCESS, err); + + // Read logs. + ObLogIdArray missing; + for (int64_t idx = 0; idx < trans_cnt; ++idx) { + // Commit trans with even idx. + log_gen.next_trans(redo_cnt, (0 == idx % 2)); + ObLogEntry log_entry; + while (OB_SUCCESS == log_gen.next_log_entry_2(log_entry)) { + err = ps.read(log_entry, missing); + EXPECT_EQ(OB_SUCCESS, err); + } + err = ps.flush(); + EXPECT_EQ(OB_SUCCESS, err); + } + + // Check. + EXPECT_EQ(commit_trans_cnt, parser.get_trans_cnt()); + + // Destroy. + err = ps.destroy(); + EXPECT_EQ(OB_SUCCESS, err); +} + + +/* + * Test partition progress tracker. + */ +TEST(PartProgressTracker, BasicTest1) +{ + const int64_t progress_cnt = 4 * 10000; + PartProgressTracker tracker; + + int err = tracker.init(progress_cnt); + EXPECT_EQ(OB_SUCCESS, err); + + ObArray indices; + const int64_t time = get_timestamp(); + + // Acquire progresses and update their values. + for (int64_t idx = 0, cnt = progress_cnt; idx < cnt; ++idx) { + int64_t progress_idx = 0; + err = tracker.acquire_progress(progress_idx); + EXPECT_EQ(OB_SUCCESS, err); + err = indices.push_back(progress_idx); + EXPECT_EQ(OB_SUCCESS, err); + err = tracker.update_progress(progress_idx, time); + EXPECT_EQ(OB_SUCCESS, err); + } + + // Get min progress test. + const int64_t test_cnt = 10000; + int64_t start = get_timestamp(); + for (int64_t idx = 0, cnt = test_cnt; idx < cnt; ++idx) { + int64_t min = 0; + err = tracker.get_min_progress(min); + EXPECT_EQ(OB_SUCCESS, err); + } + const int64_t avg = ((get_timestamp() - start)/ test_cnt); + + // Release. + while (0 != indices.count()) { + int64_t progress_idx = indices.at(indices.count() - 1); + indices.pop_back(); + err = tracker.release_progress(progress_idx); + EXPECT_EQ(OB_SUCCESS, err); + } + + err = tracker.destroy(); + EXPECT_EQ(OB_SUCCESS, err); + + // Print result. + fprintf(stderr, "partition progress tracker get min for %ld progresses costs %s\n", + progress_cnt, TVAL_TO_STR(avg)); +} + +// Perf test. +// This test requires at least 3 cores: 1 core tests reading, 2 cores update data. +struct PerfTest1Updater : public Runnable +{ + virtual int routine() + { + while (ATOMIC_LOAD(&atomic_run_)) { + int64_t seed = get_timestamp(); + for (int i = 0; i < 10000; ++i) { + progress_tracker_->update_progress(indices_->at((seed % (indices_->count()))), seed); + seed += 777; + } + } + return common::OB_SUCCESS; + } + bool atomic_run_; + PartProgressTracker *progress_tracker_; + ObArray *indices_; +}; +TEST(PartProgressTracker, PerfTest1) +{ + const int64_t progress_cnt = 4 * 10000; + PartProgressTracker tracker; + + int err = tracker.init(progress_cnt); + EXPECT_EQ(OB_SUCCESS, err); + + ObArray indices; + const int64_t time = get_timestamp(); + + // Acquire progresses and update their values. + for (int64_t idx = 0, cnt = progress_cnt; idx < cnt; ++idx) { + int64_t progress_idx = 0; + err = tracker.acquire_progress(progress_idx); + EXPECT_EQ(OB_SUCCESS, err); + err = indices.push_back(progress_idx); + EXPECT_EQ(OB_SUCCESS, err); + err = tracker.update_progress(progress_idx, time); + EXPECT_EQ(OB_SUCCESS, err); + } + + // Trigger updaters. + const int64_t updater_cnt = 2; + PerfTest1Updater updaters[updater_cnt]; + for (int i = 0; i < updater_cnt; ++i) { + updaters[i].atomic_run_ = true; + updaters[i].progress_tracker_ = &tracker; + updaters[i].indices_ = &indices; + updaters[i].create(); + } + + // Get min progress test. + const int64_t test_cnt = 10000; + int64_t start = get_timestamp(); + for (int64_t idx = 0, cnt = test_cnt; idx < cnt; ++idx) { + int64_t min = 0; + err = tracker.get_min_progress(min); + EXPECT_EQ(OB_SUCCESS, err); + } + const int64_t avg = ((get_timestamp() - start)/ test_cnt); + + // Stop updaters. + for (int i = 0; i < updater_cnt; ++i) { + ATOMIC_STORE(&(updaters[i].atomic_run_), false); + updaters[i].join(); + } + + // Release. + while (0 != indices.count()) { + int64_t progress_idx = indices.at(indices.count() - 1); + indices.pop_back(); + err = tracker.release_progress(progress_idx); + EXPECT_EQ(OB_SUCCESS, err); + } + + err = tracker.destroy(); + EXPECT_EQ(OB_SUCCESS, err); + + // Print result. + fprintf(stderr, "partition progress tracker 2 updaters get min for %ld progresses costs %s\n", + progress_cnt, TVAL_TO_STR(avg)); +} +} +} + +int main(int argc, char **argv) +{ + //ObLogger::get_logger().set_mod_log_levels("ALL.*:DEBUG, TLOG.*:DEBUG"); + testing::InitGoogleTest(&argc,argv); + // testing::FLAGS_gtest_filter = "DO_NOT_RUN"; + return RUN_ALL_TESTS(); +} diff --git a/unittest/liboblog/test_log_fetcher_start_log_id_locator.cpp b/unittest/liboblog/test_log_fetcher_start_log_id_locator.cpp new file mode 100644 index 0000000000000000000000000000000000000000..78d14d461714f03205cb6f868ec992bd1a7204ab --- /dev/null +++ b/unittest/liboblog/test_log_fetcher_start_log_id_locator.cpp @@ -0,0 +1,317 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include +#include + +#include "share/ob_define.h" +#include "liboblog/src/ob_log_fetcher_start_log_id_locator.h" + +#include "test_log_fetcher_common_utils.h" + +using namespace oceanbase; +using namespace common; +using namespace liboblog; +using namespace fetcher; +using namespace transaction; +using namespace storage; +using namespace clog; + +namespace oceanbase +{ +namespace unittest +{ + +/* + * StartLogIdLocator Tests. + */ +/* + * Basic function test 1. + * - N thread & M requests for each thread + * - result start log id = 1 + * - rpc always succeed, no server internal error + * - rpc interface breaks the locating process randomly (30%) + * - rpc interface returns correct result or an error code randomly (30%) + */ +namespace basic_func_test_1 +{ +class MockRpcInterface : public IFetcherRpcInterface +{ +public: + ~MockRpcInterface() {} + virtual void set_svr(const common::ObAddr& svr) { UNUSED(svr); } + virtual const ObAddr& get_svr() const { static ObAddr svr; return svr; } + virtual void set_timeout(const int64_t timeout) { UNUSED(timeout); } + virtual int req_start_log_id_by_ts( + const obrpc::ObLogReqStartLogIdByTsRequest& req, + obrpc::ObLogReqStartLogIdByTsResponse& res) + { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + virtual int req_start_log_id_by_ts_2(const obrpc::ObLogReqStartLogIdByTsRequestWithBreakpoint &req, + obrpc::ObLogReqStartLogIdByTsResponseWithBreakpoint &res) { + res.reset(); + // Seed. + int64_t seed = (get_timestamp()); + for (int64_t idx = 0, cnt = req.get_params().count(); idx < cnt; ++idx) { + // 30% success, 30% break. + int64_t rand = (idx + seed) % 100; + bool succeed = (rand < 30); + bool breakrpc = (30 <= rand) && (rand < 60); + obrpc::ObLogReqStartLogIdByTsResponseWithBreakpoint::Result result; + result.reset(); + result.err_ = (succeed) ? OB_SUCCESS : ((breakrpc) ? OB_EXT_HANDLE_UNFINISH : OB_NEED_RETRY); + result.start_log_id_ = 1; + // Break info is actually not returned. + EXPECT_EQ(OB_SUCCESS, res.append_result(result)); + } + return OB_SUCCESS; + } + virtual int req_start_pos_by_log_id_2(const obrpc::ObLogReqStartPosByLogIdRequestWithBreakpoint &req, + obrpc::ObLogReqStartPosByLogIdResponseWithBreakpoint &res) { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + virtual int req_start_pos_by_log_id( + const obrpc::ObLogReqStartPosByLogIdRequest& req, + obrpc::ObLogReqStartPosByLogIdResponse& res) + { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + virtual int fetch_log( + const obrpc::ObLogExternalFetchLogRequest& req, + obrpc::ObLogExternalFetchLogResponse& res) + { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + virtual int req_heartbeat_info( + const obrpc::ObLogReqHeartbeatInfoRequest& req, + obrpc::ObLogReqHeartbeatInfoResponse& res) + { + res.reset(); + // Seed. + int64_t seed = (get_timestamp()); + for (int64_t idx = 0, cnt = req.get_params().count(); idx < cnt; ++idx) { + // 30%. + bool succeed = ((idx + seed) % 100) < 30; + obrpc::ObLogReqHeartbeatInfoResponse::Result result; + result.reset(); + result.err_ = (succeed) ? OB_SUCCESS : OB_NEED_RETRY; + result.tstamp_ = (succeed) ? (int64_t)(req.get_params().at(idx).log_id_) : OB_INVALID_TIMESTAMP; + EXPECT_EQ(OB_SUCCESS, res.append_result(result)); + } + return OB_SUCCESS; + } + + virtual int req_leader_heartbeat( + const obrpc::ObLogLeaderHeartbeatReq &req, + obrpc::ObLogLeaderHeartbeatResp &res) + { + res.reset(); + res.set_err(OB_SUCCESS); + res.set_debug_err(OB_SUCCESS); + // Seed. + int64_t seed = (get_timestamp()); + for (int64_t idx = 0, cnt = req.get_params().count(); idx < cnt; ++idx) { + obrpc::ObLogLeaderHeartbeatResp::Result result; + const obrpc::ObLogLeaderHeartbeatReq::Param ¶m = req.get_params().at(idx); + // 30%. + bool succeed = ((idx + seed) % 100) < 30; + + result.reset(); + result.err_ = succeed ? OB_SUCCESS : OB_NOT_MASTER; + result.next_served_log_id_ = param.next_log_id_; + result.next_served_ts_ = succeed ? get_timestamp() : 1; + + EXPECT_EQ(OB_SUCCESS, res.append_result(result)); + } + + _D_(">>> heartbeat", K(req), K(res)); + return OB_SUCCESS; + } + + virtual int open_stream(const obrpc::ObLogOpenStreamReq &req, + obrpc::ObLogOpenStreamResp &res) { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + + virtual int fetch_stream_log(const obrpc::ObLogStreamFetchLogReq &req, + obrpc::ObLogStreamFetchLogResp &res) { + UNUSED(req); + UNUSED(res); + return OB_NOT_IMPLEMENT; + } + virtual int req_svr_feedback(const ReqLogSvrFeedback &feedback) + { + UNUSED(feedback); + return OB_SUCCESS; + } +}; + +/* + * Factory. + */ +class MockRpcInterfaceFactory : public IFetcherRpcInterfaceFactory +{ +public: + virtual int new_fetcher_rpc_interface(IFetcherRpcInterface*& rpc) + { + rpc = new MockRpcInterface(); + return OB_SUCCESS; + } + virtual int delete_fetcher_rpc_interface(IFetcherRpcInterface* rpc) + { + delete rpc; + return OB_SUCCESS; + } +}; + +/* + * Worker. + */ +class TestWorker : public Runnable +{ +public: + StartLogIdLocator *locator_; + virtual int routine() + { + // Build requests. + const int64_t AllSvrCnt = 3; + ObAddr svrs[AllSvrCnt]; + for (int64_t idx = 0, cnt = AllSvrCnt; idx < cnt; ++idx) { + svrs[idx] = ObAddr(ObAddr::IPV4, "127.0.0.1", (int32_t)(idx + 1000)); + } + const int64_t RequestCnt = 10 * 10000; + StartLogIdLocatorRequest *request_array = new StartLogIdLocatorRequest[RequestCnt]; + for (int64_t idx = 0, cnt = RequestCnt; idx < cnt; ++idx) { + StartLogIdLocatorRequest &r = request_array[idx]; + r.reset(); + r.pkey_ = ObPartitionKey((uint64_t)(1000 + idx), 0, 1); + r.start_tstamp_ = 1 + idx; + // Set server list. + for (int64_t idx2 = 0, cnt2 = AllSvrCnt; idx2 < cnt2; ++idx2) { + StartLogIdLocatorRequest::SvrListItem item; + item.reset(); + item.svr_ = svrs[idx2]; + r.svr_list_.push_back(item); + } + } + // Push requests into locator. + for (int64_t idx = 0, cnt = RequestCnt; idx < cnt; ++idx) { + StartLogIdLocatorRequest &r = request_array[idx]; + EXPECT_EQ(OB_SUCCESS, locator_->async_start_log_id_req(&r)); + if (0 == (idx % 1000)) { + usec_sleep(10 * _MSEC_); + } + } + // Wait for requests end. Max test time should set. + int64_t end_request_cnt = 0; + const int64_t TestTimeLimit = 10 * _MIN_; + const int64_t start_test_tstamp = get_timestamp(); + while (((get_timestamp() - start_test_tstamp) < TestTimeLimit) + && (end_request_cnt < RequestCnt)) { + for (int64_t idx = 0, cnt = RequestCnt; idx < cnt; ++idx) { + StartLogIdLocatorRequest &r = request_array[idx]; + if (StartLogIdLocatorRequest::DONE == r.get_state()) { + end_request_cnt += 1; + r.set_state(StartLogIdLocatorRequest::IDLE); + } + } + usec_sleep(100 * _MSEC_); + } + // Assert if test cannot finish. + EXPECT_EQ(RequestCnt, end_request_cnt); + // Do some statistics. + int64_t svr_consume_distribution[AllSvrCnt]; // 1, 2, 3, ... + for (int64_t idx = 0, cnt = AllSvrCnt; idx < cnt; ++idx) { + svr_consume_distribution[idx] = 0; + } + int64_t succ_cnt = 0; + for (int64_t idx = 0, cnt = RequestCnt; idx < cnt; ++idx) { + StartLogIdLocatorRequest &r = request_array[idx]; + EXPECT_GE(r.svr_list_consumed_, 0); + svr_consume_distribution[(r.svr_list_consumed_ - 1)] += 1; + uint64_t start_log_id = 0; + if (r.get_result(start_log_id)) { + succ_cnt += 1; + EXPECT_EQ(1, start_log_id); + } + } + delete[] request_array; + const int64_t BuffSize = 1024; + char buf[BuffSize]; + int64_t pos = 0; + for (int64_t idx = 0, cnt = AllSvrCnt; idx < cnt; ++idx) { + pos += snprintf(buf + pos, BuffSize - pos, "svr_cnt:%ld perc:%f ", (1 + idx), + ((double)svr_consume_distribution[idx] / (double)RequestCnt)); + } + fprintf(stderr, "request count: %ld distribution: %s succeed perc: %f \n", + RequestCnt, buf, (double)succ_cnt / (double)RequestCnt); + return OB_SUCCESS; + } +}; + +TEST(StartLogIdLocator, BasicFuncTest1) +{ + MockFetcherErrHandler1 err_handler1; + MockRpcInterfaceFactory rpc_factory; + FixedJobPerWorkerPool worker_pool; + StartLogIdLocator locator; + + int err = OB_SUCCESS; + err = worker_pool.init(1); + EXPECT_EQ(OB_SUCCESS, err); + + err = locator.init(&rpc_factory, &err_handler1, &worker_pool, 3); + EXPECT_EQ(OB_SUCCESS, err); + + const int64_t TestWorkerCnt = 3; + TestWorker workers[TestWorkerCnt]; + for (int64_t idx = 0, cnt = TestWorkerCnt; idx < cnt; ++idx) { + TestWorker &w = workers[idx]; + w.locator_ = &locator; + w.create(); + } + + for (int64_t idx = 0, cnt = TestWorkerCnt; idx < cnt; ++idx) { + TestWorker &w = workers[idx]; + w.join(); + } + + err = locator.destroy(); + EXPECT_EQ(OB_SUCCESS, err); + + err = worker_pool.destroy(); + EXPECT_EQ(OB_SUCCESS, err); +} + +} + +} +} + +int main(int argc, char **argv) +{ + //ObLogger::get_logger().set_mod_log_levels("ALL.*:DEBUG, TLOG.*:DEBUG"); + testing::InitGoogleTest(&argc,argv); + // testing::FLAGS_gtest_filter = "DO_NOT_RUN"; + return RUN_ALL_TESTS(); +} diff --git a/unittest/liboblog/test_log_fetcher_stream.cpp b/unittest/liboblog/test_log_fetcher_stream.cpp new file mode 100644 index 0000000000000000000000000000000000000000..35d5f05c856e20c42e595ec20a89c4c41fd3daca --- /dev/null +++ b/unittest/liboblog/test_log_fetcher_stream.cpp @@ -0,0 +1,42 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include + +#include "share/ob_define.h" +#include "liboblog/src/ob_log_fetcher_stream.h" + +#include "test_log_fetcher_common_utils.h" + +using namespace oceanbase; +using namespace common; +using namespace liboblog; +using namespace fetcher; +using namespace transaction; +using namespace storage; +using namespace clog; + +namespace oceanbase +{ +namespace unittest +{ +// Deprecated. Del me later. +} +} + +int main(int argc, char **argv) +{ + //ObLogger::get_logger().set_mod_log_levels("ALL.*:DEBUG, TLOG.*:DEBUG"); + testing::InitGoogleTest(&argc,argv); + // testing::FLAGS_gtest_filter = "DO_NOT_RUN"; + return RUN_ALL_TESTS(); +} diff --git a/unittest/liboblog/test_log_fetcher_svr_finder.cpp b/unittest/liboblog/test_log_fetcher_svr_finder.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e0a2fe4c3080415b76fe82510b955b4e33a02155 --- /dev/null +++ b/unittest/liboblog/test_log_fetcher_svr_finder.cpp @@ -0,0 +1,242 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include +#include + +#include "share/ob_define.h" +#include "lib/container/ob_se_array.h" + +#include "liboblog/src/ob_log_fetcher_svr_finder.h" + +#include "test_log_fetcher_common_utils.h" + +using namespace oceanbase; +using namespace common; +using namespace liboblog; +using namespace fetcher; +using namespace transaction; +using namespace storage; +using namespace clog; + +namespace oceanbase +{ +namespace unittest +{ + +/* + * SvrFinder Tests. + * + */ +/* + * Basic function test 1. + * - N thread & M requests for each thread + */ +namespace basic_func_test_1 +{ +class MockSystableHelper : public ObILogSysTableHelper +{ +public: + virtual int query_all_clog_history_info_by_log_id_1(const common::ObPartitionKey &pkey, const uint64_t log_id, + AllClogHistoryInfos &records) { + // Generate random results. + int ret = OB_SUCCESS; + int64_t seed = get_timestamp(); + records.reset(); + AllClogHistoryInfoRecord rec; + const int64_t cnt = 1 + (seed % 6); + for (int64_t idx = 0; idx < cnt; ++idx) { + rec.reset(); + rec.table_id_ = (uint64_t)(pkey.table_id_); + rec.partition_idx_ = (int32_t)(pkey.get_partition_id()); + rec.partition_cnt_ = pkey.get_partition_cnt(); + rec.start_log_id_ = log_id; + rec.end_log_id_ = log_id + 10000; + rec.start_log_timestamp_ = seed - (1 * _HOUR_); + rec.end_log_timestamp_ = seed + (1 * _HOUR_); + snprintf(rec.svr_ip_, common::MAX_IP_ADDR_LENGTH + 1, "127.0.0.%ld", (seed % 128)); + rec.svr_port_ = 8888; + records.push_back(rec); + seed += 13; + } + return ret; + } + + virtual int query_all_clog_history_info_by_timestamp_1(const common::ObPartitionKey &pkey, const int64_t timestamp, + AllClogHistoryInfos &records) { + // Generate random results. + int ret = OB_SUCCESS; + int64_t seed = get_timestamp(); + records.reset(); + AllClogHistoryInfoRecord rec; + const int64_t cnt = 1 + (seed % 6); + for (int64_t idx = 0; idx < cnt; ++idx) { + rec.reset(); + rec.table_id_ = (uint64_t)(pkey.table_id_); + rec.partition_idx_ = (int32_t)(pkey.get_partition_id()); + rec.partition_cnt_ = (int32_t)(pkey.get_partition_cnt()); + rec.start_log_id_ = 0; + rec.end_log_id_ = 65536; + rec.start_log_timestamp_ = timestamp - (1 * _HOUR_); + rec.end_log_timestamp_ = timestamp + (1 * _HOUR_); + snprintf(rec.svr_ip_, common::MAX_IP_ADDR_LENGTH + 1, "127.0.0.%ld", (seed % 128)); + rec.svr_port_ = 8888; + records.push_back(rec); + seed += 13; + } + return ret; + } + virtual int query_all_meta_table_1(const common::ObPartitionKey &pkey, AllMetaTableRecords &records) { + // Generate random results. + int ret = OB_SUCCESS; + UNUSED(pkey); + int64_t seed = get_timestamp(); + records.reset(); + AllMetaTableRecord rec; + const int64_t cnt = 1 + (seed % 6); + for (int64_t idx = 0; idx < cnt; ++idx) { + rec.reset(); + snprintf(rec.svr_ip_, common::MAX_IP_ADDR_LENGTH + 1, "127.0.0.%ld", (seed % 128)); + rec.svr_port_ = 8888; + rec.role_ = (0 == idx) ? LEADER : FOLLOWER; + records.push_back(rec); + seed += 13; + } + return ret; + } + virtual int query_all_meta_table_for_leader( + const common::ObPartitionKey &pkey, + bool &has_leader, + common::ObAddr &leader) + { + UNUSED(pkey); + has_leader = true; + leader.set_ip_addr("127.0.0.1", 8888); + return OB_SUCCESS; + } + virtual int query_all_server_table_1(AllServerTableRecords &records) { + int ret = OB_SUCCESS; + records.reset(); + AllServerTableRecord rec; + for (int64_t idx = 0; idx < 128; ++idx) { + rec.reset(); + snprintf(rec.svr_ip_, common::MAX_IP_ADDR_LENGTH + 1, "127.0.0.%ld", (idx)); + rec.svr_port_ = 8888; + records.push_back(rec); + } + return ret; + } +}; + +/* + * Worker. + */ +class TestWorker : public Runnable +{ +public: + SvrFinder *svrfinder_; + virtual int routine() + { + // Build requests. + const int64_t RequestCnt = 10 * 10000; + SvrFindReq *request_array = new SvrFindReq[RequestCnt]; + for (int64_t idx = 0, cnt = RequestCnt; idx < cnt; ++idx) { + SvrFindReq &r = request_array[idx]; + r.reset(); + r.pkey_ = ObPartitionKey((uint64_t)(1000 + idx), 0, 1); + const int64_t seed = get_timestamp(); + if ((seed % 100) < 50) { + r.req_by_start_tstamp_ = true; + r.start_tstamp_ = seed; + } + else { + r.req_by_next_log_id_ = true; + r.next_log_id_ = (uint64_t)(seed % 65536); + } + } + // Push requests into svrfinder. + for (int64_t idx = 0, cnt = RequestCnt; idx < cnt; ++idx) { + SvrFindReq &r = request_array[idx]; + EXPECT_EQ(OB_SUCCESS, svrfinder_->async_svr_find_req(&r)); + if (0 == (idx % 1000)) { + usec_sleep(10 * _MSEC_); + } + } + // Wait for requests end. Max test time should set. + int64_t end_request_cnt = 0; + const int64_t TestTimeLimit = 10 * _MIN_; + const int64_t start_test_tstamp = get_timestamp(); + while (((get_timestamp() - start_test_tstamp) < TestTimeLimit) + && (end_request_cnt < RequestCnt)) { + for (int64_t idx = 0, cnt = RequestCnt; idx < cnt; ++idx) { + SvrFindReq &r = request_array[idx]; + if (SvrFindReq::DONE == r.get_state()) { + end_request_cnt += 1; + _E_(">>> svr list size", "size", r.svr_list_.count()); + r.set_state_idle(); + } + } + usec_sleep(100 * _MSEC_); + } + // Assert if test cannot finish. + EXPECT_EQ(RequestCnt, end_request_cnt); + delete[] request_array; + return OB_SUCCESS; + } +}; + +TEST(DISABLED_SvrFinder, BasicFuncTest1) +{ + MockFetcherErrHandler1 err_handler1; + MockSystableHelper systable_helper; + FixedJobPerWorkerPool worker_pool; + SvrFinder svrfinder; + + int err = OB_SUCCESS; + err = worker_pool.init(1); + EXPECT_EQ(OB_SUCCESS, err); + + err = svrfinder.init(&systable_helper, &err_handler1, &worker_pool, 3); + EXPECT_EQ(OB_SUCCESS, err); + + const int64_t TestWorkerCnt = 3; + TestWorker workers[TestWorkerCnt]; + for (int64_t idx = 0, cnt = TestWorkerCnt; idx < cnt; ++idx) { + TestWorker &w = workers[idx]; + w.svrfinder_ = &svrfinder; + w.create(); + } + + for (int64_t idx = 0, cnt = TestWorkerCnt; idx < cnt; ++idx) { + TestWorker &w = workers[idx]; + w.join(); + } + + err = svrfinder.destroy(); + EXPECT_EQ(OB_SUCCESS, err); + + err = worker_pool.destroy(); + EXPECT_EQ(OB_SUCCESS, err); +} + +} + +} +} + +int main(int argc, char **argv) +{ + //ObLogger::get_logger().set_mod_log_levels("ALL.*:DEBUG, TLOG.*:DEBUG"); + testing::InitGoogleTest(&argc,argv); + // testing::FLAGS_gtest_filter = "DO_NOT_RUN"; + return RUN_ALL_TESTS(); +} diff --git a/unittest/liboblog/test_log_mysql_connector.cpp b/unittest/liboblog/test_log_mysql_connector.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8ced672413ed5f67044045e41f8f2de194c4caf9 --- /dev/null +++ b/unittest/liboblog/test_log_mysql_connector.cpp @@ -0,0 +1,138 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include + +#include "gtest/gtest.h" +#include "lib/allocator/ob_malloc.h" +#include "lib/net/ob_addr.h" + +#include "ob_log_mysql_connector.h" + +using namespace oceanbase; +using namespace common; +using namespace liboblog; + +namespace oceanbase +{ +namespace unittest +{ + +class PrintRow : public MySQLQueryBase +{ +public: + PrintRow() + { + sql_ = "select table_id, partition_id, ip, port, role " + "from __all_root_table"; + sql_len_ = strlen(sql_); + } + int print_row() + { + int ret = common::OB_SUCCESS; + while (common::OB_SUCCESS == (ret = next_row())) { + uint64_t table_id = 0; + int32_t partition_id = 0; + ObAddr addr; + if (OB_SUCC(ret)) { + if (common::OB_SUCCESS != (ret = get_uint(0, table_id))) { + OBLOG_LOG(WARN, "err get uint", K(ret)); + } + } + if (OB_SUCC(ret)) { + int64_t val = 0; + if (common::OB_SUCCESS != (ret = get_int(1, val))) { + OBLOG_LOG(WARN, "err get int", K(ret)); + } else { + partition_id = static_cast(val); + } + } + if (OB_SUCC(ret)) { + ObString ip_str; + int64_t port = 0; + if (common::OB_SUCCESS != (ret = get_varchar(2, ip_str))) { + OBLOG_LOG(WARN, "err get var char", K(ret)); + } else if (common::OB_SUCCESS != (ret = get_int(3, port))) { + OBLOG_LOG(WARN, "err get int", K(ret)); + } else { + addr.set_ip_addr(ip_str, static_cast(port)); + } + } + // Print values. + if (OB_SUCC(ret)) { + OBLOG_LOG(INFO, "\n>>>", K(table_id), + K(partition_id), + K(addr)); + } + } + ret = (common::OB_ITER_END == ret) ? common::OB_SUCCESS : ret; + return ret; + } +}; + +class CreateTable : public MySQLQueryBase +{ +public: + CreateTable(const char *tname) + { + snprintf(buf_, 512, "create table %s(c1 int primary key)", tname); + sql_ = buf_; + sql_len_ = strlen(sql_); + } +private: + char buf_[512]; +}; + +TEST(MySQLConnector, run) +{ + ConnectorConfig cfg; + cfg.mysql_addr_ = "10.210.177.162"; + cfg.mysql_port_ = 26556; + cfg.mysql_user_ = "root"; + cfg.mysql_password_ = ""; + cfg.mysql_db_ = "oceanbase"; + cfg.mysql_timeout_ = 100; + + ObLogMySQLConnector conn; + + int ret = conn.init(cfg); + EXPECT_EQ(OB_SUCCESS, ret); + + // Print rows. + PrintRow pr; + ret = conn.query(pr); + EXPECT_EQ(OB_SUCCESS, ret); + ret = pr.print_row(); + EXPECT_EQ(OB_SUCCESS, ret); + + // Create dup tables. + CreateTable ct("table_1"); + ret = conn.exec(ct); + EXPECT_EQ(OB_SUCCESS, ret); + ret = conn.exec(ct); + EXPECT_EQ(OB_SUCCESS, ret); + + ret = conn.destroy(); + EXPECT_EQ(OB_SUCCESS, ret); + +} + +} +} + +int main(int argc, char **argv) +{ + oceanbase::common::ObLogger::get_logger().set_log_level("debug"); + testing::InitGoogleTest(&argc,argv); + testing::FLAGS_gtest_filter = "DO_NOT_RUN"; + return RUN_ALL_TESTS(); +} diff --git a/unittest/liboblog/test_log_part_mgr.cpp b/unittest/liboblog/test_log_part_mgr.cpp new file mode 100644 index 0000000000000000000000000000000000000000..97836e561bb9f6f8357d18f9c96a71d2cf6d1b2d --- /dev/null +++ b/unittest/liboblog/test_log_part_mgr.cpp @@ -0,0 +1,46 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include + +#include "gtest/gtest.h" +#include "lib/allocator/ob_malloc.h" + +#include "ob_log_part_mgr.h" + +using namespace oceanbase; +using namespace common; +using namespace liboblog; + +namespace oceanbase +{ +namespace unittest +{ + + + +TEST(ObLogPartMgr, Function1) +{ + // -- TODO -- +} + + +} +} + +int main(int argc, char **argv) +{ + oceanbase::common::ObLogger::get_logger().set_log_level("debug"); + testing::InitGoogleTest(&argc,argv); + // testing::FLAGS_gtest_filter = "DO_NOT_RUN"; + return RUN_ALL_TESTS(); +} diff --git a/unittest/liboblog/test_log_part_mgr_mock.h b/unittest/liboblog/test_log_part_mgr_mock.h new file mode 100644 index 0000000000000000000000000000000000000000..d5f8101cdff0ac686216f528f05912af9dcc2ef4 --- /dev/null +++ b/unittest/liboblog/test_log_part_mgr_mock.h @@ -0,0 +1,159 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include +#include "ob_log_part_mgr.h" // ObLogPartMgr + +using namespace oceanbase::common; +using namespace oceanbase::liboblog; +using namespace oceanbase::transaction; + +class MockObLogPartMgr : public IObLogPartMgr +{ +public: + static const int64_t START_TIMESTAMP = 1452763440; + static const int64_t CUR_SCHEMA_VERSION = 100; + + MockObLogPartMgr(): start_tstamp_(START_TIMESTAMP), cur_schema_version_(CUR_SCHEMA_VERSION) + { } + + ~MockObLogPartMgr() + { } + + virtual int add_table(const uint64_t table_id, + const int64_t start_schema_version, + const int64_t start_server_tstamp, + const int64_t timeout) + { + UNUSED(table_id); + UNUSED(start_schema_version); + UNUSED(start_server_tstamp); + UNUSED(timeout); + return OB_SUCCESS; + } + + virtual int drop_table(const uint64_t table_id, + const int64_t schema_version_before_drop, + const int64_t schema_version_after_drop, + const int64_t timeout) + { + UNUSED(table_id); + UNUSED(schema_version_before_drop); + UNUSED(schema_version_after_drop); + UNUSED(timeout); + return OB_SUCCESS; + } + + virtual int drop_tenant(const uint64_t tenant_id, + const int64_t schema_version_before_drop, + const int64_t schema_version_after_drop, + const int64_t timeout) + { + UNUSED(tenant_id); + UNUSED(schema_version_before_drop); + UNUSED(schema_version_after_drop); + UNUSED(timeout); + return OB_SUCCESS; + } + + virtual int drop_database(const uint64_t database_id, + const int64_t schema_version_before_drop, + const int64_t schema_version_after_drop, + const int64_t timeout) + { + UNUSED(database_id); + UNUSED(schema_version_before_drop); + UNUSED(schema_version_after_drop); + UNUSED(timeout); + return OB_SUCCESS; + } + + virtual int add_all_tables(const int64_t schema_version, const int64_t start_tstamp) + { + UNUSED(schema_version); + UNUSED(start_tstamp); + return OB_SUCCESS; + } + + virtual int update_schema_version(const int64_t schema_version) + { + UNUSED(schema_version); + return OB_SUCCESS; + } + virtual int inc_part_trans_count_on_serving(bool &is_serving, + const ObPartitionKey &key, + const uint64_t prepare_log_id, + const int64_t prepare_log_timestamp, + const int64_t timeout) + { + if (prepare_log_timestamp < start_tstamp_) { + // If the Prepare log timestamp is less than the start timestamp, it must not be served + is_serving = false; + } else { + is_serving = true; + } + + UNUSED(key); + UNUSED(prepare_log_id); + UNUSED(timeout); + return OB_SUCCESS; + } + + virtual int dec_part_trans_count(const ObPartitionKey &key) + { + UNUSED(key); + return OB_SUCCESS; + } + virtual int update_part_info(const ObPartitionKey &pkey, const uint64_t start_log_id) + { + UNUSED(pkey); + UNUSED(start_log_id); + return OB_SUCCESS; + } + virtual int table_group_match(const char *pattern, bool &is_matched, + int fnmatch_flags = FNM_CASEFOLD) + { + UNUSED(pattern); + UNUSED(is_matched); + UNUSED(fnmatch_flags); + return OB_SUCCESS; + } + virtual int get_table_groups(std::vector &table_groups) + { + UNUSED(table_groups); + return OB_SUCCESS; + } + virtual int register_part_add_callback(PartAddCallback *callback) + { + UNUSED(callback); + return OB_SUCCESS; + } + virtual int register_part_rm_callback(PartRMCallback *callback) + { + UNUSED(callback); + return OB_SUCCESS; + } + virtual int register_part_recycle_callback(PartRecycleCallback *callback) + { + UNUSED(callback); + return OB_SUCCESS; + } + virtual void print_part_info() {} + +private: + int64_t start_tstamp_; + int64_t cur_schema_version_; + +private: + DISALLOW_COPY_AND_ASSIGN(MockObLogPartMgr); +}; + diff --git a/unittest/liboblog/test_log_sql_server_provider.cpp b/unittest/liboblog/test_log_sql_server_provider.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1e893329c00d2f592552b003da065a3fa5d05f70 --- /dev/null +++ b/unittest/liboblog/test_log_sql_server_provider.cpp @@ -0,0 +1,248 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG + +#include +#include "share/ob_web_service_root_addr.h" // to_json +#include "lib/oblog/ob_log_module.h" +#include "lib/string/ob_sql_string.h" // ObSqlString + +#include "ob_log_sql_server_provider.h" + +namespace oceanbase +{ +using namespace common; +using namespace share; + +namespace liboblog +{ +#define CONFIG_SERVER_PORT 6789 +#define CONFIG_SERVER_PORT_STR "6789" +#define CONFIG_SERVER_IP "127.0.0.1" +#define CONFIG_SERVER_PROGRAM "./fake_config_server" + +class TestLogSQLServerProvider : public ::testing::Test +{ + static const int64_t MAX_JASON_BUFFER_SIZE = 1 << 10; + static const int64_t MAX_CONFIG_URL_LENGTH = 1 << 10; + static const int64_t MAX_APPNAME_LENGTH = 1 << 10; + +public: + TestLogSQLServerProvider() : rs_leader_(), + rs_follower_1_(), + rs_follower_2_(), + service_pid_(0), + server_provider_() + {} + + virtual void SetUp(); + virtual void TearDown(); + + void set_rs_list(const ObRootAddrList &rs_list); + +protected: + ObRootAddr rs_leader_; + ObRootAddr rs_follower_1_; + ObRootAddr rs_follower_2_; + + pid_t service_pid_; + ObLogSQLServerProvider server_provider_; + + char appname_[MAX_APPNAME_LENGTH]; + char config_url_[MAX_CONFIG_URL_LENGTH]; + char json_buffer_[MAX_JASON_BUFFER_SIZE]; +}; + +void TestLogSQLServerProvider::SetUp() +{ + int ret = OB_SUCCESS; + const char *config_url_arbitrary_str = "i_am_an_arbitrary_string/versin=1"; + const char *appname_str = "test"; + + // Constructing the ConfigURL + // Note that the URL is actually only accessible as "http::/IP:PORT", the subsequent string is an arbitrary string + (void)snprintf(config_url_, sizeof(config_url_), "http://%s:%d/%s", + CONFIG_SERVER_IP, CONFIG_SERVER_PORT, config_url_arbitrary_str); + + (void)snprintf(appname_, sizeof(appname_), "%s", appname_str); + + // Create configure server simulation process + pid_t pid = fork(); + if (0 == pid) { + // Set the child process to a new child process group to facilitate KILL + ret = setpgid(pid, pid); + if (ret < 0) { + LOG_ERROR("setpgid failed", K(errno)); + } else if (-1 == (ret = execl( + "/bin/bash", CONFIG_SERVER_PROGRAM, CONFIG_SERVER_PROGRAM, CONFIG_SERVER_PORT_STR, (char *)NULL))) { + LOG_ERROR("execl failed", K(errno)); + } + exit(1); + } else if (-1 == pid) { + LOG_ERROR("fork failed", K(errno)); + } else { + LOG_INFO("create child", K(pid)); + service_pid_ = pid; + + // wait child process execute. + usleep(100000); + } + + // init rs addr list + ObSEArray rs_list; + + rs_leader_.server_.set_ip_addr("10.210.170.11", 100); + rs_leader_.role_ = LEADER; + rs_leader_.sql_port_ = 2828; + rs_list.push_back(rs_leader_); + + rs_follower_1_.server_.set_ip_addr("10.210.170.16", 200); + rs_follower_1_.role_ = FOLLOWER; + rs_follower_1_.sql_port_ = 3838; + rs_list.push_back(rs_follower_1_); + + rs_follower_2_.server_.set_ip_addr("10.210.180.96", 300); + rs_follower_2_.role_ = FOLLOWER; + rs_follower_2_.sql_port_ = 4848; + rs_list.push_back(rs_follower_2_); + + // Setting up the Rootserver list + set_rs_list(rs_list); + + // init Server Provider + ret = server_provider_.init(config_url_, appname_); + ASSERT_EQ(OB_SUCCESS, ret); + + // init 3 rootserver + ObAddr server; + EXPECT_EQ(3, server_provider_.get_server_count()); + EXPECT_EQ(OB_SUCCESS, server_provider_.get_server(0, server)); + EXPECT_EQ(rs_leader_.server_.get_ipv4(), server.get_ipv4()); + EXPECT_EQ(rs_leader_.sql_port_, server.get_port()); + EXPECT_EQ(OB_SUCCESS, server_provider_.get_server(1, server)); + EXPECT_EQ(rs_follower_1_.server_.get_ipv4(), server.get_ipv4()); + EXPECT_EQ(rs_follower_1_.sql_port_, server.get_port()); + EXPECT_EQ(OB_SUCCESS, server_provider_.get_server(2, server)); + EXPECT_EQ(rs_follower_2_.server_.get_ipv4(), server.get_ipv4()); + EXPECT_EQ(rs_follower_2_.sql_port_, server.get_port()); + EXPECT_EQ(OB_ENTRY_NOT_EXIST, server_provider_.get_server(3, server)); +} + +void TestLogSQLServerProvider::TearDown() +{ + int status = 0; + int64_t orig_server_count = server_provider_.get_server_count(); + + // Sends SIGINT to all processes in the process group of the child process + kill(-service_pid_, SIGINT); + + pid_t pid = wait(&status); + LOG_INFO("child exit", K(pid)); + + // Refresh error if Configure Server does not exist + EXPECT_NE(OB_SUCCESS, server_provider_.refresh_server_list()); + + // Refresh the error without modifying the previous Server list + EXPECT_EQ(orig_server_count, server_provider_.get_server_count()); +} + +void TestLogSQLServerProvider::set_rs_list(const ObRootAddrList &rs_list) +{ + int ret = OB_SUCCESS; + ObSqlString cmd; + ObSqlString json; + + usleep(50000); + const int64_t cluster_id = 100; + ret = ObWebServiceRootAddr::to_json(rs_list, appname_, cluster_id, json); + ASSERT_EQ(OB_SUCCESS, ret); + LOG_INFO("to_json", K(json)); + + ret = cmd.assign_fmt("echo -n 'POST / HTTP/1.1\r\nContent-Length: %ld\r\n%s' | nc %s %d &> /dev/null", + json.length(), json.ptr(), CONFIG_SERVER_IP, CONFIG_SERVER_PORT); + ASSERT_EQ(OB_SUCCESS, ret); + + ret = system(cmd.ptr()); + usleep(50000); +} + +TEST_F(TestLogSQLServerProvider, fetch) +{ + ObAddr server; + ObSEArray rs_list; + + // Test zero RS + rs_list.reuse(); + set_rs_list(rs_list); // Set up a new RS list + ASSERT_EQ(OB_SUCCESS, server_provider_.refresh_server_list()); // Refresh RS list + EXPECT_EQ(0, server_provider_.get_server_count()); + EXPECT_EQ(OB_ENTRY_NOT_EXIST, server_provider_.get_server(0, server)); + + // Test one RS + rs_list.reuse(); + rs_list.push_back(rs_leader_); + set_rs_list(rs_list); // Set up a new RS list + ASSERT_EQ(OB_SUCCESS, server_provider_.refresh_server_list()); // Refresh RS list + EXPECT_EQ(1, server_provider_.get_server_count()); + EXPECT_EQ(OB_SUCCESS, server_provider_.get_server(0, server)); + EXPECT_EQ(rs_leader_.server_.get_ipv4(), server.get_ipv4()); + EXPECT_EQ(rs_leader_.sql_port_, server.get_port()); // Server的端口应该是SQL端口 + + EXPECT_EQ(OB_ENTRY_NOT_EXIST, server_provider_.get_server(1, server)); + EXPECT_EQ(OB_INVALID_ARGUMENT, server_provider_.get_server(-1, server)); + + // Test two RS + rs_list.reuse(); + rs_list.push_back(rs_leader_); + rs_list.push_back(rs_follower_1_); + set_rs_list(rs_list); // Set up a new RS list + ASSERT_EQ(OB_SUCCESS, server_provider_.refresh_server_list()); // Refresh RS list + EXPECT_EQ(2, server_provider_.get_server_count()); + EXPECT_EQ(OB_SUCCESS, server_provider_.get_server(0, server)); + EXPECT_EQ(rs_leader_.server_.get_ipv4(), server.get_ipv4()); + EXPECT_EQ(rs_leader_.sql_port_, server.get_port()); + EXPECT_EQ(OB_SUCCESS, server_provider_.get_server(1, server)); + EXPECT_EQ(rs_follower_1_.server_.get_ipv4(), server.get_ipv4()); + EXPECT_EQ(rs_follower_1_.sql_port_, server.get_port()); + EXPECT_EQ(OB_ENTRY_NOT_EXIST, server_provider_.get_server(2, server)); + + // Test three RS + rs_list.reuse(); + rs_list.push_back(rs_leader_); + rs_list.push_back(rs_follower_1_); + rs_list.push_back(rs_follower_2_); + set_rs_list(rs_list); // Set up a new RS list + ASSERT_EQ(OB_SUCCESS, server_provider_.refresh_server_list()); // Refresh RS list + EXPECT_EQ(3, server_provider_.get_server_count()); + EXPECT_EQ(OB_SUCCESS, server_provider_.get_server(0, server)); + EXPECT_EQ(rs_leader_.server_.get_ipv4(), server.get_ipv4()); + EXPECT_EQ(rs_leader_.sql_port_, server.get_port()); + EXPECT_EQ(OB_SUCCESS, server_provider_.get_server(1, server)); + EXPECT_EQ(rs_follower_1_.server_.get_ipv4(), server.get_ipv4()); + EXPECT_EQ(rs_follower_1_.sql_port_, server.get_port()); + EXPECT_EQ(OB_SUCCESS, server_provider_.get_server(2, server)); + EXPECT_EQ(rs_follower_2_.server_.get_ipv4(), server.get_ipv4()); + EXPECT_EQ(rs_follower_2_.sql_port_, server.get_port()); + EXPECT_EQ(OB_ENTRY_NOT_EXIST, server_provider_.get_server(3, server)); +} + +} // end namespace share +} // end namespace oceanbase + +int main(int argc, char **argv) +{ + oceanbase::common::ObLogger::get_logger().set_log_level("INFO"); + OB_LOGGER.set_log_level("INFO"); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/unittest/liboblog/test_log_svr_blacklist.cpp b/unittest/liboblog/test_log_svr_blacklist.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f8b5bcc246a8e436658cb056e13150467b6c26b3 --- /dev/null +++ b/unittest/liboblog/test_log_svr_blacklist.cpp @@ -0,0 +1,123 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include +#include "share/ob_define.h" +#include "lib/oblog/ob_log.h" +#include "liboblog/src/ob_log_svr_blacklist.h" + +using namespace oceanbase; +using namespace common; +using namespace liboblog; + +namespace oceanbase +{ +namespace unittest +{ + +class SvrBlacklist : public ::testing::Test +{ +public : + virtual void SetUp() {} + virtual void TearDown() {} +}; + +TEST(SvrBlacklist, BasicTest1) +{ + int err = OB_SUCCESS; + ObLogSvrBlacklist svr_blacklist; + const char *svr_blacklist_str ="|"; + const bool is_sql_server = false; + + err = svr_blacklist.init(svr_blacklist_str, is_sql_server); + EXPECT_EQ(OB_SUCCESS, err); + + ObAddr svr1(ObAddr::IPV4, "127.0.0.1", 2880); + EXPECT_FALSE(svr_blacklist.is_exist(svr1)); + ObAddr svr2(ObAddr::IPV4, "127.0.0.2", 2881); + EXPECT_FALSE(svr_blacklist.is_exist(svr2)); + ObAddr svr3(ObAddr::IPV4, "127.0.0.3", 2882); + EXPECT_FALSE(svr_blacklist.is_exist(svr3)); + + ObAddr svr4(ObAddr::IPV4, "127.0.0.1", 2881); + EXPECT_FALSE(svr_blacklist.is_exist(svr4)); + ObAddr svr5(ObAddr::IPV4, "127.0.0.4", 2881); + EXPECT_FALSE(svr_blacklist.is_exist(svr5)); + + svr_blacklist.destroy(); +} + +TEST(SvrBlacklist, BasicTest2) +{ + int err = OB_SUCCESS; + + ObLogSvrBlacklist svr_blacklist; + const char *svr_blacklist_str ="127.0.0.1:2880"; + const bool is_sql_server = false; + + err = svr_blacklist.init(svr_blacklist_str, is_sql_server); + EXPECT_EQ(OB_SUCCESS, err); + ObAddr svr1(ObAddr::IPV4, "127.0.0.1", 2880); + EXPECT_TRUE(svr_blacklist.is_exist(svr1)); + ObAddr svr2(ObAddr::IPV4, "127.0.0.2", 2881); + EXPECT_FALSE(svr_blacklist.is_exist(svr2)); + ObAddr svr3(ObAddr::IPV4, "127.0.0.3", 2882); + EXPECT_FALSE(svr_blacklist.is_exist(svr3)); + + const char *svr_blacklist_str2="127.0.0.1:2880|127.0.0.2:2881|127.0.0.3:2882"; + svr_blacklist.refresh(svr_blacklist_str2); + EXPECT_TRUE(svr_blacklist.is_exist(svr1)); + EXPECT_TRUE(svr_blacklist.is_exist(svr2)); + EXPECT_TRUE(svr_blacklist.is_exist(svr3)); + + svr_blacklist.destroy(); +} + +TEST(SvrBlacklist, BasicTest3) +{ + int err = OB_SUCCESS; + + ObLogSvrBlacklist svr_blacklist; + const char *svr_blacklist_str ="127.0.0.1:2880|127.0.0.2:2881|127.0.0.3:2882"; + const bool is_sql_server = false; + + err = svr_blacklist.init(svr_blacklist_str, is_sql_server); + EXPECT_EQ(OB_SUCCESS, err); + + ObAddr svr1(ObAddr::IPV4, "127.0.0.1", 2880); + EXPECT_TRUE(svr_blacklist.is_exist(svr1)); + ObAddr svr2(ObAddr::IPV4, "127.0.0.2", 2881); + EXPECT_TRUE(svr_blacklist.is_exist(svr2)); + ObAddr svr3(ObAddr::IPV4, "127.0.0.3", 2882); + EXPECT_TRUE(svr_blacklist.is_exist(svr3)); + + ObAddr svr4(ObAddr::IPV4, "127.0.0.1", 2881); + EXPECT_FALSE(svr_blacklist.is_exist(svr4)); + ObAddr svr5(ObAddr::IPV4, "127.0.0.4", 2881); + EXPECT_FALSE(svr_blacklist.is_exist(svr5)); + + svr_blacklist.destroy(); +} + +} +} + +int main(int argc, char **argv) +{ + int ret = 1; + ObLogger &logger = ObLogger::get_logger(); + logger.set_file_name("test_ob_log_svr_blacklist.log", true); + logger.set_log_level(OB_LOG_LEVEL_INFO); + testing::InitGoogleTest(&argc,argv); + ret = RUN_ALL_TESTS(); + return ret; +} diff --git a/unittest/liboblog/test_log_table_matcher.cpp b/unittest/liboblog/test_log_table_matcher.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7322c639f66a6ba58cc27861d275596e01b52d45 --- /dev/null +++ b/unittest/liboblog/test_log_table_matcher.cpp @@ -0,0 +1,258 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include +#include + +#include "share/ob_define.h" + +#include "liboblog/src/ob_log_table_matcher.h" + + +using namespace oceanbase; +using namespace common; +using namespace liboblog; + +namespace oceanbase +{ +namespace unittest +{ + +/* + * TEST1. + * Test fnmatch. + * Used to study fnmatch(). + */ +/* + * Test Functions. + * fnmatch Prototype: + * int fnmatch(const char *pattern, const char *string, int flags); + */ +void CASE_MATCH(const char *pattern, const char *string, int flags = 0) +{ + int err = fnmatch(pattern, string, flags); + EXPECT_EQ(0, err); + fprintf(stderr, ">>> %s: \t\"%s\" -> \"%s\"\n", + (0 == err) ? "MATCH" : "NOMATCH", + pattern, string); +} + +void CASE_NOMATCH(const char *pattern, const char *string, int flags = 0) +{ + int err = fnmatch(pattern, string, flags); + EXPECT_EQ(FNM_NOMATCH, err); + fprintf(stderr, ">>> %s: \t\"%s\" -> \"%s\"\n", + (0 == err) ? "MATCH" : "NOMATCH", + pattern, string); +} +TEST(DISABLED_TableMatcher, Fnmatch1) +{ + CASE_MATCH("sky*", "SkyBlue", FNM_CASEFOLD); + CASE_NOMATCH("sky*[!e]", "SkyBlue", FNM_CASEFOLD); + CASE_MATCH("ab\\0c", "ab\\0c"); +} + +/* + * TEST2. + * Test TableMatcher. + */ +TEST(TableMatcher, BasicTest1) +{ + int err = OB_SUCCESS; + ObLogTableMatcher matcher; + const char *tb_whilte_list="TN1.DB-A*.table_1*|" + "TN2.DB-A*.TABLE_2*|" + "tn3.db-a*.table_*_tmp"; + const char *tb_black_list="|"; + const char *tg_whilte_list="*.*"; + const char *tg_black_list="|"; + + err = matcher.init(tb_whilte_list, tb_black_list, tg_whilte_list, tg_black_list); + EXPECT_EQ(OB_SUCCESS, err); + + int flag = FNM_CASEFOLD; + + // Test match. + bool matched = false; + err = matcher.table_match("tn1", "db-a-1", "table_1_1", matched, flag); + EXPECT_TRUE(matched); + + err = matcher.table_match("tn1", "db-b-1", "table_1_1", matched, flag); + EXPECT_FALSE(matched); + + err = matcher.table_match("tn3", "db-a-2", "table_1_tmp", matched, flag); + EXPECT_TRUE(matched); + + matcher.destroy(); +} + +/* + * TEST3. + * Test TableMatcher static match. + */ +TEST(TableMatcher, BasicTest2) +{ + int err = OB_SUCCESS; + const char *tb_whilte_list="*.*.*"; + const char *tb_black_list="|"; + const char *tg_whilte_list="*.*"; + const char *tg_black_list="|"; + + ObLogTableMatcher matcher; + + err = matcher.init(tb_whilte_list, tb_black_list, tg_whilte_list, tg_black_list); + EXPECT_EQ(OB_SUCCESS, err); + + int flag = FNM_CASEFOLD; + + // Case 1. Match. + { + const char *pattern1 = "tn1.db1*|tn2.db2*|tn3.db3*|tn4.db4*"; + ObArray pattern2; + err = pattern2.push_back(ObString("tn1.db1")); + EXPECT_EQ(OB_SUCCESS, err); + err = pattern2.push_back(ObString("tnx.dbx")); + EXPECT_EQ(OB_SUCCESS, err); + + bool matched = false; + err = matcher.match(pattern1, pattern2, matched, flag); + EXPECT_EQ(OB_SUCCESS, err); + EXPECT_TRUE(matched); + } + + // Case 2. No match. + { + const char *pattern1 = "tn1.db1*|tn2.db2*|tn3.db3*|tn4.db4*"; + ObArray pattern2; + err = pattern2.push_back(ObString("tnx.dbx")); + EXPECT_EQ(OB_SUCCESS, err); + err = pattern2.push_back(ObString("tny.dby")); + EXPECT_EQ(OB_SUCCESS, err); + + bool matched = false; + err = matcher.match(pattern1, pattern2, matched, flag); + EXPECT_EQ(OB_SUCCESS, err); + EXPECT_FALSE(matched); + } + + // Case 3. Empty pattern1. + { + const char *pattern1 = ""; + ObArray pattern2; + err = pattern2.push_back(ObString("tnx.dbx")); + EXPECT_EQ(OB_SUCCESS, err); + err = pattern2.push_back(ObString("tny.dby")); + EXPECT_EQ(OB_SUCCESS, err); + + bool matched = false; + err = matcher.match(pattern1, pattern2, matched, flag); + EXPECT_EQ(OB_SUCCESS, err); + EXPECT_FALSE(matched); + } + + // Case 4. Invalid pattern1. + { + const char *pattern1 = "|"; + ObArray pattern2; + err = pattern2.push_back(ObString("tnx.dbx")); + EXPECT_EQ(OB_SUCCESS, err); + err = pattern2.push_back(ObString("tny.dby")); + EXPECT_EQ(OB_SUCCESS, err); + + bool matched = false; + err = matcher.match(pattern1, pattern2, matched, flag); + EXPECT_EQ(OB_SUCCESS, err); + EXPECT_FALSE(matched); + } + + matcher.destroy(); +} + +// test tablegroup +TEST(TableMatcher, BasicTest3) +{ + int err = OB_SUCCESS; + ObLogTableMatcher matcher; + const char *tb_whilte_list="*.*.*"; + const char *tb_black_list="|"; + const char *tg_whilte_list="tt1.alitg*"; + const char *tg_black_list="|"; + + err = matcher.init(tb_whilte_list, tb_black_list, tg_whilte_list, tg_black_list); + EXPECT_EQ(OB_SUCCESS, err); + + int flag = FNM_CASEFOLD; + + // Test match. + bool matched = false; + err = matcher.tablegroup_match("tt1", "alitg1", matched, flag); + EXPECT_TRUE(matched); + + err = matcher.tablegroup_match("tt1", "alitg2", matched, flag); + EXPECT_TRUE(matched); + + EXPECT_FALSE(matched); + + err = matcher.tablegroup_match("tt2", "alitg1", matched, flag); + EXPECT_FALSE(matched); + + matcher.destroy(); +} + +TEST(TableMatcher, BasicTest4) +{ + int err = OB_SUCCESS; + ObLogTableMatcher matcher; + const char *tb_whilte_list="*.*.*"; + const char *tb_black_list="|"; + const char *tg_whilte_list="tt1.alitg*|tt1.anttg*"; + const char *tg_black_list="tt1.alitg*"; + + err = matcher.init(tb_whilte_list, tb_black_list, tg_whilte_list, tg_black_list); + EXPECT_EQ(OB_SUCCESS, err); + + int flag = FNM_CASEFOLD; + // Whitelist matches, but blacklist does not + + // Test match. + bool matched = false; + err = matcher.tablegroup_match("tt1", "alitg1", matched, flag); + EXPECT_FALSE(matched); + + err = matcher.tablegroup_match("tt1", "alitg2", matched, flag); + EXPECT_FALSE(matched); + + err = matcher.tablegroup_match("tt1", "anttg1", matched, flag); + EXPECT_TRUE(matched); + + err = matcher.tablegroup_match("tt1", "anttghello", matched, flag); + EXPECT_TRUE(matched); + + err = matcher.tablegroup_match("tt2", "anttghello", matched, flag); + EXPECT_FALSE(matched); + + matcher.destroy(); +} + +} +} + +int main(int argc, char **argv) +{ + //ObLogger::get_logger().set_mod_log_levels("ALL.*:DEBUG, TLOG.*:DEBUG"); + ObLogger &logger = ObLogger::get_logger(); + logger.set_file_name("test_ob_log_table_match.log", true); + logger.set_log_level(OB_LOG_LEVEL_INFO); + testing::InitGoogleTest(&argc,argv); + // testing::FLAGS_gtest_filter = "DO_NOT_RUN"; + return RUN_ALL_TESTS(); +} diff --git a/unittest/liboblog/test_log_task_pool.cpp b/unittest/liboblog/test_log_task_pool.cpp new file mode 100644 index 0000000000000000000000000000000000000000..142bc67afe00a343eb7922e32e9d7b5294b03662 --- /dev/null +++ b/unittest/liboblog/test_log_task_pool.cpp @@ -0,0 +1,137 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include + +#include "gtest/gtest.h" +#include "lib/allocator/ob_malloc.h" +#include "lib/allocator/ob_concurrent_fifo_allocator.h" + +#include "liboblog/src/ob_log_task_pool.h" + +using namespace oceanbase; +using namespace common; +using namespace liboblog; + +namespace oceanbase +{ +namespace unittest +{ + +class MockTransTask : public TransTaskBase +{ +public: + void foo() { bar_ += 1; } + + void set_allocator(int64_t page_size, common::ObIAllocator &large_allocator) + { + UNUSED(page_size); + UNUSED(large_allocator); + } + + void set_prealloc_page(void *page) + { + UNUSED(page); + } + + void revert_prealloc_page(void *page) + { + UNUSED(page); + } + + void set_pkey_info(const common::ObPartitionKey &partition, + const char *pkey_str) + { + UNUSED(partition); + UNUSED(pkey_str); + } + +private: + int64_t bar_; +}; + +TEST(ObLogTransTaskPool, Function1) +{ + const int64_t task_cnt = 1024 * 32; + + ObConcurrentFIFOAllocator fifo; + int64_t G = 1024 * 1024 * 1024; + fifo.init(1 * G, 1 * G, OB_MALLOC_BIG_BLOCK_SIZE); + + ObLogTransTaskPool pool; + + int ret = pool.init(&fifo, 1024 * 8, 1024, true, 1024); + EXPECT_EQ(OB_SUCCESS, ret); + + MockTransTask **tasks = new MockTransTask*[task_cnt]; + const char *part_info = "partition"; + ObPartitionKey pkey; + + for (int64_t idx = 0; idx < task_cnt; ++idx) { + tasks[idx] = pool.get(part_info, pkey); + EXPECT_TRUE(NULL != tasks[idx]); + } + + for (int64_t idx = 0; idx < task_cnt; ++idx) { + tasks[idx]->revert(); + } + + pool.destroy(); + + delete []tasks; + fifo.destroy(); +} + +// 2 tasks not returned. +TEST(ObLogTransTaskPool, Function2) +{ + const int64_t task_cnt = 1024 * 32; + + ObConcurrentFIFOAllocator fifo; + int64_t G = 1024 * 1024 * 1024; + fifo.init(1 * G, 1 * G, OB_MALLOC_BIG_BLOCK_SIZE); + + ObLogTransTaskPool pool; + + int ret = pool.init(&fifo, 1024 * 8, 1024, true, 1024); + EXPECT_EQ(OB_SUCCESS, ret); + + MockTransTask **tasks = new MockTransTask*[task_cnt]; + const char *part_info = "partition"; + ObPartitionKey pkey; + + for (int64_t idx = 0; idx < task_cnt; ++idx) { + tasks[idx] = pool.get(part_info, pkey); + EXPECT_TRUE(NULL != tasks[idx]); + } + + for (int64_t idx = 0; idx < task_cnt - 2; ++idx) { + tasks[idx + 1]->revert(); + } + + pool.destroy(); + + delete []tasks; + fifo.destroy(); +} + + +} +} + +int main(int argc, char **argv) +{ + oceanbase::common::ObLogger::get_logger().set_log_level("debug"); + testing::InitGoogleTest(&argc,argv); + // testing::FLAGS_gtest_filter = "DO_NOT_RUN"; + return RUN_ALL_TESTS(); +} diff --git a/unittest/liboblog/test_log_trans_ctx.cpp b/unittest/liboblog/test_log_trans_ctx.cpp new file mode 100644 index 0000000000000000000000000000000000000000..84ac0bb1943b5c6fdbc8e52f080af66ce23761d8 --- /dev/null +++ b/unittest/liboblog/test_log_trans_ctx.cpp @@ -0,0 +1,722 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include + +#include "ob_log_trans_ctx.h" // ObLogDepParser +#include "common/ob_clock_generator.h" // ObClockGenerator +#include "ob_log_trans_task.h" // PartTransTask +#include "ob_log_trans_ctx_mgr.h" // ObLogTransCtxMgr +#include "test_log_part_mgr_mock.h" // MockObLogPartMgr +#include "ob_log_common.h" // MAX_CACHED_TRANS_CTX_COUNT + +using namespace oceanbase::common; +using namespace oceanbase::liboblog; +using namespace oceanbase::transaction; + +class TransCtxTest : public ::testing::Test +{ +public: + static const int64_t PART_TRANS_TASK_ARRAY_SIZE = 10; + typedef ObSEArray PartTransTaskArray; + +public: + TransCtxTest(); + virtual ~TransCtxTest(); + virtual void SetUp(); + virtual void TearDown(); + +public: + bool is_exist(const TransCtx::ReverseDepSet &reverse_dep_set, const ObTransID &trans_id) const + { + bool ret = false; + TransCtx::ReverseDepSet::const_iterator_t itor = reverse_dep_set.begin(); + for (; itor != reverse_dep_set.end(); ++itor) { + if (trans_id == *itor) { + ret = true; + break; + } + } + + return ret; + } + + bool is_exist(const TransCtx::TransIDArray dep_parsed_reverse_deps, + const ObTransID &trans_id) const + { + bool ret = false; + ObTransID trans_id_cmp; + for (int64_t index = 0; index < dep_parsed_reverse_deps.count(); index++) { + EXPECT_EQ(OB_SUCCESS, dep_parsed_reverse_deps.at(index, trans_id_cmp)); + if (trans_id == trans_id_cmp) { + ret = true; + break; + } + } + return ret; + } + + void init_trans_ctx(const ObTransID &trans_id, TransCtx *&trans_ctx, const bool enable_create) + { + EXPECT_TRUE(NULL != trans_ctx_mgr_); + EXPECT_TRUE(OB_SUCCESS == trans_ctx_mgr_->get_trans_ctx(trans_id, trans_ctx, enable_create)); + EXPECT_TRUE(NULL != trans_ctx); + EXPECT_TRUE(OB_SUCCESS == trans_ctx->set_trans_id(trans_id)); + EXPECT_TRUE(OB_SUCCESS == trans_ctx->set_state(TransCtx::TRANS_CTX_STATE_PARTICIPANT_READY)); + } + + IObLogPartMgr *create_part_mgr() + { + IObLogPartMgr *part_mgr = NULL; + if (NULL != (part_mgr = (MockObLogPartMgr *)ob_malloc(sizeof(MockObLogPartMgr), + ObModIds::OB_LOG_PART_INFO))) { + new(part_mgr)MockObLogPartMgr(); + } + return part_mgr; + } + + IObLogTransCtxMgr *create_trans_mgr() + { + ObLogTransCtxMgr *tx_mgr = NULL; + if (NULL != (tx_mgr = (ObLogTransCtxMgr *)ob_malloc(sizeof(ObLogTransCtxMgr), + ObModIds::OB_LOG_TRANS_CTX))) { + new(tx_mgr)ObLogTransCtxMgr(); + if (OB_SUCCESS != tx_mgr->init(MAX_CACHED_TRANS_CTX_COUNT)) { + tx_mgr->~ObLogTransCtxMgr(); + ob_free(tx_mgr); + tx_mgr = NULL; + } + } + return tx_mgr; + } + + void destroy() + { + trans_ctx_.reset(); + trans_id_.reset(); + part_trans_task_.reset(); + if (NULL != part_mgr_) { + part_mgr_->~IObLogPartMgr(); + ob_free(part_mgr_); + part_mgr_ = NULL; + } + + if (NULL != trans_ctx_mgr_) { + trans_ctx_mgr_->~IObLogTransCtxMgr(); + ob_free(trans_ctx_mgr_); + trans_ctx_mgr_ = NULL; + } + } + + void init_part_trans_task_array(PartTransTaskArray &array, const ObTransID &trans_id) + { + EXPECT_TRUE(trans_id.is_valid()); + + PartTransTask *part_trans_task = NULL; + for (int i = 0; i < PART_TRANS_TASK_ARRAY_SIZE; i++) { + init_part_trans_task(part_trans_task, trans_id); + EXPECT_EQ(OB_SUCCESS, array.push_back(part_trans_task)); + } + } + + void free_part_trans_task_array(PartTransTaskArray &array) + { + PartTransTask *part_trans_task = NULL; + for (int i = 0; i < PART_TRANS_TASK_ARRAY_SIZE; i++) { + EXPECT_EQ(OB_SUCCESS, array.at(i, part_trans_task)); + free_part_trans_task(part_trans_task); + } + } + + void init_part_trans_task(PartTransTask *&part_trans_task, const ObTransID &trans_id) + { + EXPECT_TRUE(trans_id.is_valid()); + + if (NULL != (part_trans_task = (PartTransTask *)ob_malloc(sizeof(PartTransTask), + ObModIds::OB_LOG_PART_TRANS_TASK_SMALL))) { + new(part_trans_task)PartTransTask(); + part_trans_task->set_trans_id(trans_id); + part_trans_task->set_ref_cnt(0); + part_trans_task->set_pool(NULL); + } + } + + void free_part_trans_task(PartTransTask *part_trans_task) + { + if (NULL != part_trans_task) { + part_trans_task->~PartTransTask(); + ob_free(part_trans_task); + part_trans_task = NULL; + } + } + + +public: + TransCtx trans_ctx_; + ObTransID trans_id_; + PartTransTask part_trans_task_; + IObLogPartMgr *part_mgr_; + IObLogTransCtxMgr *trans_ctx_mgr_; + +private: + // disallow copy + DISALLOW_COPY_AND_ASSIGN(TransCtxTest); +}; + +TransCtxTest::TransCtxTest(): trans_ctx_(), + trans_id_(), + part_trans_task_(), + part_mgr_(NULL), + trans_ctx_mgr_(NULL) +{ +} + +TransCtxTest::~TransCtxTest() +{ +} + +void TransCtxTest::SetUp() +{ + const ObAddr svr(ObAddr::IPV4, "127.0.0.1", 1000); + trans_id_ = ObTransID(svr); + part_trans_task_.set_trans_id(trans_id_); + EXPECT_TRUE(NULL != (part_mgr_ = create_part_mgr())); + EXPECT_TRUE(NULL != (trans_ctx_mgr_ = create_trans_mgr())); + trans_ctx_.set_host(trans_ctx_mgr_); +} + +void TransCtxTest::TearDown() +{ + destroy(); +} + +TEST_F(TransCtxTest, prepare_failed) +{ + bool stop_flag = false; + bool need_discard = false; + IObLogPartMgr *part_mgr_null = NULL; + + // 1. If part mrg is null + EXPECT_EQ(OB_INVALID_ARGUMENT, trans_ctx_.prepare(part_trans_task_, part_mgr_null, stop_flag, + need_discard)); + + // 2. If the state is TRANS_CTX_STATE_DISCARDED, prepare returns an error + trans_ctx_.set_state(TransCtx::TRANS_CTX_STATE_DISCARDED); + EXPECT_EQ(OB_INVALID_ERROR, trans_ctx_.prepare(part_trans_task_, part_mgr_, stop_flag, + need_discard)); +} + +TEST_F(TransCtxTest, prepare_discard) +{ + bool stop_flag = false; + bool need_discard = false; + const int64_t prepare_tstamp = 1452763000; + + // prepare partition key + ObPartitionKey partition_key_0; + partition_key_0.init(1000000000, 0, 3); + + // Make the prepare log timestamp less than the specified timestamp + part_trans_task_.set_partition(partition_key_0); + part_trans_task_.set_timestamp(prepare_tstamp); + part_trans_task_.set_prepare_log_id(1); + + trans_ctx_.set_state(TransCtx::TRANS_CTX_STATE_INVALID); + + // Current transaction not in service, need discard + EXPECT_EQ(OB_SUCCESS, trans_ctx_.prepare(part_trans_task_, part_mgr_, stop_flag, need_discard)); + EXPECT_TRUE(need_discard); +} + +TEST_F(TransCtxTest, prepare_success) +{ + bool stop_flag = false; + bool need_discard = false; + const int64_t prepare_tstamp = 1452763900; + + // prepare partition key + ObPartitionKey partition_key_0; + partition_key_0.init(1000000000, 0, 3); + ObPartitionKey partition_key_1; + partition_key_1.init(1000000000, 1, 3); + ObPartitionKey partition_key_2; + partition_key_2.init(1000000000, 2, 3); + + // If the current partitioned transaction service has 2 service participants, verify that the participants are obtained correctly + // Make the prepare log timestamp greater than the specified timestamp + part_trans_task_.set_partition(partition_key_0); + part_trans_task_.set_timestamp(prepare_tstamp); + part_trans_task_.set_prepare_log_id(1); + ObPartitionLogInfo part_info_0(partition_key_0, 1, prepare_tstamp); + ObPartitionLogInfo part_info_1(partition_key_1, 1, 1452763999); + ObPartitionLogInfo part_info_2(partition_key_2, 1, 1452763000); + PartitionLogInfoArray participants; + EXPECT_EQ(OB_SUCCESS, participants.push_back(part_info_0)); + EXPECT_EQ(OB_SUCCESS, participants.push_back(part_info_1)); + EXPECT_EQ(OB_SUCCESS, participants.push_back(part_info_2)); + EXPECT_EQ(OB_SUCCESS, part_trans_task_.set_participants(participants)); + + trans_ctx_.set_state(TransCtx::TRANS_CTX_STATE_INVALID); + + EXPECT_EQ(OB_SUCCESS, trans_ctx_.prepare(part_trans_task_, part_mgr_, stop_flag, need_discard)); + EXPECT_FALSE(need_discard); + const TransPartInfo *valid_participants = trans_ctx_.get_participants(); + int64_t valid_participant_count = trans_ctx_.get_participant_count(); + int64_t participants_count = valid_participant_count; + EXPECT_EQ(2, participants_count); + for (int64_t index = 0; index < participants_count; index++) { + EXPECT_FALSE(partition_key_2 == valid_participants[index].pkey_); + } +} + +TEST_F(TransCtxTest, add_participant_failed) +{ + bool is_part_trans_served = true; + bool is_all_participants_ready = false; + + // 1. The current state is not advanced to the PREPARE state + trans_ctx_.set_state(TransCtx::TRANS_CTX_STATE_INVALID); + EXPECT_EQ(OB_STATE_NOT_MATCH, trans_ctx_.add_participant(part_trans_task_, is_part_trans_served, + is_all_participants_ready)); + + // 2. The current state is already ready, the current participant will not be gathered + trans_ctx_.set_state(TransCtx::TRANS_CTX_STATE_PARTICIPANT_READY); + EXPECT_EQ(OB_SUCCESS, trans_ctx_.add_participant(part_trans_task_, is_part_trans_served, + is_all_participants_ready)); + EXPECT_FALSE(is_part_trans_served); +} + +TEST_F(TransCtxTest, add_participant_not_served) +{ + bool stop_flag = false; + bool need_discard = false; + bool is_part_trans_served = true; + bool is_all_participants_ready = false; + const int64_t prepare_tstamp = 1452763900; + + // prepare partition key + ObPartitionKey partition_key_0; + partition_key_0.init(1000000000, 0, 3); + ObPartitionKey partition_key_1; + partition_key_1.init(1000000000, 1, 3); + + // Make the prepare log timestamp greater than the specified timestamp + part_trans_task_.set_partition(partition_key_0); + part_trans_task_.set_timestamp(prepare_tstamp); + part_trans_task_.set_prepare_log_id(1); + ObPartitionLogInfo part_info_0(partition_key_0, 1, prepare_tstamp); + ObPartitionLogInfo part_info_1(partition_key_1, 1, 1452763999); + PartitionLogInfoArray participants; + EXPECT_EQ(OB_SUCCESS, participants.push_back(part_info_0)); + EXPECT_EQ(OB_SUCCESS, participants.push_back(part_info_1)); + part_trans_task_.set_participants(participants); + + // 先prepare一次 + trans_ctx_.set_state(TransCtx::TRANS_CTX_STATE_INVALID); + + EXPECT_EQ(OB_SUCCESS, trans_ctx_.prepare(part_trans_task_, part_mgr_, stop_flag, need_discard)); + EXPECT_FALSE(need_discard); + + // 构造一个partition key,不在参与者列表里 + PartTransTask part_trans_task_new; + part_trans_task_new.set_trans_id(trans_id_); + ObPartitionKey partition_key_new; + partition_key_new.init(1000000000, 2, 3); + part_trans_task_new.set_partition(partition_key_new); + + // 当前是prepare状态 但partition不在参与者列表中 + trans_ctx_.set_state(TransCtx::TRANS_CTX_STATE_PREPARED); + EXPECT_EQ(OB_SUCCESS, trans_ctx_.add_participant(part_trans_task_new, is_part_trans_served, + is_all_participants_ready)); + EXPECT_FALSE(is_part_trans_served); + EXPECT_FALSE(is_all_participants_ready); +} + +TEST_F(TransCtxTest, add_participant_all_push_in_ready) +{ + bool stop_flag = false; + bool need_discard = false; + bool is_part_trans_served = true; + bool is_all_participants_ready = false; + const int64_t prepare_tstamp = 1452763900; + + // prepare partition key + ObPartitionKey partition_key_0; + partition_key_0.init(1000000000, 0, 3); + ObPartitionKey partition_key_1; + partition_key_1.init(1000000000, 1, 3); + + // Make the prepare log timestamp greater than the specified timestamp + part_trans_task_.set_partition(partition_key_0); + part_trans_task_.set_timestamp(prepare_tstamp); + part_trans_task_.set_prepare_log_id(1); + ObPartitionLogInfo part_info_0(partition_key_0, 1, prepare_tstamp); + ObPartitionLogInfo part_info_1(partition_key_1, 1, 1452763999); + PartitionLogInfoArray participants; + EXPECT_EQ(OB_SUCCESS, participants.push_back(part_info_0)); + EXPECT_EQ(OB_SUCCESS, participants.push_back(part_info_1)); + part_trans_task_.set_participants(participants); + + // Prepare first, generating a list of participants for all services + trans_ctx_.set_state(TransCtx::TRANS_CTX_STATE_INVALID); + EXPECT_EQ(OB_SUCCESS, trans_ctx_.prepare(part_trans_task_, part_mgr_, stop_flag, need_discard)); + EXPECT_FALSE(need_discard); + EXPECT_EQ(TransCtx::TRANS_CTX_STATE_PREPARED, trans_ctx_.get_state()); + + PartTransTask part_trans_task_2; + part_trans_task_2.set_trans_id(trans_id_); + part_trans_task_2.set_partition(partition_key_1); + part_trans_task_2.set_timestamp(prepare_tstamp + 100); + EXPECT_EQ(OB_SUCCESS, trans_ctx_.prepare(part_trans_task_2, part_mgr_, stop_flag, need_discard)); + EXPECT_FALSE(need_discard); + EXPECT_EQ(TransCtx::TRANS_CTX_STATE_PREPARED, trans_ctx_.get_state()); + + // 1.Currently in prepare state: partition 1 is in the participants list, then it is added to the ready list, but has not yet reached the ready state + EXPECT_EQ(OB_SUCCESS, trans_ctx_.add_participant(part_trans_task_, is_part_trans_served, + is_all_participants_ready)); + EXPECT_TRUE(is_part_trans_served); + EXPECT_FALSE(is_all_participants_ready); + EXPECT_EQ(TransCtx::TRANS_CTX_STATE_PREPARED, trans_ctx_.get_state()); + + // 2.All partitions have been added to the ready list and should be in the ready state + EXPECT_EQ(OB_SUCCESS, trans_ctx_.add_participant(part_trans_task_2, is_part_trans_served, + is_all_participants_ready)); + EXPECT_TRUE(is_part_trans_served); + EXPECT_TRUE(is_all_participants_ready); + EXPECT_EQ(TransCtx::TRANS_CTX_STATE_PARTICIPANT_READY, trans_ctx_.get_state()); +} + +TEST_F(TransCtxTest, parse_deps_failed) +{ + IObLogTransCtxMgr *trans_ctx_mgr = NULL; + bool all_deps_cleared = false; + trans_ctx_.set_state(TransCtx::TRANS_CTX_STATE_PREPARED); + EXPECT_EQ(OB_STATE_NOT_MATCH, trans_ctx_.parse_deps(trans_ctx_mgr, all_deps_cleared)); + EXPECT_FALSE(all_deps_cleared); + + trans_ctx_.set_state(TransCtx::TRANS_CTX_STATE_PARTICIPANT_READY); + EXPECT_EQ(OB_INVALID_ARGUMENT, trans_ctx_.parse_deps(trans_ctx_mgr, all_deps_cleared)); + EXPECT_FALSE(all_deps_cleared); +} + +TEST_F(TransCtxTest, parse_deps_and_sequence) +{ + // Create 4 transactions + const ObAddr svr1(ObAddr::IPV4, "127.0.0.1", 1000); + const ObAddr svr2(ObAddr::IPV4, "127.0.0.1", 2000); + const ObAddr svr3(ObAddr::IPV4, "127.0.0.1", 3000); + const ObAddr svr4(ObAddr::IPV4, "127.0.0.1", 4000); + const ObTransID trans_id_1(svr1); + const ObTransID trans_id_2(svr2); + const ObTransID trans_id_3(svr3); + const ObTransID trans_id_4(svr4); + + TransCtx *trans_ctx_1 = NULL; + TransCtx *trans_ctx_2 = NULL; + TransCtx *trans_ctx_3 = NULL; + TransCtx *trans_ctx_4 = NULL; + bool enable_create = true; + + // init 4 trans_ctx + init_trans_ctx(trans_id_1, trans_ctx_1, enable_create); + init_trans_ctx(trans_id_2, trans_ctx_2, enable_create); + init_trans_ctx(trans_id_3, trans_ctx_3, enable_create); + init_trans_ctx(trans_id_4, trans_ctx_4, enable_create); + + // set deps of trans + trans_ctx_1->set_deps(trans_id_2); + trans_ctx_1->set_deps(trans_id_3); + trans_ctx_2->set_deps(trans_id_3); + trans_ctx_3->set_deps(trans_id_4); + + bool all_deps_cleared = false; + TransCtx::TransIDArray dep_parsed_reverse_deps; + + // 1. trans_ctx 4 can be ordered + // trans_ctx_4 parses the dependencies (since there are no dependencies, the partitioned transaction parses the end of the dependencies and the state is changed from ready->parsed) + EXPECT_EQ(OB_SUCCESS, trans_ctx_4->parse_deps(trans_ctx_mgr_, all_deps_cleared)); + EXPECT_TRUE(all_deps_cleared); + EXPECT_EQ(TransCtx::TRANS_CTX_STATE_DEP_PARSED, trans_ctx_4->get_state()); + + // trans_ctx_4 sequenced + EXPECT_EQ(OB_SUCCESS, trans_ctx_4->sequence(0, 0)); + + // trans_ctx_4 parse reverse deps + EXPECT_EQ(OB_SUCCESS, trans_ctx_4->parse_reverse_deps(trans_ctx_mgr_, dep_parsed_reverse_deps)); + EXPECT_EQ(0, dep_parsed_reverse_deps.count()); + + // 2. trans_ctx 1 cannot be ordered + // trans_ctx_1 analyses the dependencies and adds the reverse dependency list of 2 and 3, with the status ready, because 2 and 3 are not ordered + all_deps_cleared = false; + EXPECT_EQ(OB_SUCCESS, trans_ctx_1->parse_deps(trans_ctx_mgr_, all_deps_cleared)); + EXPECT_FALSE(all_deps_cleared); + EXPECT_EQ(TransCtx::TRANS_CTX_STATE_PARTICIPANT_READY, trans_ctx_1->get_state()); + // Determine if the reverse dependency list for the next 2/3 includes 1 + const TransCtx::ReverseDepSet &reverse_dep_set_2 = trans_ctx_2->get_reverse_dep_set(); + EXPECT_EQ(1, reverse_dep_set_2.count()); + EXPECT_TRUE(is_exist(reverse_dep_set_2, trans_id_1)); + const TransCtx::ReverseDepSet &reverse_dep_set_3 = trans_ctx_3->get_reverse_dep_set(); + EXPECT_EQ(1, reverse_dep_set_3.count()); + EXPECT_TRUE(is_exist(reverse_dep_set_3, trans_id_1)); + + // 3.trans_ctx 2 cannot be ordered and will join the set of reverse dependencies of 3 + all_deps_cleared = false; + EXPECT_EQ(OB_SUCCESS, trans_ctx_2->parse_deps(trans_ctx_mgr_, all_deps_cleared)); + EXPECT_FALSE(all_deps_cleared); + EXPECT_EQ(TransCtx::TRANS_CTX_STATE_PARTICIPANT_READY, trans_ctx_2->get_state()); + // Determine the reverse dependency list for 3, containing 2 + const TransCtx::ReverseDepSet &reverse_dep_set_3_new = trans_ctx_3->get_reverse_dep_set(); + EXPECT_EQ(2, reverse_dep_set_3_new.count()); + EXPECT_TRUE(is_exist(reverse_dep_set_3_new, trans_id_2)); + + // 4.trans-ctx 3 can parse deps + all_deps_cleared = false; + EXPECT_EQ(OB_SUCCESS, trans_ctx_3->parse_deps(trans_ctx_mgr_, all_deps_cleared)); + EXPECT_TRUE(all_deps_cleared); + EXPECT_EQ(TransCtx::TRANS_CTX_STATE_DEP_PARSED, trans_ctx_3->get_state()); + + // trans_ctx_3 sequenced + EXPECT_EQ(OB_SUCCESS, trans_ctx_3->sequence(1, 1)); + + // trans_ctx_3 parse reverse deps + dep_parsed_reverse_deps.reset(); + EXPECT_EQ(OB_SUCCESS, trans_ctx_3->parse_reverse_deps(trans_ctx_mgr_, dep_parsed_reverse_deps)); + EXPECT_EQ(1, dep_parsed_reverse_deps.count()); + EXPECT_TRUE(is_exist(dep_parsed_reverse_deps, trans_id_2)); + + // The set of dependencies of trans 2 is 0 and the set of dependencies of trans 1 is 1 + EXPECT_EQ(0, trans_ctx_2->get_cur_dep_count()); + EXPECT_EQ(1, trans_ctx_1->get_cur_dep_count()); + + // 2 Execution of sequencing, reverse decoupling + dep_parsed_reverse_deps.reset(); + EXPECT_EQ(OB_SUCCESS, trans_ctx_2->sequence(2, 2)); + EXPECT_EQ(OB_SUCCESS, trans_ctx_2->parse_reverse_deps(trans_ctx_mgr_, dep_parsed_reverse_deps)); + EXPECT_EQ(1, dep_parsed_reverse_deps.count()); + EXPECT_EQ(0, trans_ctx_1->get_cur_dep_count()); + EXPECT_TRUE(is_exist(dep_parsed_reverse_deps, trans_id_1)); + + // 5.trans_ctx_1 can be sequenced + dep_parsed_reverse_deps.reset(); + EXPECT_EQ(OB_SUCCESS, trans_ctx_1->sequence(3, 3)); + EXPECT_EQ(OB_SUCCESS, trans_ctx_1->parse_reverse_deps(trans_ctx_mgr_, dep_parsed_reverse_deps)); + EXPECT_EQ(0, dep_parsed_reverse_deps.count()); +} + +TEST_F(TransCtxTest, format_participant_failed) +{ + TransCtx *trans_ctx = NULL; + bool enable_create = true; + init_trans_ctx(trans_id_, trans_ctx, enable_create); + + // 1.Transaction status not in order, error reported + EXPECT_EQ(OB_SUCCESS, trans_ctx->set_state(TransCtx::TRANS_CTX_STATE_DEP_PARSED)); + EXPECT_EQ(OB_STATE_NOT_MATCH, trans_ctx->format_participant(part_trans_task_)); + + // 2.Inconsistent transaction id, error reported + const ObAddr svr(ObAddr::IPV4, "127.0.0.1", 2000); + const ObTransID trans_id(svr); + PartTransTask part_trans_task; + part_trans_task.set_trans_id(trans_id); + + EXPECT_EQ(OB_INVALID_ARGUMENT, trans_ctx->format_participant(part_trans_task)); +} + +TEST_F(TransCtxTest, format_participant) +{ + TransCtx *trans_ctx = NULL; + bool enable_create = true; + init_trans_ctx(trans_id_, trans_ctx, enable_create); + + // Total of 10 partition transactions + PartTransTaskArray part_trans_task_array; + init_part_trans_task_array(part_trans_task_array, trans_id_); + EXPECT_TRUE(PART_TRANS_TASK_ARRAY_SIZE == part_trans_task_array.count()); + + // Set the number of ready participants + EXPECT_EQ(OB_SUCCESS, trans_ctx->set_ready_participant_count(PART_TRANS_TASK_ARRAY_SIZE)); + EXPECT_EQ(OB_SUCCESS, trans_ctx->set_state(TransCtx::TRANS_CTX_STATE_SEQUENCED)); + + // After the first 9 formations, each time the transaction status is not updated, the formated participant count is increased by 1 + int64_t formated_count = 0; + PartTransTask *part_trans_task = NULL; + int64_t index = 0; + for (index = 0; index < PART_TRANS_TASK_ARRAY_SIZE - 1; index++) { + EXPECT_EQ(OB_SUCCESS, part_trans_task_array.at(index, part_trans_task)); + EXPECT_EQ(OB_SUCCESS, trans_ctx->format_participant(*part_trans_task)); + EXPECT_EQ(++formated_count, trans_ctx->get_formatted_participant_count()); + EXPECT_EQ(TransCtx::TRANS_CTX_STATE_SEQUENCED, trans_ctx->get_state()); + } + + EXPECT_EQ(OB_SUCCESS, part_trans_task_array.at(index, part_trans_task)); + EXPECT_EQ(OB_SUCCESS, trans_ctx->format_participant(*part_trans_task)); + EXPECT_EQ(trans_ctx->get_ready_participant_count(), trans_ctx->get_formatted_participant_count()); + EXPECT_EQ(TransCtx::TRANS_CTX_STATE_FORMATTED, trans_ctx->get_state()); + + free_part_trans_task_array(part_trans_task_array); + part_trans_task_array.destroy(); +} + +TEST_F(TransCtxTest, commit) +{ + EXPECT_EQ(OB_SUCCESS, trans_ctx_.set_state(TransCtx::TRANS_CTX_STATE_SEQUENCED)); + EXPECT_EQ(OB_STATE_NOT_MATCH, trans_ctx_.commit()); + + EXPECT_EQ(OB_SUCCESS, trans_ctx_.set_state(TransCtx::TRANS_CTX_STATE_FORMATTED)); + EXPECT_EQ(OB_SUCCESS, trans_ctx_.commit()); + EXPECT_EQ(TransCtx::TRANS_CTX_STATE_COMMITTED, trans_ctx_.get_state()); +} + +TEST_F(TransCtxTest, release_participants_failed) +{ + TransCtx *trans_ctx = NULL; + bool enable_create = true; + init_trans_ctx(trans_id_, trans_ctx, enable_create); + + // 1. The current state is not a commit state and an error is reported + EXPECT_EQ(OB_SUCCESS, trans_ctx->set_state(TransCtx::TRANS_CTX_STATE_SEQUENCED)); + EXPECT_EQ(OB_STATE_NOT_MATCH, trans_ctx->release_participants()); + + // 2. Not all parts are currently available for release, 10 partitioned transactions in total + PartTransTaskArray part_trans_task_array; + init_part_trans_task_array(part_trans_task_array, trans_id_); + EXPECT_TRUE(PART_TRANS_TASK_ARRAY_SIZE == part_trans_task_array.count()); + + // Set the number of ready participants, the status is commited + EXPECT_EQ(OB_SUCCESS, trans_ctx->set_ready_participant_count(PART_TRANS_TASK_ARRAY_SIZE)); + EXPECT_EQ(OB_SUCCESS, trans_ctx->set_state(TransCtx::TRANS_CTX_STATE_COMMITTED)); + + // set next participant + PartTransTask *part_trans_task = NULL; + PartTransTask *first_part_trans_task = NULL; + PartTransTask *next_part_trans_task = NULL; + int64_t index = 0; + EXPECT_EQ(OB_SUCCESS, part_trans_task_array.at(0, part_trans_task)); + first_part_trans_task = part_trans_task; + + for (index = 0; index < PART_TRANS_TASK_ARRAY_SIZE - 1; index++) { + EXPECT_EQ(OB_SUCCESS, part_trans_task_array.at(index + 1, next_part_trans_task)); + part_trans_task->set_next_participant(next_part_trans_task); + part_trans_task = next_part_trans_task; + } + + EXPECT_EQ(OB_SUCCESS, trans_ctx->set_ready_participant_objs(first_part_trans_task)); + bool all_part_releasable = false; + for (index = 0; index < PART_TRANS_TASK_ARRAY_SIZE - 1; index++) { + EXPECT_EQ(OB_SUCCESS, trans_ctx->inc_releasable_participant_count(all_part_releasable)); + } + + // Not all parts are releasable, error reported + EXPECT_EQ(OB_STATE_NOT_MATCH, trans_ctx->release_participants()); + EXPECT_EQ(TransCtx::TRANS_CTX_STATE_COMMITTED, trans_ctx->get_state()); + + EXPECT_EQ(OB_SUCCESS, trans_ctx->inc_releasable_participant_count(all_part_releasable)); + EXPECT_TRUE(true == all_part_releasable); + + // 3. Not all part reference counts are 0, error reported + part_trans_task->set_ref_cnt(2); + next_part_trans_task->set_ref_cnt(1); + EXPECT_EQ(OB_ERR_UNEXPECTED, trans_ctx->release_participants()); + EXPECT_EQ(TransCtx::TRANS_CTX_STATE_COMMITTED, trans_ctx->get_state()); + + free_part_trans_task_array(part_trans_task_array); + part_trans_task_array.destroy(); +} + +TEST_F(TransCtxTest, releasd_participants_less_than_ready) +{ + TransCtx *trans_ctx = NULL; + bool enable_create = true; + init_trans_ctx(trans_id_, trans_ctx, enable_create); + + PartTransTask *part_trans_task; + init_part_trans_task(part_trans_task, trans_id_); + + // set count of ready participants, status is commited + EXPECT_EQ(OB_SUCCESS, trans_ctx->set_ready_participant_count(PART_TRANS_TASK_ARRAY_SIZE)); + EXPECT_EQ(OB_SUCCESS, trans_ctx->set_state(TransCtx::TRANS_CTX_STATE_COMMITTED)); + + // set next participant + part_trans_task->set_next_participant(NULL); + + EXPECT_EQ(OB_SUCCESS, trans_ctx->set_ready_participant_objs(part_trans_task)); + EXPECT_TRUE(NULL != trans_ctx->get_participant_objs()); + bool all_part_releasable = false; + for (int index = 0; index < PART_TRANS_TASK_ARRAY_SIZE; index++) { + EXPECT_EQ(OB_SUCCESS, trans_ctx->inc_releasable_participant_count(all_part_releasable)); + } + + EXPECT_TRUE(true == all_part_releasable); + EXPECT_EQ(OB_INVALID_ERROR, trans_ctx->release_participants()); + EXPECT_EQ(TransCtx::TRANS_CTX_STATE_COMMITTED, trans_ctx->get_state()); + + free_part_trans_task(part_trans_task); +} + +TEST_F(TransCtxTest, release_participants) +{ + TransCtx *trans_ctx = NULL; + bool enable_create = true; + init_trans_ctx(trans_id_, trans_ctx, enable_create); + + // Total of 10 partition transactions + PartTransTaskArray part_trans_task_array; + init_part_trans_task_array(part_trans_task_array, trans_id_); + EXPECT_TRUE(PART_TRANS_TASK_ARRAY_SIZE == part_trans_task_array.count()); + + // set count of ready participants, status is commited + EXPECT_EQ(OB_SUCCESS, trans_ctx->set_ready_participant_count(PART_TRANS_TASK_ARRAY_SIZE)); + EXPECT_EQ(OB_SUCCESS, trans_ctx->set_state(TransCtx::TRANS_CTX_STATE_COMMITTED)); + + // set ready participant objs + PartTransTask *part_trans_task = NULL; + PartTransTask *first_part_trans_task = NULL; + PartTransTask *next_part_trans_task = NULL; + int64_t index = 0; + + EXPECT_EQ(OB_SUCCESS, part_trans_task_array.at(0, part_trans_task)); + first_part_trans_task = part_trans_task; + for (index = 0; index < PART_TRANS_TASK_ARRAY_SIZE - 1; index++) { + EXPECT_EQ(OB_SUCCESS, part_trans_task_array.at(index + 1, next_part_trans_task)); + part_trans_task->set_next_participant(next_part_trans_task); + part_trans_task = next_part_trans_task; + } + + EXPECT_EQ(OB_SUCCESS, trans_ctx->set_ready_participant_objs(first_part_trans_task)); + EXPECT_TRUE(NULL != trans_ctx->get_participant_objs()); + + bool all_part_releasable = false; + for (index = 0; index < PART_TRANS_TASK_ARRAY_SIZE; index++) { + EXPECT_EQ(OB_SUCCESS, trans_ctx->inc_releasable_participant_count(all_part_releasable)); + } + + EXPECT_TRUE(true == all_part_releasable); + EXPECT_EQ(OB_SUCCESS, trans_ctx->release_participants()); + EXPECT_EQ(TransCtx::TRANS_CTX_STATE_PARTICIPANT_RELEASED, trans_ctx->get_state()); + EXPECT_TRUE(0 == trans_ctx->get_ready_participant_count()); + EXPECT_TRUE(0 == trans_ctx->get_releasable_participant_count()); + EXPECT_TRUE(0 == trans_ctx->get_formatted_participant_count()); + EXPECT_TRUE(NULL == trans_ctx->get_participant_objs()); + + free_part_trans_task_array(part_trans_task_array); + part_trans_task_array.destroy(); +} + +int main(int argc, char **argv) +{ + // used for init of ObTransIDTest for incoming length errors + ObClockGenerator::init(); + + OB_LOGGER.set_log_level("INFO"); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/unittest/liboblog/test_log_trans_ctx_mgr.cpp b/unittest/liboblog/test_log_trans_ctx_mgr.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6386f1cf3ab748fba7c166f618b0a28bd3c0ae1c --- /dev/null +++ b/unittest/liboblog/test_log_trans_ctx_mgr.cpp @@ -0,0 +1,358 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include +#include "ob_log_trans_ctx_mgr.h" // ObLogTransCtxMgr +#include "common/ob_clock_generator.h" // ObClockGenerator + +using namespace oceanbase::common; +using namespace oceanbase::liboblog; +using namespace oceanbase::transaction; + +class ObLogTransCtxMgrTest : public ::testing::Test +{ +public: + static const int64_t SLEEP_TIME = 10000; + static const int64_t THREAD_NUM = 10; + static const int64_t RUN_TIME_SEC = 60; + static const int64_t CACHED_CTX_COUNT = 10000; + static const int64_t TEST_CTX_COUNT = CACHED_CTX_COUNT + 1024; + +public: + ObLogTransCtxMgrTest(); + virtual ~ObLogTransCtxMgrTest(); + virtual void SetUp(); + virtual void TearDown(); + + static void *thread_func(void *args); + +public: + void run(); + void test_imediately_remove(); + void test_dely_remove(); + +public: + int32_t port_; + ObTransID *trans_ids_; + pthread_t threads_[THREAD_NUM]; + ObLogTransCtxMgr mgr_; + +private: + // disallow copy + DISALLOW_COPY_AND_ASSIGN(ObLogTransCtxMgrTest); +}; +ObLogTransCtxMgrTest::ObLogTransCtxMgrTest() : port_(0), trans_ids_(NULL), mgr_() +{ +} + +ObLogTransCtxMgrTest::~ObLogTransCtxMgrTest() +{ +} + +void ObLogTransCtxMgrTest::SetUp() +{ +} + +void ObLogTransCtxMgrTest::TearDown() +{ +} + +TEST_F(ObLogTransCtxMgrTest, DISABLED_single_thread_immediately_remove) +{ + ObLogTransCtxMgr trans_ctx_mgr; + + EXPECT_NE(OB_SUCCESS, trans_ctx_mgr.init(0)); + EXPECT_NE(OB_SUCCESS, trans_ctx_mgr.init(-1)); + EXPECT_EQ(OB_SUCCESS, trans_ctx_mgr.init(CACHED_CTX_COUNT)); + + EXPECT_EQ(0, trans_ctx_mgr.get_valid_trans_ctx_count()); + EXPECT_EQ(0, trans_ctx_mgr.get_alloc_trans_ctx_count()); + EXPECT_EQ(0, trans_ctx_mgr.get_free_trans_ctx_count()); + + // Up to two transaction context objects are allocated at the same time when used by a single thread following the "allocate-return-release" process. + // One of them is not deleted from the cache. The logic is verified below. + int64_t free_count = 0; + int64_t alloc_count = 0; + for (int64_t index = 0; index < TEST_CTX_COUNT; index++) { + ObAddr svr(ObAddr::IPV4, "127.0.0.1", 1 + (int32_t)index); + ObTransID trans_id(svr); + + // At the beginning, the effective number is 0 + EXPECT_EQ(0, trans_ctx_mgr.get_valid_trans_ctx_count()); + + free_count = index <= 1 ? 0 : 1; + alloc_count = index <= 2 ? index : 2; + EXPECT_EQ(free_count, trans_ctx_mgr.get_free_trans_ctx_count()); + EXPECT_EQ(alloc_count, trans_ctx_mgr.get_alloc_trans_ctx_count()); + + // get with a not-exist trans_id + TransCtx *ctx1 = NULL; + EXPECT_EQ(OB_ENTRY_NOT_EXIST, trans_ctx_mgr.get_trans_ctx(trans_id, ctx1)); + + // create when get a not-exist trans_id + bool enable_create = true; + EXPECT_EQ(OB_SUCCESS, trans_ctx_mgr.get_trans_ctx(trans_id, ctx1, enable_create)); + EXPECT_TRUE(NULL != ctx1); + + // get trans_ctx that create just now + TransCtx *ctx1_get = NULL; + EXPECT_EQ(OB_SUCCESS, trans_ctx_mgr.get_trans_ctx(trans_id, ctx1_get)); + EXPECT_TRUE(ctx1 == ctx1_get); + + // Valid quantity is 1 + EXPECT_EQ(1, trans_ctx_mgr.get_valid_trans_ctx_count()); + + // Idle transaction context object used, idle becomes 0, allocated to a maximum of 2 + free_count = 0; + alloc_count = index <= 0 ? 1 : 2; + EXPECT_EQ(free_count, trans_ctx_mgr.get_free_trans_ctx_count()); + EXPECT_EQ(alloc_count, trans_ctx_mgr.get_alloc_trans_ctx_count()); + + // revert the trans_ctx + EXPECT_EQ(OB_SUCCESS, trans_ctx_mgr.revert_trans_ctx(ctx1)); + + // A revert before a remove does not affect the number of objects + EXPECT_EQ(1, trans_ctx_mgr.get_valid_trans_ctx_count()); + free_count = 0; + alloc_count = index <= 0 ? 1 : 2; + EXPECT_EQ(free_count, trans_ctx_mgr.get_free_trans_ctx_count()); + EXPECT_EQ(alloc_count, trans_ctx_mgr.get_alloc_trans_ctx_count()); + + // remove + EXPECT_EQ(OB_SUCCESS, trans_ctx_mgr.remove_trans_ctx(trans_id)); + EXPECT_EQ(OB_ENTRY_NOT_EXIST, trans_ctx_mgr.remove_trans_ctx(trans_id)); + + // After deletion, the effective number becomes 0 + EXPECT_EQ(0, trans_ctx_mgr.get_valid_trans_ctx_count()); + + // After deletion, the object just deleted is not released immediately, but the last deleted object is released + // So after the second time, the number of free objects becomes 1, but the number of allocated objects remains the same + free_count = index <= 0 ? 0 : 1; + alloc_count = index <= 0 ? 1 : 2; + EXPECT_EQ(free_count, trans_ctx_mgr.get_free_trans_ctx_count()); + EXPECT_EQ(alloc_count, trans_ctx_mgr.get_alloc_trans_ctx_count()); + + // revert the last one + EXPECT_EQ(OB_SUCCESS, trans_ctx_mgr.revert_trans_ctx(ctx1_get)); + + // When all returned, the valid number remains 0 + EXPECT_EQ(0, trans_ctx_mgr.get_valid_trans_ctx_count()); + + // Even after swapping them all back, the object just deleted is not immediately released until the next time it is deleted + free_count = index <= 0 ? 0 : 1; + alloc_count = index <= 0 ? 1 : 2; + EXPECT_EQ(free_count, trans_ctx_mgr.get_free_trans_ctx_count()); + EXPECT_EQ(alloc_count, trans_ctx_mgr.get_alloc_trans_ctx_count()); + } +} + +TEST_F(ObLogTransCtxMgrTest, DISABLED_single_thread_delay_remove) +{ + ObLogTransCtxMgr trans_ctx_mgr; + ObTransID *tids = (ObTransID *)ob_malloc(sizeof(ObTransID) * TEST_CTX_COUNT); + TransCtx *tctxs_[TEST_CTX_COUNT]; + + (void)memset(tctxs_, 0, sizeof(tctxs_)); + ASSERT_TRUE(NULL != tids); + EXPECT_EQ(OB_SUCCESS, trans_ctx_mgr.init(CACHED_CTX_COUNT)); + + for (int64_t index = 0; index < TEST_CTX_COUNT; index++) { + new(tids + index) ObTransID(ObAddr(ObAddr::IPV4, "127.0.0.1", 1 + (int32_t)index)); + ObTransID &trans_id = tids[index]; + + // get with a not-exist trans_id + TransCtx *ctx1 = NULL; + EXPECT_EQ(OB_ENTRY_NOT_EXIST, trans_ctx_mgr.get_trans_ctx(trans_id, ctx1)); + + // create when get a not-exist trans_id + bool enable_create = true; + EXPECT_EQ(OB_SUCCESS, trans_ctx_mgr.get_trans_ctx(trans_id, ctx1, enable_create)); + EXPECT_TRUE(NULL != ctx1); + + // revert the trans_ctx + EXPECT_EQ(OB_SUCCESS, trans_ctx_mgr.revert_trans_ctx(ctx1)); + } + + EXPECT_EQ(TEST_CTX_COUNT + 0, trans_ctx_mgr.get_valid_trans_ctx_count()); + EXPECT_EQ(TEST_CTX_COUNT + 0, trans_ctx_mgr.get_alloc_trans_ctx_count()); + EXPECT_EQ(0, trans_ctx_mgr.get_free_trans_ctx_count()); + + int64_t REMOVE_INTERVAL_COUNT = 10; + for (int64_t index = 0; index < TEST_CTX_COUNT; index++) { + ObTransID &trans_id = tids[index]; + + EXPECT_EQ(OB_SUCCESS, trans_ctx_mgr.get_trans_ctx(trans_id, tctxs_[index])); + EXPECT_TRUE(NULL != tctxs_[index]); + + EXPECT_EQ(OB_SUCCESS, trans_ctx_mgr.remove_trans_ctx(trans_id)); + EXPECT_EQ(TEST_CTX_COUNT - index - 1, trans_ctx_mgr.get_valid_trans_ctx_count()); + + // revert REMOVE_INTERVAL_COUNT the object you got before, so that it is actually deleted on the next remove + if (index >= REMOVE_INTERVAL_COUNT) { + int64_t revert_index = index - REMOVE_INTERVAL_COUNT; + // After revert, the next remove will delete + EXPECT_EQ(OB_SUCCESS, trans_ctx_mgr.revert_trans_ctx(tctxs_[revert_index])); + tctxs_[revert_index] = NULL; + + static int64_t alloc_count = TEST_CTX_COUNT; + static int64_t free_count = 0; + + // The alloc_count is only decremented when a second non-cached object is deleted + if (revert_index > CACHED_CTX_COUNT) { + alloc_count--; + } else if (revert_index > 0) { + // The free_count is incremented when the cached transaction context object is deleted + free_count++; + } + + EXPECT_EQ(alloc_count, trans_ctx_mgr.get_alloc_trans_ctx_count()); + EXPECT_EQ(free_count, trans_ctx_mgr.get_free_trans_ctx_count()); + } + } + + EXPECT_EQ(CACHED_CTX_COUNT + REMOVE_INTERVAL_COUNT + 1, trans_ctx_mgr.get_alloc_trans_ctx_count()); + EXPECT_EQ(CACHED_CTX_COUNT + 0, trans_ctx_mgr.get_free_trans_ctx_count()); + + ob_free((void *)tids); + tids = NULL; +} + +TEST_F(ObLogTransCtxMgrTest, multiple_thread) +{ + EXPECT_EQ(OB_SUCCESS, mgr_.init(CACHED_CTX_COUNT)); + + OB_ASSERT(NULL == trans_ids_); + trans_ids_ = (ObTransID *)ob_malloc(sizeof(ObTransID) * TEST_CTX_COUNT); + ASSERT_TRUE(NULL != trans_ids_); + + for (int64_t index = 0; index < TEST_CTX_COUNT; index++) { + new(trans_ids_ + index) ObTransID(ObAddr(ObAddr::IPV4, "127.0.0.1", 1 + (int32_t)index)); + } + + for (int64_t index = 0; index < THREAD_NUM; index++) { + ASSERT_EQ(0, pthread_create(threads_ + index, NULL, thread_func, this)); + } + + for (int64_t index = 0; index < THREAD_NUM; index++) { + if (0 != threads_[index]) { + pthread_join(threads_[index], NULL); + threads_[index] = 0; + } + } + + for (int64_t index = 0; index < TEST_CTX_COUNT; index++) { + trans_ids_[index].~ObTransID(); + } + + ob_free(trans_ids_); + trans_ids_ = NULL; +} + +void *ObLogTransCtxMgrTest::thread_func(void *args) +{ + if (NULL != args) { + ((ObLogTransCtxMgrTest *)args)->run(); + } + + return NULL; +} + +void ObLogTransCtxMgrTest::run() +{ + int64_t end_time = ObTimeUtility::current_time() + RUN_TIME_SEC * 1000000; + + while (true) { + test_imediately_remove(); + test_dely_remove(); + int64_t left_time = end_time - ObTimeUtility::current_time(); + if (left_time <= 0) break; + } +} + +void ObLogTransCtxMgrTest::test_imediately_remove() +{ + ObAddr svr(ObAddr::IPV4, "127.0.0.1", ATOMIC_AAF(&port_, 1)); + ObTransID trans_id(svr); // Although the svr is the same, the internal inc will be self-increasing + + // get with a not-exist trans_id + TransCtx *ctx1 = NULL; + EXPECT_EQ(OB_ENTRY_NOT_EXIST, mgr_.get_trans_ctx(trans_id, ctx1)); + + // create when get a not-exist trans_id + bool enable_create = true; + EXPECT_EQ(OB_SUCCESS, mgr_.get_trans_ctx(trans_id, ctx1, enable_create)); + EXPECT_TRUE(NULL != ctx1); + + // get trans_ctx that create just now + TransCtx *ctx1_get = NULL; + EXPECT_EQ(OB_SUCCESS, mgr_.get_trans_ctx(trans_id, ctx1_get)); + EXPECT_TRUE(ctx1 == ctx1_get); + + // revert the trans_ctx + EXPECT_EQ(OB_SUCCESS, mgr_.revert_trans_ctx(ctx1)); + + usleep((useconds_t)random() % SLEEP_TIME); + + // remove + EXPECT_EQ(OB_SUCCESS, mgr_.remove_trans_ctx(trans_id)); + EXPECT_EQ(OB_ENTRY_NOT_EXIST, mgr_.remove_trans_ctx(trans_id)); + + // Return to the last acquired + EXPECT_EQ(OB_SUCCESS, mgr_.revert_trans_ctx(ctx1_get)); +} + +void ObLogTransCtxMgrTest::test_dely_remove() +{ + for (int64_t index = 0; index < TEST_CTX_COUNT; index++) { + ObTransID &trans_id = trans_ids_[random() % TEST_CTX_COUNT]; + TransCtx *ctx = NULL; + bool enable_create = true; + + EXPECT_EQ(OB_SUCCESS, mgr_.get_trans_ctx(trans_id, ctx, enable_create)); + EXPECT_TRUE(NULL != ctx); + + usleep((useconds_t)random() % SLEEP_TIME); + + EXPECT_EQ(OB_SUCCESS, mgr_.revert_trans_ctx(ctx)); + } + + for (int64_t index = 0; index < TEST_CTX_COUNT; index++) { + ObTransID &trans_id = trans_ids_[random() % TEST_CTX_COUNT]; + TransCtx *ctx = NULL; + + int ret = mgr_.get_trans_ctx(trans_id, ctx); + + if (OB_SUCC(ret)) { + EXPECT_TRUE(NULL != ctx); + + ret = mgr_.remove_trans_ctx(trans_id); + EXPECT_TRUE(OB_SUCCESS == ret || OB_ENTRY_NOT_EXIST == ret); + + usleep((useconds_t)random() % SLEEP_TIME); + + EXPECT_EQ(OB_SUCCESS, mgr_.revert_trans_ctx(ctx)); + } + } +} + +int main(int argc, char **argv) +{ + // Used for initialization of ObTransID + ObClockGenerator::init(); + + srandom((unsigned)ObTimeUtility::current_time()); + + OB_LOGGER.set_log_level("INFO"); + ::testing::InitGoogleTest(&argc,argv); + return RUN_ALL_TESTS(); +} + diff --git a/unittest/liboblog/test_log_utils.cpp b/unittest/liboblog/test_log_utils.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6d1d3745ed7f11e51e2765bbfb26e07f3e4f4073 --- /dev/null +++ b/unittest/liboblog/test_log_utils.cpp @@ -0,0 +1,330 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include +#include "share/ob_define.h" +#include "liboblog/src/ob_log_utils.h" + +using namespace oceanbase; +using namespace common; +using namespace liboblog; + +namespace oceanbase +{ +namespace unittest +{ + +/* + * TEST1. + * Test split. + */ +TEST(utils, split) +{ + int err = OB_SUCCESS; + char str[] = "tt1.database1"; + const char *delimiter = "."; + const char *res[16]; + int64_t res_cnt = 0; + + err = split(str, delimiter, 2, res, res_cnt); + EXPECT_EQ(OB_SUCCESS, err); + EXPECT_EQ(2, res_cnt); + EXPECT_STREQ("tt1", res[0]); + EXPECT_STREQ("database1", res[1]); + + char str1[] = "tt2.database2.test"; + err = split(str1, delimiter, 3, res, res_cnt); + EXPECT_EQ(OB_SUCCESS, err); + EXPECT_EQ(3, res_cnt); + EXPECT_STREQ("tt2", res[0]); + EXPECT_STREQ("database2", res[1]); + EXPECT_STREQ("test", res[2]); +} + +/* + * TEST2. + * Test split. Boundary tests + */ +TEST(utils, split_boundary) +{ + int err = OB_SUCCESS; + char str[] = "tt1.database1"; + const char *delimiter = "."; + const char *res[16]; + int64_t res_cnt = 0; + + err = split(NULL, delimiter, 2, res, res_cnt); + EXPECT_EQ(OB_INVALID_ARGUMENT, err); + EXPECT_EQ(0, res_cnt); + + char str1[] = ""; + err = split(str1, delimiter, 2, res, res_cnt); + EXPECT_EQ(OB_INVALID_ARGUMENT, err); + EXPECT_EQ(0, res_cnt); + + err = split(str, NULL, 2, res, res_cnt); + EXPECT_EQ(OB_INVALID_ARGUMENT, err); + EXPECT_EQ(0, res_cnt); + + const char *delimiter1 = ""; + err = split(str, delimiter1, 2, res, res_cnt); + EXPECT_EQ(OB_INVALID_ARGUMENT, err); + EXPECT_EQ(0, res_cnt); + + // Test for incoming length errors + err = split(str, delimiter, 1, res, res_cnt); + EXPECT_EQ(OB_INVALID_ARGUMENT, err); + EXPECT_EQ(1, res_cnt); +} + +TEST(utils, split_int64_all) +{ + char delimiter = '|'; + ObString str; + const char *ptr = NULL; + ObSEArray ret_array; + + // Store a single number + ptr = "100"; + str.assign_ptr(ptr, (ObString::obstr_size_t)strlen(ptr)); + ret_array.reuse(); + EXPECT_EQ(OB_SUCCESS, split_int64(str, delimiter, ret_array)); + EXPECT_EQ(1, ret_array.count()); + EXPECT_EQ(100, ret_array.at(0)); + + // Store multi numbers + ptr = "100|2000|30000|400000"; + str.assign_ptr(ptr, (ObString::obstr_size_t)strlen(ptr)); + ret_array.reuse(); + EXPECT_EQ(OB_SUCCESS, split_int64(str, delimiter, ret_array)); + EXPECT_EQ(4, ret_array.count()); + EXPECT_EQ(100, ret_array.at(0)); + EXPECT_EQ(2000, ret_array.at(1)); + EXPECT_EQ(30000, ret_array.at(2)); + EXPECT_EQ(400000, ret_array.at(3)); + + // Store multiple numbers with a separator at the end + ptr = "100|2000|30000|400000|"; + str.assign_ptr(ptr, (ObString::obstr_size_t)strlen(ptr)); + ret_array.reuse(); + EXPECT_EQ(OB_SUCCESS, split_int64(str, delimiter, ret_array)); + EXPECT_EQ(4, ret_array.count()); + EXPECT_EQ(100, ret_array.at(0)); + EXPECT_EQ(2000, ret_array.at(1)); + EXPECT_EQ(30000, ret_array.at(2)); + EXPECT_EQ(400000, ret_array.at(3)); + + // no number + ptr = ""; + str.assign_ptr(ptr, (ObString::obstr_size_t)strlen(ptr)); + ret_array.reuse(); + EXPECT_EQ(OB_SUCCESS, split_int64(str, delimiter, ret_array)); + EXPECT_EQ(0, ret_array.count()); + + // obly seperator + ptr = "|"; + str.assign_ptr(ptr, (ObString::obstr_size_t)strlen(ptr)); + ret_array.reuse(); + EXPECT_EQ(OB_SUCCESS, split_int64(str, delimiter, ret_array)); + EXPECT_EQ(0, ret_array.count()); + + // There are no numbers, only invalid content + ptr = ","; + str.assign_ptr(ptr, (ObString::obstr_size_t)strlen(ptr)); + ret_array.reuse(); + EXPECT_EQ(OB_INVALID_DATA, split_int64(str, delimiter, ret_array)); + EXPECT_EQ(0, ret_array.count()); + + // Numerical limit values + char max_int[100]; + snprintf(max_int, sizeof(max_int), "%ld", INT64_MAX); + str.assign_ptr(max_int, (ObString::obstr_size_t)strlen(max_int)); + ret_array.reuse(); + EXPECT_EQ(OB_SUCCESS, split_int64(str, delimiter, ret_array)); + EXPECT_EQ(1, ret_array.count()); + EXPECT_EQ(INT64_MAX, ret_array.at(0)); + + // Exceeding numerical limits + std::string over_size_int(100, '9'); + str.assign_ptr(over_size_int.c_str(), (ObString::obstr_size_t)strlen(over_size_int.c_str())); + ret_array.reuse(); + EXPECT_EQ(OB_INVALID_DATA, split_int64(str, delimiter, ret_array)); + + // Use other delimite characters + // Only the first one can be parsed + ptr = "100,200,300"; + str.assign_ptr(ptr, (ObString::obstr_size_t)strlen(ptr)); + ret_array.reuse(); + EXPECT_EQ(OB_INVALID_DATA, split_int64(str, delimiter, ret_array)); + + // constains other char + ptr = "100a|200b|300c"; + str.assign_ptr(ptr, (ObString::obstr_size_t)strlen(ptr)); + ret_array.reuse(); + EXPECT_EQ(OB_INVALID_DATA, split_int64(str, delimiter, ret_array)); + + // delimite at first pos + ptr = "|100|200|"; + str.assign_ptr(ptr, (ObString::obstr_size_t)strlen(ptr)); + ret_array.reuse(); + EXPECT_EQ(OB_SUCCESS, split_int64(str, delimiter, ret_array)); + EXPECT_EQ(2, ret_array.count()); + EXPECT_EQ(100, ret_array.at(0)); + EXPECT_EQ(200, ret_array.at(1)); + + // The separator appears several times in succession + ptr = "300||400|||500|"; + str.assign_ptr(ptr, (ObString::obstr_size_t)strlen(ptr)); + ret_array.reuse(); + EXPECT_EQ(OB_SUCCESS, split_int64(str, delimiter, ret_array)); + EXPECT_EQ(3, ret_array.count()); + EXPECT_EQ(300, ret_array.at(0)); + EXPECT_EQ(400, ret_array.at(1)); + EXPECT_EQ(500, ret_array.at(2)); +} + +TEST(utils, kv_pair) +{ + int ret = OB_SUCCESS; + char kv_str[] = "test:999"; + const char *delimiter1 = ":"; + const char *delimiter2 = "%"; + + ObLogKVCollection::KVPair kvpair; + ret = kvpair.init(delimiter1); + EXPECT_EQ(OB_SUCCESS, ret); + ret = kvpair.deserialize(kv_str); + EXPECT_EQ(OB_SUCCESS, ret); + EXPECT_STREQ("test", kvpair.get_key()); + EXPECT_STREQ("999", kvpair.get_value()); + + kvpair.reset(); + char key[] = "kjdngasdey"; + char value[] = "vaksahgasfashjlue"; + ret = kvpair.init(delimiter2); + EXPECT_EQ(OB_SUCCESS, ret); + ret = kvpair.set_key_and_value(key, value); + EXPECT_EQ(OB_SUCCESS, ret); + EXPECT_STREQ("kjdngasdey", kvpair.get_key()); + EXPECT_STREQ("vaksahgasfashjlue", kvpair.get_value()); + int64_t pos = 0; + int64_t len = kvpair.length(); + EXPECT_EQ(strlen(key) + strlen(value) + strlen(delimiter2), len); + char buf[len+1]; + ret = kvpair.serialize(buf, len+1, pos); + EXPECT_EQ(OB_SUCCESS, ret); + EXPECT_STREQ("kjdngasdey%vaksahgasfashjlue", buf); +} + +TEST(utils, kv_collection) +{ + int ret = OB_SUCCESS; + char kv_str[] = "data:2346234;test:5asdfgasf; time:21354213"; + int64_t origin_len = strlen(kv_str); + const char *pair_delimiter = "; "; + const char *kv_delimiter = ":"; + ObLogKVCollection kv_c; + ret = kv_c.init(kv_delimiter, pair_delimiter); + EXPECT_EQ(OB_SUCCESS, ret); + ret = kv_c.deserialize(kv_str); + EXPECT_EQ(OB_SUCCESS, ret); + EXPECT_EQ(3, kv_c.size()); + int64_t len = kv_c.length(); + EXPECT_EQ(origin_len, len-1); + bool contain = false; + ret = kv_c.contains_key("data", contain); + EXPECT_EQ(OB_SUCCESS, ret); + EXPECT_EQ(true, contain); + ret = kv_c.contains_key("versin", contain); + EXPECT_EQ(OB_SUCCESS, ret); + EXPECT_EQ(false, contain); + const char *value_time = NULL; + ret = kv_c.get_value_of_key("time", value_time); + EXPECT_EQ(OB_SUCCESS, ret); + EXPECT_STREQ("21354213", value_time); + kv_c.reset(); + + // test append + kv_c.init(kv_delimiter, pair_delimiter); + ObLogKVCollection::KVPair kvpair; + char key[] = "jakds"; + char value[] = "dsagads"; + ret = kvpair.init(kv_delimiter); + EXPECT_EQ(OB_SUCCESS, ret); + ret = kvpair.set_key_and_value(key, value); + EXPECT_EQ(OB_SUCCESS, ret); + EXPECT_EQ(true, kvpair.is_valid()); + ret = kv_c.append_kv_pair(kvpair); + EXPECT_EQ(OB_SUCCESS, ret); + + kvpair.reset(); + char key1[] = "time"; + char value1[] = "1237851204"; + ret = kvpair.init(kv_delimiter); + EXPECT_EQ(OB_SUCCESS, ret); + ret = kvpair.set_key_and_value(key1, value1); + EXPECT_EQ(OB_SUCCESS, ret); + EXPECT_EQ(true, kvpair.is_valid()); + ret = kv_c.append_kv_pair(kvpair); + EXPECT_EQ(OB_SUCCESS, ret); + kvpair.reset(); + ret = kv_c.contains_key("time1", contain); + EXPECT_EQ(OB_SUCCESS, ret); + EXPECT_EQ(false, contain); + origin_len = strlen(key) + strlen(value) + strlen(key1) + strlen(value1) + + 2 * strlen(kv_delimiter) + strlen(pair_delimiter); + len = kv_c.length(); + EXPECT_EQ(origin_len, len); + char buf[len+1]; + int64_t pos = 0; + ret = kv_c.serialize(buf, len+1, pos); + EXPECT_EQ(OB_SUCCESS, ret); + EXPECT_STREQ("jakds:dsagads; time:1237851204", buf); +} + +TEST(utils, cstring_to_num) +{ + char numstr1[] = "123412"; + char numstr2[] = "-683251"; + char numstr3[] = "0"; + char numstr4[] = "a123"; + char numstr5[] = " 123"; + char numstr6[] = ""; + int64_t val = 0; + int ret = c_str_to_int(numstr1, val); + EXPECT_EQ(OB_SUCCESS, ret); + EXPECT_EQ(123412, val); + ret = c_str_to_int(numstr2, val); + EXPECT_EQ(OB_SUCCESS, ret); + EXPECT_EQ(-683251, val); + ret = c_str_to_int(numstr3, val); + EXPECT_EQ(OB_SUCCESS, ret); + EXPECT_EQ(0, val); + ret = c_str_to_int(numstr4, val); + EXPECT_EQ(OB_INVALID_DATA, ret); + ret = c_str_to_int(numstr5, val); + EXPECT_EQ(OB_SUCCESS, ret); + EXPECT_EQ(123, val); + ret = c_str_to_int(numstr6, val); + EXPECT_EQ(OB_INVALID_ARGUMENT, ret); +} + +} +} + +int main(int argc, char **argv) +{ + // ObLogger::get_logger().set_mod_log_levels("ALL.*:DEBUG, TLOG.*:DEBUG"); + testing::InitGoogleTest(&argc,argv); +// testing::FLAGS_gtest_filter = "DO_NOT_RUN"; + return RUN_ALL_TESTS(); +} diff --git a/unittest/liboblog/test_ob_concurrent_seq_queue.cpp b/unittest/liboblog/test_ob_concurrent_seq_queue.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4040dca7b8f8d8c67d922b47ded7cf19b1639234 --- /dev/null +++ b/unittest/liboblog/test_ob_concurrent_seq_queue.cpp @@ -0,0 +1,198 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include +#include "ob_concurrent_seq_queue.h" + +#include "share/ob_define.h" +#include "lib/time/ob_time_utility.h" +#include "lib/atomic/ob_atomic.h" + +namespace oceanbase +{ +namespace common +{ +class TestConSeqQueue : public ::testing::Test +{ +public: + static const int64_t RUN_TIME = 1L * 60L * 60L * 1000L * 1000L; + static const int64_t THREAD_NUM = 20; + static const int64_t STAT_INTERVAL = 5 * 1000 * 1000; +public: + TestConSeqQueue() {} + ~TestConSeqQueue() {} + + virtual void SetUp() + { + ASSERT_EQ(0, queue_.init(1024)); + produce_seq_ = 0; + consume_seq_ = 0; + consume_thread_counter_ = 0; + consume_task_count_ = 0; + last_stat_time_ = 0; + last_consume_task_count_ = 0; + stop_flag_ = false; + } + virtual void TearDown() + { + queue_.destroy(); + } + static void *produce_thread_func(void *args); + static void *consume_thread_func(void *args); + void run_produce(); + void run_consume(); + +public: + pthread_t produce_threads_[THREAD_NUM]; + pthread_t consume_threads_[THREAD_NUM]; + int64_t consume_thread_counter_; + ObConcurrentSeqQueue queue_; + int64_t produce_seq_ CACHE_ALIGNED; + int64_t consume_seq_ CACHE_ALIGNED; + int64_t consume_task_count_ CACHE_ALIGNED; + int64_t last_consume_task_count_ CACHE_ALIGNED; + int64_t last_stat_time_ CACHE_ALIGNED; + + volatile bool stop_flag_ CACHE_ALIGNED; +}; + +TEST_F(TestConSeqQueue, basic) +{ + ObConcurrentSeqQueue queue; + void *data = 0; + + EXPECT_EQ(0, queue.init(1024)); + + EXPECT_EQ(0, queue.push((void*)0, 0, 0)); + EXPECT_EQ(0, queue.push((void*)1, 1, 0)); + EXPECT_EQ(0, queue.push((void*)2, 2, 0)); + + EXPECT_EQ(0, queue.pop(data, 0, 0)); + EXPECT_EQ(0, (int64_t)data); + EXPECT_EQ(0, queue.pop(data, 1, 0)); + EXPECT_EQ(1, (int64_t)data); + EXPECT_EQ(0, queue.pop(data, 2, 0)); + EXPECT_EQ(2, (int64_t)data); + + // Failed to push and pop elements with the same serial number again + EXPECT_NE(0, queue.push((void*)0, 0, 0)); + EXPECT_NE(0, queue.push((void*)1, 1, 0)); + EXPECT_NE(0, queue.push((void*)2, 2, 0)); + EXPECT_NE(0, queue.pop(data, 0, 0)); + EXPECT_NE(0, queue.pop(data, 1, 0)); + EXPECT_NE(0, queue.pop(data, 2, 0)); +} + +void *TestConSeqQueue::produce_thread_func(void *args) +{ + if (NULL != args) { + ((TestConSeqQueue *)args)->run_produce(); + } + + return NULL; +} + +void TestConSeqQueue::run_produce() +{ + int ret = OB_SUCCESS; + int64_t batch_count = 1000; + + while (OB_SUCCESS == ret && ! stop_flag_) { + for (int64_t index = 0; OB_SUCCESS == ret && index < batch_count; index++) { + int64_t seq = ATOMIC_FAA(&produce_seq_, 1); + while (! stop_flag_ && OB_TIMEOUT == (ret = queue_.push((void*)seq, seq, 1 * 1000 * 1000))); + if (! stop_flag_) { + EXPECT_EQ(OB_SUCCESS, ret); + } + } + } +} + +void *TestConSeqQueue::consume_thread_func(void *args) +{ + if (NULL != args) { + ((TestConSeqQueue *)args)->run_consume(); + } + + return NULL; +} + +void TestConSeqQueue::run_consume() +{ + int ret = OB_SUCCESS; + int64_t batch_count = 1000; + int64_t end_time = ObTimeUtility::current_time(); + + int64_t thread_index = ATOMIC_FAA(&consume_thread_counter_, 0); + + while (OB_SUCCESS == ret && !stop_flag_) { + for (int64_t index = 0; OB_SUCCESS == ret && index < batch_count; index++) { + int64_t seq = ATOMIC_FAA(&consume_seq_, 1); + void *data = NULL; + while (! stop_flag_ && OB_TIMEOUT == (ret = queue_.pop(data, seq, 1 * 1000 * 1000))); + if (! stop_flag_) { + EXPECT_EQ(OB_SUCCESS, ret); + EXPECT_EQ(seq, (int64_t)data); + ATOMIC_INC(&consume_task_count_); + } + } + + int64_t cur_time = ObTimeUtility::current_time(); + if (OB_UNLIKELY(0 == thread_index) && cur_time - last_stat_time_ > STAT_INTERVAL) { + int64_t task_count = ATOMIC_LOAD(&consume_task_count_); + int64_t consume_seq = ATOMIC_LOAD(&consume_seq_); + int64_t produce_seq = ATOMIC_LOAD(&produce_seq_); + if (0 != last_stat_time_) { + int64_t delta_task_count = task_count - last_consume_task_count_; + int64_t delta_time_sec = (cur_time - last_stat_time_)/1000000; + LIB_LOG(INFO, "STAT", "POP_TPS", delta_task_count/delta_time_sec, K(delta_task_count), + K(consume_seq), K(produce_seq), K(INT32_MAX)); + } + + last_stat_time_ = cur_time; + last_consume_task_count_ = task_count; + } + + if (end_time - cur_time <= 0) { + stop_flag_ = true; + } + } +} + +TEST_F(TestConSeqQueue, thread) +{ + for (int64_t index = 0; index < THREAD_NUM; index++) { + ASSERT_EQ(0, pthread_create(produce_threads_ + index, NULL, produce_thread_func, this)); + } + for (int64_t index = 0; index < THREAD_NUM; index++) { + ASSERT_EQ(0, pthread_create(consume_threads_ + index, NULL, consume_thread_func, this)); + } + for (int64_t index = 0; index < THREAD_NUM; index++) { + pthread_join(produce_threads_[index], NULL); + produce_threads_[index] = 0; + } + for (int64_t index = 0; index < THREAD_NUM; index++) { + pthread_join(consume_threads_[index], NULL); + consume_threads_[index] = 0; + } +} + +} +} + +int main(int argc, char **argv) +{ + oceanbase::common::ObLogger::get_logger().set_log_level("INFO"); + OB_LOGGER.set_log_level("INFO"); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/unittest/liboblog/test_ob_log_adapt_string.cpp b/unittest/liboblog/test_ob_log_adapt_string.cpp new file mode 100644 index 0000000000000000000000000000000000000000..29baaf9aa82f91e643a22515c9e52ee7cc7cedb3 --- /dev/null +++ b/unittest/liboblog/test_ob_log_adapt_string.cpp @@ -0,0 +1,132 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include +#include "ob_log_adapt_string.h" // ObLogAdaptString + +using namespace oceanbase::common; +namespace oceanbase +{ +namespace liboblog +{ + +class TestLogAdaptString : public ::testing::Test +{ +public: + TestLogAdaptString() {} + ~TestLogAdaptString() {} +}; + +void test_append_str(ObLogAdaptString &str, std::string &std_str, const char *cstr) +{ + const char *ret_cstr = NULL; + ASSERT_EQ(OB_SUCCESS, str.append(cstr)); + std_str.append(cstr); + + ASSERT_EQ(OB_SUCCESS, str.cstr(ret_cstr)); + ASSERT_STREQ(std_str.c_str(), ret_cstr); +} + +TEST_F(TestLogAdaptString, smoke_test) +{ + ObLogAdaptString str(ObModIds::OB_LOG_TEMP_MEMORY); + std::string std_str; + const char *cstr = ""; + + test_append_str(str, std_str, ""); + test_append_str(str, std_str, "I am me "); + test_append_str(str, std_str, "中华人民共和国 "); + + EXPECT_EQ(OB_SUCCESS, str.append_int64(100)); + std_str.append("100"); + + ASSERT_EQ(OB_SUCCESS, str.cstr(cstr)); + ASSERT_STREQ(std_str.c_str(), cstr); + + OBLOG_LOG(INFO, "cstr", K(cstr), K(str)); +} + +TEST_F(TestLogAdaptString, argument_test) +{ + ObLogAdaptString str(ObModIds::OB_LOG_TEMP_MEMORY); + std::string std_str; + + EXPECT_EQ(OB_INVALID_ARGUMENT, str.append(NULL)); + + EXPECT_EQ(OB_SUCCESS, str.append("")); + std_str.append(""); + EXPECT_EQ(OB_SUCCESS, str.append_int64(-1)); + std_str.append("-1"); + + EXPECT_EQ(OB_SUCCESS, str.append_int64(INT64_MAX)); + char int64_max[100]; + sprintf(int64_max, "%ld", INT64_MAX); + std_str.append(int64_max); + + + const char *cstr = ""; + ASSERT_EQ(OB_SUCCESS, str.cstr(cstr)); + ASSERT_STREQ(std_str.c_str(), cstr); + + OBLOG_LOG(INFO, "cstr", K(cstr), K(std_str.c_str())); +} + +TEST_F(TestLogAdaptString, all_sort_of_string) +{ + ObLogAdaptString str(ObModIds::OB_LOG_TEMP_MEMORY); + std::string std_str; + const char *cstr = ""; + char buf[1 * _M_ + 1]; + + (void)memset(buf, 'a', sizeof(buf)); + + // Empty strings are also equal + EXPECT_EQ(OB_SUCCESS, str.cstr(cstr)); + EXPECT_STREQ(std_str.c_str(), cstr); + + for (int i = 0; i < 3; i++) { + // less than 8K + test_append_str(str, std_str, ""); + test_append_str(str, std_str, "11111111111111"); + test_append_str(str, std_str, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"); + + // equals to 8K + buf[8 * _K_] = '\0'; + test_append_str(str, std_str, buf); + test_append_str(str, std_str, buf); + test_append_str(str, std_str, buf); + buf[8 * _K_] = 'a'; + + // greater than 8K + buf[16 * _K_] = '\0'; + test_append_str(str, std_str, buf); + buf[16 * _K_] = 'a'; + buf[32 * _K_] = '\0'; + test_append_str(str, std_str, buf); + buf[32 * _K_] = 'a'; + buf[1 * _M_] = '\0'; + test_append_str(str, std_str, buf); + buf[1 * _M_] = 'a'; + } +} + +} +} + +int main(int argc, char **argv) +{ + oceanbase::common::ObLogger::get_logger().set_log_level("INFO"); + OB_LOGGER.set_log_level("INFO"); + OB_LOGGER.set_file_name("test_ob_log_adapt_string.log", true); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/unittest/liboblog/test_ob_log_all_svr_cache.cpp b/unittest/liboblog/test_ob_log_all_svr_cache.cpp new file mode 100644 index 0000000000000000000000000000000000000000..10d661a80cfc45e711e79d7b9df0e6e90385f2b6 --- /dev/null +++ b/unittest/liboblog/test_ob_log_all_svr_cache.cpp @@ -0,0 +1,528 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG_FETCHER + +#include +#include "share/ob_define.h" +#define private public +#include "liboblog/src/ob_log_all_svr_cache.h" +#include "liboblog/src/ob_log_systable_helper.h" +#include "ob_log_utils.h" +#include "test_ob_log_fetcher_common_utils.h" +#include "lib/atomic/ob_atomic.h" + +using namespace oceanbase; +using namespace common; +using namespace liboblog; + +namespace oceanbase +{ +namespace unittest +{ +class TestObLogAllSvrCache: public ::testing::Test +{ +public : + virtual void SetUp() {} + virtual void TearDown() {} +public : + static const int64_t ALLSVR_CACHE_UPDATE_INTERVAL = 10 * _MSEC_; +}; + +static const int64_t SERVER_COUNT = 120; +static const int64_t FIRST_QUERY_RECORD_COUNT = 60; +static const int64_t VARY_RECORD_COUNT = 6; + +typedef IObLogSysTableHelper::AllServerRecordArray AllServerRecordArray; +typedef IObLogSysTableHelper::AllServerRecord AllServerRecord; +AllServerRecord all_server_records[SERVER_COUNT]; +const char *zones[4] = {"z1", "z2", "z3", "z4"}; +const char *regions[4] = {"hz", "sh", "sz", "sh"}; +const char *zone_types[4] = {"ReadWrite", "ReadWrite", "ReadWrite", "ReadOnly"}; + +void generate_data() +{ + int ret = OB_SUCCESS; + ObString ip_str = "127.0.0.1"; + + for(int64_t idx = 0; idx < SERVER_COUNT; idx++) { + AllServerRecord &record = all_server_records[idx]; + int64_t pos = 0; + if (OB_FAIL(databuff_printf(record.svr_ip_, sizeof(record.svr_ip_), pos, + "%.*s", ip_str.length(), ip_str.ptr()))) { + LOG_ERROR("save ip address fail", K(ret), K(pos), + "buf_size", sizeof(record.svr_ip_), K(ip_str)); + } + record.svr_port_ = static_cast(idx + 8000); + int64_t index = idx % 4; + switch (index) { + case 0: + record.status_ = share::ObServerStatus::DisplayStatus::OB_SERVER_INACTIVE; + break; + case 1: + record.status_ = share::ObServerStatus::DisplayStatus::OB_SERVER_ACTIVE; + break; + case 2: + record.status_ = share::ObServerStatus::DisplayStatus::OB_SERVER_DELETING; + break; + case 3: + record.status_ = share::ObServerStatus::DisplayStatus::OB_SERVER_ACTIVE; + break; + default: + break; + } + if (OB_FAIL(record.zone_.assign(zones[index]))) { + LOG_ERROR("record zone assign fail", K(ret), K(record)); + } + } +} + +// To test if the cached __all_server system table data is cached correctly, the following dynamic policy is used for the returned data. +// Assume FIRST_QUERY_RECORD_COUNT=60, VARY_RECORD_COUNT=6 +// 1. First query returns: records from 0 to 59 +// 2. Second query returns: 6 new rows, i.e. 0 to 65 rows, 60 to 65 rows added +// 3. Third query returns: Decrease the first 6 rows, i.e. return 6 to 65 rows, decrease 0 to 5 rows +// ... +// and so on, until the end, the final validation result 60~119 +class MockSysTableHelper1 : public IObLogSysTableHelper +{ +public: + MockSysTableHelper1() : query_time_(1), + start_index_(0), + end_index_(FIRST_QUERY_RECORD_COUNT - 1), + is_invariable_(false) {} + virtual ~MockSysTableHelper1() {} + +public: + int query_with_multiple_statement(BatchSQLQuery &batch_query) + { + UNUSED(batch_query); + return 0; + } + + /// Query __all_clog_history_info_v2 based on log_id to get all servers with service log IDs greater than or equal to log_id logs + virtual int query_clog_history_by_log_id( + const common::ObPartitionKey &pkey, + const uint64_t log_id, + ClogHistoryRecordArray &records) + { + UNUSED(pkey); + UNUSED(log_id); + UNUSED(records); + + return 0; + } + + /// Query __all_clog_history_info_v2 for all servers with timestamp greater than or equal to timestamp log based on timestamp + virtual int query_clog_history_by_tstamp( + const common::ObPartitionKey &pkey, + const int64_t timestamp, + ClogHistoryRecordArray &records) + { + UNUSED(pkey); + UNUSED(timestamp); + UNUSED(records); + + return 0; + } + + /// Query __all_meta_table / __all_root_table to get information about the servers that are serving the partition + virtual int query_meta_info( + const common::ObPartitionKey &pkey, + MetaRecordArray &records) + { + UNUSED(pkey); + UNUSED(records); + + return 0; + } + + // Query __all_meta_table / __all_root_table for leader information + virtual int query_leader_info( + const common::ObPartitionKey &pkey, + bool &has_leader, + common::ObAddr &leader) + { + UNUSED(pkey); + UNUSED(has_leader); + UNUSED(leader); + + return 0; + } + + /// Query __all_server table for all active server information + virtual int query_all_server_info(AllServerRecordArray &records) + { + int ret = OB_SUCCESS; + + // The first query returns records from 0 to FIRST_QUERY_RECORD_COUNT-1 + if (1 == query_time_) { + start_index_ = 0; + end_index_ = FIRST_QUERY_RECORD_COUNT - 1; + } else { + if (is_invariable_) { // Return records no longer change + // do nothing + } else if (0 == (query_time_ & 0x01)) { // ADD record + if (end_index_ + VARY_RECORD_COUNT >= SERVER_COUNT) { + ATOMIC_STORE(&is_invariable_, true); + } else { + end_index_ += VARY_RECORD_COUNT; + } + } else if (1 == (query_time_ & 0x01)) { // minus records + start_index_ += VARY_RECORD_COUNT; + } + } + + // make records + for (int64_t idx = start_index_; OB_SUCC(ret) && idx <= end_index_; idx++) { + AllServerRecord &record = all_server_records[idx]; + if (OB_FAIL(records.push_back(record))) { + LOG_ERROR("records push error", K(ret), K(record)); + } + } + LOG_INFO("query all server info", K(query_time_), K(start_index_), + K(end_index_), K(is_invariable_)); + query_time_++; + + return ret; + } + + virtual int query_all_zone_info(AllZoneRecordArray &records) + { + UNUSED(records); + + return 0; + } + + virtual int query_all_zone_type(AllZoneTypeRecordArray &records) + { + int ret = OB_SUCCESS; + + for (int64_t idx = 0; idx < 4; ++idx) { + AllZoneTypeRecord record; + record.zone_type_ = str_to_zone_type(zone_types[idx]); + if (OB_FAIL(record.zone_.assign(zones[idx]))) { + LOG_ERROR("record assign zone error", K(ret), K(record)); + } else if (OB_FAIL(records.push_back(record))) { + LOG_ERROR("records push error", K(ret), K(record)); + } + } + return ret; + } + + virtual int query_cluster_info(ClusterInfo &cluster_info) + { + UNUSED(cluster_info); + + return 0; + } + + virtual int query_cluster_min_observer_version(uint64_t &min_observer_version) + { + UNUSED(min_observer_version); + + return 0; + } + + virtual int reset_connection() + { + return 0; + } + + virtual int query_timezone_info_version(const uint64_t tenant_id, + int64_t &timezone_info_version) + { + UNUSED(tenant_id); + UNUSED(timezone_info_version); + return 0; + } +public: + int64_t query_time_; + int64_t start_index_; + int64_t end_index_; + bool is_invariable_; +}; + +class MockSysTableHelper2 : public IObLogSysTableHelper +{ +public: + MockSysTableHelper2() : query_time_(1), + start_index_(0), + end_index_(FIRST_QUERY_RECORD_COUNT - 1) {} + virtual ~MockSysTableHelper2() {} + +public: + virtual int query_with_multiple_statement(BatchSQLQuery &batch_query) + { + UNUSED(batch_query); + return 0; + } + + /// Query __all_clog_history_info_v2 based on log_id to get all servers with service log IDs greater than or equal to log_id logs + virtual int query_clog_history_by_log_id( + const common::ObPartitionKey &pkey, + const uint64_t log_id, + ClogHistoryRecordArray &records) + { + UNUSED(pkey); + UNUSED(log_id); + UNUSED(records); + + return 0; + } + + /// Query __all_clog_history_info_v2 for all servers with timestamp greater than or equal to timestamp log based on timestamp + virtual int query_clog_history_by_tstamp( + const common::ObPartitionKey &pkey, + const int64_t timestamp, + ClogHistoryRecordArray &records) + { + UNUSED(pkey); + UNUSED(timestamp); + UNUSED(records); + + return 0; + } + + /// Query __all_meta_table / __all_root_table to get information about the servers that are serving the partition + virtual int query_meta_info( + const common::ObPartitionKey &pkey, + MetaRecordArray &records) + { + UNUSED(pkey); + UNUSED(records); + + return 0; + } + + // Query __all_meta_table / __all_root_table for leader information + virtual int query_leader_info( + const common::ObPartitionKey &pkey, + bool &has_leader, + common::ObAddr &leader) + { + UNUSED(pkey); + UNUSED(has_leader); + UNUSED(leader); + + return 0; + } + + /// Query the __all_server table to get all active server information + // First query: return a batch of servers, 1/3 of which are ACTIVE + // Second query: return the servers returned in the first query, and the ACTIVE server status is changed to INACTIVE + virtual int query_all_server_info(AllServerRecordArray &records) + { + int ret = OB_SUCCESS; + + // build records + for (int64_t idx = start_index_; OB_SUCC(ret) && idx <= end_index_; idx++) { + AllServerRecord &record = all_server_records[idx]; + if (2 == query_time_) { + // ACTIVE->INACTIVE + if (1 == idx % 4) { + record.status_ = share::ObServerStatus::DisplayStatus::OB_SERVER_INACTIVE; + } + } + if (OB_FAIL(records.push_back(record))) { + LOG_ERROR("records push error", K(ret), K(record)); + } + } + LOG_INFO("query all server info", K(query_time_), K(start_index_), K(end_index_)); + query_time_++; + + return ret; + } + + virtual int query_all_zone_info(AllZoneRecordArray &records) + { + int ret = OB_SUCCESS; + + for (int64_t idx = 0; idx < 4; ++idx) { + AllZoneRecord record; + if (OB_FAIL(record.zone_.assign(zones[idx]))) { + LOG_ERROR("record assign zone error", K(ret), K(record)); + } else if (OB_FAIL(record.region_.assign(regions[idx]))) { + LOG_ERROR("record assign error", K(ret), K(record)); + } else if (OB_FAIL(records.push_back(record))) { + LOG_ERROR("records push error", K(ret), K(record)); + } + } + + return ret; + } + + virtual int query_all_zone_type(AllZoneTypeRecordArray &records) + { + int ret = OB_SUCCESS; + + for (int64_t idx = 0; idx < 4; ++idx) { + AllZoneTypeRecord record; + record.zone_type_ = str_to_zone_type(zone_types[idx]); + if (OB_FAIL(record.zone_.assign(zones[idx]))) { + LOG_ERROR("record assign zone error", K(ret), K(record)); + } else if (OB_FAIL(records.push_back(record))) { + LOG_ERROR("records push error", K(ret), K(record)); + } + } + return ret; + } + + virtual int query_cluster_info(ClusterInfo &cluster_info) + { + UNUSED(cluster_info); + + return 0; + } + + virtual int query_cluster_min_observer_version(uint64_t &min_observer_version) + { + UNUSED(min_observer_version); + + return 0; + } + + virtual int reset_connection() + { + return 0; + } + virtual int query_timezone_info_version(const uint64_t tenant_id, + int64_t &timezone_info_version) + { + UNUSED(tenant_id); + UNUSED(timezone_info_version); + return 0; + } +public: + int64_t query_time_; + int64_t start_index_; + int64_t end_index_; +}; + + +////////////////////// Test of basic functions ////////////////////////////////////////// +TEST_F(TestObLogAllSvrCache, init) +{ + generate_data(); + + ObLogAllSvrCache all_svr_cache; + MockSysTableHelper1 mock_systable_helper; + MockFetcherErrHandler1 err_handler; + + // set update interval + all_svr_cache.set_update_interval_(ALLSVR_CACHE_UPDATE_INTERVAL); + + EXPECT_EQ(OB_SUCCESS, all_svr_cache.init(mock_systable_helper, err_handler)); + while (false == ATOMIC_LOAD(&mock_systable_helper.is_invariable_)) { + // do nothing + } + LOG_INFO("exit", K(mock_systable_helper.start_index_), K(mock_systable_helper.end_index_)); + + /// verify result + EXPECT_EQ(FIRST_QUERY_RECORD_COUNT, all_svr_cache.svr_map_.count()); + int64_t end_index = SERVER_COUNT - 1; + int64_t start_index = end_index - FIRST_QUERY_RECORD_COUNT + 1; + + // Test servers in the __all_server table + // Servers in the ACTIVE and DELETING states are serviceable + // Servers in the INACTIVE state are not serviceable + for (int64_t idx = start_index; idx <= end_index; idx++) { + ObAddr svr(ObAddr::IPV4, all_server_records[idx].svr_ip_, all_server_records[idx].svr_port_); + if (0 == idx % 4) { + // INACTIVE/ENCRYPTION ZONE + EXPECT_FALSE(all_svr_cache.is_svr_avail(svr)); + } else { + // ACTIVE/DELETEING + EXPECT_TRUE(all_svr_cache.is_svr_avail(svr)); + } + } + + // test server not in __all_server table + for (int64_t idx = 0; idx < start_index; idx++) { + ObAddr svr(ObAddr::IPV4, all_server_records[idx].svr_ip_, all_server_records[idx].svr_port_); + EXPECT_FALSE(all_svr_cache.is_svr_avail(svr)); + } + + all_svr_cache.destroy(); +} + +// state change from active to inactive +TEST_F(TestObLogAllSvrCache, all_svr_cache2) +{ + ObLogAllSvrCache all_svr_cache; + MockSysTableHelper2 mock_systable_helper; + MockFetcherErrHandler1 err_handler; + + // No threads open, manual assignment + int ret = OB_SUCCESS; + if (OB_FAIL(all_svr_cache.svr_map_.init(ObModIds::OB_LOG_ALL_SERVER_CACHE))) { + LOG_ERROR("init svr map fail", K(ret)); + } + if (OB_FAIL(all_svr_cache.zone_map_.init(ObModIds::OB_LOG_ALL_SERVER_CACHE))) { + LOG_ERROR("init svr map fail", K(ret)); + } + + all_svr_cache.cur_version_ = 0; + all_svr_cache.cur_zone_version_ = 0; + all_svr_cache.err_handler_ = &err_handler; + all_svr_cache.systable_helper_ = &mock_systable_helper; + + // update __all_zone + EXPECT_EQ(OB_SUCCESS, all_svr_cache.update_zone_cache_()); + + // manual update and clearance + EXPECT_EQ(OB_SUCCESS, all_svr_cache.update_server_cache_()); + EXPECT_EQ(OB_SUCCESS, all_svr_cache.purge_stale_records_()); + + /// verify result + EXPECT_EQ(FIRST_QUERY_RECORD_COUNT, all_svr_cache.svr_map_.count()); + int64_t start_index = 0; + int64_t end_index = FIRST_QUERY_RECORD_COUNT - 1; + + for (int64_t idx = start_index; idx <= end_index; idx++) { + ObAddr svr(ObAddr::IPV4, all_server_records[idx].svr_ip_, all_server_records[idx].svr_port_); + if (1 == idx % 4) { + EXPECT_TRUE(all_svr_cache.is_svr_avail(svr)); + } + } + + // Second manual update and clearance + EXPECT_EQ(OB_SUCCESS, all_svr_cache.update_server_cache_()); + EXPECT_EQ(OB_SUCCESS, all_svr_cache.purge_stale_records_()); + + // Verify that it is ACTIVE-INACTIVE + for (int64_t idx = start_index; idx <= end_index; idx++) { + ObAddr svr(ObAddr::IPV4, all_server_records[idx].svr_ip_, all_server_records[idx].svr_port_); + if (1 == idx % 4) { + EXPECT_FALSE(all_svr_cache.is_svr_avail(svr)); + } + } + + all_svr_cache.destroy(); +} + +}//end of unittest +}//end of oceanbase + +int main(int argc, char **argv) +{ + // ObLogger::get_logger().set_mod_log_levels("ALL.*:DEBUG, TLOG.*:DEBUG"); + // testing::InitGoogleTest(&argc,argv); + // testing::FLAGS_gtest_filter = "DO_NOT_RUN"; + int ret = 1; + ObLogger &logger = ObLogger::get_logger(); + logger.set_file_name("test_ob_log_all_svr_cache.log", true); + logger.set_log_level(OB_LOG_LEVEL_INFO); + testing::InitGoogleTest(&argc, argv); + ret = RUN_ALL_TESTS(); + return ret; +} diff --git a/unittest/liboblog/test_ob_log_dlist.cpp b/unittest/liboblog/test_ob_log_dlist.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2362a6e9e1b36924af7060421382195baedff208 --- /dev/null +++ b/unittest/liboblog/test_ob_log_dlist.cpp @@ -0,0 +1,165 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG_FETCHER + +#include +#include "share/ob_define.h" +#define private public +#include "liboblog/src/ob_log_dlist.h" +#include "ob_log_utils.h" + +using namespace oceanbase; +using namespace common; +using namespace liboblog; + +namespace oceanbase +{ +namespace unittest +{ +class DeriveDlistNode; +typedef ObLogDListNode TestDlistNode; + +class DeriveDlistNode : public TestDlistNode +{ +public: + DeriveDlistNode() : value_(0) {} + ~DeriveDlistNode() {} +public: + void reset(int64_t value) + { + value_ = value; + } + +private: + int64_t value_; +}; +typedef DeriveDlistNode Type; +// test count +static const int64_t ONE_TEST_COUNT = 1; +static const int64_t MUL_TEST_COUNT = 1000; + +class TestObLogDlist: public ::testing::Test +{ +public : + virtual void SetUp() {} + virtual void TearDown() {} +public: + // generate data + void generate_data(const int64_t count, Type *&datas); + // check data correct + bool is_dlist_correct(const int64_t start_value, DeriveDlistNode *head); +}; + +void TestObLogDlist::generate_data(const int64_t count, Type *&datas) +{ + datas = (Type *)ob_malloc(sizeof(Type) * count); + OB_ASSERT(NULL != datas); + for (int64_t idx = 0; idx < count; idx++) { + new (datas + idx) Type(); + datas[idx].reset(idx); + } +} + +bool TestObLogDlist::is_dlist_correct(const int64_t start_value, DeriveDlistNode *head) +{ + bool bool_ret = true; + int64_t expect_val = start_value; + + if (OB_ISNULL(head) || OB_NOT_NULL(head->get_prev())) { + LOG_ERROR("invalid argument"); + bool_ret = false; + } else if (OB_ISNULL(head->get_next())) { // single node + if (expect_val != head->value_) { + bool_ret = false; + } + LOG_DEBUG("is_dlist_correct", K(expect_val)); + } else { // multi node + DeriveDlistNode *current_node = head; + DeriveDlistNode *next_node = current_node->get_next(); + while ((NULL != current_node) + && (NULL != current_node->get_next())) { + if ((expect_val != current_node->value_) + || (expect_val != next_node->get_prev()->value_)) { + bool_ret = false; + } + LOG_DEBUG("is_dlist_correct", K(expect_val)); + current_node = next_node; + next_node = current_node->get_next(); + expect_val--; + } + // last node + if ((expect_val == current_node->value_) + && OB_ISNULL(current_node->get_next())) { + LOG_DEBUG("is_dlist_correct", K(expect_val)); + } else { + bool_ret = false; + } + } + + return bool_ret; +} + +////////////////////// basic functions ////////////////////////////////////////// +TEST_F(TestObLogDlist, dlist) +{ + // generate data + Type *datas = NULL; + generate_data(MUL_TEST_COUNT, datas); + + // ObLogDList + ObLogDList dlist; + EXPECT_EQ(0, dlist.count_); + EXPECT_EQ(NULL, dlist.head_); + + // insert data + dlist.add_head(datas[0]); + EXPECT_EQ(ONE_TEST_COUNT, dlist.count_); + EXPECT_TRUE(is_dlist_correct(ONE_TEST_COUNT - 1, dlist.head())); + + // insert multi data + for (int64_t idx = 1; idx < MUL_TEST_COUNT; idx++) { + dlist.add_head(datas[idx]); + } + EXPECT_EQ(MUL_TEST_COUNT, dlist.count_); + EXPECT_TRUE(is_dlist_correct(MUL_TEST_COUNT - 1, dlist.head())); + + // Delete the last half of the data and check for correctness + for (int64_t idx = 0; idx < MUL_TEST_COUNT / 2; idx++) { + dlist.erase(datas[idx]); + } + EXPECT_EQ(MUL_TEST_COUNT / 2, dlist.count_); + EXPECT_TRUE(is_dlist_correct(MUL_TEST_COUNT - 1, dlist.head())); + + // Delete the first half of the data and check for correctness + for (int64_t idx = MUL_TEST_COUNT / 2; idx < MUL_TEST_COUNT; idx++) { + dlist.erase(datas[idx]); + } + EXPECT_EQ(0, dlist.count_); +} + +}//end of unittest +}//end of oceanbase + +int main(int argc, char **argv) +{ + // ObLogger::get_logger().set_mod_log_levels("ALL.*:DEBUG, TLOG.*:DEBUG"); + // testing::InitGoogleTest(&argc,argv); + // testing::FLAGS_gtest_filter = "DO_NOT_RUN"; + int ret = 1; + ObLogger &logger = ObLogger::get_logger(); + logger.set_file_name("test_ob_log_dlist.log", true); + logger.set_log_level(OB_LOG_LEVEL_INFO); + testing::InitGoogleTest(&argc, argv); + ret = RUN_ALL_TESTS(); + return ret; +} diff --git a/unittest/liboblog/test_ob_log_fetcher_common_utils.h b/unittest/liboblog/test_ob_log_fetcher_common_utils.h new file mode 100644 index 0000000000000000000000000000000000000000..f9463350acbe9d6460dc673109d34c66a9044cec --- /dev/null +++ b/unittest/liboblog/test_ob_log_fetcher_common_utils.h @@ -0,0 +1,598 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include +#include +#include + +//#include "lib/oblog/ob_log_module.h" +#include "share/ob_define.h" +#include "storage/ob_storage_log_type.h" +#include "storage/transaction/ob_trans_log.h" + +#include "liboblog/src/ob_log_instance.h" +#include "ob_log_stream_worker.h" +#define private public +#include "ob_log_rpc.h" +#include "ob_log_utils.h" +#include "ob_log_systable_helper.h" + +//#include "ob_log_part_fetch_ctx.h" +//#include "ob_log_fetcher_stream.h" + +using namespace oceanbase; +using namespace common; +using namespace liboblog; +using namespace transaction; +using namespace storage; +//using namespace clog; +//using namespace fetcher; + +namespace oceanbase +{ +namespace unittest +{ + +/* + * Utils. + */ +typedef std::vector Svrs; +typedef std::vector PKeys; +typedef std::vector LogIds; +typedef std::vector Tstamps; + +class MockFetcherErrHandler1 : public IObLogErrHandler +{ +public: + virtual ~MockFetcherErrHandler1() { } +public: + virtual void handle_error(const int err_no, const char *fmt, ...) + { + UNUSED(err_no); + va_list ap; + va_start(ap, fmt); + //__E__(fmt, ap); + //LOG_ERROR("test", fmt, ap); + va_end(ap); + abort(); + } +}; + +/* + * SvrFinder + * + */ +static const int64_t ALL_SERVER_COUNT = 100; + +static const int64_t QUERY_CLOG_HISTORY_VALID_COUNT = 10; +static const int64_t QUERY_CLOG_HISTORY_INVALID_COUNT = 5; +static const int64_t QUERY_META_INFO_ADD_COUNT = 6; + +static const int64_t SVR_FINDER_REQ_NUM = 10 * 1000; +static const int64_t LEADER_FINDER_REQ_NUM = 10 * 1000; + +// Construct a request server to initiate asynchronous requests +// request server: query clog/query meta +// Request leader: +class MockSysTableHelperBase: public IObLogSysTableHelper +{ +public: + MockSysTableHelperBase() {} + virtual ~MockSysTableHelperBase() {} + +public: + /// Query __all_clog_history_info_v2 based on log_id to get all servers with service log IDs greater than or equal to log_id logs + /// Returns two types of logs: one for servers in the _all_server table, and one for servers not in the _all_server table + virtual int query_clog_history_by_log_id( + const common::ObPartitionKey &pkey, + const uint64_t log_id, + ClogHistoryRecordArray &records) + { + // Generate random results. + int ret = OB_SUCCESS; + + UNUSED(pkey); + records.reset(); + ClogHistoryRecord rec; + + int64_t valid_seed = static_cast(pkey.table_id_); + int64_t invalid_seed = ALL_SERVER_COUNT; + int64_t cnt = QUERY_CLOG_HISTORY_VALID_COUNT + QUERY_CLOG_HISTORY_INVALID_COUNT; + + for (int64_t idx = 0; idx < cnt; idx++) { + rec.reset(); + rec.start_log_id_ = log_id; + rec.end_log_id_ = log_id + 10000; + if (idx < QUERY_CLOG_HISTORY_VALID_COUNT) { + // Insert QUERY_CLOG_HISTORY_VALID_COUNT a valid record + snprintf(rec.svr_ip_, common::MAX_IP_ADDR_LENGTH + 1, + "127.0.0.%ld", valid_seed % ALL_SERVER_COUNT); + valid_seed++; + } else { + // Insert QUERY_CLOG_HISTORY_INVALID_COUNT an invalid record + snprintf(rec.svr_ip_, common::MAX_IP_ADDR_LENGTH + 1, "127.0.0.%ld", invalid_seed); + invalid_seed++; + } + rec.svr_port_ = 8888; + + records.push_back(rec); + } + + return ret; + } + + /// Query __all_clog_history_info_v2 for all servers with timestamp greater than or equal to timestamp log based on timestamp + virtual int query_clog_history_by_tstamp( + const common::ObPartitionKey &pkey, + const int64_t timestamp, + ClogHistoryRecordArray &records) + { + // Generate random results. + int ret = OB_SUCCESS; + + UNUSED(timestamp); + + records.reset(); + ClogHistoryRecord rec; + + int64_t valid_seed = static_cast(pkey.table_id_); + int64_t invalid_seed = ALL_SERVER_COUNT; + int64_t cnt = QUERY_CLOG_HISTORY_VALID_COUNT + QUERY_CLOG_HISTORY_INVALID_COUNT; + + for (int64_t idx = 0; idx < cnt; idx++) { + rec.reset(); + rec.start_log_id_ = 0; + rec.end_log_id_ = 65536; + if (idx < QUERY_CLOG_HISTORY_VALID_COUNT) { + // Insert QUERY_CLOG_HISTORY_VALID_COUNT a valid record + snprintf(rec.svr_ip_, common::MAX_IP_ADDR_LENGTH + 1, + "127.0.0.%ld", valid_seed % ALL_SERVER_COUNT); + valid_seed++; + } else { + // Insert QUERY_CLOG_HISTORY_INVALID_COUNT an invalid record + snprintf(rec.svr_ip_, common::MAX_IP_ADDR_LENGTH + 1, "127.0.0.%ld", invalid_seed); + invalid_seed++; + } + rec.svr_port_ = 8888; + + records.push_back(rec); + } + + return ret; + } + + /// Query __all_meta_table / __all_root_table to get information about the servers that are serving the partition + // Add records: return a batch of servers to add to query_clog_history, add only those servers for which clog history does not exist + virtual int query_meta_info( + const common::ObPartitionKey &pkey, + MetaRecordArray &records) + { + // Generate random results. + int ret = OB_SUCCESS; + + UNUSED(pkey); + records.reset(); + MetaRecord rec; + + int64_t seed = static_cast(pkey.table_id_); + int64_t cnt = QUERY_CLOG_HISTORY_VALID_COUNT + QUERY_META_INFO_ADD_COUNT; + + for (int64_t idx = 0; idx < cnt; idx++) { + rec.reset(); + if (idx < QUERY_CLOG_HISTORY_VALID_COUNT) { + // Returns the same server as query_clog_history + snprintf(rec.svr_ip_, common::MAX_IP_ADDR_LENGTH + 1, + "127.0.0.%ld", seed % ALL_SERVER_COUNT); + } else { + // Return QUERY_META_INFO_ADD_COUNT additional records + snprintf(rec.svr_ip_, common::MAX_IP_ADDR_LENGTH + 1, + "127.0.0.%ld", seed % ALL_SERVER_COUNT); + } + rec.svr_port_ = 8888; + rec.replica_type_ = REPLICA_TYPE_FULL; + seed++; + + records.push_back(rec); + } + + return ret; + } + + // Query __all_meta_table / __all_root_table for leader information + virtual int query_leader_info( + const common::ObPartitionKey &pkey, + bool &has_leader, + common::ObAddr &leader) + { + int ret = OB_SUCCESS; + + UNUSED(pkey); + has_leader = true; + leader.set_ip_addr("127.0.0.1", 8888); + + return ret; + } + + /// Query __all_server table for all active server information + virtual int query_all_server_info(AllServerRecordArray &records) + { + int ret = OB_SUCCESS; + + UNUSED(records); + + return ret; + } + + virtual int query_all_zone_info(AllZoneRecordArray &records) + { + UNUSED(records); + + return 0; + } + + virtual int query_cluster_info(ClusterInfo &cluster_info) + { + UNUSED(cluster_info); + + return 0; + } +}; + +class MockSysTableHelperDerive1 : public MockSysTableHelperBase +{ +public: + MockSysTableHelperDerive1() {} + virtual ~MockSysTableHelperDerive1() {} + +public: + /// Query the __all_server table to get all active server information + /// The _all_server table has 100 servers in the range 127.0.0.1:8888 ~ 127.0.0.99:8888 + virtual int query_all_server_info(AllServerRecordArray &records) + { + int ret = OB_SUCCESS; + + int64_t seed = 0; + AllServerRecord rec; + for(int64_t idx = 0; idx < ALL_SERVER_COUNT; idx++) { + rec.reset(); + snprintf(rec.svr_ip_, common::MAX_IP_ADDR_LENGTH + 1, "127.0.0.%ld", seed); + rec.svr_port_ = 8888; + rec.status_ = share::ObServerStatus::DisplayStatus::OB_SERVER_ACTIVE; + records.push_back(rec); + seed++; + } + + return ret; + } +}; + +class MockSysTableHelperDerive2 : public MockSysTableHelperBase +{ +public: + MockSysTableHelperDerive2() {} + virtual ~MockSysTableHelperDerive2() {} + +public: + /// Query the __all_server table to get all active server information + /// The _all_server table has 100 servers in the range of 127.0.0.1:8888 ~ 127.0.0.20:8888 + // 1. 50 of them are ACTIVE + // 2. 50 of them are INACTIVE + virtual int query_all_server_info(AllServerRecordArray &records) + { + int ret = OB_SUCCESS; + + int64_t seed = 0; + AllServerRecord rec; + for(int64_t idx = 0; idx < ALL_SERVER_COUNT; idx++) { + rec.reset(); + snprintf(rec.svr_ip_, common::MAX_IP_ADDR_LENGTH + 1, "127.0.0.%ld", seed); + rec.svr_port_ = 8888; + if (0 == (idx & 0x01)) { + rec.status_ = share::ObServerStatus::DisplayStatus::OB_SERVER_ACTIVE; + } else { + rec.status_ = share::ObServerStatus::DisplayStatus::OB_SERVER_INACTIVE; + } + + records.push_back(rec); + seed++; + } + + return ret; + } +}; + +class MockObLogRpcBase : public IObLogRpc +{ +public: + MockObLogRpcBase() {} + virtual ~MockObLogRpcBase() { } + + // Request start log id based on timestamp + virtual int req_start_log_id_by_tstamp(const common::ObAddr &svr, + const obrpc::ObLogReqStartLogIdByTsRequestWithBreakpoint& req, + obrpc::ObLogReqStartLogIdByTsResponseWithBreakpoint& res, + const int64_t timeout) + { + int ret = OB_SUCCESS; + UNUSED(svr); + UNUSED(req); + UNUSED(res); + UNUSED(timeout); + + return ret; + } + + // Request Leader Heartbeat + virtual int req_leader_heartbeat(const common::ObAddr &svr, + const obrpc::ObLogLeaderHeartbeatReq &req, + obrpc::ObLogLeaderHeartbeatResp &res, + const int64_t timeout) + { + int ret = OB_SUCCESS; + UNUSED(svr); + UNUSED(req); + UNUSED(res); + UNUSED(timeout); + + return ret; + } + + // Open a new stream + // Synchronous RPC + virtual int open_stream(const common::ObAddr &svr, + const obrpc::ObLogOpenStreamReq &req, + obrpc::ObLogOpenStreamResp &resp, + const int64_t timeout) + { + int ret = OB_SUCCESS; + UNUSED(svr); + UNUSED(req); + UNUSED(resp); + UNUSED(timeout); + + return ret; + } + + // Stream based, get logs + // Asynchronous RPC + virtual int async_stream_fetch_log(const common::ObAddr &svr, + const obrpc::ObLogStreamFetchLogReq &req, + obrpc::ObLogExternalProxy::AsyncCB &cb, + const int64_t timeout) + { + int ret = OB_SUCCESS; + UNUSED(svr); + UNUSED(req); + UNUSED(cb); + UNUSED(timeout); + + return ret; + } +}; + +class MockObLogStartLogIdRpc : public MockObLogRpcBase +{ + typedef const obrpc::ObLogReqStartLogIdByTsRequestWithBreakpoint::Param Param; + typedef const obrpc::ObLogReqStartLogIdByTsRequestWithBreakpoint::ParamArray ParamArray; +public: + MockObLogStartLogIdRpc() : + spec_err_(false), + svr_err_(OB_SUCCESS), + part_err_(OB_SUCCESS) + {} + virtual ~MockObLogStartLogIdRpc() { } + + void set_err(const int svr_err, const int part_err) + { + svr_err_ = svr_err; + part_err_ = part_err; + spec_err_ = true; + } + + // Request start log id based on timestamp + // 1. rpc always assumes success + // 2. 10% chance of server internal error + // 3. 30% probability that partition returns success (30%) when server succeeds, + // 30% probability that start_log_id returns pkey-table_id with breakpoint information + // 4. Support for external error codes + virtual int req_start_log_id_by_tstamp(const common::ObAddr &svr, + const obrpc::ObLogReqStartLogIdByTsRequestWithBreakpoint& req, + obrpc::ObLogReqStartLogIdByTsResponseWithBreakpoint& res, + const int64_t timeout) + { + int ret = OB_SUCCESS; + UNUSED(svr); + UNUSED(timeout); + + res.reset(); + // Seed. + int64_t seed = (get_timestamp()); + int64_t rand = (seed) % 100; + bool svr_internal_err = (rand < 10); + + // Preferred use of the specified error code + if (spec_err_) { + res.set_err(svr_err_); + } else if (svr_internal_err) { + res.set_err(OB_ERR_UNEXPECTED); + } + + if (OB_SUCCESS == res.get_err()) { + ParamArray ¶m_array = req.get_params(); + for (int64_t idx = 0, cnt = param_array.count(); idx < cnt; ++idx) { + Param ¶m = param_array[idx]; + obrpc::ObLogReqStartLogIdByTsResponseWithBreakpoint::Result result; + result.reset(); + result.start_log_id_ = param.pkey_.table_id_; + + if (spec_err_) { + result.err_ = part_err_; + } else { + // 30% success, 30% break. + rand = (idx + seed) % 100; + bool succeed = (rand < 30); + bool breakrpc = (30 <= rand) && (rand < 60); + result.err_ = (succeed) ? OB_SUCCESS : ((breakrpc) ? OB_EXT_HANDLE_UNFINISH : OB_NEED_RETRY); + } + + // Break info is actually not returned. + EXPECT_EQ(OB_SUCCESS, res.append_result(result)); + } + } + + return ret; + } + +private: + bool spec_err_; + int svr_err_; + int part_err_; +}; + +class MockObLogRpcDerived2 : public MockObLogRpcBase +{ + typedef obrpc::ObLogReqStartLogIdByTsRequestWithBreakpoint Req; + typedef Req::Param Param; + typedef Req::ParamArray ParamArray; +public: + MockObLogRpcDerived2() : request_(NULL), + start_pos_(0), + end_pos_(0), + query_time_(0) {} + + virtual ~MockObLogRpcDerived2() {} + + int init(int64_t req_cnt) + { + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(req_cnt <= 0)) { + //LOG_ERROR("invalid_argument"); + ret = OB_INVALID_ARGUMENT; + } else { + request_ = new Req; + request_->reset(); + start_pos_ = 0; + end_pos_ = req_cnt - 1; + query_time_ = 1; + } + + return ret; + } + + void destroy() + { + delete request_; + start_pos_ = 0; + end_pos_ = 0; + query_time_ = 1; + } + + // Request start log id based on timestamp + // 1. rpc always assumes success, and no server internal error + // 2. Each time the second half returns succ and the first half returns break info + virtual int req_start_log_id_by_tstamp(const common::ObAddr &svr, + const obrpc::ObLogReqStartLogIdByTsRequestWithBreakpoint& req, + obrpc::ObLogReqStartLogIdByTsResponseWithBreakpoint& res, + const int64_t timeout) + { + int ret = OB_SUCCESS; + UNUSED(svr); + UNUSED(timeout); + + res.reset(); + int64_t mid_index = (end_pos_ - start_pos_ + 1) / 2; + const ParamArray ¶m_array = req.get_params(); + + if (1 == query_time_) { + // No validation is required for the first query + // Save the request parameters + for (int64_t idx = 0, cnt = param_array.count(); idx < cnt; ++idx) { + const Param ¶m = param_array[idx]; + Param add_param; + add_param.reset(param.pkey_, param.start_tstamp_, param.break_info_); + + if (OB_FAIL(request_->append_param(add_param))) { + //LOG_ERROR("append param fail", K(ret), K(idx), K(add_param)); + } + } + } else { + // Verify that it is the original request + is_original_req(&req, start_pos_, end_pos_); + } + + for (int64_t idx = 0, cnt = param_array.count(); idx < cnt; ++idx) { + const Param ¶m = param_array[idx]; + obrpc::ObLogReqStartLogIdByTsResponseWithBreakpoint::Result result; + + if (idx < mid_index) { + // First half returns break info + result.reset(); + result.err_ = OB_EXT_HANDLE_UNFINISH;; + reset_break_info(result.break_info_, static_cast(idx), idx + 100); + result.start_log_id_ = OB_INVALID_ID; + + // dynamically update the break info of the corresponding parameter of the saved requeset, for subsequent verification + Param &all_param = const_cast(request_->params_[idx]); + reset_break_info(all_param.break_info_, static_cast(idx), idx + 100); + } else { + // The second half returns success + result.reset(); + result.err_ = OB_SUCCESS; + result.start_log_id_ = param.pkey_.table_id_; + } + EXPECT_EQ(OB_SUCCESS, res.append_result(result)); + } + if (end_pos_ != 0) { + end_pos_ = mid_index - 1; + } + query_time_++; + + return ret; + } +private: + void is_original_req(const Req *cur_req, int64_t start_pos, int64_t end_pos) + { + ParamArray all_param_array = request_->get_params(); + ParamArray cur_param_array = cur_req->get_params(); + + for (int64_t idx = start_pos; idx <= end_pos; idx++) { + Param all_param = all_param_array[idx]; + Param cur_param = cur_param_array[idx]; + // verify pkey, start_tstamp + EXPECT_EQ(all_param.pkey_, cur_param.pkey_); + EXPECT_EQ(all_param.start_tstamp_, cur_param.start_tstamp_); + // verify BreakInfo + const obrpc::BreakInfo all_breakinfo = all_param.break_info_; + const obrpc::BreakInfo cur_breakinfo = cur_param.break_info_; + EXPECT_EQ(all_breakinfo.break_file_id_, cur_breakinfo.break_file_id_); + EXPECT_EQ(all_breakinfo.min_greater_log_id_, cur_breakinfo.min_greater_log_id_); + } + } + + void reset_break_info(obrpc::BreakInfo &break_info, + uint32_t break_file_id, + uint64_t min_greater_log_id) + { + break_info.break_file_id_ = break_file_id; + break_info.min_greater_log_id_ = min_greater_log_id; + } +private: + Req *request_; + int64_t start_pos_; + int64_t end_pos_; + int64_t query_time_; +}; + + +} +} diff --git a/unittest/liboblog/test_ob_log_heartbeater.cpp b/unittest/liboblog/test_ob_log_heartbeater.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e15d718517e1abe713669a5b9f33351a58c4130b --- /dev/null +++ b/unittest/liboblog/test_ob_log_heartbeater.cpp @@ -0,0 +1,520 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG_FETCHER + +#include +#include "share/ob_define.h" +#include "lib/hash/ob_linear_hash_map.h" // ObLinearHashMap +#include "lib/atomic/ob_atomic.h" +#define private public +#include "test_ob_log_fetcher_common_utils.h" +#include "ob_log_utils.h" +#include "ob_log_rpc.h" +#include "ob_log_fetcher_heartbeat_worker.h" + +using namespace oceanbase; +using namespace common; +using namespace liboblog; + +namespace oceanbase +{ +namespace unittest +{ +class TestObLogFetcherHeartbeatWorker: public ::testing::Test +{ +public : + virtual void SetUp() {} + virtual void TearDown() {} +public : + static const int64_t SINGLE_WORKER_COUNT = 1; + static const int64_t WORKER_COUNT = 3; +}; + +static const int64_t ONE_SERVER_COUNT = 1; +static const int64_t SERVER_COUNT = 3; + +static const int64_t HEARTBEATER_REQUEST_COUNT = 5 * 10000; +static const int64_t SMALL_HEARTBEATER_REQUEST_COUNT = 1000; + +static const int64_t MAP_MOD_ID = 1; + +static const int64_t TEST_TIME_LIMIT = 10 * _MIN_; +static const int64_t FIXED_TIMESTAMP = 10000; + + +class MockObLogRpcBaseHeartbeat : public IObLogRpc +{ +public: + typedef common::ObLinearHashMap PkeySvrMap; +public: + MockObLogRpcBaseHeartbeat(PkeySvrMap &map) : pkey_svr_map_(map) {} + virtual ~MockObLogRpcBaseHeartbeat() { } + + // Request start log id based on timestamp + virtual int req_start_log_id_by_tstamp(const common::ObAddr &svr, + const obrpc::ObLogReqStartLogIdByTsRequestWithBreakpoint& req, + obrpc::ObLogReqStartLogIdByTsResponseWithBreakpoint& res, + const int64_t timeout) + { + int ret = OB_SUCCESS; + UNUSED(svr); + UNUSED(req); + UNUSED(res); + UNUSED(timeout); + + return ret; + } + + // Request Leader Heartbeat + virtual int req_leader_heartbeat(const common::ObAddr &svr, + const obrpc::ObLogLeaderHeartbeatReq &req, + obrpc::ObLogLeaderHeartbeatResp &res, + const int64_t timeout) + { + int ret = OB_SUCCESS; + UNUSED(svr); + UNUSED(req); + UNUSED(res); + UNUSED(timeout); + + return ret; + } + + // Open a new stream + // Synchronous RPC + virtual int open_stream(const common::ObAddr &svr, + const obrpc::ObLogOpenStreamReq &req, + obrpc::ObLogOpenStreamResp &resp, + const int64_t timeout) + { + int ret = OB_SUCCESS; + UNUSED(svr); + UNUSED(req); + UNUSED(resp); + UNUSED(timeout); + + return ret; + } + + // Stream based, get logs + // Asynchronous RPC + virtual int async_stream_fetch_log(const common::ObAddr &svr, + const obrpc::ObLogStreamFetchLogReq &req, + obrpc::ObLogExternalProxy::AsyncCB &cb, + const int64_t timeout) + { + int ret = OB_SUCCESS; + UNUSED(svr); + UNUSED(req); + UNUSED(cb); + UNUSED(timeout); + + return ret; + } +private: + // Record the pkey-svr mapping, which is used to verify that the pkey was sent to the expected observer when the rpc is received + PkeySvrMap &pkey_svr_map_; +}; + +class MockObLogRpcDerived1Heartbeat : public MockObLogRpcBaseHeartbeat +{ +public: + MockObLogRpcDerived1Heartbeat(PkeySvrMap &map) : MockObLogRpcBaseHeartbeat(map) {} + virtual ~MockObLogRpcDerived1Heartbeat() { } + + // Requesting a leader heartbeat + // 1. rpc always assumes success + // 2. 10% probability of server internal error + // 3. 30% probability that the partition returns OB_SUCESS, 30% probability that OB_NOT_MASTER when server succeeds, + // 30% chance of returning OB_PARTITON_NOT_EXIST, 10% chance of returning other + virtual int req_leader_heartbeat(const common::ObAddr &svr, + const obrpc::ObLogLeaderHeartbeatReq &req, + obrpc::ObLogLeaderHeartbeatResp &res, + const int64_t timeout) + { + int ret = OB_SUCCESS; + UNUSED(svr); + UNUSED(timeout); + + res.reset(); + res.set_debug_err(OB_SUCCESS); + // Seed. + int64_t seed = (get_timestamp()); + int64_t rand = (seed) % 100; + bool svr_internal_err = (rand < 10); + if (svr_internal_err) { + res.set_err(OB_ERR_UNEXPECTED); + } else { + res.set_err(OB_SUCCESS); + for (int64_t idx = 0, cnt = req.get_params().count(); OB_SUCCESS == ret && idx < cnt; ++idx) { + // 30%. + seed = get_timestamp(); + rand = (idx + seed) % 100; + bool succeed = (rand < 30); + bool not_master = (30 <= rand) && (rand < 60); + bool partition_not_exist = (60 <= rand) && (rand < 90); + + const obrpc::ObLogLeaderHeartbeatReq::Param ¶m = req.get_params().at(idx); + obrpc::ObLogLeaderHeartbeatResp::Result result; + result.reset(); + if (succeed) { + result.err_ = OB_SUCCESS; + } else if (not_master) { + result.err_ = OB_NOT_MASTER; + } else if (partition_not_exist) { + result.err_ = OB_PARTITION_NOT_EXIST; + } else { + result.err_ = OB_ERR_UNEXPECTED; + } + result.next_served_log_id_ = (succeed || not_master) ? param.next_log_id_ : OB_INVALID_ID; + result.next_served_ts_ = (succeed || not_master) ? FIXED_TIMESTAMP : OB_INVALID_TIMESTAMP; + + EXPECT_EQ(OB_SUCCESS, res.append_result(result)); + + // Verify that the partitions correspond to the same request server + common::ObAddr cur_svr; + if (OB_FAIL(pkey_svr_map_.get(param.pkey_, cur_svr))) { + LOG_ERROR("pkey_svr_map_ get error", K(ret), K(param), K(cur_svr)); + } else { + EXPECT_EQ(svr, cur_svr); + } + } + } + + LOG_DEBUG("req leader heartbeat", K(req), K(res)); + + return ret; + } +}; + +class MockObLogRpcDerived2Heartbeat : public MockObLogRpcBaseHeartbeat +{ +public: + MockObLogRpcDerived2Heartbeat(PkeySvrMap &map) : MockObLogRpcBaseHeartbeat(map) {} + virtual ~MockObLogRpcDerived2Heartbeat() { } + + // Request leader heartbeat + // 1. rpc always assumes success, no server internal error + // 2. partitions all return OB_SUCESS + virtual int req_leader_heartbeat(const common::ObAddr &svr, + const obrpc::ObLogLeaderHeartbeatReq &req, + obrpc::ObLogLeaderHeartbeatResp &res, + const int64_t timeout) + { + int ret = OB_SUCCESS; + UNUSED(svr); + UNUSED(timeout); + + res.reset(); + res.set_debug_err(OB_SUCCESS); + res.set_err(OB_SUCCESS); + for (int64_t idx = 0, cnt = req.get_params().count(); OB_SUCCESS == ret && idx < cnt; ++idx) { + obrpc::ObLogLeaderHeartbeatResp::Result result; + const obrpc::ObLogLeaderHeartbeatReq::Param ¶m = req.get_params().at(idx); + result.reset(); + result.err_ = OB_SUCCESS; + result.next_served_log_id_ = param.next_log_id_; + result.next_served_ts_ = FIXED_TIMESTAMP; + + EXPECT_EQ(OB_SUCCESS, res.append_result(result)); + + // Verify that the partitions correspond to the same request server + common::ObAddr cur_svr; + if (OB_FAIL(pkey_svr_map_.get(param.pkey_, cur_svr))) { + LOG_ERROR("pkey_svr_map_ get error", K(ret), K(param), K(cur_svr)); + } else { + EXPECT_EQ(svr, cur_svr); + } + } + + LOG_DEBUG("req leader heartbeat", K(req), K(res)); + + return ret; + } +}; + +void generate_req(const int64_t all_svr_cnt, + const int64_t req_cnt, + HeartbeatRequest *&request_array, + common::ObLinearHashMap &map) +{ + // Build requests. + ObAddr svrs[all_svr_cnt]; + for (int64_t idx = 0, cnt = all_svr_cnt; idx < cnt; ++idx) { + svrs[idx] = ObAddr(ObAddr::IPV4, "127.0.0.1", (int32_t)(idx + 1000)); + } + + request_array = new HeartbeatRequest[req_cnt]; + for (int64_t idx = 0, cnt = req_cnt; idx < cnt; ++idx) { + HeartbeatRequest &r = request_array[idx]; + r.reset(); + // set pkey, next_log_id, svr + // next_log_id = pkey.table_id + 1 + r.reset(ObPartitionKey((uint64_t)(1000 + idx), 0, 1), 1000 + idx + 1, svrs[idx % all_svr_cnt]); + + int ret = OB_SUCCESS; + if (OB_FAIL(map.insert(r.pkey_, svrs[idx % all_svr_cnt]))) { + if (OB_ENTRY_EXIST != ret) { + LOG_ERROR("map insert error", K(ret), K(r), K(idx)); + } + } + } +} + +void free_req(HeartbeatRequest *request_array) +{ + delete[] request_array; +} + +/* + * Worker. + */ +class TestWorker : public liboblog::Runnable +{ +public: + ObLogFetcherHeartbeatWorker *heartbeater_; + HeartbeatRequest *request_array_; + int64_t request_cnt_; + int64_t all_svr_cnt_; + bool push_req_finish_; + double success_rate_; + + void reset(ObLogFetcherHeartbeatWorker *heartbeater, HeartbeatRequest *req_array, + int64_t req_cnt, int64_t all_svr_cnt) + { + heartbeater_ = heartbeater; + request_array_ = req_array; + request_cnt_ = req_cnt; + all_svr_cnt_ = all_svr_cnt; + push_req_finish_ = false; + success_rate_ = 0; + } + + virtual int routine() + { + // Push requests into heartbeater + for (int64_t idx = 0, cnt = request_cnt_; idx < cnt; ++idx) { + HeartbeatRequest &r = request_array_[idx]; + EXPECT_EQ(OB_SUCCESS, heartbeater_->async_heartbeat_req(&r)); + if (0 == (idx % 1000)) { + usec_sleep(10 * _MSEC_); + } + } + ATOMIC_STORE(&push_req_finish_, true); + + // Wait for requests end. Max test time should set. + int64_t end_request_cnt = 0; + const int64_t start_test_tstamp = get_timestamp(); + while (((get_timestamp() - start_test_tstamp) < TEST_TIME_LIMIT) + && (end_request_cnt < request_cnt_)) { + for (int64_t idx = 0, cnt = request_cnt_; idx < cnt; ++idx) { + HeartbeatRequest &r = request_array_[idx]; + if (HeartbeatRequest::DONE == r.get_state()) { + end_request_cnt += 1; + r.set_state(HeartbeatRequest::IDLE); + } + } + usec_sleep(100 * _MSEC_); + } + // Assert if test cannot finish. + EXPECT_EQ(request_cnt_, end_request_cnt); + + // Do some statistics. + int64_t svr_consume_distribution[all_svr_cnt_]; // 1, 2, 3, ... + for (int64_t idx = 0, cnt = all_svr_cnt_; idx < cnt; ++idx) { + svr_consume_distribution[idx] = 0; + } + int64_t succ_cnt = 0; + for (int64_t idx = 0, cnt = request_cnt_; idx < cnt; ++idx) { + HeartbeatRequest &req = request_array_[idx]; + svr_consume_distribution[idx % all_svr_cnt_] += 1; + + const HeartbeatResponse &res = req.get_resp(); + if (res.next_served_log_id_ != OB_INVALID_ID + && res.next_served_tstamp_ != OB_INVALID_TIMESTAMP) { + EXPECT_EQ(req.pkey_.table_id_ + 1, res.next_served_log_id_); + EXPECT_EQ(FIXED_TIMESTAMP, res.next_served_tstamp_); + succ_cnt += 1; + LOG_DEBUG("verify", K(res), K(succ_cnt)); + } + } + + const int64_t BuffSize = 1024; + char buf[BuffSize]; + int64_t pos = 0; + for (int64_t idx = 0, cnt = all_svr_cnt_; idx < cnt; ++idx) { + pos += snprintf(buf + pos, BuffSize - pos, "svr_cnt:%ld perc:%f ", (1 + idx), + ((double)svr_consume_distribution[idx] / (double)request_cnt_)); + } + success_rate_ = (double)succ_cnt / (double)request_cnt_; + fprintf(stderr, "request count: %ld distribution: %s succeed perc: %f \n", + request_cnt_, buf, success_rate_); + + + return OB_SUCCESS; + } +}; + +//////////////////////Basic function tests////////////////////////////////////////// +TEST_F(TestObLogFetcherHeartbeatWorker, HeartbeatRequest) +{ + HeartbeatRequest req; + req.reset(); + EXPECT_TRUE(req.is_state_idle()); + + req.set_state_req(); + EXPECT_TRUE(req.is_state_req()); + EXPECT_EQ(HeartbeatRequest::REQ, req.get_state()); + + req.set_state_done(); + EXPECT_TRUE(req.is_state_done()); + EXPECT_EQ(HeartbeatRequest::DONE, req.get_state()); + + req.set_state_idle(); + EXPECT_TRUE(req.is_state_idle()); + EXPECT_EQ(HeartbeatRequest::IDLE, req.get_state()); +} + +//TEST_F(TestObLogStartLogIdLocator, DISABLED_locator) +TEST_F(TestObLogFetcherHeartbeatWorker, heartbeater) +{ + const int64_t TestWorkerCnt = 3; + // generate data + HeartbeatRequest *request_arrays[TestWorkerCnt]; + common::ObLinearHashMap map; + EXPECT_EQ(OB_SUCCESS, map.init(MAP_MOD_ID)); + for (int64_t idx = 0; idx < TestWorkerCnt; idx++) { + generate_req(SERVER_COUNT, HEARTBEATER_REQUEST_COUNT, request_arrays[idx], map); + OB_ASSERT(NULL != request_arrays[idx]); + } + + MockFetcherErrHandler1 err_handler1; + MockObLogRpcDerived1Heartbeat rpc(map); + ObLogFetcherHeartbeatWorker heartbeater; + + EXPECT_EQ(OB_SUCCESS, heartbeater.init(WORKER_COUNT, rpc, err_handler1)); + EXPECT_EQ(OB_SUCCESS, heartbeater.start()); + + TestWorker workers[TestWorkerCnt]; + for (int64_t idx = 0, cnt = TestWorkerCnt; idx < cnt; ++idx) { + TestWorker &w = workers[idx]; + w.reset(&heartbeater, request_arrays[idx], HEARTBEATER_REQUEST_COUNT, SERVER_COUNT); + w.create(); + } + + for (int64_t idx = 0, cnt = TestWorkerCnt; idx < cnt; ++idx) { + TestWorker &w = workers[idx]; + w.join(); + } + + // free + for (int64_t idx = 0; idx < TestWorkerCnt; idx++) { + free_req(request_arrays[idx]); + request_arrays[idx] = NULL; + } + map.destroy(); + heartbeater.destroy(); +} + +// Test the request logic +// Currently aggregating up to 10,000 requests at a time, pushing more than 10,000 requests to test if multiple aggregations are possible +TEST_F(TestObLogFetcherHeartbeatWorker, aggregation) +{ + // generate data + HeartbeatRequest *request_array; + common::ObLinearHashMap map; + EXPECT_EQ(OB_SUCCESS, map.init(MAP_MOD_ID)); + // All requests are made to the same server + generate_req(ONE_SERVER_COUNT, HEARTBEATER_REQUEST_COUNT, request_array, map); + OB_ASSERT(NULL != request_array); + + MockFetcherErrHandler1 err_handler1; + MockObLogRpcDerived1Heartbeat rpc(map); + + ObLogFetcherHeartbeatWorker heartbeater; + + EXPECT_EQ(OB_SUCCESS, heartbeater.init(SINGLE_WORKER_COUNT, rpc, err_handler1)); + + // Insert all data first, then open the StartLogIdLocator thread to ensure that all subsequent requests are aggregated on a single server; + TestWorker worker; + worker.reset(&heartbeater, request_array, HEARTBEATER_REQUEST_COUNT, ONE_SERVER_COUNT); + worker.create(); + + while (false == ATOMIC_LOAD(&worker.push_req_finish_)) { + } + + EXPECT_EQ(OB_SUCCESS, heartbeater.start()); + + // join + worker.join(); + // free + free_req(request_array); + request_array = NULL; + + map.destroy(); + heartbeater.destroy(); +} + +// Test scenario: when the observer returns all the correct data, whether the ObLogFetcherHeartbeatWorker processes it correctly +TEST_F(TestObLogFetcherHeartbeatWorker, heartbeater_handle) +{ + // generate data + HeartbeatRequest *request_array; + common::ObLinearHashMap map; + EXPECT_EQ(OB_SUCCESS, map.init(MAP_MOD_ID)); + generate_req(SERVER_COUNT, SMALL_HEARTBEATER_REQUEST_COUNT, request_array, map); + OB_ASSERT(NULL != request_array); + + MockFetcherErrHandler1 err_handler1; + MockObLogRpcDerived2Heartbeat rpc(map); + ObLogFetcherHeartbeatWorker heartbeater; + + EXPECT_EQ(OB_SUCCESS, heartbeater.init(WORKER_COUNT, rpc, err_handler1)); + EXPECT_EQ(OB_SUCCESS, heartbeater.start()); + + TestWorker worker; + worker.reset(&heartbeater, request_array, SMALL_HEARTBEATER_REQUEST_COUNT, SERVER_COUNT); + worker.create(); + + while (0 == ATOMIC_LOAD((int64_t*)&worker.success_rate_)) { + } + // all request succ + EXPECT_EQ(1, worker.success_rate_); + + worker.join(); + + // free + free_req(request_array); + request_array = NULL; + + map.destroy(); + heartbeater.destroy(); +} + + +}//end of unittest +}//end of oceanbase + +int main(int argc, char **argv) +{ + // ObLogger::get_logger().set_mod_log_levels("ALL.*:DEBUG, TLOG.*:DEBUG"); + // testing::InitGoogleTest(&argc,argv); + // testing::FLAGS_gtest_filter = "DO_NOT_RUN"; + int ret = 1; + ObLogger &logger = ObLogger::get_logger(); + logger.set_file_name("test_ob_log_heartbeater.log", true); + logger.set_log_level(OB_LOG_LEVEL_INFO); + testing::InitGoogleTest(&argc, argv); + ret = RUN_ALL_TESTS(); + return ret; +} diff --git a/unittest/liboblog/test_ob_log_part_fetch_mgr.cpp b/unittest/liboblog/test_ob_log_part_fetch_mgr.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ad07632c1fed734c3fec491a6ddb979760d41ab6 --- /dev/null +++ b/unittest/liboblog/test_ob_log_part_fetch_mgr.cpp @@ -0,0 +1,200 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG_FETCHER + +#include +#include "share/ob_define.h" +#include "ob_log_utils.h" +#define private public +#include "ob_log_part_fetch_mgr.h" + +using namespace oceanbase; +using namespace common; +using namespace liboblog; + +namespace oceanbase +{ +namespace unittest +{ +class TestObLogPartFetchMgr: public ::testing::Test +{ +public : + virtual void SetUp() {} + virtual void TearDown() {} +public : + static const int64_t MAX_CACHED_PART_FETCH_CTX_COUNT = 10 * 1000; + static const int64_t PART_FETCH_CTX_POOL_BLOCK_SIZE = 1L << 24; + static const uint64_t DEFAULT_TENANT_ID = common::OB_SERVER_TENANT_ID; + + static const int64_t PART_FETCH_CTX__COUNT = 10 * 1000; + static const int64_t SINGLE_PART_FETCH_CTX__COUNT = 1; + static int64_t g_slowest_part_num; +}; + +int64_t TestObLogPartFetchMgr::g_slowest_part_num = +ObLogConfig::default_print_fetcher_slowest_part_num; +typedef common::ObSmallObjPool PartFetchCtxPool; +PartFetchCtxPool ctx_pool; + +void generate_ctx(const int64_t part_fetch_ctx_count, + PartTransResolver &part_trans_resolver, + PartFetchCtx *pctx_array[], + ObLogPartFetchMgr::PartFetchCtxArray &part_fetch_ctx_array) +{ + int ret = OB_SUCCESS; + + for (int64_t idx = 0; idx < part_fetch_ctx_count; ++idx) { + PartFetchCtx *&ctx = pctx_array[idx]; + if (OB_FAIL(ctx_pool.alloc(ctx)) || OB_ISNULL(ctx)) { + LOG_ERROR("alloc PartFetchCtx fail", K(ret), K(idx), KPC(ctx)); + } else { + // Initialising the fetch logging context + ctx->reset(ObPartitionKey(1000U, idx, part_fetch_ctx_count), + get_timestamp(), idx, idx, part_trans_resolver); + // Manually assigning values to partition progress, for testing purposes + ctx->progress_.progress_ = part_fetch_ctx_count - idx; + if (OB_FAIL(part_fetch_ctx_array.push_back(ctx))) { + LOG_ERROR("part_fetch_ctx_array push back fail", K(ret), K(idx), KPC(ctx)); + } else { + LOG_DEBUG("data", K(idx), "progress", ctx->get_progress()); + } + } + } +} + +void free_all_ctx(const int64_t array_cnt, + PartFetchCtx *pctx_array[]) +{ + for (int64_t idx = 0; idx < array_cnt; ++idx) { + PartFetchCtx *&ctx = pctx_array[idx]; + if (NULL != ctx) { + ctx->reset(); + ctx_pool.free(ctx); + ctx = NULL; + } + } +} + +int do_top_k(const ObLogPartFetchMgr::PartFetchCtxArray &part_fetch_ctx_array, + const int64_t g_slowest_part_num) +{ + int ret = OB_SUCCESS; + + ObLogPartFetchMgr::SlowestPartArray slow_part_array; + ObLogPartFetchMgr part_fetch_mgr; + int64_t start_time = get_timestamp(); + int64_t end_time = 0; + if (OB_FAIL(part_fetch_mgr.find_k_slowest_partition_(part_fetch_ctx_array, + g_slowest_part_num, + slow_part_array))) { + LOG_ERROR("find_the_k_slowest_partition_ fail", K(ret)); + } else { + end_time = get_timestamp(); + LOG_INFO("top-k cost time", "time", TVAL_TO_STR(end_time - start_time)); + + int64_t array_cnt = slow_part_array.count(); + for (int64_t idx = 0; idx < array_cnt; ++idx) { + const PartFetchCtx *ctx = slow_part_array.at(idx); + EXPECT_EQ(idx + 1, ctx->get_progress()); + LOG_INFO("slow part", K(idx), "pkey", ctx->get_pkey(), + "progress", ctx->get_progress()); + } + } + + return ret; +} + +// TEST find_k_slowest_partition +TEST_F(TestObLogPartFetchMgr, top_k) +{ + int ret = OB_SUCCESS; + + ObLogPartFetchMgr::PartFetchCtxArray part_fetch_ctx_array; + // Storage Context Pointer + PartFetchCtx *pctx_array[PART_FETCH_CTX__COUNT]; + PartTransResolver part_trans_resolver; + + // PartFetchCtxPool + if (OB_FAIL(ctx_pool.init(MAX_CACHED_PART_FETCH_CTX_COUNT, + ObModIds::OB_LOG_PART_FETCH_CTX_POOL, + DEFAULT_TENANT_ID, + PART_FETCH_CTX_POOL_BLOCK_SIZE))) { + LOG_ERROR("init PartFetchCtxPool fail", K(ret), LITERAL_K(MAX_CACHED_PART_FETCH_CTX_COUNT), + LITERAL_K(PART_FETCH_CTX_POOL_BLOCK_SIZE)); + } + + // case-1: + // Test 100,000 partitions + // Generate ctx + generate_ctx(PART_FETCH_CTX__COUNT, part_trans_resolver, pctx_array, part_fetch_ctx_array); + // Execute top-k + EXPECT_EQ(OB_SUCCESS, do_top_k(part_fetch_ctx_array, g_slowest_part_num)); + // free + free_all_ctx(PART_FETCH_CTX__COUNT, pctx_array); + + + // case-2 + // Test 0 partitions + part_fetch_ctx_array.reset(); + // Generate ctx + generate_ctx(0, part_trans_resolver, pctx_array, part_fetch_ctx_array); + // Execute top-k + EXPECT_EQ(OB_SUCCESS, do_top_k(part_fetch_ctx_array, g_slowest_part_num)); + // free + free_all_ctx(0, pctx_array); + + + // case-3 + //Test 1 partitions + part_fetch_ctx_array.reset(); + // Generate ctx + generate_ctx(SINGLE_PART_FETCH_CTX__COUNT, part_trans_resolver, pctx_array, part_fetch_ctx_array); + // Execute top-k + EXPECT_EQ(OB_SUCCESS, do_top_k(part_fetch_ctx_array, g_slowest_part_num)); + // free + free_all_ctx(SINGLE_PART_FETCH_CTX__COUNT, pctx_array); + + + // case-4 + // Test 2 partitions, one of which is NULL + part_fetch_ctx_array.reset(); + // Generate ctx + generate_ctx(SINGLE_PART_FETCH_CTX__COUNT, part_trans_resolver, pctx_array, part_fetch_ctx_array); + // push NULL + EXPECT_EQ(OB_SUCCESS, part_fetch_ctx_array.push_back(NULL)); + // Execute top-k + EXPECT_EQ(OB_ERR_UNEXPECTED, do_top_k(part_fetch_ctx_array, g_slowest_part_num)); + // free + free_all_ctx(SINGLE_PART_FETCH_CTX__COUNT, pctx_array); + + + // ctx pool destory + ctx_pool.destroy(); +} + + +}//end of unittest +}//end of oceanbase + +int main(int argc, char **argv) +{ + int ret = OB_SUCCESS; + + ObLogger &logger = ObLogger::get_logger(); + logger.set_file_name("test_ob_log_part_fetch_mgr.log", true); + logger.set_log_level(OB_LOG_LEVEL_INFO); + testing::InitGoogleTest(&argc, argv); + ret = RUN_ALL_TESTS(); + + return ret; +} diff --git a/unittest/liboblog/test_ob_log_part_svr_list.cpp b/unittest/liboblog/test_ob_log_part_svr_list.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7432f7266c5bf3e59482cb2babdf3c1cb2212569 --- /dev/null +++ b/unittest/liboblog/test_ob_log_part_svr_list.cpp @@ -0,0 +1,428 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG_FETCHER + +#include +#include "share/ob_define.h" +#include "lib/net/ob_addr.h" +#include "lib/allocator/page_arena.h" +#include "ob_log_utils.h" +#define private public +#include "liboblog/src/ob_log_part_svr_list.h" + +#include "ob_log_start_log_id_locator.h" // StartLogIdLocateReq + +using namespace oceanbase; +using namespace common; +using namespace liboblog; + +namespace oceanbase +{ +namespace unittest +{ +class TestObLogPartSvrList: public ::testing::Test +{ +public : + virtual void SetUp(); + virtual void TearDown(); +public: + typedef PartSvrList::SvrItem SvrItem; + typedef PartSvrList::LogIdRange LogIdRange; + // 验证svr_item字段的正确性 + void is_svr_item_correct(PartSvrList &svr_list, + const int64_t svr_item_index, + common::ObAddr &expect_svr, + const int64_t expect_range_num, + LogIdRange *expect_log_ranges); +private: + static const int64_t SERVER_COUNT = 64; + static const int64_t MAX_RANGE_NUMBER = 4; +private: + common::ObAddr servers[SERVER_COUNT]; + common::ObArenaAllocator allocator; +}; + +void TestObLogPartSvrList::SetUp() +{ + for (int64_t idx = 0; idx < SERVER_COUNT; idx++) { + servers[idx].set_ip_addr("127.0.0.1", static_cast(idx + 8000)); + } +} + +void TestObLogPartSvrList::TearDown() +{ +} + + +void TestObLogPartSvrList::is_svr_item_correct(PartSvrList &svr_list, + const int64_t svr_item_index, + common::ObAddr &expect_svr, + const int64_t expect_range_num, + LogIdRange *expect_log_ranges) +{ + SvrItem svr_item; + EXPECT_EQ(OB_SUCCESS, svr_list.svr_items_.at(svr_item_index, svr_item)); + + EXPECT_EQ(expect_svr, svr_item.svr_); + EXPECT_EQ(expect_range_num, svr_item.range_num_); + for (int64_t idx = 0; idx < expect_range_num; idx++) { + EXPECT_EQ(expect_log_ranges[idx].start_log_id_, svr_item.log_ranges_[idx].start_log_id_); + EXPECT_EQ(expect_log_ranges[idx].end_log_id_, svr_item.log_ranges_[idx].end_log_id_); + } +} + + +//////////////////////Basic function tests////////////////////////////////////////// +// PartSvrList::add_server_or_update() +// The main test is the insert_range_ function, which calls find_pos_and_merge to find the position, but no log range merge has occurred +TEST_F(TestObLogPartSvrList, add_server_test1) +{ + // 声明 + const int64_t svr_idx = 0; + common::ObAddr expect_svr = servers[svr_idx]; + int64_t expect_range_num = 0; + LogIdRange expect_log_ranges[MAX_RANGE_NUMBER]; + (void)memset(expect_log_ranges, 0, MAX_RANGE_NUMBER * sizeof(LogIdRange)); + const bool is_located_in_meta_table = false; + const bool is_leader = false; + + PartSvrList svr_list; + EXPECT_EQ(0, svr_list.next_svr_index_); + EXPECT_EQ(0, svr_list.count()); + + /// add log id range: (100, 200) + EXPECT_EQ(OB_SUCCESS, svr_list.add_server_or_update(servers[0], 100, 200, + is_located_in_meta_table, REGION_PRIORITY_HIGH, REPLICA_PRIORITY_FULL, is_leader)); + expect_range_num++; + // Verify the correctness of the svr_item field + EXPECT_EQ(1, svr_list.count()); + expect_log_ranges[0].reset(100, 200); + is_svr_item_correct(svr_list, svr_idx, expect_svr, expect_range_num, expect_log_ranges); + + /// add log id range: (300, 400) + EXPECT_EQ(OB_SUCCESS, svr_list.add_server_or_update(servers[0], 300, 400, + is_located_in_meta_table, REGION_PRIORITY_HIGH, REPLICA_PRIORITY_FULL, is_leader)); + expect_range_num++; + // Verify the correctness of the svr_item field + expect_log_ranges[1].reset(300, 400); + is_svr_item_correct(svr_list, svr_idx, expect_svr, expect_range_num, expect_log_ranges); + + /// add log id range: (500, 600) + EXPECT_EQ(OB_SUCCESS, svr_list.add_server_or_update(servers[0], 500, 600, + is_located_in_meta_table, REGION_PRIORITY_HIGH, REPLICA_PRIORITY_FULL, is_leader)); + expect_range_num++; + // Verify the correctness of the svr_item field + expect_log_ranges[2].reset(500, 600); + is_svr_item_correct(svr_list, svr_idx, expect_svr, expect_range_num, expect_log_ranges); + + /// add log id range: (60, 80) + EXPECT_EQ(OB_SUCCESS, svr_list.add_server_or_update(servers[0], 60, 80, + is_located_in_meta_table, REGION_PRIORITY_HIGH, REPLICA_PRIORITY_FULL, is_leader)); + expect_range_num++; + // Verify the correctness of the svr_item field + expect_log_ranges[0].reset(60, 80); + expect_log_ranges[1].reset(100, 200); + expect_log_ranges[2].reset(300, 400); + expect_log_ranges[3].reset(500, 600); + is_svr_item_correct(svr_list, svr_idx, expect_svr, expect_range_num, expect_log_ranges); + + /// add log id range: (700, 800) + // current range:[60, 80], [100, 200], [300, 400], [500, 600] + // No merge occurs and the array is full, perform a manual merge with the last range + EXPECT_EQ(OB_SUCCESS, svr_list.add_server_or_update(servers[0], 700, 800, + is_located_in_meta_table, REGION_PRIORITY_HIGH, REPLICA_PRIORITY_FULL, is_leader)); + // Verify the correctness of the svr_item field + expect_log_ranges[3].reset(500, 800); + is_svr_item_correct(svr_list, svr_idx, expect_svr, expect_range_num, expect_log_ranges); + + /// add log id range: (410, 450) + // current range:[60, 80], [100, 200], [300, 400], [700, 800] + // If no merge occurs and the array is full, find the insertion position pos, and perform a manual merge with the range at position pos + EXPECT_EQ(OB_SUCCESS, svr_list.add_server_or_update(servers[0], 410, 450, + is_located_in_meta_table, REGION_PRIORITY_HIGH, REPLICA_PRIORITY_FULL, is_leader)); + // Verify the correctness of the svr_item field + expect_log_ranges[3].reset(410, 800); + is_svr_item_correct(svr_list, svr_idx, expect_svr, expect_range_num, expect_log_ranges); + + /// add log id range: (30, 40) + // current range:[60, 80], [100, 200], [300, 400], [410, 800] + // If no merge occurs and the array is full, find the insertion position pos, and perform a manual merge with the range at position pos + EXPECT_EQ(OB_SUCCESS, svr_list.add_server_or_update(servers[0], 30, 40, + is_located_in_meta_table, REGION_PRIORITY_HIGH, REPLICA_PRIORITY_FULL, is_leader)); + // Verify the correctness of the svr_item field + expect_log_ranges[0].reset(30, 80); + is_svr_item_correct(svr_list, svr_idx, expect_svr, expect_range_num, expect_log_ranges); +} + +// PartSvrList::add_server_or_update() +// The main test is the find_pos_and_merge_ function, where find_pos_and_merge_ is called to find the position and a merge occurs +TEST_F(TestObLogPartSvrList, add_server_test2) +{ + const int64_t svr_idx = 0; + common::ObAddr expect_svr = servers[svr_idx]; + int64_t expect_range_num = 0; + LogIdRange expect_log_ranges[MAX_RANGE_NUMBER]; + (void)memset(expect_log_ranges, 0, MAX_RANGE_NUMBER * sizeof(LogIdRange)); + const bool is_located_in_meta_table = false; + const bool is_leader = false; + + PartSvrList svr_list; + EXPECT_EQ(0, svr_list.next_svr_index_); + EXPECT_EQ(0, svr_list.count()); + // init range + EXPECT_EQ(OB_SUCCESS, svr_list.add_server_or_update(expect_svr, 60, 80, + is_located_in_meta_table, REGION_PRIORITY_LOW, REPLICA_PRIORITY_FULL, is_leader)); + EXPECT_EQ(OB_SUCCESS, svr_list.add_server_or_update(expect_svr, 100, 200, + is_located_in_meta_table, REGION_PRIORITY_LOW, REPLICA_PRIORITY_FULL, is_leader)); + EXPECT_EQ(OB_SUCCESS, svr_list.add_server_or_update(expect_svr, 300, 400, + is_located_in_meta_table, REGION_PRIORITY_LOW, REPLICA_PRIORITY_FULL, is_leader)); + EXPECT_EQ(OB_SUCCESS, svr_list.add_server_or_update(expect_svr, 500, 600, + is_located_in_meta_table, REGION_PRIORITY_LOW, REPLICA_PRIORITY_FULL, is_leader)); + expect_range_num = 4; + expect_log_ranges[0].reset(60, 80); + expect_log_ranges[1].reset(100, 200); + expect_log_ranges[2].reset(300, 400); + expect_log_ranges[3].reset(500, 600); + + /// add log id range: (70, 90) + // current range:[60, 80], [100, 200], [300, 400], [500, 600] + // Merge with 1st range only + EXPECT_EQ(OB_SUCCESS, svr_list.add_server_or_update(expect_svr, 70, 90, + is_located_in_meta_table, REGION_PRIORITY_LOW, REPLICA_PRIORITY_FULL, is_leader)); + // Verify the correctness of the svr_item field + expect_log_ranges[0].reset(60, 90); + is_svr_item_correct(svr_list, svr_idx, expect_svr, expect_range_num, expect_log_ranges); + + /// add log id range: (50, 450) + // current range:[60, 90], [100, 200], [300, 400], [500, 600] + // and the 1st, 2nd and 3rd rang occur together + EXPECT_EQ(OB_SUCCESS, svr_list.add_server_or_update(expect_svr, 50, 450, + is_located_in_meta_table, REGION_PRIORITY_LOW, REPLICA_PRIORITY_FULL, is_leader)); + // Verify the correctness of the svr_item field + expect_range_num = 2; + expect_log_ranges[0].reset(50, 450); + expect_log_ranges[1].reset(500, 600); + is_svr_item_correct(svr_list, svr_idx, expect_svr, expect_range_num, expect_log_ranges); +} + +TEST_F(TestObLogPartSvrList, next_server) +{ + // request next log: log_id=250 + uint64_t next_log_id = 250; + BlackList black_list; + common::ObAddr svr; + + int64_t svr_idx = 0; + common::ObAddr expect_svr; + int64_t expect_range_num = 0; + LogIdRange expect_log_ranges[MAX_RANGE_NUMBER]; + (void)memset(expect_log_ranges, 0, MAX_RANGE_NUMBER * sizeof(LogIdRange)); + + PartSvrList svr_list; + EXPECT_EQ(0, svr_list.next_svr_index_); + EXPECT_EQ(0, svr_list.count()); + const bool is_located_in_meta_table = false; + const bool is_leader = false; + + /// case 1: for this partition, the current ServerList has 3 servers + // server-1: log range: [300, 500], [600, 700] + // server-2: log range: [100, 150], [160, 200] + // server-3: log range: [50, 90], [100, 150], [200, 300], [400, 500] + // + // for server-1, log id is at lower limit of range, exit directly; server-1 does not serve 250 logs, but server1 is valid, move to next server + // + // for server-2, server-2 does not serve 250 logs, and server2 is invalid, because next_log_id is generally + // monotonically increasing, then server-2 maintains a log range, all less than 250, ServerList needs to delete server2 + // + // For server-3, server-3 serves 250 logs, and needs to delete the [50, 90], [100, 150] logs it maintains + + // server-1 + EXPECT_EQ(OB_SUCCESS, svr_list.add_server_or_update(servers[1], 300, 500, + is_located_in_meta_table, REGION_PRIORITY_LOW, REPLICA_PRIORITY_FULL, is_leader)); + EXPECT_EQ(OB_SUCCESS, svr_list.add_server_or_update(servers[1], 600, 700, + is_located_in_meta_table, REGION_PRIORITY_LOW, REPLICA_PRIORITY_FULL, is_leader)); + + // server-2 + EXPECT_EQ(OB_SUCCESS, svr_list.add_server_or_update(servers[2], 100, 150, + is_located_in_meta_table, REGION_PRIORITY_LOW, REPLICA_PRIORITY_FULL, is_leader)); + EXPECT_EQ(OB_SUCCESS, svr_list.add_server_or_update(servers[2], 160, 200, + is_located_in_meta_table, REGION_PRIORITY_LOW, REPLICA_PRIORITY_FULL, is_leader)); + + // server-3 + EXPECT_EQ(OB_SUCCESS, svr_list.add_server_or_update(servers[3], 50, 90, + is_located_in_meta_table, REGION_PRIORITY_LOW, REPLICA_PRIORITY_FULL, is_leader)); + EXPECT_EQ(OB_SUCCESS, svr_list.add_server_or_update(servers[3], 100, 150, + is_located_in_meta_table, REGION_PRIORITY_LOW, REPLICA_PRIORITY_FULL, is_leader)); + EXPECT_EQ(OB_SUCCESS, svr_list.add_server_or_update(servers[3], 200, 300, + is_located_in_meta_table, REGION_PRIORITY_LOW, REPLICA_PRIORITY_FULL, is_leader)); + EXPECT_EQ(OB_SUCCESS, svr_list.add_server_or_update(servers[3], 400, 500, + is_located_in_meta_table, REGION_PRIORITY_LOW, REPLICA_PRIORITY_FULL, is_leader)); + + EXPECT_EQ(3, svr_list.count()); + EXPECT_EQ(OB_SUCCESS, svr_list.next_server(next_log_id, black_list, svr)); + + /// Verify correctness + /// Number of svr minus 1 + EXPECT_EQ(2, svr_list.count()); + + // verify log rang of eserver-3 + svr_idx = 1; + expect_svr = servers[3]; + expect_range_num = 2; + expect_log_ranges[0].reset(200, 300); + expect_log_ranges[1].reset(400, 500); + is_svr_item_correct(svr_list, svr_idx, expect_svr, expect_range_num, expect_log_ranges); + + + /// case 2: For this partition, the current ServerList has 2 servers + // server-1: log range: [300, 500], [600, 700] + // server-3: log range: [200, 300], [400, 500] + // + EXPECT_EQ(2, svr_list.svr_items_.count()); + svr.reset(); + EXPECT_EQ(OB_SUCCESS, svr_list.next_server(next_log_id, black_list, svr)); + + // 请求650 + next_log_id = 650; + EXPECT_EQ(OB_SUCCESS, svr_list.next_server(next_log_id, black_list, svr)); + svr_idx = 0; + expect_svr = servers[1]; + expect_range_num = 1; + expect_log_ranges[0].reset(600, 700); + is_svr_item_correct(svr_list, svr_idx, expect_svr, expect_range_num, expect_log_ranges); +} + +// PartSvrList: exist(), get_sever_array() +TEST_F(TestObLogPartSvrList, other_function) +{ + PartSvrList svr_list; + + for (int64_t idx = 0; idx < 32; idx++) { + // Half of the clog_history table records and half of the meta table records + if (idx < 16) { + const bool is_located_in_meta_table1 = false; + const bool is_leader1 = false; + EXPECT_EQ(OB_SUCCESS, svr_list.add_server_or_update(servers[idx], 100, 200, + is_located_in_meta_table1, REGION_PRIORITY_HIGH, REPLICA_PRIORITY_FULL, is_leader1)); + } else { + const bool is_located_in_meta_table2 = true; + bool is_leader2 = false; + if (31 == idx) { + is_leader2 = true; + } + EXPECT_EQ(OB_SUCCESS, svr_list.add_server_or_update(servers[idx], 100, 200, + is_located_in_meta_table2, REGION_PRIORITY_HIGH, REPLICA_PRIORITY_FULL, is_leader2)); + } + } + + int64_t svr_index = -1; + for (int64_t idx = 0; idx < 32; idx++) { + EXPECT_TRUE(svr_list.exist(servers[idx], svr_index)); + } + StartLogIdLocateReq::SvrList svr_list_for_locate_start_log_id; + EXPECT_EQ(OB_SUCCESS, svr_list.get_server_array_for_locate_start_log_id(svr_list_for_locate_start_log_id)); + EXPECT_EQ(32, svr_list_for_locate_start_log_id.count()); + + // verify leader is the first + EXPECT_EQ(svr_list_for_locate_start_log_id.at(0).svr_, servers[31]); + for (int64_t idx = 1; idx < 32; idx++) { + ObAddr &addr = svr_list_for_locate_start_log_id.at(idx).svr_; + int64_t start_idx = -1; + int64_t end_idx = -1; + + // meta table + if (idx < 16) { + start_idx = 16; + end_idx = 32; + } else { + // clog history table + start_idx = 0; + end_idx = 16; + } + + bool find = false; + for (int64_t svr_idx = start_idx; svr_idx < end_idx; ++svr_idx) { + if (addr == servers[svr_idx]) { + find = true; + break; + } + } + EXPECT_TRUE(find); + } + // + // There are 6 servers in total, added in the following order. + // server sequence: svr1, svr2, svr1, svr3, svr4, svr1, svr2, sv3, sv4, sv5, sv6 + // + // server: svr1, sv2, sv3, sv4, sv5, sv6 + // is_meta_table 0 0 0 0 0 1 1 + // is_leader 0 0 0 0 0 0 1 + // Expected: leader comes first, followed by meta table, remaining + // sv6, sv5 ..... + + PartSvrList svr_list1; + // svr1 + EXPECT_EQ(OB_SUCCESS, svr_list1.add_server_or_update(servers[1], 100, 200, false, REGION_PRIORITY_HIGH, REPLICA_PRIORITY_FULL, false)); + + // svr2 + EXPECT_EQ(OB_SUCCESS, svr_list1.add_server_or_update(servers[2], 100, 200, false, REGION_PRIORITY_HIGH, REPLICA_PRIORITY_FULL, false)); + + // svr1 + EXPECT_EQ(OB_SUCCESS, svr_list1.add_server_or_update(servers[1], 100, 200, false, REGION_PRIORITY_HIGH, REPLICA_PRIORITY_FULL, false)); + + // svr3 + EXPECT_EQ(OB_SUCCESS, svr_list1.add_server_or_update(servers[3], 100, 200, false, REGION_PRIORITY_HIGH, REPLICA_PRIORITY_FULL, false)); + + // svr4 + EXPECT_EQ(OB_SUCCESS, svr_list1.add_server_or_update(servers[4], 100, 200, false, REGION_PRIORITY_HIGH, REPLICA_PRIORITY_FULL, false)); + + // svr4 + EXPECT_EQ(OB_SUCCESS, svr_list1.add_server_or_update(servers[1], 100, 200, false, REGION_PRIORITY_HIGH, REPLICA_PRIORITY_FULL, false)); + + // svr4 + EXPECT_EQ(OB_SUCCESS, svr_list1.add_server_or_update(servers[2], 100, 200, false, REGION_PRIORITY_HIGH, REPLICA_PRIORITY_FULL, false)); + + // svr4 + EXPECT_EQ(OB_SUCCESS, svr_list1.add_server_or_update(servers[3], 100, 200, false, REGION_PRIORITY_HIGH, REPLICA_PRIORITY_FULL, false)); + + // svr5 + EXPECT_EQ(OB_SUCCESS, svr_list1.add_server_or_update(servers[5], 100, 200, true, REGION_PRIORITY_HIGH, REPLICA_PRIORITY_FULL, false)); + + // svr6 + EXPECT_EQ(OB_SUCCESS, svr_list1.add_server_or_update(servers[6], 100, 200, true, REGION_PRIORITY_HIGH, REPLICA_PRIORITY_FULL, true)); + + StartLogIdLocateReq::SvrList svr_list_for_locate_start_log_id_1; + EXPECT_EQ(OB_SUCCESS, svr_list1.get_server_array_for_locate_start_log_id(svr_list_for_locate_start_log_id_1)); + EXPECT_EQ(6, svr_list_for_locate_start_log_id_1.count()); + + int expect_result_index[] = {6, 5}; + for (int64_t idx = 0; idx < 2; idx++) { + EXPECT_EQ(svr_list_for_locate_start_log_id_1.at(idx).svr_, servers[expect_result_index[idx]]); + } +} + +}//end of unittest +}//end of oceanbase + +int main(int argc, char **argv) +{ + // ObLogger::get_logger().set_mod_log_levels("ALL.*:DEBUG, TLOG.*:DEBUG"); + // testing::InitGoogleTest(&argc,argv); + // testing::FLAGS_gtest_filter = "DO_NOT_RUN"; + int ret = 1; + ObLogger &logger = ObLogger::get_logger(); + logger.set_file_name("test_ob_log_part_svr_list.log", true); + logger.set_log_level(OB_LOG_LEVEL_INFO); + testing::InitGoogleTest(&argc, argv); + ret = RUN_ALL_TESTS(); + return ret; +} diff --git a/unittest/liboblog/test_ob_log_part_trans_resolver.cpp b/unittest/liboblog/test_ob_log_part_trans_resolver.cpp new file mode 100644 index 0000000000000000000000000000000000000000..bf7aec8b90d3ff510f45e956f7573f6dbdd0f7d6 --- /dev/null +++ b/unittest/liboblog/test_ob_log_part_trans_resolver.cpp @@ -0,0 +1,966 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG_FETCHER + +#include "gtest/gtest.h" + +#include "share/ob_define.h" +#include "storage/ob_storage_log_type.h" +#include "storage/transaction/ob_trans_log.h" +#include "ob_log_fetch_stat_info.h" + +#define private public +#include "liboblog/src/ob_log_part_trans_resolver.h" +#include "test_trans_log_generator.h" +#include "test_sp_trans_log_generator.h" + +using namespace oceanbase; +using namespace common; +using namespace liboblog; +using namespace transaction; +using namespace storage; +using namespace clog; + +namespace oceanbase +{ +namespace unittest +{ +// Task Pool +static const int64_t PREALLOC_POOL_SIZE = 10 * 1024; +static const int64_t TRANS_TASK_PAGE_SIZE = 1024; +static const int64_t TRANS_TASK_BLOCK_SIZE = 4 * 1024 *1024; +static const int64_t PREALLOC_PAGE_COUNT = 1024; + +// For task pool init +ObConcurrentFIFOAllocator fifo_allocator; + +// test trans count +static const int64_t TRANS_COUNT = 100; +// redo log count +static const int64_t TRANS_REDO_LOG_COUNT = 100; + +int init_task_pool(ObLogTransTaskPool &task_pool) +{ + int ret = OB_SUCCESS; + + ret = fifo_allocator.init(16 * _G_, 16 * _M_, OB_MALLOC_NORMAL_BLOCK_SIZE); + EXPECT_EQ(OB_SUCCESS, ret); + + ret = task_pool.init(&fifo_allocator, PREALLOC_POOL_SIZE, TRANS_TASK_PAGE_SIZE, + TRANS_TASK_BLOCK_SIZE, true, PREALLOC_PAGE_COUNT); + EXPECT_EQ(OB_SUCCESS, ret); + + return ret; +} + +/* + * Test scenario. + * For N transactions, half of which commit, half of which abort + * Each transaction has a random redo log + * + * Log sequence: redo, redo, ... redo, prepare, commit/abort + * + * // redo info + * redo_log_cnt + * ObLogIdArray redo_log_ids; + * + * // prepare info + * int64_t seq; + * common::ObPartitionKey partition; + * int64_t prepare_timestamp; + * ObTransID trans_id; + * uint64_t prepare_log_id; + * uint64_t cluster_id; + * + * // commit info + * int64_t global_trans_version; + * PartitionLogInfoArray *participants; + * + */ +TEST(PartTransResolver, BasicTest1) +{ + int err = OB_SUCCESS; + + // Commit half trans, whose has even idx. + const int64_t trans_cnt = TRANS_COUNT; + const int64_t commit_trans_cnt = trans_cnt / 2; + const int64_t abort_trans_cnt = trans_cnt - commit_trans_cnt; + ObAddr addr(ObAddr::IPV4, "127.0.0.1", 8888); + + TransLogInfo trans_log_info; + // redo info + int64_t redo_cnt = 0; + ObLogIdArray redo_log_ids; + // prepare info + int64_t seq = 0; + ObPartitionKey pkey(1000U, 1, 1); + int64_t prepare_timestamp = PREPARE_TIMESTAMP; + ObTransID trans_id(addr); + uint64_t prepare_log_id = 0; + uint64_t CLOUSTER_ID = 1000; + // commit info + int64_t global_trans_version = GLOBAL_TRANS_VERSION; + PartitionLogInfoArray ptl_ids; + + // Log gen. + TransLogEntryGeneratorBase log_gen(pkey, trans_id); + // Task Pool. + ObLogTransTaskPool task_pool; + EXPECT_EQ(OB_SUCCESS, init_task_pool(task_pool)); + // Parser. + MockParser1 parser; + EXPECT_EQ(OB_SUCCESS, parser.init()); + + // Partitioned Transaction Parser + PartTransResolver pr; + err = pr.init(pkey, parser, task_pool); + EXPECT_EQ(OB_SUCCESS, err); + + // Read logs. + ObLogIdArray missing; + TransStatInfo tsi; + volatile bool stop_flag = false; + + for (int64_t idx = 0; idx < trans_cnt; ++idx) { + redo_cnt = get_timestamp() % TRANS_REDO_LOG_COUNT + 1; + redo_log_ids.reset(); + for (int64_t cnt = 0; cnt < redo_cnt; ++cnt) { + EXPECT_EQ(OB_SUCCESS, redo_log_ids.push_back(log_gen.get_log_id() + cnt)); + } + prepare_log_id = log_gen.get_log_id() + redo_cnt; + ptl_ids.reset(); + + ObPartitionLogInfo ptl_id(pkey, prepare_log_id, PREPARE_TIMESTAMP); + err = ptl_ids.push_back(ptl_id); + EXPECT_EQ(OB_SUCCESS, err); + // push fixed participant information + for (int64_t idx = 0; idx < FIXED_PART_COUNT; ++idx) { + err = ptl_ids.push_back(FIXED_PART_INFO[idx]); + EXPECT_EQ(OB_SUCCESS, err); + } + trans_log_info.reset(redo_cnt, redo_log_ids, seq, pkey, prepare_timestamp, + trans_id, prepare_log_id, CLOUSTER_ID, global_trans_version, ptl_ids); + EXPECT_EQ(OB_SUCCESS, parser.push_into_queue(&trans_log_info)); + seq++; + + // Commit trans with even idx. + log_gen.next_trans(redo_cnt, (0 == idx % 2)); + clog::ObLogEntry log_entry; + + while (OB_SUCCESS == log_gen.next_log_entry(log_entry)) { + err = pr.read(log_entry, missing, tsi); + EXPECT_EQ(OB_SUCCESS, err); + } + + err = pr.flush(stop_flag); + EXPECT_EQ(OB_SUCCESS, err); + + // Verify the correctness of partition task data + bool check_result; + EXPECT_EQ(OB_SUCCESS, parser.get_check_result(check_result)); + EXPECT_TRUE(check_result); + LOG_DEBUG("debug", K(idx)); + } + + // Check. + EXPECT_EQ(commit_trans_cnt, parser.get_commit_trans_cnt()); + EXPECT_EQ(abort_trans_cnt, parser.get_abort_trans_cnt()); + + // Destroy. + pr.destroy(); + task_pool.destroy(); + fifo_allocator.destroy(); +} + +/* + * Test scenario. + * For N transactions, half of which commit, half of which abort + * Each transaction has a random redo log + * Log sequence: redo, redo... redo-prepare, commit/abort + * redo-prepare in a log entry + * + */ +TEST(PartTransResolver, BasicTest2) +{ + int err = OB_SUCCESS; + + // Commit half trans, whose has even idx. + const int64_t trans_cnt = TRANS_COUNT; + const int64_t commit_trans_cnt = trans_cnt / 2; + const int64_t abort_trans_cnt = trans_cnt - commit_trans_cnt; + ObAddr addr(ObAddr::IPV4, "127.0.0.1", 8888); + + TransLogInfo trans_log_info; + // redo info + int64_t redo_cnt = 0; + ObLogIdArray redo_log_ids; + // prepare info + int64_t seq = 0; + ObPartitionKey pkey(1000U, 1, 1); + int64_t prepare_timestamp = PREPARE_TIMESTAMP; + ObTransID trans_id(addr); + uint64_t prepare_log_id = 0; + uint64_t CLOUSTER_ID = 1000; + // commit info + int64_t global_trans_version = GLOBAL_TRANS_VERSION; + PartitionLogInfoArray ptl_ids; + + // Log gen. + TransLogEntryGeneratorBase log_gen(pkey, trans_id); + // Task Pool. + ObLogTransTaskPool task_pool; + EXPECT_EQ(OB_SUCCESS, init_task_pool(task_pool)); + // Parser. + MockParser1 parser; + EXPECT_EQ(OB_SUCCESS, parser.init()); + + // Partitioned Transaction Parser + PartTransResolver pr; + err = pr.init(pkey, parser, task_pool); + EXPECT_EQ(OB_SUCCESS, err); + + // Read logs. + ObLogIdArray missing; + TransStatInfo tsi; + bool stop_flag = false; + for (int64_t idx = 0; idx < trans_cnt; ++idx) { + redo_cnt = get_timestamp() % TRANS_REDO_LOG_COUNT + 2; + redo_log_ids.reset(); + for (int64_t cnt = 0; cnt < redo_cnt; ++cnt) { + EXPECT_EQ(OB_SUCCESS, redo_log_ids.push_back(log_gen.get_log_id() + cnt)); + } + prepare_log_id = log_gen.get_log_id() + redo_cnt - 1; + + ptl_ids.reset(); + ObPartitionLogInfo ptl_id(pkey, prepare_log_id, PREPARE_TIMESTAMP); + err = ptl_ids.push_back(ptl_id); + EXPECT_EQ(OB_SUCCESS, err); + // push fixed participant information + for (int64_t idx = 0; idx < FIXED_PART_COUNT; ++idx) { + err = ptl_ids.push_back(FIXED_PART_INFO[idx]); + EXPECT_EQ(OB_SUCCESS, err); + } + trans_log_info.reset(redo_cnt, redo_log_ids, seq, pkey, prepare_timestamp, + trans_id, prepare_log_id, CLOUSTER_ID, global_trans_version, ptl_ids); + EXPECT_EQ(OB_SUCCESS, parser.push_into_queue(&trans_log_info)); + seq++; + + // Commit trans with even idx. + log_gen.next_trans_with_redo_prepare(redo_cnt, (0 == idx % 2)); + clog::ObLogEntry log_entry; + + // read redo, redo... redo-prepare + for (int64_t log_cnt = 0; log_cnt < redo_cnt; log_cnt++) { + EXPECT_EQ(OB_SUCCESS, log_gen.next_log_entry_with_redo_prepare(log_entry)); + err = pr.read(log_entry, missing, tsi); + EXPECT_EQ(OB_SUCCESS, err); + } + + // read commit/abort log + EXPECT_EQ(OB_SUCCESS, log_gen.next_log_entry_with_redo_prepare(log_entry)); + err = pr.read(log_entry, missing, tsi); + EXPECT_EQ(OB_SUCCESS, err); + + err = pr.flush(stop_flag); + EXPECT_EQ(OB_SUCCESS, err); + + // Verify the correctness of partition task data + bool check_result; + EXPECT_EQ(OB_SUCCESS, parser.get_check_result(check_result)); + EXPECT_TRUE(check_result); + } + + // Check. + EXPECT_EQ(commit_trans_cnt, parser.get_commit_trans_cnt()); + EXPECT_EQ(abort_trans_cnt, parser.get_abort_trans_cnt()); + + // Destroy. + pr.destroy(); + task_pool.destroy(); + fifo_allocator.destroy(); +} + +/* + * Test scenario. + * Parse to prepare log, find redo log missing, need to read miss log + * For N transactions, half of them commit, half of them abort + * Each transaction has a random redo log + * Two cases. + * 1. redo, redo, redo...prepare, commit/abort + * 2. redo, redo, redo...redo-prepare, commit/abort + * + */ +TEST(PartTransResolver, BasicTest3) +{ + int err = OB_SUCCESS; + + // Commit half trans, whose has even idx. + const int64_t trans_cnt = TRANS_COUNT; + const int64_t commit_trans_cnt = trans_cnt / 2; + int64_t redo_cnt = 0; + int64_t miss_redo_cnt = 0; + int64_t can_read_redo_cnt = 0; + + // Pkey. + ObPartitionKey pkey(1000U, 1, 1); + // addr + ObAddr addr(ObAddr::IPV4, "127.0.0.1", 8888); + ObTransID trans_id(addr); + + // Log gen. + TransLogEntryGenerator1 log_gen(pkey, trans_id); + // Task Pool. + ObLogTransTaskPool task_pool; + EXPECT_EQ(OB_SUCCESS, init_task_pool(task_pool)); + // Parser. + MockParser2 parser; + // Partitioned Transaction Parser + PartTransResolver pr; + err = pr.init(pkey, parser, task_pool); + EXPECT_EQ(OB_SUCCESS, err); + + // Read logs. + ObLogIdArray missing; + TransStatInfo tsi; + bool stop_flag = false; + + // case 1: redo, redo, redo...prepare, commit/abort + // case 2: redo, redo, redo...redo-prepare, commit/abort + bool is_normal_trans = false; + bool is_redo_with_prapare_trans = false; + for (int64_t idx = 0; idx < trans_cnt; ++idx) { + if (idx < trans_cnt / 2) { + is_normal_trans = true; + } else { + is_redo_with_prapare_trans = true; + } + redo_cnt = get_timestamp() % TRANS_REDO_LOG_COUNT + 1; + if (is_normal_trans) { + miss_redo_cnt = get_timestamp() % redo_cnt + 1; + can_read_redo_cnt = redo_cnt - miss_redo_cnt; + } else if (is_redo_with_prapare_trans){ + miss_redo_cnt = get_timestamp() % redo_cnt; + can_read_redo_cnt = redo_cnt - miss_redo_cnt - 1; + } else { + } + + // Commit trans with even idx. + if (is_normal_trans) { + log_gen.next_trans_with_miss_redo(redo_cnt, miss_redo_cnt, (0 == idx % 2), NORMAL_TRAN); + } else if (is_redo_with_prapare_trans){ + log_gen.next_trans_with_miss_redo(redo_cnt, miss_redo_cnt, (0 == idx % 2), REDO_WITH_PREPARE_TRAN); + } else { + } + + uint64_t start_redo_log_id = log_gen.get_log_id(); + clog::ObLogEntry log_entry; + + // First read the can_read_redo_cnt redo log + for (int64_t log_cnt = 0; log_cnt < can_read_redo_cnt; ++log_cnt) { + if (is_normal_trans) { + EXPECT_EQ(OB_SUCCESS, log_gen.next_log_entry_missing_redo(NORMAL_TRAN, log_entry)); + } else if (is_redo_with_prapare_trans){ + EXPECT_EQ(OB_SUCCESS, log_gen.next_log_entry_missing_redo(REDO_WITH_PREPARE_TRAN, log_entry)); + } else { + } + err = pr.read(log_entry, missing, tsi); + EXPECT_EQ(OB_SUCCESS, err); + } + + // Read prepare log and find miss redo log + if (is_normal_trans) { + EXPECT_EQ(OB_SUCCESS, log_gen.next_log_entry_missing_redo(NORMAL_TRAN, log_entry)); + } else if (is_redo_with_prapare_trans){ + EXPECT_EQ(OB_SUCCESS, log_gen.next_log_entry_missing_redo(REDO_WITH_PREPARE_TRAN, log_entry)); + } else { + } + err = pr.read(log_entry, missing, tsi); + EXPECT_EQ(OB_ITEM_NOT_SETTED, err); + + // Verify the misses array and read the misses redo log + const int64_t miss_array_cnt = missing.count(); + EXPECT_EQ(miss_redo_cnt, miss_array_cnt); + for (int64_t log_cnt = 0; log_cnt < miss_array_cnt; ++log_cnt) { + LOG_DEBUG("miss", K(missing[log_cnt])); + EXPECT_EQ(start_redo_log_id, missing[log_cnt]); + start_redo_log_id++; + + clog::ObLogEntry miss_log_entry; + EXPECT_EQ(OB_SUCCESS, log_gen.next_miss_log_entry(missing[log_cnt], miss_log_entry)); + err = pr.read_missing_redo(miss_log_entry); + EXPECT_EQ(OB_SUCCESS, err); + } + + // After reading the missing redo log, read the prepare log again to advance the partitioning task + if (is_normal_trans) { + EXPECT_EQ(OB_SUCCESS, log_gen.get_prepare_log_entry(NORMAL_TRAN, log_entry)); + } else if (is_redo_with_prapare_trans){ + EXPECT_EQ(OB_SUCCESS, log_gen.get_prepare_log_entry(REDO_WITH_PREPARE_TRAN, log_entry)); + } else { + } + err = pr.read(log_entry, missing, tsi); + EXPECT_EQ(OB_SUCCESS, err); + + // read commit/abort log + if (is_normal_trans) { + EXPECT_EQ(OB_SUCCESS, log_gen.next_log_entry_missing_redo(NORMAL_TRAN, log_entry)); + } else if (is_redo_with_prapare_trans){ + EXPECT_EQ(OB_SUCCESS, log_gen.next_log_entry_missing_redo(REDO_WITH_PREPARE_TRAN, log_entry)); + } else { + } + err = pr.read(log_entry, missing, tsi); + EXPECT_EQ(OB_SUCCESS, err); + + err = pr.flush(stop_flag); + EXPECT_EQ(OB_SUCCESS, err); + } + + // Check. + EXPECT_EQ(commit_trans_cnt, parser.get_commit_trans_cnt()); + + // Destroy. + pr.destroy(); + task_pool.destroy(); + fifo_allocator.destroy(); +} + +/* + * r stands for redo, p stands for prepare, c stands for commit, a stands for abort) + * The numbers after r/p/c/a represent the different transactions + * Log sequence: + * r1 r2 r2 r2 p2 p1 c1 c2 r3 p3 c3 + * Verifying the correctness of parsing multiple transactions, i.e. constructing different partitioned transaction tasks based on different transaction IDs + * Verify the output order of transactions: transaction 2 -> transaction 1 -> transaction 3 + */ +TEST(PartTransResolver, BasicTest4) +{ + int err = OB_SUCCESS; + + ObPartitionKey pkey(1000U, 1, 1); + + // Task Pool. + ObLogTransTaskPool task_pool; + EXPECT_EQ(OB_SUCCESS, init_task_pool(task_pool)); + // Parser. + MockParser1 parser; + EXPECT_EQ(OB_SUCCESS, parser.init()); + + // Partitioned Transaction Parser + PartTransResolver pr; + err = pr.init(pkey, parser, task_pool); + EXPECT_EQ(OB_SUCCESS, err); + + const int64_t commit_trans_cnt = 3; + // redo info + int64_t redo_cnt_array[3] = {1, 3, 1}; + ObLogIdArray redo_log_ids_array[3]; + for (int64_t i = 0; i < 3; ++i) { + for (int64_t j = 0; j < redo_cnt_array[i]; ++j) { + EXPECT_EQ(OB_SUCCESS, redo_log_ids_array[i].push_back(j)); + } + } + + // prepare info + // trans 2 - trans 1 - trans 3->seq: 0, 1, 2 + int64_t seq_array[3] = {1, 0, 2}; + int64_t prepare_timestamp = PREPARE_TIMESTAMP; + ObAddr addr_array[3]; + for (int64_t idx = 0; idx < 3; idx++) { + addr_array[idx] = ObAddr(ObAddr::IPV4, "127.0.0.1", static_cast(8888 + idx)); + } + // trans ID + ObTransID trans_id_array[3] = { + ObTransID(addr_array[0]), ObTransID(addr_array[1]), ObTransID(addr_array[2]) + }; + uint64_t prepare_log_id_array[3] = {1, 3, 1}; + uint64_t CLOUSTER_ID = 1000; + + // commit info + int64_t global_trans_version = GLOBAL_TRANS_VERSION; + PartitionLogInfoArray ptl_ids_array[3]; + for (int64_t i = 0; i < 3; ++i) { + ptl_ids_array[i].reset(); + + ObPartitionLogInfo ptl_id(pkey, prepare_log_id_array[i], PREPARE_TIMESTAMP); + err = ptl_ids_array[i].push_back(ptl_id); + EXPECT_EQ(OB_SUCCESS, err); + // push fixed participant information + for (int64_t j = 0; j < FIXED_PART_COUNT; ++j) { + err = ptl_ids_array[i].push_back(FIXED_PART_INFO[j]); + EXPECT_EQ(OB_SUCCESS, err); + } + } + + TransLogInfo trans_log_info_array[3]; + for (int64_t i = 0; i < 3; ++i) { + trans_log_info_array[i].reset(redo_cnt_array[i], redo_log_ids_array[i], seq_array[i], pkey, prepare_timestamp, + trans_id_array[i], prepare_log_id_array[i], + CLOUSTER_ID, global_trans_version, ptl_ids_array[i]); + } + + // Push in the order of transaction 2 - transaction 1 - transaction 3 for subsequent validation of the transaction output order + EXPECT_EQ(OB_SUCCESS, parser.push_into_queue(&trans_log_info_array[1])); + EXPECT_EQ(OB_SUCCESS, parser.push_into_queue(&trans_log_info_array[0])); + EXPECT_EQ(OB_SUCCESS, parser.push_into_queue(&trans_log_info_array[2])); + + // Log gen. Generate logs for transactions 1, 2 and 3 respectively + TransLogEntryGeneratorBase log_gen_1(pkey, trans_id_array[0]); + TransLogEntryGeneratorBase log_gen_2(pkey, trans_id_array[1]); + TransLogEntryGeneratorBase log_gen_3(pkey, trans_id_array[2]); + + log_gen_1.next_trans(redo_cnt_array[0], true); + log_gen_2.next_trans(redo_cnt_array[1], true); + log_gen_3.next_trans(redo_cnt_array[2], true); + + // Read logs. + ObLogIdArray missing1; + ObLogIdArray missing2; + ObLogIdArray missing3; + TransStatInfo tsi; + volatile bool stop_flag = false; + + // log seq: + // r1 r2 r2 r2 p2 p1 c1 c2 r3 p3 c3 + clog::ObLogEntry log_entry; + + EXPECT_EQ(0, pr.task_seq_); + EXPECT_EQ(0, pr.prepare_seq_); + // r1 + EXPECT_EQ(OB_SUCCESS, log_gen_1.next_log_entry(log_entry)); + err = pr.read(log_entry, missing1, tsi); + EXPECT_EQ(OB_SUCCESS, err); + // r2 + EXPECT_EQ(OB_SUCCESS, log_gen_2.next_log_entry(log_entry)); + err = pr.read(log_entry, missing2, tsi); + EXPECT_EQ(OB_SUCCESS, err); + // r2 + EXPECT_EQ(OB_SUCCESS, log_gen_2.next_log_entry(log_entry)); + err = pr.read(log_entry, missing2, tsi); + EXPECT_EQ(OB_SUCCESS, err); + // r2 + EXPECT_EQ(OB_SUCCESS, log_gen_2.next_log_entry(log_entry)); + err = pr.read(log_entry, missing2, tsi); + EXPECT_EQ(OB_SUCCESS, err); + // p2 + EXPECT_EQ(OB_SUCCESS, log_gen_2.next_log_entry(log_entry)); + err = pr.read(log_entry, missing2, tsi); + EXPECT_EQ(OB_SUCCESS, err); + EXPECT_EQ(1, pr.prepare_seq_); + // p1 + EXPECT_EQ(OB_SUCCESS, log_gen_1.next_log_entry(log_entry)); + err = pr.read(log_entry, missing1, tsi); + EXPECT_EQ(OB_SUCCESS, err); + EXPECT_EQ(2, pr.prepare_seq_); + // c1 + EXPECT_EQ(OB_SUCCESS, log_gen_1.next_log_entry(log_entry)); + err = pr.read(log_entry, missing1, tsi); + EXPECT_EQ(OB_SUCCESS, err); + // c2 + EXPECT_EQ(OB_SUCCESS, log_gen_2.next_log_entry(log_entry)); + err = pr.read(log_entry, missing2, tsi); + EXPECT_EQ(OB_SUCCESS, err); + // r3 + EXPECT_EQ(OB_SUCCESS, log_gen_3.next_log_entry(log_entry)); + err = pr.read(log_entry, missing3, tsi); + EXPECT_EQ(OB_SUCCESS, err); + // p3 + EXPECT_EQ(OB_SUCCESS, log_gen_3.next_log_entry(log_entry)); + err = pr.read(log_entry, missing3, tsi); + EXPECT_EQ(OB_SUCCESS, err); + EXPECT_EQ(3, pr.prepare_seq_); + // c3 + EXPECT_EQ(OB_SUCCESS, log_gen_3.next_log_entry(log_entry)); + err = pr.read(log_entry, missing3, tsi); + EXPECT_EQ(OB_SUCCESS, err); + + err = pr.flush(stop_flag); + EXPECT_EQ(OB_SUCCESS, err); + EXPECT_EQ(3, pr.task_seq_); + + // Check. + EXPECT_EQ(commit_trans_cnt, parser.get_commit_trans_cnt()); + + // Verify the correctness of partition task data + for (int64_t idx = 0; idx < 3; ++idx) { + bool check_result; + EXPECT_EQ(OB_SUCCESS, parser.get_check_result(check_result)); + EXPECT_TRUE(check_result); + } + + // Destroy. + pr.destroy(); + task_pool.destroy(); + fifo_allocator.destroy(); +} + +/* + * Test scenario: + * For N Sp transactions, half of them commit, half of them abort + * Each Sp transaction has a random redo log + * + * log seq:redo, redo, ... redo, commit/abort +* + * // redo info + * redo_log_cnt + * ObLogIdArray redo_log_ids; + * + * // prepare info + * int64_t seq; + * common::ObPartitionKey partition; + * int64_t prepare_timestamp; + * ObTransID trans_id; + * uint64_t prepare_log_id; + * uint64_t cluster_id; + * + * // commit info + * int64_t global_trans_version; + * PartitionLogInfoArray *participants; + * + */ +TEST(PartTransResolver, BasicTest5) +{ + int err = OB_SUCCESS; + + // Commit half trans, whose has even idx. + const int64_t trans_cnt = TRANS_COUNT; + const int64_t commit_trans_cnt = trans_cnt / 2; + const int64_t abort_trans_cnt = trans_cnt - commit_trans_cnt; + ObAddr addr(ObAddr::IPV4, "127.0.0.1", 8888); + + TransLogInfo trans_log_info; + + // redo info + int64_t redo_cnt = 0; + ObLogIdArray redo_log_ids; + // prepare info + int64_t seq = 0; + ObPartitionKey pkey(1000U, 1, 1); + int64_t prepare_timestamp = SP_PREPARE_TIMESTAMP; + ObTransID trans_id(addr); + uint64_t prepare_log_id = 0; + uint64_t CLOUSTER_ID = 1000; + // commit info + int64_t global_trans_version = SP_GLOBAL_TRANS_VERSION; + PartitionLogInfoArray ptl_ids; + + // Log gen. + SpTransLogEntryGeneratorBase log_gen(pkey, trans_id); + // Task Pool. + ObLogTransTaskPool task_pool; + EXPECT_EQ(OB_SUCCESS, init_task_pool(task_pool)); + // Parser. + MockParser1 parser; + EXPECT_EQ(OB_SUCCESS, parser.init()); + + // Partitioned Transaction Parser + PartTransResolver pr; + err = pr.init(pkey, parser, task_pool); + EXPECT_EQ(OB_SUCCESS, err); + + // Read logs. + ObLogIdArray missing; + TransStatInfo tsi; + volatile bool stop_flag = false; + + for (int64_t idx = 0; idx < trans_cnt; ++idx) { + redo_cnt = get_timestamp() % TRANS_REDO_LOG_COUNT + 1; + redo_log_ids.reset(); + for (int64_t cnt = 0; cnt < redo_cnt; ++cnt) { + EXPECT_EQ(OB_SUCCESS, redo_log_ids.push_back(log_gen.get_log_id() + cnt)); + } + prepare_log_id = log_gen.get_log_id() + redo_cnt; + ptl_ids.reset(); + ObPartitionLogInfo ptl_id(pkey, prepare_log_id, PREPARE_TIMESTAMP); + err = ptl_ids.push_back(ptl_id); + EXPECT_EQ(OB_SUCCESS, err); + + trans_log_info.reset(redo_cnt, redo_log_ids, seq, pkey, prepare_timestamp, + trans_id, prepare_log_id, CLOUSTER_ID, global_trans_version, ptl_ids); + EXPECT_EQ(OB_SUCCESS, parser.push_into_queue(&trans_log_info)); + seq++; + + // Commit trans with even idx. + log_gen.next_trans(redo_cnt, (0 == idx % 2)); + clog::ObLogEntry log_entry; + + while (OB_SUCCESS == log_gen.next_log_entry(log_entry)) { + err = pr.read(log_entry, missing, tsi); + EXPECT_EQ(OB_SUCCESS, err); + } + + err = pr.flush(stop_flag); + EXPECT_EQ(OB_SUCCESS, err); + + // Verify the correctness of partition task data + bool check_result; + EXPECT_EQ(OB_SUCCESS, parser.get_check_result(check_result)); + EXPECT_TRUE(check_result); + } + + // Check. + EXPECT_EQ(commit_trans_cnt, parser.get_commit_trans_cnt()); + EXPECT_EQ(abort_trans_cnt, parser.get_abort_trans_cnt()); + + // Destroy. + pr.destroy(); + task_pool.destroy(); + fifo_allocator.destroy(); +} + +/* + * Test scenario: + * For N Sp transactions, redo and commit in the same log entry + * Each Sp transaction has a random redo log + * + * log seq:redo, redo, ... redo, redo-commit + * + */ +TEST(PartTransResolver, BasicTest6) +{ + int err = OB_SUCCESS; + + const int64_t trans_cnt = TRANS_COUNT; + const int64_t commit_trans_cnt = trans_cnt; + ObAddr addr(ObAddr::IPV4, "127.0.0.1", 8888); + + TransLogInfo trans_log_info; + + // redo info + int64_t redo_cnt = 0; + ObLogIdArray redo_log_ids; + // prepare info + int64_t seq = 0; + ObPartitionKey pkey(1000U, 1, 1); + int64_t prepare_timestamp = SP_PREPARE_TIMESTAMP; + ObTransID trans_id(addr); + uint64_t prepare_log_id = 0; + uint64_t CLOUSTER_ID = 1000; + // commit info + int64_t global_trans_version = SP_GLOBAL_TRANS_VERSION; + PartitionLogInfoArray ptl_ids; + + // Log gen. + SpTransLogEntryGeneratorBase log_gen(pkey, trans_id); + // Task Pool. + ObLogTransTaskPool task_pool; + EXPECT_EQ(OB_SUCCESS, init_task_pool(task_pool)); + // Parser. + MockParser1 parser; + //MockParser2 parser; + EXPECT_EQ(OB_SUCCESS, parser.init()); + + // Partitioned Transaction Parser + PartTransResolver pr; + err = pr.init(pkey, parser, task_pool); + EXPECT_EQ(OB_SUCCESS, err); + + // Read logs. + ObLogIdArray missing; + TransStatInfo tsi; + volatile bool stop_flag = false; + + for (int64_t idx = 0; idx < trans_cnt; ++idx) { + redo_cnt = get_timestamp() % TRANS_REDO_LOG_COUNT + 1; + // First test, if redo_cnt=1, only one redo-commit, prepare_log_id=0, illegal + if (0 == idx && 1 == redo_cnt) { + redo_cnt++; + } + redo_log_ids.reset(); + for (int64_t cnt = 0; cnt < redo_cnt; ++cnt) { + EXPECT_EQ(OB_SUCCESS, redo_log_ids.push_back(log_gen.get_log_id() + cnt)); + } + // sp transaction does not have prepare log, prepare log id is the same as commit log id + prepare_log_id = log_gen.get_log_id() + redo_cnt - 1; + ptl_ids.reset(); + ObPartitionLogInfo ptl_id(pkey, prepare_log_id, PREPARE_TIMESTAMP); + err = ptl_ids.push_back(ptl_id); + EXPECT_EQ(OB_SUCCESS, err); + + trans_log_info.reset(redo_cnt, redo_log_ids, seq, pkey, prepare_timestamp, + trans_id, prepare_log_id, CLOUSTER_ID, global_trans_version, ptl_ids); + EXPECT_EQ(OB_SUCCESS, parser.push_into_queue(&trans_log_info)); + seq++; + + log_gen.next_trans_with_redo_commit(redo_cnt); + clog::ObLogEntry log_entry; + + while (OB_SUCCESS == log_gen.next_log_entry_with_redo_commit(log_entry)) { + err = pr.read(log_entry, missing, tsi); + EXPECT_EQ(OB_SUCCESS, err); + } + + err = pr.flush(stop_flag); + EXPECT_EQ(OB_SUCCESS, err); + + // Verify the correctness of partition task data + bool check_result; + EXPECT_EQ(OB_SUCCESS, parser.get_check_result(check_result)); + EXPECT_TRUE(check_result); + } + + // Check. + EXPECT_EQ(commit_trans_cnt, parser.get_commit_trans_cnt()); + + // Destroy. + pr.destroy(); + task_pool.destroy(); + fifo_allocator.destroy(); +} + +/* + * Test scenario: + * For N Sp transactions, redo and commit in the same log entry + * Each Sp transaction has a random redo log + * + * Log sequence: redo, redo, ... redo, redo-commit + * Read to redo-commit and find redo log missing, need to read miss log + * + */ +TEST(PartTransResolver, BasicTest7) +{ + int err = OB_SUCCESS; + + const int64_t trans_cnt = TRANS_COUNT; + //const int64_t trans_cnt = 2; + const int64_t commit_trans_cnt = trans_cnt; + int64_t redo_cnt = 0; + int64_t miss_redo_cnt = 0; + int64_t can_read_redo_cnt = 0; + + // Pkey. + ObPartitionKey pkey(1000U, 1, 1); + ObAddr addr(ObAddr::IPV4, "127.0.0.1", 8888); + ObTransID trans_id(addr); + + // Log gen. + SpTransLogEntryGenerator1 log_gen(pkey, trans_id); + // Task Pool. + ObLogTransTaskPool task_pool; + EXPECT_EQ(OB_SUCCESS, init_task_pool(task_pool)); + // Parser. + MockParser2 parser; + + // Partitioned Transaction Parser + PartTransResolver pr; + err = pr.init(pkey, parser, task_pool); + EXPECT_EQ(OB_SUCCESS, err); + + // Read logs. + ObLogIdArray missing; + TransStatInfo tsi; + volatile bool stop_flag = false; + + // case 1: redo, redo, redo, ... redo, commit + // case 2: redo, redo, redo, ... redo, redo-commit + bool is_normal_trans = false; + bool is_redo_with_commit_trans = false; + for (int64_t idx = 0; idx < trans_cnt; ++idx) { + if (idx < trans_cnt / 2) { + is_normal_trans = true; + } else { + is_redo_with_commit_trans = true; + } + + redo_cnt = get_timestamp() % TRANS_REDO_LOG_COUNT + 1; + //redo_cnt = 2; + if (is_normal_trans) { + miss_redo_cnt = get_timestamp() % redo_cnt + 1; + can_read_redo_cnt = redo_cnt - miss_redo_cnt; + } else if (is_redo_with_commit_trans){ + miss_redo_cnt = get_timestamp() % redo_cnt; + can_read_redo_cnt = redo_cnt - miss_redo_cnt - 1; + } else { + } + + if (is_normal_trans) { + log_gen.next_trans_with_miss_redo(redo_cnt, miss_redo_cnt, SP_NORMAL_TRAN); + } else if (is_redo_with_commit_trans){ + log_gen.next_trans_with_miss_redo(redo_cnt, miss_redo_cnt, SP_REDO_WITH_COMMIT_TRAN); + } else { + } + + uint64_t start_redo_log_id = log_gen.get_log_id(); + clog::ObLogEntry log_entry; + + // First read the can_read_redo_cnt redo log + for (int64_t log_cnt = 0; log_cnt < can_read_redo_cnt; ++log_cnt) { + if (is_normal_trans) { + EXPECT_EQ(OB_SUCCESS, log_gen.next_log_entry_missing_redo(SP_NORMAL_TRAN, log_entry)); + } else if (is_redo_with_commit_trans) { + EXPECT_EQ(OB_SUCCESS, log_gen.next_log_entry_missing_redo(SP_REDO_WITH_COMMIT_TRAN, log_entry)); + } else { + } + err = pr.read(log_entry, missing, tsi); + EXPECT_EQ(OB_SUCCESS, err); + } + + // read commit log,发现miss redo log, + if (is_normal_trans) { + EXPECT_EQ(OB_SUCCESS, log_gen.next_log_entry_missing_redo(SP_NORMAL_TRAN, log_entry)); + } else if (is_redo_with_commit_trans) { + EXPECT_EQ(OB_SUCCESS, log_gen.next_log_entry_missing_redo(SP_REDO_WITH_COMMIT_TRAN, log_entry)); + } else { + } + err = pr.read(log_entry, missing, tsi); + EXPECT_EQ(OB_ITEM_NOT_SETTED, err); + + // Verify the misses array and read the misses redo log + const int64_t miss_array_cnt = missing.count(); + EXPECT_EQ(miss_redo_cnt, miss_array_cnt); + for (int64_t log_cnt = 0; log_cnt < miss_array_cnt; ++log_cnt) { + LOG_DEBUG("miss", K(missing[log_cnt])); + EXPECT_EQ(start_redo_log_id, missing[log_cnt]); + start_redo_log_id++; + + clog::ObLogEntry miss_log_entry; + EXPECT_EQ(OB_SUCCESS, log_gen.next_miss_log_entry(missing[log_cnt], miss_log_entry)); + err = pr.read_missing_redo(miss_log_entry); + EXPECT_EQ(OB_SUCCESS, err); + } + + // After reading the missing redo log, read the commit log again to advance the partitioning task and free up commit_log_entry memory + if (is_normal_trans) { + EXPECT_EQ(OB_SUCCESS, log_gen.get_commit_log_entry(SP_NORMAL_TRAN, log_entry)); + } else if (is_redo_with_commit_trans){ + EXPECT_EQ(OB_SUCCESS, log_gen.get_commit_log_entry(SP_REDO_WITH_COMMIT_TRAN, log_entry)); + } else { + } + err = pr.read(log_entry, missing, tsi); + EXPECT_EQ(OB_SUCCESS, err); + + err = pr.flush(stop_flag); + EXPECT_EQ(OB_SUCCESS, err); + } + + // Check. + EXPECT_EQ(commit_trans_cnt, parser.get_commit_trans_cnt()); + + // Destroy. + pr.destroy(); + task_pool.destroy(); + fifo_allocator.destroy(); +} + +} +} + +int main(int argc, char **argv) +{ + //ObLogger::get_logger().set_mod_log_levels("ALL.*:DEBUG, TLOG.*:DEBUG"); + // testing::FLAGS_gtest_filter = "DO_NOT_RUN"; + ObLogger &logger = ObLogger::get_logger(); + logger.set_file_name("test_ob_log_part_trans_resolver.log", true); + logger.set_log_level(OB_LOG_LEVEL_INFO); + testing::InitGoogleTest(&argc,argv); + return RUN_ALL_TESTS(); +} diff --git a/unittest/liboblog/test_ob_log_part_trans_resolver_new.cpp b/unittest/liboblog/test_ob_log_part_trans_resolver_new.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9cf5ee80b10ddfd34c3e2ad3aa4d8cb3ae119975 --- /dev/null +++ b/unittest/liboblog/test_ob_log_part_trans_resolver_new.cpp @@ -0,0 +1,175 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG_FETCHER + +#include "gtest/gtest.h" + +#include "share/ob_define.h" +#include "storage/ob_storage_log_type.h" +#include "storage/transaction/ob_trans_log.h" +#include "ob_log_fetch_stat_info.h" + +#define private public +#include "liboblog/src/ob_log_part_trans_resolver.h" + +using namespace oceanbase; +using namespace common; +using namespace liboblog; +using namespace transaction; +using namespace storage; +using namespace clog; + +namespace oceanbase +{ +namespace unittest +{ + +void call_sort_and_unique_missing_log_ids(IObLogPartTransResolver::ObLogMissingInfo &missing_info) +{ + LOG_INFO("MISSING LOG [BEGIN]", K(missing_info)); + EXPECT_EQ(OB_SUCCESS, missing_info.sort_and_unique_missing_log_ids()); + LOG_INFO("MISSING LOG [END]", K(missing_info)); +} + +TEST(ObLogPartTransResolver, Function1) +{ + int ret = OB_SUCCESS; + IObLogPartTransResolver::ObLogMissingInfo missing_info; + ObLogIdArray &missing_log_id = missing_info.missing_log_ids_; + + // 1. one miss log with id 1 + missing_info.reset(); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(1)); + call_sort_and_unique_missing_log_ids(missing_info); + EXPECT_EQ(1, missing_info.get_missing_log_count()); + + // 2. two miss log with id 1 + missing_info.reset(); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(1)); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(1)); + call_sort_and_unique_missing_log_ids(missing_info); + EXPECT_EQ(1, missing_info.get_missing_log_count()); + + // 3. repeatable miss log with id 1 + missing_info.reset(); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(1)); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(1)); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(1)); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(1)); + call_sort_and_unique_missing_log_ids(missing_info); + EXPECT_EQ(1, missing_info.get_missing_log_count()); + + // 4. multi repeatable miss log + missing_info.reset(); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(1)); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(1)); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(1)); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(2)); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(2)); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(2)); + call_sort_and_unique_missing_log_ids(missing_info); + EXPECT_EQ(2, missing_info.get_missing_log_count()); + for (int64_t idx=0; OB_SUCC(ret) && idx < missing_log_id.count(); ++idx) { + EXPECT_EQ(idx+1, missing_log_id.at(idx)); + } + + // 5. multi repeatable miss log + missing_info.reset(); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(1)); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(2)); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(2)); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(2)); + call_sort_and_unique_missing_log_ids(missing_info); + EXPECT_EQ(2, missing_info.get_missing_log_count()); + for (int64_t idx=0; OB_SUCC(ret) && idx < missing_log_id.count(); ++idx) { + EXPECT_EQ(idx+1, missing_log_id.at(idx)); + } + + // 6. multi repeatable miss log + missing_info.reset(); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(1)); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(1)); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(1)); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(1)); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(2)); + call_sort_and_unique_missing_log_ids(missing_info); + EXPECT_EQ(2, missing_info.get_missing_log_count()); + for (int64_t idx=0; OB_SUCC(ret) && idx < missing_log_id.count(); ++idx) { + EXPECT_EQ(idx+1, missing_log_id.at(idx)); + } + + + // 7. multi repeatable miss log + missing_info.reset(); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(1)); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(1)); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(1)); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(2)); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(2)); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(3)); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(4)); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(4)); + call_sort_and_unique_missing_log_ids(missing_info); + EXPECT_EQ(4, missing_info.get_missing_log_count()); + for (int64_t idx=0; OB_SUCC(ret) && idx < missing_log_id.count(); ++idx) { + EXPECT_EQ(idx+1, missing_log_id.at(idx)); + } + + // 8. multi repeatable miss log + missing_info.reset(); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(1)); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(2)); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(2)); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(2)); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(3)); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(3)); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(3)); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(3)); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(3)); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(4)); + call_sort_and_unique_missing_log_ids(missing_info); + EXPECT_EQ(4, missing_info.get_missing_log_count()); + for (int64_t idx=0; OB_SUCC(ret) && idx < missing_log_id.count(); ++idx) { + EXPECT_EQ(idx+1, missing_log_id.at(idx)); + } + + // 9. multi repeatable miss log + missing_info.reset(); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(1)); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(2)); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(3)); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(4)); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(4)); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(4)); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(4)); + EXPECT_EQ(OB_SUCCESS, missing_info.push_back_missing_log_id(4)); + call_sort_and_unique_missing_log_ids(missing_info); + EXPECT_EQ(4, missing_info.get_missing_log_count()); + for (int64_t idx=0; OB_SUCC(ret) && idx < missing_log_id.count(); ++idx) { + EXPECT_EQ(idx+1, missing_log_id.at(idx)); + } +} + +} +} + +int main(int argc, char **argv) +{ + //ObLogger::get_logger().set_mod_log_levels("ALL.*:DEBUG, TLOG.*:DEBUG"); + // testing::FLAGS_gtest_filter = "DO_NOT_RUN"; + ObLogger &logger = ObLogger::get_logger(); + logger.set_file_name("test_ob_log_part_trans_resolver.log", true); + logger.set_log_level(OB_LOG_LEVEL_INFO); + testing::InitGoogleTest(&argc,argv); + return RUN_ALL_TESTS(); +} diff --git a/unittest/liboblog/test_ob_log_start_log_id_locator.cpp b/unittest/liboblog/test_ob_log_start_log_id_locator.cpp new file mode 100644 index 0000000000000000000000000000000000000000..cdf7743f1f8582457b7385f629facb428febf21c --- /dev/null +++ b/unittest/liboblog/test_ob_log_start_log_id_locator.cpp @@ -0,0 +1,366 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG_FETCHER + +#include +#include "share/ob_define.h" +#include "lib/atomic/ob_atomic.h" +#include "ob_log_utils.h" +#define private public +#include "test_ob_log_fetcher_common_utils.h" +#include "ob_log_start_log_id_locator.h" + +using namespace oceanbase; +using namespace common; +using namespace liboblog; + +namespace oceanbase +{ +namespace unittest +{ +class TestObLogStartLogIdLocator: public ::testing::Test +{ +public : + virtual void SetUp() {} + virtual void TearDown() {} +public : + static const int64_t WORKER_COUNT = 3; + static const int64_t LOCATE_COUNT = 1; + static const int64_t SINGLE_WORKER_COUNT = 1; +}; + +static const int64_t SERVER_COUNT = 10; +static const int64_t START_LOG_ID_REQUEST_COUNT = 5 * 10000; +// for test break info +static const int64_t BREAK_INFO_START_LOG_ID_REQUEST_COUNT = 256; +static const int64_t TEST_TIME_LIMIT = 10 * _MIN_; + +void generate_req(const int64_t req_cnt, StartLogIdLocateReq *&request_array, + const int64_t start_tstamp) +{ + // Build requests. + const int64_t AllSvrCnt = 10; + ObAddr svrs[AllSvrCnt]; + for (int64_t idx = 0, cnt = AllSvrCnt; idx < cnt; ++idx) { + svrs[idx] = ObAddr(ObAddr::IPV4, "127.0.0.1", (int32_t)(idx + 1000)); + } + + request_array = new StartLogIdLocateReq[req_cnt]; + for (int64_t idx = 0, cnt = req_cnt; idx < cnt; ++idx) { + StartLogIdLocateReq &r = request_array[idx]; + r.reset(); + r.pkey_ = ObPartitionKey((uint64_t)(1000 + idx), 0, 1); + r.start_tstamp_ = start_tstamp; + // Set server list. + for (int64_t idx2 = 0, cnt2 = AllSvrCnt; idx2 < cnt2; ++idx2) { + StartLogIdLocateReq::SvrItem item; + item.reset(); + item.svr_ = svrs[idx2]; + r.svr_list_.push_back(item); + } + } +} + +void free_req(StartLogIdLocateReq *request_array) +{ + delete[] request_array; +} + +/* + * Worker. + */ +class TestWorker : public liboblog::Runnable +{ +public: + ObLogStartLogIdLocator *locator_; + StartLogIdLocateReq *request_array_; + int64_t request_cnt_; + int64_t all_svr_cnt_; + bool push_req_finish_; + + void reset(ObLogStartLogIdLocator *locator, StartLogIdLocateReq *req_array, + int64_t req_cnt, int64_t all_svr_cnt) + { + locator_ = locator; + request_array_ = req_array; + request_cnt_ = req_cnt; + all_svr_cnt_ = all_svr_cnt; + push_req_finish_ = false; + } + + virtual int routine() + { + // Push requests into locator. + for (int64_t idx = 0, cnt = request_cnt_; idx < cnt; ++idx) { + StartLogIdLocateReq &r = request_array_[idx]; + EXPECT_EQ(OB_SUCCESS, locator_->async_start_log_id_req(&r)); + if (0 == (idx % 1000)) { + usec_sleep(10 * _MSEC_); + } + } + ATOMIC_STORE(&push_req_finish_, true); + + // Wait for requests end. Max test time should set. + int64_t end_request_cnt = 0; + const int64_t start_test_tstamp = get_timestamp(); + while (((get_timestamp() - start_test_tstamp) < TEST_TIME_LIMIT) + && (end_request_cnt < request_cnt_)) { + for (int64_t idx = 0, cnt = request_cnt_; idx < cnt; ++idx) { + StartLogIdLocateReq &r = request_array_[idx]; + if (StartLogIdLocateReq::DONE == r.get_state()) { + end_request_cnt += 1; + r.set_state(StartLogIdLocateReq::IDLE); + } + } + usec_sleep(100 * _MSEC_); + } + // Assert if test cannot finish. + EXPECT_EQ(request_cnt_, end_request_cnt); + + // Do some statistics. + int64_t svr_consume_distribution[all_svr_cnt_]; // 1, 2, 3, ... + for (int64_t idx = 0, cnt = all_svr_cnt_; idx < cnt; ++idx) { + svr_consume_distribution[idx] = 0; + } + int64_t succ_cnt = 0; + for (int64_t idx = 0, cnt = request_cnt_; idx < cnt; ++idx) { + StartLogIdLocateReq &r = request_array_[idx]; + EXPECT_GE(r.svr_list_consumed_, 0); + svr_consume_distribution[(r.svr_list_consumed_ - 1)] += 1; + uint64_t start_log_id = 0; + common::ObAddr svr; + if (r.get_result(start_log_id, svr)) { + succ_cnt += 1; + EXPECT_EQ(r.pkey_.table_id_, start_log_id); + } + } + + const int64_t BuffSize = 1024; + char buf[BuffSize]; + int64_t pos = 0; + for (int64_t idx = 0, cnt = all_svr_cnt_; idx < cnt; ++idx) { + pos += snprintf(buf + pos, BuffSize - pos, "svr_cnt:%ld perc:%f ", (1 + idx), + ((double)svr_consume_distribution[idx] / (double)request_cnt_)); + } + fprintf(stderr, "request count: %ld distribution: %s succeed perc: %f \n", + request_cnt_, buf, (double)succ_cnt / (double)request_cnt_); + + + return OB_SUCCESS; + } +}; + +//////////////////////Basic function tests////////////////////////////////////////// +TEST_F(TestObLogStartLogIdLocator, start_log_id_request) +{ + StartLogIdLocateReq req; + req.reset(); + EXPECT_TRUE(req.is_state_idle()); + + req.set_state_req(); + EXPECT_TRUE(req.is_state_req()); + EXPECT_EQ(StartLogIdLocateReq::REQ, req.get_state()); + + req.set_state_done(); + EXPECT_TRUE(req.is_state_done()); + EXPECT_EQ(StartLogIdLocateReq::DONE, req.get_state()); + + req.set_state_idle(); + EXPECT_TRUE(req.is_state_idle()); + EXPECT_EQ(StartLogIdLocateReq::IDLE, req.get_state()); + + /// build svr_list + int ret = OB_SUCCESS; + ObAddr svr_list[SERVER_COUNT]; + for (int64_t idx = 0, cnt = SERVER_COUNT; idx < cnt; ++idx) { + svr_list[idx] = ObAddr(ObAddr::IPV4, "127.0.0.1", (int32_t)(idx + 1000)); + } + + for (int64_t idx = 0, cnt = SERVER_COUNT; (OB_SUCCESS == ret) && idx < cnt; ++idx) { + StartLogIdLocateReq::SvrItem item; + item.reset(svr_list[idx]); + if (OB_FAIL(req.svr_list_.push_back(item))) { + LOG_ERROR("push error", K(ret)); + } + } + EXPECT_EQ(SERVER_COUNT, req.svr_list_.count()); + + // next_svr_item, cur_svr_item + for (int64_t idx = 0, cnt = SERVER_COUNT; (OB_SUCCESS == ret) && idx < cnt; ++idx) { + StartLogIdLocateReq::SvrItem *item; + + EXPECT_EQ(OB_SUCCESS, req.next_svr_item(item)); + EXPECT_EQ(svr_list[idx], item->svr_); + EXPECT_EQ(OB_SUCCESS, req.cur_svr_item(item)); + EXPECT_EQ(svr_list[idx], item->svr_); + } + // is_request_ended, get_result + EXPECT_TRUE(req.is_request_ended(LOCATE_COUNT)); + uint64_t start_log_id = 0; + common::ObAddr svr; + EXPECT_FALSE(req.get_result(start_log_id, svr)); + EXPECT_EQ(OB_INVALID_ID, start_log_id); +} + +//TEST_F(TestObLogStartLogIdLocator, DISABLED_locator) +TEST_F(TestObLogStartLogIdLocator, locator) +{ + const int64_t TestWorkerCnt = 3; + // genereate data + StartLogIdLocateReq *request_arrays[TestWorkerCnt]; + for (int64_t idx = 0; idx < TestWorkerCnt; idx++) { + //StartLogIdLocateReq *request_array = NULL; + generate_req(START_LOG_ID_REQUEST_COUNT, request_arrays[idx], get_timestamp()); + OB_ASSERT(NULL != request_arrays[idx]); + } + + MockFetcherErrHandler1 err_handler1; + MockObLogStartLogIdRpc rpc; + ObLogStartLogIdLocator locator; + + EXPECT_EQ(OB_SUCCESS, locator.init(WORKER_COUNT, LOCATE_COUNT, rpc, err_handler1)); + EXPECT_EQ(OB_SUCCESS, locator.start()); + + TestWorker workers[TestWorkerCnt]; + for (int64_t idx = 0, cnt = TestWorkerCnt; idx < cnt; ++idx) { + TestWorker &w = workers[idx]; + w.reset(&locator, request_arrays[idx], START_LOG_ID_REQUEST_COUNT, SERVER_COUNT); + w.create(); + } + + for (int64_t idx = 0, cnt = TestWorkerCnt; idx < cnt; ++idx) { + TestWorker &w = workers[idx]; + w.join(); + } + + // free + for (int64_t idx = 0; idx < TestWorkerCnt; idx++) { + free_req(request_arrays[idx]); + request_arrays[idx] = NULL; + } + locator.destroy(); +} + +TEST_F(TestObLogStartLogIdLocator, test_out_of_lower_bound) +{ + // Default configuration of observer log retention time + int64_t default_clog_save_time = ObLogStartLogIdLocator::g_observer_clog_save_time; + + MockFetcherErrHandler1 err_handler1; + MockObLogStartLogIdRpc rpc; + ObLogStartLogIdLocator locator; + + EXPECT_EQ(OB_SUCCESS, locator.init(WORKER_COUNT, LOCATE_COUNT, rpc, err_handler1)); + EXPECT_EQ(OB_SUCCESS, locator.start()); + + // Generate data, set start time to current time + StartLogIdLocateReq *req = NULL; + int64_t start_tstamp = get_timestamp(); + generate_req(1, req, start_tstamp); + + // RPC setup, observer returns success, but partition returns log less than lower bound + rpc.set_err(OB_SUCCESS, OB_ERR_OUT_OF_LOWER_BOUND); + + EXPECT_EQ(OB_SUCCESS, locator.async_start_log_id_req(req)); + while (req->get_state() != StartLogIdLocateReq::DONE) { + usec_sleep(100 * _MSEC_); + } + + // Since all servers return less than the lower bound and have a start time stamp of less than 2 hours, + // expect the location to succeed + uint64_t start_log_id = OB_INVALID_ID; + common::ObAddr svr; + EXPECT_EQ(true, req->get_result(start_log_id, svr)); + EXPECT_EQ(req->pkey_.get_table_id(), start_log_id); + + // free + free_req(req); + req = NULL; + + /////////////// Set the start time past the log retention time, in which case the location returns a failure /////////////////// + // Start-up time less than minimum log retention time + start_tstamp = get_timestamp() - default_clog_save_time - 1; + generate_req(1, req, start_tstamp); // Regeneration request + + // RPC setup, observer returns success, but partition returns less than lower bound + rpc.set_err(OB_SUCCESS, OB_ERR_OUT_OF_LOWER_BOUND); + + // Execute location requests + EXPECT_EQ(OB_SUCCESS, locator.async_start_log_id_req(req)); + while (req->get_state() != StartLogIdLocateReq::DONE) { + usec_sleep(100 * _MSEC_); + } + + // Although all servers return less than the lower bound, the start-up timestamp is no longer within the log retention time + // and expects the location to fail + EXPECT_EQ(false, req->get_result(start_log_id, svr)); + + // free + free_req(req); + req = NULL; + + // destroy locator + locator.destroy(); +} + +// When the break_info message is returned, test the correct processing +TEST_F(TestObLogStartLogIdLocator, break_info_test) +{ + // genereate data + StartLogIdLocateReq *request_array; + generate_req(BREAK_INFO_START_LOG_ID_REQUEST_COUNT, request_array, get_timestamp()); + OB_ASSERT(NULL != request_array); + + MockFetcherErrHandler1 err_handler1; + MockObLogRpcDerived2 rpc; + EXPECT_EQ(OB_SUCCESS, rpc.init(BREAK_INFO_START_LOG_ID_REQUEST_COUNT)); + ObLogStartLogIdLocator locator; + + EXPECT_EQ(OB_SUCCESS, locator.init(SINGLE_WORKER_COUNT, LOCATE_COUNT, rpc, err_handler1)); + + // Insert all data first, then open the StartLogIdLocator thread to ensure that all subsequent requests are aggregated on a single server; + TestWorker worker; + worker.reset(&locator, request_array, BREAK_INFO_START_LOG_ID_REQUEST_COUNT, SERVER_COUNT); + worker.create(); + + while (false == ATOMIC_LOAD(&worker.push_req_finish_)) { + } + + EXPECT_EQ(OB_SUCCESS, locator.start()); + + // join + worker.join(); + // free + free_req(request_array); + request_array = NULL; + + locator.destroy(); + rpc.destroy(); +} + +}//end of unittest +}//end of oceanbase + +int main(int argc, char **argv) +{ + // ObLogger::get_logger().set_mod_log_levels("ALL.*:DEBUG, TLOG.*:DEBUG"); + // testing::InitGoogleTest(&argc,argv); + // testing::FLAGS_gtest_filter = "DO_NOT_RUN"; + int ret = 1; + ObLogger &logger = ObLogger::get_logger(); + logger.set_file_name("test_ob_log_start_log_id_locator.log", true); + logger.set_log_level(OB_LOG_LEVEL_INFO); + testing::InitGoogleTest(&argc, argv); + ret = RUN_ALL_TESTS(); + return ret; +} diff --git a/unittest/liboblog/test_ob_log_svr_finder.cpp b/unittest/liboblog/test_ob_log_svr_finder.cpp new file mode 100644 index 0000000000000000000000000000000000000000..73d093f9fb010f27f2a136bc141ecf704f58f59f --- /dev/null +++ b/unittest/liboblog/test_ob_log_svr_finder.cpp @@ -0,0 +1,296 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG_FETCHER + +#include +#include "share/ob_define.h" +#define private public +#include "test_ob_log_fetcher_common_utils.h" +#include "ob_log_utils.h" +#include "ob_log_svr_finder.h" +#include "ob_log_all_svr_cache.h" +#include "lib/atomic/ob_atomic.h" + +using namespace oceanbase; +using namespace common; +using namespace liboblog; + +namespace oceanbase +{ +namespace unittest +{ +class TestObLogSvrFinder: public ::testing::Test +{ +public : + virtual void SetUp() {} + virtual void TearDown() {} +public : + static const int64_t SVR_FINDER_THREAD_NUM = 1; +}; + +static const int64_t TEST_TIME_LIMIT = 10 * _MIN_; + +void generate_part_svr_list(const int64_t count, PartSvrList *&part_svr_list) +{ + part_svr_list = static_cast( + ob_malloc(sizeof(PartSvrList) * count)); + for (int64_t idx = 0; idx < count; idx++) { + new (part_svr_list + idx) PartSvrList(); + } +} + +// Constructing SvrFindReq, two types of requests +// 1. logid request +// 2. timestamp request +void generate_svr_finder_requset(const int64_t count, + PartSvrList *part_svr_list, + SvrFindReq *&svr_req_array) +{ + svr_req_array = static_cast( + ob_malloc(sizeof(SvrFindReq) * count)); + for (int64_t idx = 0; idx < count; idx++) { + new (svr_req_array + idx) SvrFindReq(); + ObPartitionKey pkey = ObPartitionKey((uint64_t)(1000 + idx), 0, 1); + + const int64_t seed = get_timestamp(); + if ((seed % 100) < 50) { + svr_req_array[idx].reset_for_req_by_log_id(part_svr_list[idx], pkey, idx); + EXPECT_TRUE(svr_req_array[idx].is_state_idle()); + } else { + svr_req_array[idx].reset_for_req_by_tstamp(part_svr_list[idx], pkey, seed); + EXPECT_TRUE(svr_req_array[idx].is_state_idle()); + } + } +} + +// build LeaderFindReq +void generate_leader_finder_request(const int64_t count, LeaderFindReq *&leader_req_array) +{ + leader_req_array = static_cast( + ob_malloc(sizeof(LeaderFindReq) * count)); + + for (int64_t idx = 0; idx < count; idx++) { + new (leader_req_array + idx) LeaderFindReq(); + ObPartitionKey pkey = ObPartitionKey((uint64_t)(1000 + idx), 0, 1); + leader_req_array[idx].reset(pkey); + EXPECT_TRUE(leader_req_array[idx].is_state_idle()); + } +} + +void wait_svr_finer_req_end(SvrFindReq *svr_req_array, + const int64_t count, + int64_t &end_request_cnt) +{ + end_request_cnt = 0; + const int64_t start_test_tstamp = get_timestamp(); + while (((get_timestamp() - start_test_tstamp) < TEST_TIME_LIMIT) + && (end_request_cnt < count)) { + for (int64_t idx = 0, cnt = count; idx < cnt; ++idx) { + SvrFindReq &r = svr_req_array[idx]; + if (SvrFindReq::DONE == r.get_state()) { + end_request_cnt += 1; + r.set_state_idle(); + } + } + usec_sleep(100 * _MSEC_); + } +} + +void wait_leader_finer_req_end(LeaderFindReq *leader_req_array, + const int64_t count, + int64_t &end_request_cnt) +{ + end_request_cnt = 0; + const int64_t start_test_tstamp = get_timestamp(); + while (((get_timestamp() - start_test_tstamp) < TEST_TIME_LIMIT) + && (end_request_cnt < count)) { + for (int64_t idx = 0, cnt = count; idx < cnt; ++idx) { + LeaderFindReq &r = leader_req_array[idx]; + if (LeaderFindReq::DONE == r.get_state()) { + end_request_cnt += 1; + r.set_state_idle(); + } + } + usec_sleep(100 * _MSEC_); + } +} + +//////////////////////Basic function tests////////////////////////////////////////// +TEST_F(TestObLogSvrFinder, init) +{ + MockFetcherErrHandler1 err_handler; + MockSysTableHelperDerive1 mock_systable_helper; + + // AllSvrCache init + ObLogAllSvrCache all_svr_cache; + EXPECT_EQ(OB_SUCCESS, all_svr_cache.init(mock_systable_helper, err_handler)); + + // SvrFinder init + ObLogSvrFinder svr_finder; + EXPECT_EQ(OB_SUCCESS, svr_finder.init(SVR_FINDER_THREAD_NUM, err_handler, + all_svr_cache, mock_systable_helper)); + // sever list for partition + PartSvrList *part_svr_list = NULL; + generate_part_svr_list(SVR_FINDER_REQ_NUM, part_svr_list); + + // Constructing SvrFindReq, two types of requests + // 1. logid request + // 2. timestamp request + SvrFindReq *svr_req_array = NULL; + generate_svr_finder_requset(SVR_FINDER_REQ_NUM, part_svr_list, svr_req_array); + + // build LeaderFindReq + LeaderFindReq *leader_req_array = NULL; + generate_leader_finder_request(LEADER_FINDER_REQ_NUM, leader_req_array); + + // push request to svr_finder + for (int64_t idx = 0; idx < SVR_FINDER_REQ_NUM; idx++) { + EXPECT_EQ(OB_SUCCESS, svr_finder.async_svr_find_req(svr_req_array + idx)); + } + for (int64_t idx = 0; idx < LEADER_FINDER_REQ_NUM; idx++) { + EXPECT_EQ(OB_SUCCESS, svr_finder.async_leader_find_req(leader_req_array + idx)); + } + + // SvrFinder start + EXPECT_EQ(OB_SUCCESS, svr_finder.start()); + + // Wait for asynchronous SvrFinderReq to finish + int64_t end_svr_finder_req_cnt = 0; + wait_svr_finer_req_end(svr_req_array, SVR_FINDER_REQ_NUM, end_svr_finder_req_cnt); + // Assert + EXPECT_EQ(SVR_FINDER_REQ_NUM, end_svr_finder_req_cnt); + + // Waiting for the end of the asynchronous LeaderFinderReq + int64_t end_leader_finder_req_cnt = 0; + wait_leader_finer_req_end(leader_req_array, LEADER_FINDER_REQ_NUM, end_leader_finder_req_cnt); + // Assert + EXPECT_EQ(LEADER_FINDER_REQ_NUM, end_leader_finder_req_cnt); + + // Validate SvrFinderReq results + for (int64_t idx = 0; idx < SVR_FINDER_REQ_NUM; idx++) { + PartSvrList &svr_list = part_svr_list[idx]; + PartSvrList::SvrItemArray svr_items = svr_list.svr_items_; + int64_t EXPECT_START_LOG_ID = 0; + int64_t EXPECT_END_LOG_ID = 0; + + if (svr_req_array[idx].req_by_next_log_id_) { + EXPECT_START_LOG_ID = svr_req_array[idx].next_log_id_; + EXPECT_END_LOG_ID = EXPECT_START_LOG_ID + 10000; + } else if (svr_req_array[idx].req_by_start_tstamp_) { + EXPECT_START_LOG_ID = 0; + EXPECT_END_LOG_ID = 65536; + } + + int cnt = QUERY_CLOG_HISTORY_VALID_COUNT + QUERY_META_INFO_ADD_COUNT; + EXPECT_EQ(cnt, svr_list.count()); + // Validate log range + for (int64_t svr_idx = 0; svr_idx < cnt; svr_idx++) { + const PartSvrList::LogIdRange &range = svr_items[svr_idx].log_ranges_[0]; + if (svr_idx < QUERY_CLOG_HISTORY_VALID_COUNT) { + // clog history record + EXPECT_EQ(EXPECT_START_LOG_ID, range.start_log_id_); + EXPECT_EQ(EXPECT_END_LOG_ID, range.end_log_id_); + } else { + // Additional records + EXPECT_EQ(0, range.start_log_id_); + EXPECT_EQ(OB_INVALID_ID, range.end_log_id_); + } + } + } + + // Validate LeaderFinderReq results + ObAddr EXPECT_ADDR; + EXPECT_ADDR.set_ip_addr("127.0.0.1", 8888); + for (int64_t idx = 0; idx < LEADER_FINDER_REQ_NUM; idx++) { + LeaderFindReq &req = leader_req_array[idx]; + EXPECT_TRUE(req.has_leader_); + EXPECT_EQ(EXPECT_ADDR, req.leader_); + } + + // destroy + ob_free(part_svr_list); + ob_free(svr_req_array); + svr_finder.destroy(); + all_svr_cache.destroy(); +} + +// Used to test if SvrFinder can filter INACTIVE records +TEST_F(TestObLogSvrFinder, inactive_test) +{ + MockFetcherErrHandler1 err_handler; + MockSysTableHelperDerive2 mock_systable_helper; + + // AllSvrCache init + ObLogAllSvrCache all_svr_cache; + EXPECT_EQ(OB_SUCCESS, all_svr_cache.init(mock_systable_helper, err_handler)); + + // SvrFinder init + ObLogSvrFinder svr_finder; + EXPECT_EQ(OB_SUCCESS, svr_finder.init(SVR_FINDER_THREAD_NUM, err_handler, + all_svr_cache, mock_systable_helper)); + // Declaration of partition sever list + PartSvrList *part_svr_list = NULL; + generate_part_svr_list(SVR_FINDER_REQ_NUM, part_svr_list); + + // Constructing SvrFindReq, two types of requests + // 1. logid request + // 2. timestamp request + SvrFindReq *svr_req_array = NULL; + generate_svr_finder_requset(SVR_FINDER_REQ_NUM, part_svr_list, svr_req_array); + + // push request to svr_finder + for (int64_t idx = 0; idx < SVR_FINDER_REQ_NUM; idx++) { + EXPECT_EQ(OB_SUCCESS, svr_finder.async_svr_find_req(svr_req_array + idx)); + } + + // SvrFinder start + EXPECT_EQ(OB_SUCCESS, svr_finder.start()); + + // Wait for asynchronous SvrFinderReq to finish + int64_t end_svr_finder_req_cnt = 0; + wait_svr_finer_req_end(svr_req_array, SVR_FINDER_REQ_NUM, end_svr_finder_req_cnt); + // Assert + EXPECT_EQ(SVR_FINDER_REQ_NUM, end_svr_finder_req_cnt); + + // Validate SvrFinderReq results + int cnt = (QUERY_CLOG_HISTORY_VALID_COUNT + QUERY_META_INFO_ADD_COUNT) / 2; + for (int64_t idx = 0; idx < 1; idx++) { + PartSvrList &svr_list = part_svr_list[idx]; + PartSvrList::SvrItemArray svr_items = svr_list.svr_items_; + + EXPECT_EQ(cnt, svr_list.count()); + } + + ob_free(part_svr_list); + ob_free(svr_req_array); + svr_finder.destroy(); + all_svr_cache.destroy(); +} + + +}//end of unittest +}//end of oceanbase + +int main(int argc, char **argv) +{ + // ObLogger::get_logger().set_mod_log_levels("ALL.*:DEBUG, TLOG.*:DEBUG"); + // testing::InitGoogleTest(&argc,argv); + // testing::FLAGS_gtest_filter = "DO_NOT_RUN"; + int ret = 1; + ObLogger &logger = ObLogger::get_logger(); + logger.set_file_name("test_ob_log_svr_finder.log", true); + logger.set_log_level(OB_LOG_LEVEL_INFO); + testing::InitGoogleTest(&argc, argv); + ret = RUN_ALL_TESTS(); + return ret; +} diff --git a/unittest/liboblog/test_ob_log_timer.cpp b/unittest/liboblog/test_ob_log_timer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..82a293b4f433dd625c9aea82b69b0ecdb9d5b563 --- /dev/null +++ b/unittest/liboblog/test_ob_log_timer.cpp @@ -0,0 +1,198 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG_FETCHER + +#include +#include "share/ob_define.h" +#include "liboblog/src/ob_map_queue_thread.h" +#include "ob_log_utils.h" +#define private public +#include "test_ob_log_fetcher_common_utils.h" +#include "liboblog/src/ob_log_timer.h" + +using namespace oceanbase; +using namespace common; +using namespace liboblog; + +namespace oceanbase +{ +namespace unittest +{ +static const int MAX_THREAD_NUM = 16; +typedef common::ObMapQueueThread QueueThread; + +// Timed task implementation +class PushMapTimerTask : public ObLogTimerTask +{ +public: + PushMapTimerTask() : host_(NULL), start_time_(0), end_time_(0), process_count_(NULL) + {} + virtual ~PushMapTimerTask() {} + + void reset() + { + host_ = NULL; + start_time_ = 0; + end_time_ = 0; + process_count_ = NULL; + } + + void reset(int64_t *&process_count, QueueThread *host) + { + reset(); + process_count_ = process_count; + host_ = host; + } + +public: + virtual void process_timer_task() override + { + EXPECT_EQ(OB_SUCCESS, host_->push(this, static_cast(get_timestamp()))); + end_time_ = get_timestamp(); + (*process_count_)++; + } +private: + QueueThread *host_; + int64_t start_time_; + int64_t end_time_; + // Record the number of successfully executed timed tasks + int64_t *process_count_; +private: + DISALLOW_COPY_AND_ASSIGN(PushMapTimerTask); +}; +typedef PushMapTimerTask Type; +// Timer task count +static const int64_t TASK_COUNT = 1000; + +class TestObLogTimer: public ::testing::Test +{ +public : + virtual void SetUp() {} + virtual void TearDown() {} +public : + static constexpr const int64_t TEST_TIME_LIMIT = 10 * _MIN_; + // ObMapQueue mod_id + static constexpr const char *MOD_ID = "1"; + // ObMapQueueThread线程个数 + static const int THREAD_NUM = 6; + // max task count + static const int64_t MAX_TASK_COUNT = 10 * 1000; +public: + // Generate timed task data + void generate_data(const int64_t count, QueueThread *host, int64_t *&process_count, Type *&datas); +}; + +void TestObLogTimer::generate_data(const int64_t count, + QueueThread *host, + int64_t *&process_count, + Type *&datas) +{ + datas = (Type *)ob_malloc(sizeof(Type) * count); + OB_ASSERT(NULL != datas); + for (uint64_t idx = 0; idx < count; idx++) { + new (datas + idx) Type(); + datas[idx].reset(process_count, host); + } +} + +//////////////////////Basic function tests////////////////////////////////////////// +TEST_F(TestObLogTimer, timer) +{ + // ObMapQueueThread init + QueueThread host; + EXPECT_EQ(OB_SUCCESS, host.init(THREAD_NUM, MOD_ID)); + + // Number of timer tasks handled + int64_t process_timer_task_count = 0; + int64_t *ptr = &process_timer_task_count; + + // Generate timed tasks + Type *datas = NULL; + generate_data(TASK_COUNT, &host, ptr, datas); + OB_ASSERT(NULL != datas); + + // ObLogFixedTimer init + ObLogFixedTimer timer; + MockFetcherErrHandler1 err_handle; + EXPECT_EQ(OB_SUCCESS, timer.init(err_handle, MAX_TASK_COUNT)); + + // Insert timed tasks + int64_t start_push_time = 0; + int64_t end_push_time = 0; + start_push_time = get_timestamp(); + for (int64_t idx = 0; idx < TASK_COUNT; idx++) { + // Giving start time + datas[idx].start_time_ = get_timestamp(); + EXPECT_EQ(OB_SUCCESS, timer.schedule(&datas[idx])); + } + end_push_time = get_timestamp(); + int64_t push_take_time = end_push_time - start_push_time; + EXPECT_EQ(TASK_COUNT, timer.task_queue_.get_total()); + LOG_INFO("timer push", K(push_take_time)); + + // ObLogTimer start + EXPECT_EQ(OB_SUCCESS, timer.start()); + + int64_t start_test_tstamp = get_timestamp(); + while (((get_timestamp() - start_test_tstamp) < TEST_TIME_LIMIT) + && (process_timer_task_count < TASK_COUNT)) { + } + LOG_INFO("process", K(process_timer_task_count)); + + int64_t min_interval = 1 * _SEC_; + int64_t max_interval = 0; + + for (int64_t idx = 0; idx < TASK_COUNT; idx++) { + int64_t inv = datas[idx].end_time_ - datas[idx].start_time_; + if (inv < min_interval) { + min_interval = inv; + } + + if (inv > max_interval) { + max_interval = inv; + } + } + LOG_INFO("interval", K(min_interval), K(max_interval)); + + host.destroy(); + ob_free(datas); + timer.destroy(); +} + +////////////////////////Boundary condition testing////////////////////////////////////////// +// ObLogTimer init fail +TEST_F(TestObLogTimer, init_failed) +{ + ObLogFixedTimer timer; + MockFetcherErrHandler1 err_handle; + EXPECT_EQ(OB_SUCCESS, timer.init(err_handle, MAX_TASK_COUNT)); + EXPECT_EQ(OB_INIT_TWICE, timer.init(err_handle, MAX_TASK_COUNT)); + timer.destroy(); +} + +}//end of unittest +}//end of oceanbase + +int main(int argc, char **argv) +{ + // ObLogger::get_logger().set_mod_log_levels("ALL.*:DEBUG, TLOG.*:DEBUG"); + // testing::InitGoogleTest(&argc,argv); + // testing::FLAGS_gtest_filter = "DO_NOT_RUN"; + int ret = 1; + ObLogger &logger = ObLogger::get_logger(); + logger.set_file_name("test_ob_log_timer.log", true); + logger.set_log_level(OB_LOG_LEVEL_INFO); + testing::InitGoogleTest(&argc, argv); + ret = RUN_ALL_TESTS(); + return ret; +} diff --git a/unittest/liboblog/test_ob_map_queue.cpp b/unittest/liboblog/test_ob_map_queue.cpp new file mode 100644 index 0000000000000000000000000000000000000000..dd5103cc91c1e94504e81ba13f1b942dffb5292b --- /dev/null +++ b/unittest/liboblog/test_ob_map_queue.cpp @@ -0,0 +1,425 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG_FETCHER + +#include +#include "share/ob_define.h" +#include "liboblog/src/ob_map_queue.h" +#include "ob_log_utils.h" + +using namespace oceanbase; +using namespace common; +using namespace liboblog; + +namespace oceanbase +{ +namespace unittest +{ +class TestObMapQueue : public ::testing::Test +{ +public : + virtual void SetUp() {} + virtual void TearDown() {} +public : + // ObMapQueue label + static constexpr const char *LABEL = "TestObMapQueue"; + // push thread + static const int64_t ONE_PUSH_THREAD_NUM = 1; + static const int64_t MULTI_PUSH_THREAD_NUM = 3; + // pop thread + static const int64_t ONE_POP_THREAD_NUM = 1; + static const int64_t MULTI_POP_THREAD_NUM = 5; + + static const int64_t TEST_TIME_LIMIT = 10 * _MIN_; +}; + +// ObMapQueue type +typedef int64_t Type; +// push ObMapQueue value +static const int64_t START_VALUE = 0; +static const int64_t END_VALUE = 1 * 1000 * 1000 - 1; +static const int64_t VALUE_COUNT = END_VALUE - START_VALUE + 1; + +class TestPushWorker : public liboblog::Runnable +{ +public: + enum State + { + IDLE, // + REQ, // pushing + DONE // push DONE + }; + // Identifies the current thread status + State state_; + + // thread index + int64_t thread_idx_; + // thread count + int64_t thread_count_; + // ObMapQueue + ObMapQueue *map_queue_; + // record map_queue push count + int64_t push_count_; + // value interval + int64_t interval_; + + virtual int routine() + { + int64_t start = thread_idx_ * interval_; + int64_t end = (thread_count_ - 1 != thread_idx_) ? start + interval_ - 1 : END_VALUE; + LOG_INFO("TestPushWorker", K(start), K(end)); + + int64_t val = start; + while (val <= end) { + EXPECT_EQ(OB_SUCCESS, map_queue_->push(val++)); + push_count_++; + } + + if (end + 1 == val) { + state_ = DONE; + } + + return OB_SUCCESS; + } +}; + +class TestPopWorker: public liboblog::Runnable +{ +public: + // thread index + int64_t thread_idx_; + // ObMapQueue + ObMapQueue *map_queue_; + // record thread map_queue pop count + int64_t pop_count_ CACHE_ALIGNED; + // record poped count for all threads + int64_t *end_pop_count_ CACHE_ALIGNED; + // 保存pop出来的数据 + Type *array_; + + virtual int routine() + { + int ret = OB_SUCCESS; + + while (OB_SUCC(ret)) { + Type val; + while (OB_SUCC(map_queue_->pop(val))) { + if (val >= START_VALUE && val <= END_VALUE) { + if (0 == array_[val]) { + array_[val] = val; + ATOMIC_INC(&pop_count_); + } + } + } + + if (OB_EAGAIN == ret) { + ret = OB_SUCCESS; + } + if (ATOMIC_LOAD(end_pop_count_) == VALUE_COUNT) { + break; + } + } + + return ret; + } +}; + + +////////////////////// Basic function tests ////////////////////////////////////////// +// ObMapQueue init +TEST_F(TestObMapQueue, init) +{ + ObMapQueue map_queue; + EXPECT_EQ(OB_SUCCESS, map_queue.init(LABEL)); + EXPECT_TRUE(map_queue.is_inited()); + + map_queue.destroy(); + EXPECT_FALSE(map_queue.is_inited()); +} + +// Test scenarios. +// 1. single-threaded push - single-threaded pop +// 2. single-threaded push - multi-threaded pop +// 3. multi-threaded push - single-threaded pop +// 4. multi-threaded push - multi-threaded pop +TEST_F(TestObMapQueue, push_pop_test) +{ + ObMapQueue map_queue; + EXPECT_EQ(OB_SUCCESS, map_queue.init(LABEL)); + EXPECT_TRUE(map_queue.is_inited()); + + // malloc array + Type *array = (Type *)ob_malloc(sizeof(Type) * VALUE_COUNT); + OB_ASSERT(NULL != array); + + for (int64_t test_type = 0, test_cnt = 4; test_type < test_cnt; ++test_type) { + memset(array, 0, sizeof(Type) * VALUE_COUNT); + int64_t PUSH_THREAD_NUM = 0; + int64_t POP_THREAD_NUM = 0; + int64_t end_push_count = 0; + int64_t end_pop_count = 0; + + switch (test_type) { + // single-threaded push - single-threaded pop + case 0: + PUSH_THREAD_NUM = ONE_PUSH_THREAD_NUM; + POP_THREAD_NUM = ONE_POP_THREAD_NUM; + break; + // single-threaded push - multi-threaded pop + case 1: + PUSH_THREAD_NUM = ONE_PUSH_THREAD_NUM; + POP_THREAD_NUM = MULTI_POP_THREAD_NUM; + break; + // multi-threaded push - single-threaded pop + case 2: + PUSH_THREAD_NUM = MULTI_PUSH_THREAD_NUM; + POP_THREAD_NUM = ONE_POP_THREAD_NUM; + break; + // multi-threaded push - multi-threaded pop + case 3: + PUSH_THREAD_NUM = MULTI_PUSH_THREAD_NUM; + POP_THREAD_NUM = MULTI_POP_THREAD_NUM; + break; + default: + break; + } + LOG_INFO("push_pop_test", K(test_type), K(PUSH_THREAD_NUM), K(POP_THREAD_NUM)); + + // push thread + TestPushWorker push_workers[PUSH_THREAD_NUM]; + const int64_t INTERVAL = VALUE_COUNT / PUSH_THREAD_NUM; + for (int64_t idx = 0, cnt = PUSH_THREAD_NUM; idx < cnt; ++idx) { + TestPushWorker &w = push_workers[idx]; + // assign value + w.state_ = TestPushWorker::REQ; + w.thread_idx_ = idx; + w.thread_count_ = PUSH_THREAD_NUM; + w.map_queue_ = &map_queue; + w.push_count_ = 0; + w.interval_ = INTERVAL; + // create threads + w.create(); + LOG_INFO("push_pop_test", "push thread", "create OB_SUCCESS"); + } + + // pop thread + TestPopWorker pop_workers[POP_THREAD_NUM]; + for (int64_t idx = 0, cnt = POP_THREAD_NUM; idx < cnt; ++idx) { + TestPopWorker &w = pop_workers[idx]; + // addign value + w.map_queue_ = &map_queue; + w.array_ = array; + w.pop_count_ = 0; + w.end_pop_count_ = &end_pop_count; + // create threads + w.create(); + LOG_INFO("push_pop_test", "pop thread", "create OB_SUCCESS"); + } + + // Verify the correctness of the push: verify the total number of pushes into the ObMapQueue-Type + int64_t start_test_tstamp = get_timestamp(); + while (((get_timestamp() - start_test_tstamp) < TEST_TIME_LIMIT) + && (end_push_count < VALUE_COUNT)) { + for (int64_t idx = 0, cnt = PUSH_THREAD_NUM; idx < cnt; ++idx) { + TestPushWorker &w = push_workers[idx]; + if (TestPushWorker::DONE == w.state_) { + end_push_count += w.push_count_; + w.state_ = TestPushWorker::IDLE; + } + } + } + EXPECT_EQ(VALUE_COUNT, end_push_count); + + // Verify that the pop is correct: + // 1. verify the total number of -Types popped from ObMapQueue + // 2. Correctness of the fields + start_test_tstamp = get_timestamp(); + while (((get_timestamp() - start_test_tstamp) < TEST_TIME_LIMIT) + && (end_pop_count < VALUE_COUNT)) { + for (int64_t idx = 0, cnt = POP_THREAD_NUM; idx < cnt; ++idx) { + TestPopWorker &w = pop_workers[idx]; + + int64_t pop_cnt = ATOMIC_LOAD(&w.pop_count_); + while (!ATOMIC_BCAS(&w.pop_count_, pop_cnt, 0)) { + pop_cnt = ATOMIC_LOAD(&w.pop_count_); + } + + end_pop_count += pop_cnt; + //LOG_DEBUG("pop verify", K(idx), K(pop_cnt), K(end_pop_count)); + LOG_INFO("pop verify", K(idx), K(pop_cnt), K(end_pop_count)); + } + } + EXPECT_EQ(VALUE_COUNT, end_pop_count); + + int64_t correct_field = 0; + for (int64_t idx = 0, cnt = VALUE_COUNT; idx < cnt; ++idx) { + if (idx == array[idx]) { + correct_field++; + } + } + EXPECT_EQ(VALUE_COUNT, correct_field); + + // push thread join + for (int64_t idx = 0, cnt = PUSH_THREAD_NUM; idx < cnt; ++idx) { + TestPushWorker &w = push_workers[idx]; + w.join(); + LOG_INFO("push_pop_test", "push thread", "join OB_SUCCESS"); + } + + // pop thread join + for (int64_t idx = 0, cnt = POP_THREAD_NUM; idx < cnt; ++idx) { + TestPopWorker &w = pop_workers[idx]; + w.join(); + LOG_INFO("push_pop_test", "pop thread", "join OB_SUCCESS"); + } + + EXPECT_EQ(OB_SUCCESS, map_queue.reset()); + } + + // free array + ob_free(array); + map_queue.destroy(); + EXPECT_FALSE(map_queue.is_inited()); +} + +// 1. push performance test: push data with 10 threads +// 2. pop performance test: pop data with 10 threads +TEST_F(TestObMapQueue, DISABLED_performance) +{ + int64_t start_test_tstamp = 0; + int64_t end_test_tstamp = 0; + + ObMapQueue map_queue; + EXPECT_EQ(OB_SUCCESS, map_queue.init(LABEL)); + + // push + int64_t PUSH_THREAD_NUM = 10; + const int64_t INTERVAL = VALUE_COUNT / PUSH_THREAD_NUM; + int64_t end_push_count = 0; + TestPushWorker push_workers[PUSH_THREAD_NUM]; + + start_test_tstamp = get_timestamp(); + for (int64_t idx = 0, cnt = PUSH_THREAD_NUM; idx < cnt; ++idx) { + TestPushWorker &w = push_workers[idx]; + w.state_ = TestPushWorker::REQ; + w.thread_idx_ = idx; + w.thread_count_ = PUSH_THREAD_NUM; + w.map_queue_ = &map_queue; + w.push_count_ = 0; + w.interval_ = INTERVAL; + w.create(); + LOG_INFO("push_performance", "push thread", "create OB_SUCCESS"); + } + // Detect the end of push in all threads + while (((get_timestamp() - start_test_tstamp) < TEST_TIME_LIMIT) + && (end_push_count < VALUE_COUNT)) { + for (int64_t idx = 0, cnt = PUSH_THREAD_NUM; idx < cnt; ++idx) { + TestPushWorker &w = push_workers[idx]; + if (TestPushWorker::DONE == w.state_) { + end_push_count += w.push_count_; + w.state_ = TestPushWorker::IDLE; + } + } + } + EXPECT_EQ(VALUE_COUNT, end_push_count); + end_test_tstamp = get_timestamp(); + + double push_time = static_cast(end_test_tstamp - start_test_tstamp) * 1.0 / 1000000; + double push_cnt_per_second = static_cast(VALUE_COUNT) * 1.0 / (push_time); + LOG_INFO("push_performance", K(end_push_count), K(push_time), "push count/s", push_cnt_per_second); + + // pop + int64_t POP_THREAD_NUM = 10; + int64_t end_pop_count = 0; + TestPopWorker pop_workers[POP_THREAD_NUM]; + + // malloc array + Type *array = (Type *)ob_malloc(sizeof(Type) * VALUE_COUNT); + OB_ASSERT(NULL != array); + memset(array, 0, sizeof(Type) * VALUE_COUNT); + + start_test_tstamp = get_timestamp(); + for (int64_t idx = 0, cnt = POP_THREAD_NUM; idx < cnt; ++idx) { + TestPopWorker &w = pop_workers[idx]; + w.map_queue_ = &map_queue; + w.array_ = array; + w.pop_count_ = 0; + w.end_pop_count_ = &end_pop_count; + w.create(); + LOG_INFO("pop_performance", "pop thread", "create OB_SUCCESS"); + } + + while (((get_timestamp() - start_test_tstamp) < TEST_TIME_LIMIT) + && (end_pop_count < VALUE_COUNT)) { + for (int64_t idx = 0, cnt = POP_THREAD_NUM; idx < cnt; ++idx) { + TestPopWorker &w = pop_workers[idx]; + + int64_t pop_cnt = ATOMIC_LOAD(&w.pop_count_); + while (!ATOMIC_BCAS(&w.pop_count_, pop_cnt, 0)) { + pop_cnt = ATOMIC_LOAD(&w.pop_count_); + } + + end_pop_count += pop_cnt; + LOG_DEBUG("pop verify", K(idx), K(pop_cnt), K(end_pop_count)); + } + } + EXPECT_EQ(VALUE_COUNT, end_pop_count); + end_test_tstamp = get_timestamp(); + + double pop_time = static_cast(end_test_tstamp - start_test_tstamp) * 1.0 / 1000000; + double pop_cnt_per_second = static_cast(VALUE_COUNT) * 1.0 / (pop_time); + LOG_INFO("pop_performance", K(end_pop_count), K(pop_time), "pop count/s", pop_cnt_per_second); + + // push thread join + for (int64_t idx = 0, cnt = PUSH_THREAD_NUM; idx < cnt; ++idx) { + TestPushWorker &w = push_workers[idx]; + w.join(); + LOG_INFO("performance", "push thread", "join OB_SUCCESS"); + } + + // pop thread join + for (int64_t idx = 0, cnt = POP_THREAD_NUM; idx < cnt; ++idx) { + TestPopWorker &w = pop_workers[idx]; + w.join(); + LOG_INFO("performance", "pop thread", "join OB_SUCCESS"); + } + + ob_free(array); + map_queue.destroy(); +} + +//////////////////////// Boundary condition testing ////////////////////////////////////////// +// ObMapQueue init fail +TEST_F(TestObMapQueue, init_failed) +{ + ObMapQueue map_queue; + EXPECT_EQ(OB_SUCCESS, map_queue.init(LABEL)); + EXPECT_EQ(OB_INIT_TWICE, map_queue.init(LABEL)); +} + +}//end of unittest +}//end of oceanbase + +int main(int argc, char **argv) +{ + // ObLogger::get_logger().set_mod_log_levels("ALL.*:DEBUG, TLOG.*:DEBUG"); + // testing::InitGoogleTest(&argc,argv); + // testing::FLAGS_gtest_filter = "DO_NOT_RUN"; + int ret = 1; + ObLogger &logger = ObLogger::get_logger(); + logger.set_file_name("test_ob_map_queue.log", true); + logger.set_log_level(OB_LOG_LEVEL_INFO); + testing::InitGoogleTest(&argc, argv); + ret = RUN_ALL_TESTS(); + return ret; +} diff --git a/unittest/liboblog/test_ob_map_queue_thread.cpp b/unittest/liboblog/test_ob_map_queue_thread.cpp new file mode 100644 index 0000000000000000000000000000000000000000..08d2104662e17a5895471fc5c7987e1e43a2f87c --- /dev/null +++ b/unittest/liboblog/test_ob_map_queue_thread.cpp @@ -0,0 +1,544 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG_FETCHER + +#include +#include +#include "share/ob_define.h" +#include "liboblog/src/ob_map_queue_thread.h" +#include "ob_log_utils.h" + +using namespace oceanbase; +using namespace common; +using namespace liboblog; +using namespace std; + +namespace oceanbase +{ +namespace unittest +{ +// ObMapQueue label +static const char *label = "test"; +// Thread num of ObMapQueueThread +static const int THREAD_NUM = 6; + +// ObMapQueue type +struct MapQueueType +{ + int64_t value_; + uint64_t hash_val_; + void reset(int64_t value, uint64_t hash_val) + { + value_ = value; + hash_val_ = hash_val; + } + TO_STRING_KV(K(value_), K(hash_val_)); +}; +typedef MapQueueType Type; + +// ObMapQueue value range +static const int64_t START_VALUE = 0; +static const int64_t END_VALUE = 1 * 100 * 1000 - 1; +static const int64_t VALUE_COUNT = END_VALUE - START_VALUE + 1; + +class TestObMapQueueThread : public ::testing::Test +{ +public : + TestObMapQueueThread() {} + virtual ~TestObMapQueueThread() {} + virtual void SetUp() {} + virtual void TearDown() {} +public : + // push thread + static const int64_t ONE_PUSH_THREAD_NUM = 1; + static const int64_t MULTI_PUSH_THREAD_NUM = 3; + // time limit + static const int64_t TEST_TIME_LIMIT = 1 * _MIN_; +public: + // generate data + void generate_data(const int64_t count, Type *&datas); +}; + +void TestObMapQueueThread::generate_data(const int64_t count, Type *&datas) +{ + datas = (Type *)ob_malloc(sizeof(Type) * count); + OB_ASSERT(NULL != datas); + for (int64_t idx = 0; idx < count; idx++) { + datas[idx].reset(idx, idx % THREAD_NUM); + } + for (int64_t idx = 0; idx < count; idx++) { + LOG_DEBUG("data", K(datas[idx])); + } +} + +static const int MAX_THREAD_NUM = 16; +typedef common::ObMapQueueThread QueueThread; +// DerivedQueueThread1 +// Overload handle, test the correctness of ObMapQueueThread execution +class DerivedQueueThread1 : public QueueThread +{ +public: + DerivedQueueThread1(vector > &handle_result) : end_handle_count_(0), + handle_result_(handle_result), + inited_(false) {} + virtual ~DerivedQueueThread1() { destroy(); } +public: + int init(); + void destroy(); + int start(); +public: + // Record the number of data that has been processed by the thread + int64_t end_handle_count_ CACHE_ALIGNED; +public: + // Implement ObMapQueueThread dummy function-handle to overload the thread handling function + virtual int handle(void *data, const int64_t thread_index, volatile bool &stop_flag); +private: + vector > &handle_result_; + bool inited_; +}; + +// DerivedQueueThread2 +// Overload run, test the correctness of ObMapQueueThread execution +class DerivedQueueThread2 : public QueueThread +{ +public: + DerivedQueueThread2(vector > &handle_result) : end_handle_count_(0), + handle_result_(handle_result), + inited_(false) {} + virtual ~DerivedQueueThread2() { destroy(); } +public: + int init(); + void destroy(); + int start(); +public: + // Record the number of data that has been processed by the thread + int64_t end_handle_count_ CACHE_ALIGNED; +public: + // 实Overload run, test the correctness of ObMapQueueThread execution + virtual void run(const int64_t thread_index); +private: + static const int64_t IDLE_WAIT_TIME = 10 * 1000; +private: + vector > &handle_result_; + bool inited_; +}; + +int DerivedQueueThread1::init() +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("DerivedQueueThread1 init twice"); + ret = OB_INIT_TWICE; + } else if (OB_FAIL(QueueThread::init(THREAD_NUM, label))) { + LOG_ERROR("init QueueThread fail", K(ret), K(THREAD_NUM), K(label)); + } else { + EXPECT_EQ(THREAD_NUM, QueueThread::get_thread_num()); + EXPECT_TRUE(QueueThread::is_stoped()); + end_handle_count_ = 0; + inited_ = true; + + LOG_INFO("DerivedQueueThread1 init ok", K(ret)); + } + + return ret; +} + +void DerivedQueueThread1::destroy() +{ + if (inited_) { + QueueThread::destroy(); + EXPECT_TRUE(QueueThread::is_stoped()); + inited_ = false; + + LOG_INFO("DerivedQueueThread1 destory"); + } +} + +int DerivedQueueThread1::start() +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(!inited_)) { + LOG_ERROR("DerivedQueueThread1 not init"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(QueueThread::start())) { + LOG_ERROR("DerivedQueueThread1 start error", K(ret)); + } else { + LOG_INFO("DerivedQueueThread1 start ok"); + } + EXPECT_FALSE(QueueThread::is_stoped()); + + return ret; +} + +int DerivedQueueThread1::handle(void *data, const int64_t thread_index, volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + stop_flag = stop_flag; + Type *task = NULL; + + if (OB_UNLIKELY(!inited_)) { + LOG_ERROR("DerivedQueueThread1 not init"); + ret = OB_NOT_INIT; + } else if (OB_ISNULL(data) + || OB_UNLIKELY(thread_index < 0) + || OB_UNLIKELY(thread_index >= get_thread_num())) { + LOG_ERROR("invalid argument", K(thread_index), K(get_thread_num())); + ret = OB_ERR_UNEXPECTED; + } else if (OB_ISNULL(task = static_cast(data))) { + ret = OB_INVALID_ARGUMENT; + LOG_ERROR("invalid argument", K(ret), KP(task), K(thread_index)); + } else { + LOG_DEBUG("DerivedQueueThread1 handle", K(ret), K(*task), K(thread_index)); + + handle_result_[thread_index].push_back(*task); + ATOMIC_INC(&end_handle_count_); + } + + return ret; +} + +int DerivedQueueThread2::init() +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(inited_)) { + LOG_ERROR("DerivedQueueThread2 init twice"); + ret = OB_INIT_TWICE; + } else if (OB_FAIL(QueueThread::init(THREAD_NUM, label))) { + LOG_ERROR("init QueueThread fail", K(ret), K(THREAD_NUM), K(label)); + } else { + EXPECT_EQ(THREAD_NUM, QueueThread::get_thread_num()); + EXPECT_TRUE(QueueThread::is_stoped()); + end_handle_count_ = 0; + inited_ = true; + + LOG_INFO("DerivedQueueThread2 init ok", K(ret)); + } + + return ret; +} + +void DerivedQueueThread2::destroy() +{ + if (inited_) { + QueueThread::destroy(); + EXPECT_TRUE(QueueThread::is_stoped()); + inited_ = false; + + LOG_INFO("DerivedQueueThread2 destory"); + } +} + +int DerivedQueueThread2::start() +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(!inited_)) { + LOG_ERROR("DerivedQueueThread2 not init"); + ret = OB_NOT_INIT; + } else if (OB_FAIL(QueueThread::start())) { + LOG_ERROR("DerivedQueueThread2 start error", K(ret)); + } else { + LOG_INFO("DerivedQueueThread2 start ok"); + } + EXPECT_FALSE(QueueThread::is_stoped()); + + return ret; +} + +void DerivedQueueThread2::run(const int64_t thread_index) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(!inited_)) { + LOG_ERROR("DerivedQueueThread2 not init"); + ret = OB_NOT_INIT; + } else if (OB_UNLIKELY(thread_index < 0) || OB_UNLIKELY(thread_index >= get_thread_num())) { + LOG_ERROR("invalid argument", K(thread_index), K(get_thread_num())); + ret = OB_ERR_UNEXPECTED; + } else { + LOG_INFO("DerivedQueueThread2 run start", K(thread_index)); + + while (!stop_flag_ && OB_SUCCESS == ret) { + void *data = NULL; + Type *task = NULL; + + if (OB_FAIL(pop(thread_index, data))) { + if (OB_EAGAIN == ret) { + // empty + ret = OB_SUCCESS; + cond_timedwait(thread_index, IDLE_WAIT_TIME); + LOG_DEBUG("DerivedQueueThread2 pop empty"); + } else { + LOG_ERROR("DerivedQueueThread2 pop data error", K(ret)); + } + } else if (OB_ISNULL(data) || OB_ISNULL(task = static_cast(data))) { + LOG_ERROR("invalid argument", KPC(task), K(thread_index)); + ret = OB_ERR_UNEXPECTED; + } else { + LOG_DEBUG("DerivedQueueThread2 handle", K(ret), K(*task), K(thread_index)); + + handle_result_[thread_index].push_back(*task); + ATOMIC_INC(&end_handle_count_); + } + } + } +} + +class TestPushWorker : public liboblog::Runnable +{ +public: + enum State + { + IDLE, // empty + REQ, // pushing + DONE // push done + }; + // Identifies the current thread status + State state_; + // thread index + int64_t thread_idx_; + // thread count + int64_t thread_count_; + // push data + Type *datas_; + // value interval + int64_t interval_; + // ObMapQueueThread + QueueThread *host_; + // record thread map_queue push count + int64_t push_count_; + + void reset(const int64_t thread_idx, + const int64_t push_thread_num, + Type *datas, + QueueThread *host) + { + state_ = TestPushWorker::REQ; + thread_idx_ = thread_idx; + thread_count_ = push_thread_num; + datas_ = datas; + interval_ = VALUE_COUNT / push_thread_num; + host_ = host; + push_count_ = 0; + } + + void start() + { + // create threads + create(); + LOG_INFO("TestPushWorker start", "push worker thread", "create OB_SUCCESS"); + } + + void stop() + { + join(); + LOG_INFO("TestPushWorker join", "push worker thread", "join OB_SUCCESS"); + } + + virtual int routine() + { + int64_t start = thread_idx_ * interval_; + int64_t end = (thread_count_ - 1 != thread_idx_) ? start + interval_ - 1 : END_VALUE; + LOG_INFO("TestPushWorker", K(start), K(end)); + + int64_t idx = 0; + for (idx = start; idx <= end; idx++) { + Type *type = datas_ + idx; + EXPECT_EQ(OB_SUCCESS, host_->push(type, type->hash_val_)); + push_count_++; + LOG_DEBUG("TestPushWorker", K(idx), KPC(type)); + } + + if (end + 1 == idx) { + state_ = DONE; + } + + return OB_SUCCESS; + } +}; + +const int64_t MAX_PUSH_WORKER_NUM = 32; +TestPushWorker push_workers[MAX_PUSH_WORKER_NUM]; +// start push worker +static void start_push_worker(const int64_t push_thread_num, + Type *datas, + QueueThread *host) +{ + //push thread + for (int64_t idx = 0, cnt = push_thread_num; idx < cnt; ++idx) { + TestPushWorker &w = push_workers[idx]; + w.reset(idx, push_thread_num, datas, host); + w.start(); + } +} + +// stop push worker +static void stop_push_worker(const int64_t push_thread_num) +{ + // push thread join + for (int64_t idx = 0, cnt = push_thread_num; idx < cnt; ++idx) { + TestPushWorker &w = push_workers[idx]; + w.stop(); + } +} + +////////////////////// Basic function tests ////////////////////////////////////////// +// ObMapQueueThread init, destory, start, stop, is_stoped, get_thread_num +// overload ObMapQueueThread-handle +TEST_F(TestObMapQueueThread, DerivedQueueThread1) +{ + // genereate data + Type *datas = NULL; + generate_data(VALUE_COUNT, datas); + OB_ASSERT(NULL != datas); + + // savd result of DerivedQueueThread1 handle + vector > handle_result; + for (int64_t idx = 0; idx < THREAD_NUM; idx++) { + vector res; + res.clear(); + handle_result.push_back(res); + } + + // init and start worker thread + DerivedQueueThread1 derived1(handle_result); + EXPECT_EQ(OB_SUCCESS, derived1.init()); + EXPECT_EQ(OB_SUCCESS, derived1.start()); + + // start push thread + int64_t PUSH_THREAD_NUM = ONE_PUSH_THREAD_NUM; + start_push_worker(PUSH_THREAD_NUM, datas, &derived1); + + // Check handle completion and verify that the result totals and fields are correct + int64_t end_handle_count = 0; + int64_t start_test_tstamp = get_timestamp(); + while (((get_timestamp() - start_test_tstamp) < TEST_TIME_LIMIT) + && (end_handle_count < VALUE_COUNT)) { + end_handle_count = ATOMIC_LOAD(&derived1.end_handle_count_); + usleep(static_cast<__useconds_t>(1000)); + LOG_DEBUG("handle verify", K(end_handle_count)); + } + EXPECT_EQ(VALUE_COUNT, end_handle_count); + + int64_t handle_result_count = 0; + for (int64_t idx = 0; idx < THREAD_NUM; idx++) { + int64_t cnt = handle_result[idx].size(); + LOG_INFO("DerivedQueueThread1 vector count", K(idx), K(cnt)); + handle_result_count += cnt; + for (int64_t i = 0; i < cnt; i++) { + Type t = handle_result[idx][i]; + LOG_DEBUG("type", K(t)); + EXPECT_TRUE(idx == (t.value_ % THREAD_NUM)); + EXPECT_TRUE(idx == t.hash_val_); + } + } + EXPECT_EQ(VALUE_COUNT, handle_result_count); + + stop_push_worker(PUSH_THREAD_NUM); + derived1.destroy(); + ob_free(datas); +} + +// ObMapQueueThread run, pop, cond_timewait +// overload ObMapQueueThread-run +TEST_F(TestObMapQueueThread, DerivedQueueThread2) +{ + // genereate data + Type *datas = NULL; + generate_data(VALUE_COUNT, datas); + OB_ASSERT(NULL != datas); + + // save result of DerivedQueueThread2 handle + vector > handle_result; + for (int64_t idx = 0; idx < THREAD_NUM; idx++) { + vector res; + res.clear(); + handle_result.push_back(res); + } + + // init and start worker thread + DerivedQueueThread2 derived2(handle_result); + EXPECT_EQ(OB_SUCCESS, derived2.init()); + EXPECT_EQ(OB_SUCCESS, derived2.start()); + + // start push thread + int64_t PUSH_THREAD_NUM = ONE_PUSH_THREAD_NUM; + start_push_worker(PUSH_THREAD_NUM, datas, &derived2); + + // Check handle completion and verify that the result totals and fields are correct + int64_t end_handle_count = 0; + int64_t start_test_tstamp = get_timestamp(); + while (((get_timestamp() - start_test_tstamp) < TEST_TIME_LIMIT) + && (end_handle_count < VALUE_COUNT)) { + end_handle_count = ATOMIC_LOAD(&derived2.end_handle_count_); + usleep(static_cast<__useconds_t>(1000)); + LOG_DEBUG("handle verify", K(end_handle_count)); + } + EXPECT_EQ(VALUE_COUNT, end_handle_count); + + int64_t handle_result_count = 0; + for (int64_t idx = 0; idx < THREAD_NUM; idx++) { + int64_t cnt = handle_result[idx].size(); + LOG_INFO("DerivedQueueThread2 vector count", K(idx), K(cnt)); + handle_result_count += cnt; + for (int64_t i = 0; i < cnt; i++) { + Type t = handle_result[idx][i]; + LOG_DEBUG("type", K(t)); + EXPECT_TRUE(idx == (t.value_ % THREAD_NUM)); + EXPECT_TRUE(idx == t.hash_val_); + } + } + EXPECT_EQ(VALUE_COUNT, handle_result_count); + + stop_push_worker(PUSH_THREAD_NUM); + derived2.destroy(); + ob_free(datas); +} + +////////////////////////Boundary condition testing////////////////////////////////////////// +// ObMapQueue init fail +TEST_F(TestObMapQueueThread, init_failed) +{ + QueueThread queue_thread; + EXPECT_EQ(OB_SUCCESS, queue_thread.init(THREAD_NUM, label)); + EXPECT_EQ(OB_INIT_TWICE, queue_thread.init(THREAD_NUM, label)); + queue_thread.destroy(); + + // MAX_THREAD_NUM = 16 + const int64_t INVALID_THREAD_NUM1 = 0; + const int64_t INVALID_THREAD_NUM2 = 17; + EXPECT_EQ(OB_INVALID_ARGUMENT, queue_thread.init(INVALID_THREAD_NUM1, label)); + EXPECT_EQ(OB_INVALID_ARGUMENT, queue_thread.init(INVALID_THREAD_NUM2, label)); + EXPECT_EQ(OB_SUCCESS, queue_thread.init(THREAD_NUM, label)); + queue_thread.destroy(); +} + +}//end of unittest +}//end of oceanbase + +int main(int argc, char **argv) +{ + // ObLogger::get_logger().set_mod_log_levels("ALL.*:DEBUG, TLOG.*:DEBUG"); + // testing::InitGoogleTest(&argc,argv); + // testing::FLAGS_gtest_filter = "DO_NOT_RUN"; + int ret = 1; + ObLogger &logger = ObLogger::get_logger(); + logger.set_file_name("test_ob_map_queue_thread.log", true); + logger.set_log_level(OB_LOG_LEVEL_INFO); + testing::InitGoogleTest(&argc, argv); + ret = RUN_ALL_TESTS(); + return ret; +} diff --git a/unittest/liboblog/test_ob_seq_thread.cpp b/unittest/liboblog/test_ob_seq_thread.cpp new file mode 100644 index 0000000000000000000000000000000000000000..bc598930efebecdf4e776108303a8e07eff76506 --- /dev/null +++ b/unittest/liboblog/test_ob_seq_thread.cpp @@ -0,0 +1,73 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include +#include "ob_seq_thread.h" +namespace oceanbase +{ +namespace common +{ +class MyClass {}; + +class CThread : public ObSeqThread<256, MyClass> +{ +public: + CThread() {} + virtual ~CThread() {} + +public: + virtual int handle(void *task, const int64_t task_seq, const int64_t thread_index, volatile bool &stop_flag) + { + if (! stop_flag) { + EXPECT_EQ(task_seq + 1, (int64_t)task); + } + UNUSED(thread_index); + return 0; + } +}; + +class TestSeqThread : public ::testing::Test +{ +public: + TestSeqThread() {} + ~TestSeqThread() {} + + void SetUp() {} + void TearDown() {} +}; + +TEST_F(TestSeqThread, basic) +{ + CThread thread; + // Parameter not legal + EXPECT_EQ(OB_INVALID_ARGUMENT, thread.init(257, 100)); + EXPECT_EQ(OB_INVALID_ARGUMENT, thread.init(0, 0)); + + EXPECT_EQ(OB_SUCCESS, thread.init(256, 10000)); + EXPECT_EQ(OB_SUCCESS, thread.start()); + for (int64_t index = 0; index < 1000; index++) { + EXPECT_EQ(OB_SUCCESS, thread.push((void*)(index + 1), index, 0)); + } + sleep(1); + thread.stop(); + EXPECT_EQ(true, thread.is_stoped()); +} +} +} + +int main(int argc, char **argv) +{ + oceanbase::common::ObLogger::get_logger().set_log_level("INFO"); + OB_LOGGER.set_log_level("INFO"); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/unittest/liboblog/test_small_arena.cpp b/unittest/liboblog/test_small_arena.cpp new file mode 100644 index 0000000000000000000000000000000000000000..dc490e5eae695e4dce4e0e6113d4c4612224b53e --- /dev/null +++ b/unittest/liboblog/test_small_arena.cpp @@ -0,0 +1,393 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include +#include "ob_log_utils.h" // current_time +#include "ob_small_arena.h" + +#include "lib/allocator/ob_concurrent_fifo_allocator.h" // ObConcurrentFIFOAllocator + +#define ALLOC_AND_CHECK(size) ALLOC_ALIGN_AND_CHECK(sizeof(void*), size) + +#define ALLOC_ALIGN_AND_CHECK(align_size, size) \ + do { \ + int64_t alloc_size = (size); \ + int64_t align = (align_size); \ + int64_t max_small_size = MAX_SMALL_ALLOC_SIZE(align); \ + void *ptr = sa.alloc_aligned(alloc_size, align); \ +\ + ASSERT_TRUE(NULL != ptr); \ + EXPECT_EQ(0, reinterpret_cast(ptr) & (align - 1)); \ +\ + if (alloc_size > max_small_size) { \ + large_alloc_count++; \ + } else { \ + small_alloc_count++; \ + } \ +\ + EXPECT_EQ(small_alloc_count, sa.get_small_alloc_count()); \ + EXPECT_EQ(large_alloc_count, sa.get_large_alloc_count()); \ +\ + ((char *)ptr)[alloc_size - 1] = 'a'; \ + } while (0) + +#define MAX_SMALL_ALLOC_SIZE(align) (MAX_SMALL_ALLOC_SIZE_WITHOUT_ALIGN - align + 1) + +namespace oceanbase +{ +namespace liboblog +{ + +static const int64_t SMALL_ARENA_PAGE_SIZE = 1024; +static const int64_t MAX_SMALL_ALLOC_SIZE_WITHOUT_ALIGN = SMALL_ARENA_PAGE_SIZE - ObSmallArena::SMALL_PAGE_HEADER_SIZE; +static const int64_t PAGE_SIZE = 1024; + +using namespace common; + +// TODO: add multi thread test +class TestSmallArena : public ::testing::Test +{ +public: + TestSmallArena() {} + ~TestSmallArena() {} + + virtual void SetUp(); + virtual void TearDown(); + +public: + ObConcurrentFIFOAllocator large_allocator_; + static const uint64_t tenant_id_ = 0; +}; + +void TestSmallArena::SetUp() +{ + const static int64_t LARGE_PAGE_SIZE = (1LL << 26); + const static int64_t LARGE_TOTAL_LIMIT = (1LL << 34); + const static int64_t LARGE_HOLD_LIMIT = LARGE_PAGE_SIZE; + ASSERT_EQ(OB_SUCCESS, large_allocator_.init(LARGE_TOTAL_LIMIT, LARGE_HOLD_LIMIT, LARGE_PAGE_SIZE)); + + srandom((unsigned int)get_timestamp()); +} + +void TestSmallArena::TearDown() +{ + large_allocator_.destroy(); +} + +TEST_F(TestSmallArena, smoke_test) +{ + ObSmallArena sa; + int64_t small_alloc_count = 0; + int64_t large_alloc_count = 0; + + sa.set_allocator(PAGE_SIZE, large_allocator_); + + ALLOC_AND_CHECK(8); + ALLOC_AND_CHECK(16); + ALLOC_AND_CHECK(256); + ALLOC_AND_CHECK(512); + ALLOC_AND_CHECK(17 + 8); + ALLOC_AND_CHECK(17 + 16); + ALLOC_AND_CHECK(17 + 256); + ALLOC_AND_CHECK(17 + 512); + ALLOC_AND_CHECK(SMALL_ARENA_PAGE_SIZE + 8); + ALLOC_AND_CHECK(SMALL_ARENA_PAGE_SIZE + 16); + ALLOC_AND_CHECK(SMALL_ARENA_PAGE_SIZE + 256); + ALLOC_AND_CHECK(SMALL_ARENA_PAGE_SIZE + 512); + ALLOC_AND_CHECK(SMALL_ARENA_PAGE_SIZE + 17 + 8); + ALLOC_AND_CHECK(SMALL_ARENA_PAGE_SIZE + 17 + 16); + ALLOC_AND_CHECK(SMALL_ARENA_PAGE_SIZE + 17 + 256); + ALLOC_AND_CHECK(SMALL_ARENA_PAGE_SIZE + 17 + 512); + ALLOC_AND_CHECK((1<<21) + SMALL_ARENA_PAGE_SIZE + 8); + ALLOC_AND_CHECK((1<<21) + SMALL_ARENA_PAGE_SIZE + 16); + ALLOC_AND_CHECK((1<<21) + SMALL_ARENA_PAGE_SIZE + 256); + ALLOC_AND_CHECK((1<<21) + SMALL_ARENA_PAGE_SIZE + 512); + ALLOC_AND_CHECK((1<<21) + SMALL_ARENA_PAGE_SIZE + 17 + 8); + ALLOC_AND_CHECK((1<<21) + SMALL_ARENA_PAGE_SIZE + 17 + 16); + ALLOC_AND_CHECK((1<<21) + SMALL_ARENA_PAGE_SIZE + 17 + 256); + ALLOC_AND_CHECK((1<<21) + SMALL_ARENA_PAGE_SIZE + 17 + 512); + + sa.reset(); + small_alloc_count = 0; + large_alloc_count = 0; + + ALLOC_AND_CHECK(8); + ALLOC_AND_CHECK(16); + ALLOC_AND_CHECK(256); + ALLOC_AND_CHECK(512); + ALLOC_AND_CHECK(17 + 8); + ALLOC_AND_CHECK(17 + 16); + ALLOC_AND_CHECK(17 + 256); + ALLOC_AND_CHECK(17 + 512); + ALLOC_AND_CHECK(SMALL_ARENA_PAGE_SIZE + 8); + ALLOC_AND_CHECK(SMALL_ARENA_PAGE_SIZE + 16); + ALLOC_AND_CHECK(SMALL_ARENA_PAGE_SIZE + 256); + ALLOC_AND_CHECK(SMALL_ARENA_PAGE_SIZE + 512); + ALLOC_AND_CHECK(SMALL_ARENA_PAGE_SIZE + 17 + 8); + ALLOC_AND_CHECK(SMALL_ARENA_PAGE_SIZE + 17 + 16); + ALLOC_AND_CHECK(SMALL_ARENA_PAGE_SIZE + 17 + 256); + ALLOC_AND_CHECK(SMALL_ARENA_PAGE_SIZE + 17 + 512); + ALLOC_AND_CHECK((1<<21) + SMALL_ARENA_PAGE_SIZE + 8); + ALLOC_AND_CHECK((1<<21) + SMALL_ARENA_PAGE_SIZE + 16); + ALLOC_AND_CHECK((1<<21) + SMALL_ARENA_PAGE_SIZE + 256); + ALLOC_AND_CHECK((1<<21) + SMALL_ARENA_PAGE_SIZE + 512); + ALLOC_AND_CHECK((1<<21) + SMALL_ARENA_PAGE_SIZE + 17 + 8); + ALLOC_AND_CHECK((1<<21) + SMALL_ARENA_PAGE_SIZE + 17 + 16); + ALLOC_AND_CHECK((1<<21) + SMALL_ARENA_PAGE_SIZE + 17 + 256); + ALLOC_AND_CHECK((1<<21) + SMALL_ARENA_PAGE_SIZE + 17 + 512); + + sa.reset(); + small_alloc_count = 0; + large_alloc_count = 0; +} + +TEST_F(TestSmallArena, alloc_small) +{ + static const int64_t TEST_COUNT = 10; + int64_t max_alloc_size = MAX_SMALL_ALLOC_SIZE(8); + int64_t small_alloc_count = 0; + int64_t large_alloc_count = 0; + ObSmallArena sa; + + sa.set_allocator(PAGE_SIZE, large_allocator_); + + for (int i = 0; i < TEST_COUNT; i++) { + ALLOC_AND_CHECK(8); + ALLOC_AND_CHECK(16); + ALLOC_AND_CHECK(256); + ALLOC_AND_CHECK(512); + ALLOC_AND_CHECK(17 + 8); + ALLOC_AND_CHECK(17 + 16); + ALLOC_AND_CHECK(17 + 256); + ALLOC_AND_CHECK(17 + 512); + ALLOC_AND_CHECK(SMALL_ARENA_PAGE_SIZE - 15); + ALLOC_AND_CHECK(SMALL_ARENA_PAGE_SIZE - 16); + ALLOC_AND_CHECK(SMALL_ARENA_PAGE_SIZE - 17); + ALLOC_AND_CHECK(SMALL_ARENA_PAGE_SIZE - 64); + ALLOC_AND_CHECK(SMALL_ARENA_PAGE_SIZE - 67); + ALLOC_AND_CHECK(SMALL_ARENA_PAGE_SIZE - 128); + ALLOC_AND_CHECK(SMALL_ARENA_PAGE_SIZE - 129); + + ALLOC_AND_CHECK(random() % (max_alloc_size + 1)); + } + + sa.reset(); + small_alloc_count = 0; + large_alloc_count = 0; + + for (int i = 0; i < TEST_COUNT; i++) { + ALLOC_AND_CHECK(8); + ALLOC_AND_CHECK(16); + ALLOC_AND_CHECK(256); + ALLOC_AND_CHECK(512); + ALLOC_AND_CHECK(17 + 8); + ALLOC_AND_CHECK(17 + 16); + ALLOC_AND_CHECK(17 + 256); + ALLOC_AND_CHECK(17 + 512); + ALLOC_AND_CHECK(SMALL_ARENA_PAGE_SIZE - 15); + ALLOC_AND_CHECK(SMALL_ARENA_PAGE_SIZE - 16); + ALLOC_AND_CHECK(SMALL_ARENA_PAGE_SIZE - 17); + ALLOC_AND_CHECK(SMALL_ARENA_PAGE_SIZE - 64); + ALLOC_AND_CHECK(SMALL_ARENA_PAGE_SIZE - 67); + ALLOC_AND_CHECK(SMALL_ARENA_PAGE_SIZE - 128); + ALLOC_AND_CHECK(SMALL_ARENA_PAGE_SIZE - 129); + + ALLOC_AND_CHECK(random() % (max_alloc_size + 1)); + } + + sa.reset(); + small_alloc_count = 0; + large_alloc_count = 0; +} + +TEST_F(TestSmallArena, alloc_large) +{ + static const int64_t TEST_COUNT = 10; + int64_t max_alloc_size = (1 << 22); + int64_t min_alloc_size = MAX_SMALL_ALLOC_SIZE(8) + 1; + int64_t small_alloc_count = 0; + int64_t large_alloc_count = 0; + ObSmallArena sa; + + sa.set_allocator(PAGE_SIZE, large_allocator_); + + for (int i = 0; i < TEST_COUNT; i++) { + ALLOC_AND_CHECK(min_alloc_size + 0); + ALLOC_AND_CHECK(min_alloc_size + 1); + ALLOC_AND_CHECK(min_alloc_size + 2); + ALLOC_AND_CHECK(min_alloc_size + 4); + ALLOC_AND_CHECK(min_alloc_size + 8); + ALLOC_AND_CHECK(min_alloc_size + 16); + ALLOC_AND_CHECK(min_alloc_size + 256); + ALLOC_AND_CHECK(min_alloc_size + 512); + ALLOC_AND_CHECK(min_alloc_size + 17 + 8); + ALLOC_AND_CHECK(min_alloc_size + 17 + 16); + ALLOC_AND_CHECK(min_alloc_size + 17 + 256); + ALLOC_AND_CHECK(min_alloc_size + 17 + 512); + ALLOC_AND_CHECK(min_alloc_size + 1 * SMALL_ARENA_PAGE_SIZE); + ALLOC_AND_CHECK(min_alloc_size + 2 * SMALL_ARENA_PAGE_SIZE); + ALLOC_AND_CHECK(min_alloc_size + 3 * SMALL_ARENA_PAGE_SIZE); + ALLOC_AND_CHECK(min_alloc_size + 4 * SMALL_ARENA_PAGE_SIZE); + ALLOC_AND_CHECK(min_alloc_size + 5 * SMALL_ARENA_PAGE_SIZE); + ALLOC_AND_CHECK(min_alloc_size + 6 * SMALL_ARENA_PAGE_SIZE); + ALLOC_AND_CHECK(min_alloc_size + 7 * SMALL_ARENA_PAGE_SIZE); + ALLOC_AND_CHECK((1LL << 10) + 11); + ALLOC_AND_CHECK((1LL << 12) + 13); + ALLOC_AND_CHECK((1LL << 18) + 17); + ALLOC_AND_CHECK((1LL << 19) + 19); + ALLOC_AND_CHECK((1LL << 20) + 7); + ALLOC_AND_CHECK((1LL << 21) + 3); + + ALLOC_AND_CHECK((random() % (max_alloc_size)) + min_alloc_size); + } + + sa.reset(); + small_alloc_count = 0; + large_alloc_count = 0; + + for (int i = 0; i < TEST_COUNT; i++) { + ALLOC_AND_CHECK(min_alloc_size + 0); + ALLOC_AND_CHECK(min_alloc_size + 1); + ALLOC_AND_CHECK(min_alloc_size + 2); + ALLOC_AND_CHECK(min_alloc_size + 4); + ALLOC_AND_CHECK(min_alloc_size + 8); + ALLOC_AND_CHECK(min_alloc_size + 16); + ALLOC_AND_CHECK(min_alloc_size + 256); + ALLOC_AND_CHECK(min_alloc_size + 512); + ALLOC_AND_CHECK(min_alloc_size + 17 + 8); + ALLOC_AND_CHECK(min_alloc_size + 17 + 16); + ALLOC_AND_CHECK(min_alloc_size + 17 + 256); + ALLOC_AND_CHECK(min_alloc_size + 17 + 512); + ALLOC_AND_CHECK(min_alloc_size + 1 * SMALL_ARENA_PAGE_SIZE); + ALLOC_AND_CHECK(min_alloc_size + 2 * SMALL_ARENA_PAGE_SIZE); + ALLOC_AND_CHECK(min_alloc_size + 3 * SMALL_ARENA_PAGE_SIZE); + ALLOC_AND_CHECK(min_alloc_size + 4 * SMALL_ARENA_PAGE_SIZE); + ALLOC_AND_CHECK(min_alloc_size + 5 * SMALL_ARENA_PAGE_SIZE); + ALLOC_AND_CHECK(min_alloc_size + 6 * SMALL_ARENA_PAGE_SIZE); + ALLOC_AND_CHECK(min_alloc_size + 7 * SMALL_ARENA_PAGE_SIZE); + ALLOC_AND_CHECK((1LL << 10) + 11); + ALLOC_AND_CHECK((1LL << 12) + 13); + ALLOC_AND_CHECK((1LL << 18) + 17); + ALLOC_AND_CHECK((1LL << 19) + 19); + ALLOC_AND_CHECK((1LL << 20) + 7); + ALLOC_AND_CHECK((1LL << 21) + 3); + + ALLOC_AND_CHECK((random() % (max_alloc_size)) + min_alloc_size); + } + + sa.reset(); + small_alloc_count = 0; + large_alloc_count = 0; +} + +TEST_F(TestSmallArena, alloc_align) +{ + int64_t small_alloc_count = 0; + int64_t large_alloc_count = 0; + ObSmallArena sa; + + sa.set_allocator(PAGE_SIZE, large_allocator_); + + ALLOC_ALIGN_AND_CHECK(1, 4); + ALLOC_ALIGN_AND_CHECK(16, 8); + ALLOC_ALIGN_AND_CHECK(32, 16); + ALLOC_ALIGN_AND_CHECK(64, 256); + ALLOC_ALIGN_AND_CHECK(128, 512); + ALLOC_ALIGN_AND_CHECK(16, 17 + 8); + ALLOC_ALIGN_AND_CHECK(32, 17 + 16); + ALLOC_ALIGN_AND_CHECK(64, 17 + 256); + ALLOC_ALIGN_AND_CHECK(128, 17 + 512); + ALLOC_ALIGN_AND_CHECK(16, SMALL_ARENA_PAGE_SIZE + 8); + ALLOC_ALIGN_AND_CHECK(32, SMALL_ARENA_PAGE_SIZE + 16); + ALLOC_ALIGN_AND_CHECK(64, SMALL_ARENA_PAGE_SIZE + 256); + ALLOC_ALIGN_AND_CHECK(128, SMALL_ARENA_PAGE_SIZE + 512); + ALLOC_ALIGN_AND_CHECK(16, SMALL_ARENA_PAGE_SIZE + 17 + 8); + ALLOC_ALIGN_AND_CHECK(32, SMALL_ARENA_PAGE_SIZE + 17 + 16); + ALLOC_ALIGN_AND_CHECK(64, SMALL_ARENA_PAGE_SIZE + 17 + 256); + ALLOC_ALIGN_AND_CHECK(128, SMALL_ARENA_PAGE_SIZE + 17 + 512); + ALLOC_ALIGN_AND_CHECK(16, (1LL<<21) + SMALL_ARENA_PAGE_SIZE + 8); + ALLOC_ALIGN_AND_CHECK(32, (1LL<<21) + SMALL_ARENA_PAGE_SIZE + 16); + ALLOC_ALIGN_AND_CHECK(64, (1LL<<21) + SMALL_ARENA_PAGE_SIZE + 256); + ALLOC_ALIGN_AND_CHECK(128, (1LL<<21) + SMALL_ARENA_PAGE_SIZE + 512); + ALLOC_ALIGN_AND_CHECK(16, (1LL<<21) + SMALL_ARENA_PAGE_SIZE + 17 + 8); + ALLOC_ALIGN_AND_CHECK(32, (1LL<<21) + SMALL_ARENA_PAGE_SIZE + 17 + 16); + ALLOC_ALIGN_AND_CHECK(64, (1LL<<21) + SMALL_ARENA_PAGE_SIZE + 17 + 256); + ALLOC_ALIGN_AND_CHECK(128, (1LL<<21) + SMALL_ARENA_PAGE_SIZE + 17 + 512); + + sa.reset(); + small_alloc_count = 0; + large_alloc_count = 0; + + ALLOC_ALIGN_AND_CHECK(1, 4); + ALLOC_ALIGN_AND_CHECK(16, 8); + ALLOC_ALIGN_AND_CHECK(32, 16); + ALLOC_ALIGN_AND_CHECK(64, 256); + ALLOC_ALIGN_AND_CHECK(128, 512); + ALLOC_ALIGN_AND_CHECK(16, 17 + 8); + ALLOC_ALIGN_AND_CHECK(32, 17 + 16); + ALLOC_ALIGN_AND_CHECK(64, 17 + 256); + ALLOC_ALIGN_AND_CHECK(128, 17 + 512); + ALLOC_ALIGN_AND_CHECK(16, SMALL_ARENA_PAGE_SIZE + 8); + ALLOC_ALIGN_AND_CHECK(32, SMALL_ARENA_PAGE_SIZE + 16); + ALLOC_ALIGN_AND_CHECK(64, SMALL_ARENA_PAGE_SIZE + 256); + ALLOC_ALIGN_AND_CHECK(128, SMALL_ARENA_PAGE_SIZE + 512); + ALLOC_ALIGN_AND_CHECK(16, SMALL_ARENA_PAGE_SIZE + 17 + 8); + ALLOC_ALIGN_AND_CHECK(32, SMALL_ARENA_PAGE_SIZE + 17 + 16); + ALLOC_ALIGN_AND_CHECK(64, SMALL_ARENA_PAGE_SIZE + 17 + 256); + ALLOC_ALIGN_AND_CHECK(128, SMALL_ARENA_PAGE_SIZE + 17 + 512); + ALLOC_ALIGN_AND_CHECK(16, (1LL<<21) + SMALL_ARENA_PAGE_SIZE + 8); + ALLOC_ALIGN_AND_CHECK(32, (1LL<<21) + SMALL_ARENA_PAGE_SIZE + 16); + ALLOC_ALIGN_AND_CHECK(64, (1LL<<21) + SMALL_ARENA_PAGE_SIZE + 256); + ALLOC_ALIGN_AND_CHECK(128, (1LL<<21) + SMALL_ARENA_PAGE_SIZE + 512); + ALLOC_ALIGN_AND_CHECK(16, (1LL<<21) + SMALL_ARENA_PAGE_SIZE + 17 + 8); + ALLOC_ALIGN_AND_CHECK(32, (1LL<<21) + SMALL_ARENA_PAGE_SIZE + 17 + 16); + ALLOC_ALIGN_AND_CHECK(64, (1LL<<21) + SMALL_ARENA_PAGE_SIZE + 17 + 256); + ALLOC_ALIGN_AND_CHECK(128, (1LL<<21) + SMALL_ARENA_PAGE_SIZE + 17 + 512); + + sa.reset(); + small_alloc_count = 0; + large_alloc_count = 0; +} + +TEST_F(TestSmallArena, init_err) +{ + void *ptr = NULL; + ObSmallArena sa; + ptr = sa.alloc(8); EXPECT_TRUE(NULL == ptr); + + sa.set_allocator(-1, large_allocator_); + ptr = sa.alloc(8); EXPECT_TRUE(NULL == ptr); + + sa.reset(); +} + +TEST_F(TestSmallArena, invalid_args) +{ + void *ptr = NULL; + ObSmallArena sa; + sa.set_allocator(PAGE_SIZE, large_allocator_); + ptr = sa.alloc(-1); EXPECT_TRUE(NULL == ptr); + ptr = sa.alloc_aligned(1,3); EXPECT_TRUE(NULL == ptr); + ptr = sa.alloc_aligned(1, 1024); EXPECT_TRUE(NULL == ptr); + sa.reset(); +} + +} // ns liboblog +} // ns oceanbase + +int main(int argc, char **argv) +{ + oceanbase::common::ObLogger::get_logger().set_log_level("DEBUG"); + OB_LOGGER.set_log_level("DEBUG"); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/unittest/liboblog/test_sp_trans_log_generator.h b/unittest/liboblog/test_sp_trans_log_generator.h new file mode 100644 index 0000000000000000000000000000000000000000..ea66f461adad19bca8bddbba5547a61045629bf0 --- /dev/null +++ b/unittest/liboblog/test_sp_trans_log_generator.h @@ -0,0 +1,474 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include +#include +#include + +#include "share/ob_define.h" +#include "storage/ob_storage_log_type.h" +#include "storage/transaction/ob_trans_log.h" + +#include "liboblog/src/ob_log_instance.h" +#include "liboblog/src/ob_log_utils.h" // get_timestamp + +using namespace oceanbase; +using namespace common; +using namespace liboblog; +using namespace transaction; +using namespace storage; +using namespace clog; + +namespace oceanbase +{ +namespace unittest +{ +// prepare log +static const int64_t SP_PREPARE_TIMESTAMP = 10 * 1000 * 1000; +// commit log +static const int64_t SP_GLOBAL_TRANS_VERSION = 100; + +// SP Transaction log parameters +struct TransParam2 +{ + ObPartitionKey pkey_; + ObTransID trans_id_; + ObStartTransParam trans_param_; +}; + +// Sp Transaction Log Generator +class TransLogGenerator2 +{ +public: + TransLogGenerator2() + : param_(), + redo_(), + commit_(), + abort_() + { } + virtual ~TransLogGenerator2() { } +public: + void next_trans(const TransParam2 ¶m) + { + param_ = param; + } + const ObSpTransRedoLog& next_redo(const uint64_t log_id) + { + int err = OB_SUCCESS; + uint64_t tenant_id = 100; + const uint64_t cluster_id = 1000; + + redo_.reset(); + ObVersion active_memstore_version(1); + err = redo_.init(OB_LOG_SP_TRANS_REDO, param_.pkey_, param_.trans_id_, + tenant_id, log_id, param_.trans_param_, cluster_id, active_memstore_version); + EXPECT_EQ(OB_SUCCESS, err); + ObTransMutator &mutator = redo_.get_mutator(); + if (NULL == mutator.get_mutator_buf()) { + mutator.init(true); + } + const char *data = "fly"; + char *buf = static_cast(mutator.alloc(strlen(data))); + strcpy(buf, data); + + return redo_; + } + const ObSpTransCommitLog& next_commit( + const ObRedoLogIdArray &all_redos, + const uint64_t redo_log_id) + { + int err = OB_SUCCESS; + uint64_t tenant_id = 100; + const uint64_t cluster_id = 1000; + int64_t checksum = 0; + ObVersion active_memstore_version(1); + ObString trace_id; + + commit_.reset(); + err = commit_.init(OB_LOG_SP_TRANS_COMMIT, param_.pkey_, tenant_id, param_.trans_id_, + SP_GLOBAL_TRANS_VERSION, checksum, cluster_id, all_redos, param_.trans_param_, + active_memstore_version, redo_log_id, trace_id); + EXPECT_EQ(OB_SUCCESS, err); + return commit_; + } + const ObSpTransAbortLog& next_abort() + { + int err = OB_SUCCESS; + const uint64_t cluster_id = 1000; + + abort_.reset(); + err = abort_.init(OB_LOG_SP_TRANS_ABORT, param_.pkey_, param_.trans_id_, cluster_id); + EXPECT_EQ(OB_SUCCESS, err); + + return abort_; + } + const ObSpTransCommitLog& next_redo_with_commit( + const ObRedoLogIdArray &all_redos, + const uint64_t redo_log_id) + { + int err = OB_SUCCESS; + uint64_t tenant_id = 100; + const uint64_t cluster_id = 1000; + int64_t checksum = 0; + ObVersion active_memstore_version(1); + ObString trace_id; + + commit_.reset(); + err = commit_.init(OB_LOG_SP_TRANS_COMMIT, param_.pkey_, tenant_id, param_.trans_id_, + SP_GLOBAL_TRANS_VERSION, checksum, cluster_id, all_redos, param_.trans_param_, + active_memstore_version, redo_log_id, trace_id); + EXPECT_EQ(OB_SUCCESS, err); + + // write redo log + ObTransMutator &mutator = commit_.get_mutator(); + if (NULL == mutator.get_mutator_buf()) { + mutator.init(true); + } + const char *data = "fly"; + char *buf = static_cast(mutator.alloc(strlen(data))); + strcpy(buf, data); + + return commit_; + } +private: + TransParam2 param_; + ObSpTransRedoLog redo_; + ObSpTransCommitLog commit_; + ObSpTransAbortLog abort_; +}; + +/* + * Responsible for generating Sp transaction logs + */ +class SpTransLogEntryGeneratorBase +{ + static const ObAddr SCHEDULER; +public: + // Pass in the ObTransID, which can be used to specify different transactions for the same partition + SpTransLogEntryGeneratorBase(const ObPartitionKey &pkey, const ObTransID &trans_id) + : pkey_(pkey), + log_id_(0), + remain_log_cnt_(0), + is_commit_(false), + param_(), + trans_log_gen_(), + redos_(), + redo_cnt_(0), + commit_log_id_(-1), + data_len_(-1) + { + param_.pkey_ = pkey_; + param_.trans_id_ = trans_id; + param_.trans_param_.set_access_mode(ObTransAccessMode::READ_WRITE); + param_.trans_param_.set_isolation(ObTransIsolation::READ_COMMITED); + param_.trans_param_.set_type(ObTransType::TRANS_NORMAL); + + buf_ = new char[buf_len_]; + EXPECT_TRUE(NULL != buf_); + } + + virtual ~SpTransLogEntryGeneratorBase() + { + delete[] buf_; + } + + // Generate normal Sp logs. redo, redo.... .redo, commit/abort + // Call next_trans to start a new transaction + // Call next_log_entry to get the number of redo entries in order by specifying the number of redo entries + void next_trans(const int64_t redo_cnt, bool is_commit) + { + // 正常事务日志总条数=redo log count + 1(commit/abort log) + remain_log_cnt_ = redo_cnt + 1; + is_commit_ = is_commit; + redos_.reset(); + redo_cnt_ = redo_cnt; + commit_log_id_ = -1; + trans_log_gen_.next_trans(param_); + } + + // Generate special Sp logs: redo, redo.... .redo, redo-commit (redo and commit logs in the same log entry) + // call next_trans to start a new transaction + // Call next_log_entry_with_redo_commit to get the number of redo entries in order by specifying + void next_trans_with_redo_commit(const int64_t redo_cnt) + { + next_trans(redo_cnt, true); + // redo-commit in the same log entrey, remain_log_cnt_reassigned + remain_log_cnt_ = redo_cnt; + } + + // Get next log entry. + // get redo, redo..., commit/abort by order + int next_log_entry(clog::ObLogEntry &log_entry) + { + int ret = OB_SUCCESS; + + if (1 < remain_log_cnt_) { + next_redo_(log_id_, log_entry); + // Store redo id. + int err = redos_.push_back(log_id_); + EXPECT_EQ(OB_SUCCESS, err); + log_id_ += 1; + remain_log_cnt_ -= 1; + } else if (1 == remain_log_cnt_ && is_commit_) { + commit_log_id_ = log_id_; + next_commit_(commit_log_id_, log_entry); + log_id_ += 1; + remain_log_cnt_ -= 1; + } else if (1 == remain_log_cnt_ && !is_commit_) { + next_abort_(log_entry); + log_id_ += 1; + remain_log_cnt_ -= 1; + } else { + ret = OB_ITER_END; + } + + return ret; + } + + // Get next log entry. + // get redo, redo...redo-commit by order + int next_log_entry_with_redo_commit(clog::ObLogEntry &log_entry) + { + int ret = OB_SUCCESS; + + if (1 < remain_log_cnt_) { + next_redo_(log_id_, log_entry); + // Store redo id. + int err = redos_.push_back(log_id_); + EXPECT_EQ(OB_SUCCESS, err); + log_id_ += 1; + remain_log_cnt_ -= 1; + } else if (1 == remain_log_cnt_) { + // redo-commit + commit_log_id_ = log_id_; + next_redo_with_commit_(commit_log_id_, log_entry); + log_id_ += 1; + remain_log_cnt_ -= 1; + } else { + ret = OB_ITER_END; + } + + return ret; + } +public: + uint64_t get_log_id() + { + return log_id_; + } +protected: + // Returns the redo log with the specified log_id + void next_redo_(const uint64_t redo_log_id, clog::ObLogEntry &log_entry) + { + int err = OB_SUCCESS; + // Gen trans log. + const ObSpTransRedoLog &redo = trans_log_gen_.next_redo(redo_log_id); + int64_t pos = 0; + err = serialization::encode_i64(buf_, buf_len_, pos, OB_LOG_SP_TRANS_REDO); + EXPECT_EQ(OB_SUCCESS, err); + err = serialization::encode_i64(buf_, buf_len_, pos, 0); + EXPECT_EQ(OB_SUCCESS, err); + err = redo.serialize(buf_, buf_len_, pos); + EXPECT_EQ(OB_SUCCESS, err); + data_len_ = pos; + // Gen entry header. + ObLogEntryHeader header; + header.generate_header(OB_LOG_SUBMIT, pkey_, redo_log_id, buf_, + data_len_, get_timestamp(), get_timestamp(), + ObProposalID(), get_timestamp(), ObVersion(0)); + // Gen log entry. + log_entry.generate_entry(header, buf_); + } + void next_commit_(uint64_t commit_log_id, clog::ObLogEntry &log_entry) + { + int err = OB_SUCCESS; + // Gen trans log. + const ObSpTransCommitLog &commit = trans_log_gen_.next_commit(redos_, 1); + int64_t pos = 0; + err = serialization::encode_i64(buf_, buf_len_, pos, OB_LOG_SP_TRANS_COMMIT); + EXPECT_EQ(OB_SUCCESS, err); + err = serialization::encode_i64(buf_, buf_len_, pos, 0); + EXPECT_EQ(OB_SUCCESS, err); + err = commit.serialize(buf_, buf_len_, pos); + EXPECT_EQ(OB_SUCCESS, err); + data_len_ = pos; + // Gen entry header. + ObLogEntryHeader header; + // log submit timestamp using SP_PREPARE_TIMESTAMP, because for sp transactions, the partition task stores the prepare timestamp + // commit log timestamp, for correctness verification + header.generate_header(OB_LOG_SUBMIT, pkey_, commit_log_id, buf_, + data_len_, get_timestamp(), get_timestamp(), + ObProposalID(), SP_PREPARE_TIMESTAMP, ObVersion(0)); + // Gen log entry. + log_entry.generate_entry(header, buf_); + } + void next_abort_(clog::ObLogEntry &log_entry) + { + int err = OB_SUCCESS; + // Gen trans log. + const ObSpTransAbortLog &abort = trans_log_gen_.next_abort(); + int64_t pos = 0; + err = serialization::encode_i64(buf_, buf_len_, pos, OB_LOG_SP_TRANS_ABORT); + EXPECT_EQ(OB_SUCCESS, err); + err = serialization::encode_i64(buf_, buf_len_, pos, 0); + EXPECT_EQ(OB_SUCCESS, err); + err = abort.serialize(buf_, buf_len_, pos); + EXPECT_EQ(OB_SUCCESS, err); + data_len_ = pos; + // Gen entry header. + ObLogEntryHeader header; + // log submit timestamp using SP_PREPARE_TIMESTAMP, because for sp transactions, the partition task stores the prepare timestamp + // commit log timestamp, for correctness verification + header.generate_header(OB_LOG_SUBMIT, pkey_, log_id_, buf_, + data_len_, get_timestamp(), get_timestamp(), + ObProposalID(), SP_PREPARE_TIMESTAMP, ObVersion(0)); + // Gen log entry. + log_entry.generate_entry(header, buf_); + } + void next_redo_with_commit_(uint64_t commit_log_id, clog::ObLogEntry &log_entry) + { + int err = OB_SUCCESS; + // Gen trans log. + const ObSpTransCommitLog &commit = trans_log_gen_.next_redo_with_commit(redos_, 1); + int64_t pos = 0; + err = serialization::encode_i64(buf_, buf_len_, pos, OB_LOG_SP_TRANS_COMMIT); + EXPECT_EQ(OB_SUCCESS, err); + err = serialization::encode_i64(buf_, buf_len_, pos, 0); + EXPECT_EQ(OB_SUCCESS, err); + err = commit.serialize(buf_, buf_len_, pos); + EXPECT_EQ(OB_SUCCESS, err); + data_len_ = pos; + // Gen entry header. + ObLogEntryHeader header; + // 日志submit timestamp采用SP_PREPARE_TIMESTAMP, 因为对于sp事务,分区任务存储的prepare时间戳 + // commit日志的时间戳,用于正确性验证 + header.generate_header(OB_LOG_SUBMIT, pkey_, commit_log_id, buf_, + data_len_, get_timestamp(), get_timestamp(), + ObProposalID(), SP_PREPARE_TIMESTAMP, ObVersion(0)); + // Gen log entry. + log_entry.generate_entry(header, buf_); + } +protected: + // Params. + ObPartitionKey pkey_; + uint64_t log_id_; + int64_t remain_log_cnt_; + // Indicates whether the current transaction has been committed or not + bool is_commit_; + // Gen. + TransParam2 param_; + TransLogGenerator2 trans_log_gen_; + ObRedoLogIdArray redos_; + int64_t redo_cnt_; + // prepare log id and commit log id are same for sp trans + uint64_t commit_log_id_; + + // Buf. + int64_t data_len_; + static const int64_t buf_len_ = 2 * _M_; + char *buf_; +}; + +/* + * test missing redo log, When the commit log is read, the missing redo can be detected + * + * two case: + * 1. redo, redo, redo...redo, commit + * 2. redo, redo, redo...redo, redo-commit + */ +enum SpCaseType +{ + SP_NORMAL_TRAN, + SP_REDO_WITH_COMMIT_TRAN +}; +class SpTransLogEntryGenerator1 : public SpTransLogEntryGeneratorBase +{ +public: + SpTransLogEntryGenerator1(const ObPartitionKey &pkey, const ObTransID &trans_id) + : SpTransLogEntryGeneratorBase(pkey, trans_id), + is_first(false), + miss_redo_cnt_(0) + {} + ~SpTransLogEntryGenerator1() {} +public: + // Specify the number of redo logs in redo_cnt, and the number of missing redo logs + void next_trans_with_miss_redo(const int64_t redo_cnt, + const int64_t miss_redo_cnt, + SpCaseType type) + { + if (SP_NORMAL_TRAN == type) { + next_trans(redo_cnt, true); + } else if(SP_REDO_WITH_COMMIT_TRAN == type) { + next_trans_with_redo_commit(redo_cnt); + } else { + } + miss_redo_cnt_ = miss_redo_cnt; + is_first = true; + } + + int next_log_entry_missing_redo(SpCaseType type, clog::ObLogEntry &log_entry) + { + int ret = OB_SUCCESS; + + // add redo log to redos list for miss_redo_cnt_ logs before miss + if (is_first) { + for (int64_t idx = 0; idx < miss_redo_cnt_; idx++) { + next_redo_(log_id_, log_entry); + // Store redo id. + int err = redos_.push_back(log_id_); + EXPECT_EQ(OB_SUCCESS, err); + log_id_ += 1; + remain_log_cnt_ -= 1; + } + is_first = false; + } + + if (SP_NORMAL_TRAN == type) { + ret = next_log_entry(log_entry); + } else if(SP_REDO_WITH_COMMIT_TRAN == type) { + ret = next_log_entry_with_redo_commit(log_entry); + } else { + } + + return ret; + } + + int next_miss_log_entry(const uint64_t miss_log_id, clog::ObLogEntry &miss_log_entry) + { + int ret = OB_SUCCESS; + + next_redo_(miss_log_id, miss_log_entry); + + return ret; + } + + int get_commit_log_entry(SpCaseType type, clog::ObLogEntry &log_entry) + { + int ret = OB_SUCCESS; + + if (SP_NORMAL_TRAN == type) { + next_commit_(commit_log_id_, log_entry); + } else if(SP_REDO_WITH_COMMIT_TRAN == type) { + next_redo_with_commit_(commit_log_id_, log_entry); + } else { + } + + return ret; + } + +private: + bool is_first; + int64_t miss_redo_cnt_; +}; + + +} +} diff --git a/unittest/liboblog/test_trans_log_generator.h b/unittest/liboblog/test_trans_log_generator.h new file mode 100644 index 0000000000000000000000000000000000000000..e0b5f41cbc30cbd2be44d340320e3a5bd9b240ec --- /dev/null +++ b/unittest/liboblog/test_trans_log_generator.h @@ -0,0 +1,818 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include +#include +#include + +#include "share/ob_define.h" +#include "storage/ob_storage_log_type.h" +#include "storage/transaction/ob_trans_log.h" + +#include "liboblog/src/ob_log_instance.h" +#include "liboblog/src/ob_log_parser.h" + +#include "ob_log_utils.h" // get_timestamp +#include "liboblog/src/ob_map_queue.h" +#include "lib/oblog/ob_log_module.h" + +using namespace oceanbase; +using namespace common; +using namespace liboblog; +using namespace transaction; +using namespace storage; +using namespace clog; + +namespace oceanbase +{ +namespace unittest +{ +// prepare log +static const int64_t PREPARE_TIMESTAMP = 10 * 1000 * 1000; +// commit log +static const int64_t GLOBAL_TRANS_VERSION = 100; + +static const int64_t FIXED_PART_COUNT = 6; +static const ObPartitionLogInfo FIXED_PART_INFO[FIXED_PART_COUNT] = +{ + ObPartitionLogInfo(ObPartitionKey(1000U, 0, 6), 100, PREPARE_TIMESTAMP), + ObPartitionLogInfo(ObPartitionKey(1000U, 1, 6), 100, PREPARE_TIMESTAMP), + ObPartitionLogInfo(ObPartitionKey(1000U, 2, 6), 100, PREPARE_TIMESTAMP), + ObPartitionLogInfo(ObPartitionKey(1000U, 3, 6), 100, PREPARE_TIMESTAMP), + ObPartitionLogInfo(ObPartitionKey(1000U, 4, 6), 100, PREPARE_TIMESTAMP), + ObPartitionLogInfo(ObPartitionKey(1000U, 5, 6), 100, PREPARE_TIMESTAMP) +}; + +/* + * TransLog Generator 1. + * Generate single partition transaction logs. + * Support get trans logs in CORRECT order. + * Use: + * - Call next_trans(), specify trans params. + * - Get logs in correct order: redo, redo, ..., prepare, commit/abort. + */ +struct TransParam1 +{ + // Params used in trans log. + ObPartitionKey pkey_; + ObTransID trans_id_; + ObAddr scheduler_; + ObPartitionKey coordinator_; + ObPartitionArray participants_; + ObStartTransParam trans_param_; +}; + +class TransLogGenerator1 +{ +public: + TransLogGenerator1() + : param_(), + redo_(), + prepare_(), + commit_(), + abort_() + { } + virtual ~TransLogGenerator1() { } +public: + void next_trans(const TransParam1 ¶m) + { + param_ = param; + } + const ObTransRedoLog& next_redo(const uint64_t log_id) + { + int err = OB_SUCCESS; + uint64_t tenant_id = 100; + const uint64_t cluster_id = 1000; + redo_.reset(); + ObVersion active_memstore_version(1); + err = redo_.init(OB_LOG_TRANS_REDO, param_.pkey_, param_.trans_id_, + tenant_id, log_id, param_.scheduler_, param_.coordinator_, + param_.participants_, param_.trans_param_, cluster_id, active_memstore_version); + EXPECT_EQ(OB_SUCCESS, err); + ObTransMutator &mutator = redo_.get_mutator(); + if (NULL == mutator.get_mutator_buf()) { + mutator.init(true); + } + const char *data = "fly"; + char *buf = static_cast(mutator.alloc(strlen(data))); + strcpy(buf, data); + return redo_; + } + const ObTransPrepareLog& next_prepare(const ObRedoLogIdArray &all_redos) + { + int err = OB_SUCCESS; + uint64_t tenant_id = 100; + const uint64_t cluster_id = 1000; + ObString trace_id; + prepare_.reset(); + ObVersion active_memstore_version(1); + err = prepare_.init(OB_LOG_TRANS_PREPARE, param_.pkey_, param_.trans_id_, + tenant_id, param_.scheduler_, param_.coordinator_, + param_.participants_, param_.trans_param_, + OB_SUCCESS, all_redos, 0, cluster_id, active_memstore_version, trace_id); + EXPECT_EQ(OB_SUCCESS, err); + return prepare_; + } + const ObTransCommitLog& next_commit(const uint64_t prepare_log_id) + { + int err = OB_SUCCESS; + const uint64_t cluster_id = 1000; + PartitionLogInfoArray ptl_ids; + + ObPartitionLogInfo ptl_id(param_.pkey_, prepare_log_id, PREPARE_TIMESTAMP); + err = ptl_ids.push_back(ptl_id); + EXPECT_EQ(OB_SUCCESS, err); + + // push Fixed participant information + for (int64_t idx = 0; idx < FIXED_PART_COUNT; ++idx) { + err = ptl_ids.push_back(FIXED_PART_INFO[idx]); + EXPECT_EQ(OB_SUCCESS, err); + } + + commit_.reset(); + err = commit_.init(OB_LOG_TRANS_COMMIT, param_.pkey_, param_.trans_id_, + ptl_ids, GLOBAL_TRANS_VERSION, 0, cluster_id); + EXPECT_EQ(OB_SUCCESS, err); + return commit_; + } + const ObTransAbortLog& next_abort() + { + int err = OB_SUCCESS; + const uint64_t cluster_id = 1000; + PartitionLogInfoArray array; + abort_.reset(); + err = abort_.init(OB_LOG_TRANS_ABORT, param_.pkey_, param_.trans_id_, array, cluster_id); + EXPECT_EQ(OB_SUCCESS, err); + return abort_; + } +private: + TransParam1 param_; + ObTransRedoLog redo_; + ObTransPrepareLog prepare_; + ObTransCommitLog commit_; + ObTransAbortLog abort_; +}; + +/* + * Transaction Log Entry Generator base + * Generate log entries of transactions. + */ +class TransLogEntryGeneratorBase +{ + static const ObAddr SCHEDULER; +public: + // Pass in the ObTransID, which can be used to specify different transactions for the same partition + TransLogEntryGeneratorBase(const ObPartitionKey &pkey, const ObTransID &trans_id) + : pkey_(pkey), + log_id_(0), + remain_log_cnt_(0), + is_commit_(false), + param_(), + trans_log_gen_(), + prepare_id_(0), + redos_(), + redo_cnt_(0), + data_len_(0) + { + param_.pkey_ = pkey_; + param_.trans_id_ = trans_id; + param_.scheduler_ = SCHEDULER; + param_.coordinator_ = pkey_; + int err = param_.participants_.push_back(pkey_); + EXPECT_EQ(OB_SUCCESS, err); + param_.trans_param_.set_access_mode(ObTransAccessMode::READ_WRITE); + param_.trans_param_.set_isolation(ObTransIsolation::READ_COMMITED); + param_.trans_param_.set_type(ObTransType::TRANS_NORMAL); + + buf_ = new char[buf_len_]; + EXPECT_TRUE(NULL != buf_); + } + + virtual ~TransLogEntryGeneratorBase() + { + delete[] buf_; + } + + // Generate normal trans. redo, redo...prepare, commit/abort + // Start a new trans. + // Specify the number of redo entries and call next_log_entry to get them in order + void next_trans(const int64_t redo_cnt, bool is_commit) + { + // 正常事务日志总条数=redo log count + 2(prepare log + commit/abort log) + remain_log_cnt_ = redo_cnt + 2; + is_commit_ = is_commit; + redos_.reset(); + redo_cnt_ = redo_cnt; + trans_log_gen_.next_trans(param_); + } + + // Generate: redo, redo... redo-prepare, commit/abort + // Start a new trans. + // redo and prepare logs in the same log entry + void next_trans_with_redo_prepare(const int64_t redo_cnt, bool is_commit) + { + next_trans(redo_cnt, is_commit); + // redo-prepare在同一个log entrey, remain_log_cnt_重新赋值 + remain_log_cnt_ = redo_cnt + 1; + } + + // Get next log entry. + // normal trans: 依次获取redo, redo...prepare, commit/abort + int next_log_entry(clog::ObLogEntry &log_entry) + { + int ret = OB_SUCCESS; + + if (2 < remain_log_cnt_) { + next_redo_(log_id_, log_entry); + // Store redo id. + int err = redos_.push_back(log_id_); + EXPECT_EQ(OB_SUCCESS, err); + log_id_ += 1; + remain_log_cnt_ -= 1; + } else if (2 == remain_log_cnt_) { + next_prepare_(log_id_, log_entry); + prepare_id_ = log_id_; + log_id_ += 1; + remain_log_cnt_ -= 1; + } else if (1 == remain_log_cnt_ && is_commit_) { + next_commit_(log_entry); + log_id_ += 1; + remain_log_cnt_ -= 1; + } else if (1 == remain_log_cnt_ && !is_commit_) { + next_abort_(log_entry); + log_id_ += 1; + remain_log_cnt_ -= 1; + } else { + ret = OB_ITER_END; + } + + return ret; + } + + // Get next log entry. + // trans log with redo-prepare: get by order as follows: redo, redo...redo-prepare, commit/abort + int next_log_entry_with_redo_prepare(clog::ObLogEntry &log_entry) + { + int ret = OB_SUCCESS; + + if (2 < remain_log_cnt_) { + next_redo_(log_id_, log_entry); + // Store redo id. + int err = redos_.push_back(log_id_); + EXPECT_EQ(OB_SUCCESS, err); + log_id_ += 1; + remain_log_cnt_ -= 1; + } else if (2 == remain_log_cnt_) { + // redo-prepare + next_redo_with_prepare_(log_id_, log_entry); + prepare_id_ = log_id_; + log_id_ += 1; + remain_log_cnt_ -= 1; + } else if (1 == remain_log_cnt_ && is_commit_) { + next_commit_(log_entry); + log_id_ += 1; + remain_log_cnt_ -= 1; + } else if (1 == remain_log_cnt_ && !is_commit_) { + next_abort_(log_entry); + log_id_ += 1; + remain_log_cnt_ -= 1; + } else { + ret = OB_ITER_END; + } + + return ret; + } +public: + uint64_t get_log_id() + { + return log_id_; + } +protected: + // return specified log_id and redo log + void next_redo_(const uint64_t redo_log_id, clog::ObLogEntry &log_entry) + { + int err = OB_SUCCESS; + // Gen trans log. + const ObTransRedoLog &redo = trans_log_gen_.next_redo(redo_log_id); + int64_t pos = 0; + err = serialization::encode_i64(buf_, buf_len_, pos, OB_LOG_TRANS_REDO); + EXPECT_EQ(OB_SUCCESS, err); + err = serialization::encode_i64(buf_, buf_len_, pos, 0); + EXPECT_EQ(OB_SUCCESS, err); + err = redo.serialize(buf_, buf_len_, pos); + EXPECT_EQ(OB_SUCCESS, err); + data_len_ = pos; + // Gen entry header. + ObLogEntryHeader header; + header.generate_header(OB_LOG_SUBMIT, pkey_, redo_log_id, buf_, + data_len_, get_timestamp(), get_timestamp(), + ObProposalID(), get_timestamp(), ObVersion(0)); + // Gen log entry. + log_entry.generate_entry(header, buf_); + } + // Returns the prepare log with the specified log_id + void next_prepare_(const uint64_t prepare_log_id, clog::ObLogEntry &log_entry) + { + int err = OB_SUCCESS; + // Gen trans log. + const ObTransPrepareLog &prepare= trans_log_gen_.next_prepare(redos_); + int64_t pos = 0; + err = serialization::encode_i64(buf_, buf_len_, pos, OB_LOG_TRANS_PREPARE); + EXPECT_EQ(OB_SUCCESS, err); + err = serialization::encode_i64(buf_, buf_len_, pos, 0); + EXPECT_EQ(OB_SUCCESS, err); + err = prepare.serialize(buf_, buf_len_, pos); + EXPECT_EQ(OB_SUCCESS, err); + data_len_ = pos; + // Gen entry header. + ObLogEntryHeader header; + header.generate_header(OB_LOG_SUBMIT, pkey_, prepare_log_id, buf_, + data_len_, get_timestamp(), get_timestamp(), + ObProposalID(), PREPARE_TIMESTAMP, ObVersion(0)); + // Gen log entry. + log_entry.generate_entry(header, buf_); + } + void next_commit_(clog::ObLogEntry &log_entry) + { + int err = OB_SUCCESS; + // Gen trans log. + const ObTransCommitLog &commit = trans_log_gen_.next_commit(prepare_id_); + int64_t pos = 0; + err = serialization::encode_i64(buf_, buf_len_, pos, OB_LOG_TRANS_COMMIT); + EXPECT_EQ(OB_SUCCESS, err); + err = serialization::encode_i64(buf_, buf_len_, pos, 0); + EXPECT_EQ(OB_SUCCESS, err); + err = commit.serialize(buf_, buf_len_, pos); + EXPECT_EQ(OB_SUCCESS, err); + data_len_ = pos; + // Gen entry header. + ObLogEntryHeader header; + header.generate_header(OB_LOG_SUBMIT, pkey_, log_id_, buf_, + data_len_, get_timestamp(), get_timestamp(), + ObProposalID(), get_timestamp(), ObVersion(0)); + // Gen log entry. + log_entry.generate_entry(header, buf_); + } + void next_abort_(clog::ObLogEntry &log_entry) + { + int err = OB_SUCCESS; + // Gen trans log. + const ObTransAbortLog &abort = trans_log_gen_.next_abort(); + int64_t pos = 0; + err = serialization::encode_i64(buf_, buf_len_, pos, OB_LOG_TRANS_ABORT); + EXPECT_EQ(OB_SUCCESS, err); + err = serialization::encode_i64(buf_, buf_len_, pos, 0); + EXPECT_EQ(OB_SUCCESS, err); + err = abort.serialize(buf_, buf_len_, pos); + EXPECT_EQ(OB_SUCCESS, err); + data_len_ = pos; + // Gen entry header. + ObLogEntryHeader header; + header.generate_header(OB_LOG_SUBMIT, pkey_, log_id_, buf_, + data_len_, get_timestamp(), get_timestamp(), + ObProposalID(), get_timestamp(), ObVersion(0)); + // Gen log entry. + log_entry.generate_entry(header, buf_); + } + void next_redo_with_prepare_(const uint64_t prepare_log_id, clog::ObLogEntry &log_entry) + { + int err = OB_SUCCESS; + // Gen trans log. + const ObTransRedoLog &redo = trans_log_gen_.next_redo(prepare_log_id); + const ObTransPrepareLog &prepare= trans_log_gen_.next_prepare(redos_); + int64_t pos = 0; + err = serialization::encode_i64(buf_, buf_len_, + pos, OB_LOG_TRANS_REDO_WITH_PREPARE); + EXPECT_EQ(OB_SUCCESS, err); + err = serialization::encode_i64(buf_, buf_len_, pos, 0); + EXPECT_EQ(OB_SUCCESS, err); + + err = redo.serialize(buf_, buf_len_, pos); + EXPECT_EQ(OB_SUCCESS, err); + err = prepare.serialize(buf_, buf_len_, pos); + EXPECT_EQ(OB_SUCCESS, err); + + data_len_ = pos; + // Gen entry header. + ObLogEntryHeader header; + header.generate_header(OB_LOG_SUBMIT, pkey_, prepare_log_id, buf_, + data_len_, get_timestamp(), get_timestamp(), + ObProposalID(), PREPARE_TIMESTAMP, ObVersion(0)); + // Gen log entry. + log_entry.generate_entry(header, buf_); + } +protected: + // Params. + ObPartitionKey pkey_; + uint64_t log_id_; + int64_t remain_log_cnt_; + // mark current trans is commit or not + bool is_commit_; + // Gen. + TransParam1 param_; + TransLogGenerator1 trans_log_gen_; + uint64_t prepare_id_; + ObRedoLogIdArray redos_; + int64_t redo_cnt_; + // Buf. + int64_t data_len_; + static const int64_t buf_len_ = 2 * _M_; + char *buf_; +}; + +const ObAddr TransLogEntryGeneratorBase::SCHEDULER = ObAddr(ObAddr::IPV4, "127.0.0.1", 5566); + +/* + * test missing redo log, When the prepare log is read, the missing redo can be detected + * + * two case: + * 1. redo, redo, redo...prepare, commit/abort + * 2. redo, redo, redo...redo-prepare, commit/abort + */ +enum CaseType +{ + NORMAL_TRAN, + REDO_WITH_PREPARE_TRAN +}; +class TransLogEntryGenerator1 : public TransLogEntryGeneratorBase +{ +public: + TransLogEntryGenerator1(const ObPartitionKey &pkey, const ObTransID &trans_id) + : TransLogEntryGeneratorBase(pkey, trans_id), + is_first(false), + miss_redo_cnt_(0) + {} + ~TransLogEntryGenerator1() {} +public: + // Specify the number of redo logs in redo_cnt, and the number of missing redo logs + void next_trans_with_miss_redo(const int64_t redo_cnt, const int64_t miss_redo_cnt, + bool is_commit, CaseType type) + { + if (NORMAL_TRAN == type) { + next_trans(redo_cnt, is_commit); + } else if(REDO_WITH_PREPARE_TRAN == type) { + next_trans_with_redo_prepare(redo_cnt, is_commit); + } else { + } + miss_redo_cnt_ = miss_redo_cnt; + is_first = true; + } + + int next_log_entry_missing_redo(CaseType type, clog::ObLogEntry &log_entry) + { + int ret = OB_SUCCESS; + + // miss_redo_cnt_bars before miss, no return but add redos + if (is_first) { + for (int64_t idx = 0; idx < miss_redo_cnt_; idx++) { + next_redo_(log_id_, log_entry); + // Store redo id. + int err = redos_.push_back(log_id_); + EXPECT_EQ(OB_SUCCESS, err); + log_id_ += 1; + remain_log_cnt_ -= 1; + } + is_first = false; + } + + if (NORMAL_TRAN == type) { + ret = next_log_entry(log_entry); + } else if(REDO_WITH_PREPARE_TRAN == type) { + ret = next_log_entry_with_redo_prepare(log_entry); + } else { + } + + return ret; + } + + int next_miss_log_entry(const uint64_t miss_log_id, clog::ObLogEntry &miss_log_entry) + { + int ret = OB_SUCCESS; + + next_redo_(miss_log_id, miss_log_entry); + + return ret; + } + + int get_prepare_log_entry(CaseType type, clog::ObLogEntry &log_entry) + { + int ret = OB_SUCCESS; + + if (NORMAL_TRAN == type) { + next_prepare_(prepare_id_, log_entry); + } else if(REDO_WITH_PREPARE_TRAN == type) { + next_redo_with_prepare_(prepare_id_, log_entry); + } else { + } + + return ret; + } +private: + bool is_first; + int64_t miss_redo_cnt_; +}; + +struct TransLogInfo +{ + // redo info + int64_t redo_log_cnt_; + ObLogIdArray redo_log_ids_; + + // prepare info + int64_t seq_; + common::ObPartitionKey partition_; + int64_t prepare_timestamp_; + ObTransID trans_id_; + uint64_t prepare_log_id_; + uint64_t cluster_id_; + + // commit info + int64_t global_trans_version_; + PartitionLogInfoArray participants_; + + void reset() + { + redo_log_cnt_ = -1; + redo_log_ids_.reset(); + seq_ = -1; + partition_.reset(); + prepare_timestamp_ = -1; + trans_id_.reset(); + prepare_log_id_ = -1; + cluster_id_ = -1; + global_trans_version_ = -1; + participants_.reset(); + } + + void reset(int64_t redo_cnt, ObLogIdArray &redo_log_ids, + int64_t seq, const ObPartitionKey partition, int64_t prepare_timestamp, + ObTransID &trans_id, uint64_t prepare_log_id, uint64_t cluster_id, + uint64_t global_trans_version, PartitionLogInfoArray &participants) + { + reset(); + + // redo + redo_log_cnt_ = redo_cnt; + redo_log_ids_ = redo_log_ids; + // prepare + seq_ = seq; + partition_ = partition; + prepare_timestamp_ = prepare_timestamp; + trans_id_ = trans_id; + prepare_log_id_ = prepare_log_id; + cluster_id_ = cluster_id; + + // commmit + global_trans_version_ = global_trans_version; + participants_ = participants; + } +}; + +/* + * Mock Parser 1. + * Read Task, revert it immediately, and count Task number. + */ +class MockParser1 : public IObLogParser +{ +public: + MockParser1() : commit_trans_cnt_(0), abort_trans_cnt_(0), info_queue_(), res_queue_() {} + virtual ~MockParser1() + { + info_queue_.destroy(); + res_queue_.destroy(); + } + + int init() + { + int ret = OB_SUCCESS; + + if (OB_FAIL(info_queue_.init(MOD_ID))) { + } else if (OB_FAIL(res_queue_.init(MOD_ID))) { + } else { + } + + return ret; + } + + virtual int start() { return OB_SUCCESS; } + virtual void stop() { } + virtual void mark_stop_flag() { } + virtual int push(PartTransTask *task, const int64_t timeout) + { + int ret = OB_SUCCESS; + UNUSED(timeout); + + if (OB_ISNULL(task)) { + ret = OB_INVALID_ARGUMENT; + } else { + TransLogInfo *trans_log_info = NULL; + int tmp_ret = OB_SUCCESS; + + if (OB_SUCCESS != (tmp_ret = info_queue_.pop(trans_log_info))) { + // pop error + } else if(NULL == trans_log_info){ + tmp_ret = OB_ERR_UNEXPECTED; + } else { + // do nothing + } + + bool check_result; + if (task->is_normal_trans()) { + // Verify correct data for partitioning tasks + if (OB_SUCCESS == tmp_ret) { + check_result = check_nomal_tran(*task, *trans_log_info); + } else { + check_result = false; + } + task->revert(); + commit_trans_cnt_ += 1; + } else if (task->is_heartbeat()) { + // Verify correct data for partitioning tasks + if (OB_SUCCESS == tmp_ret) { + check_result = check_abort_tran(*task, *trans_log_info); + } else { + check_result = false; + } + task->revert(); + abort_trans_cnt_ += 1; + } + + // Save the validation result, no need to handle a failed push, the pop result will be validated + if (OB_SUCCESS != (tmp_ret = res_queue_.push(check_result))) { + } + } + + return ret; + } + virtual int get_pending_task_count(int64_t &task_count) + { + UNUSED(task_count); + + return OB_SUCCESS; + } + int64_t get_commit_trans_cnt() const { return commit_trans_cnt_; } + int64_t get_abort_trans_cnt() const { return abort_trans_cnt_; } + int push_into_queue(TransLogInfo *trans_log_info) + { + int ret = OB_SUCCESS; + + if (OB_FAIL(info_queue_.push(trans_log_info))) { + } else { + } + + return ret; + } + int get_check_result(bool &result) + { + int ret = OB_SUCCESS; + + if (OB_FAIL(res_queue_.pop(result))) { + } else { + } + + return ret; + } +private: + // for PartTransTask correctness validation + // for commit transactions, validate redo/prepare/commit info + // For abort transactions, since they are converted to heartbeat information, only seq_, partition_, prepare_timestamp_ need to be validated + bool check_nomal_tran(PartTransTask &task, TransLogInfo &trans_log_info) + { + bool bool_ret = true; + + // redo info + const SortedRedoLogList &redo_list = task.get_sorted_redo_list(); + if (redo_list.log_num_ != trans_log_info.redo_log_cnt_) { + bool_ret = false; + } else { + RedoLogNode *redo_node = redo_list.head_; + for (int64_t idx = 0; true == bool_ret && idx < trans_log_info.redo_log_cnt_; ++idx) { + if (redo_node->start_log_id_ == trans_log_info.redo_log_ids_[idx]) { + // do nothing + } else { + bool_ret = false; + } + + redo_node = redo_node->next_; + } + } + + // prepare info + if (bool_ret) { + if (trans_log_info.seq_ == task.get_seq() + && trans_log_info.partition_ == task.get_partition() + && trans_log_info.prepare_timestamp_ == task.get_timestamp() + && trans_log_info.trans_id_ == task.get_trans_id() + && trans_log_info.prepare_log_id_ == task.get_prepare_log_id() + && trans_log_info.cluster_id_ == task.get_cluster_id()) { + } else { + bool_ret = false; + OBLOG_LOG(INFO, "compare", K(trans_log_info.seq_), K(task.get_seq())); + OBLOG_LOG(INFO, "compare", K(trans_log_info.partition_), K(task.get_partition())); + OBLOG_LOG(INFO, "compare", K(trans_log_info.prepare_timestamp_), K(task.get_timestamp())); + OBLOG_LOG(INFO, "compare", K(trans_log_info.trans_id_), K(task.get_trans_id())); + OBLOG_LOG(INFO, "compare", K(trans_log_info.prepare_log_id_), K(task.get_prepare_log_id())); + OBLOG_LOG(INFO, "compare", K(trans_log_info.cluster_id_), K(task.get_cluster_id())); + } + } + + //// commit info + if (bool_ret) { + if (trans_log_info.global_trans_version_ != task.get_global_trans_version()) { + bool_ret = false; + } else { + const ObPartitionLogInfo *part = task.get_participants(); + const int64_t part_cnt = task.get_participant_count(); + + if (trans_log_info.participants_.count() != part_cnt) { + bool_ret = false; + } else { + const ObPartitionLogInfo *pinfo1 = NULL; + const ObPartitionLogInfo *pinfo2 = NULL; + + for (int64_t idx = 0; true == bool_ret && idx < part_cnt; ++idx) { + pinfo1 = &trans_log_info.participants_.at(idx); + pinfo2 = part + idx; + + if (pinfo1->get_partition() == pinfo2->get_partition() + && pinfo1->get_log_id() == pinfo2->get_log_id() + && pinfo1->get_log_timestamp() == pinfo2->get_log_timestamp()) { + // do nothing + } else { + bool_ret = false; + } + } + } + } + } + + return bool_ret; + } + + bool check_abort_tran(PartTransTask &task, TransLogInfo &trans_log_info) + { + bool bool_ret = true; + + if (trans_log_info.seq_ == task.get_seq() + && trans_log_info.partition_ == task.get_partition() + && trans_log_info.prepare_timestamp_ == task.get_timestamp()) { + } else { + bool_ret = false; + } + + return bool_ret; + } +private: + static const int64_t MOD_ID = 1; +private: + int64_t commit_trans_cnt_; + int64_t abort_trans_cnt_; + // save TransLogInfo + ObMapQueue info_queue_; + // save verify result + ObMapQueue res_queue_; +}; + +class MockParser2 : public IObLogParser +{ +public: + MockParser2() : commit_trans_cnt_(0) {} + virtual ~MockParser2() { } + + virtual int start() { return OB_SUCCESS; } + virtual void stop() { } + virtual void mark_stop_flag() { } + virtual int push(PartTransTask *task, const int64_t timeout) + { + UNUSED(timeout); + if (OB_ISNULL(task)) { + } else if (task->is_normal_trans()) { + task->revert(); + commit_trans_cnt_ += 1; + } + + return OB_SUCCESS; + } + virtual int get_pending_task_count(int64_t &task_count) + { + UNUSED(task_count); + + return OB_SUCCESS; + } + int64_t get_commit_trans_cnt() const { return commit_trans_cnt_; } +private: + int64_t commit_trans_cnt_; +}; + + +} +}