diff --git a/.github/workflows/buildbase/action.yml b/.github/workflows/buildbase/action.yml index 9e4b408f68cae5573efaaa2d220e0c0e04f9a676..9006f48aab6c798977eb16a7de03c7f1e781ad4d 100644 --- a/.github/workflows/buildbase/action.yml +++ b/.github/workflows/buildbase/action.yml @@ -17,8 +17,10 @@ runs: bash build.sh init echo "$GITHUB_WORKSPACE/deps/3rd/usr/local/oceanbase/devtools/bin" >> $GITHUB_PATH + # on centos7, we cannot use the latest `node`. + # ccache-action@v1.2 uses `node20` which cannot works on centos7 but node16 works fun - name: Setup ccache - uses: hendrikmuhs/ccache-action@v1.2 + uses: hendrikmuhs/ccache-action@v1.2.11 with: max-size: 800M save: ${{inputs.save_cache}} diff --git a/.github/workflows/compile.yml b/.github/workflows/compile.yml index 1f61c73c355ab3b2f79aef1361f456016d55af63..8de9e544addf1af228d8f472eb2dfc29d2373f03 100644 --- a/.github/workflows/compile.yml +++ b/.github/workflows/compile.yml @@ -7,40 +7,32 @@ on: branches: [ master, develop ] paths-ignore: - 'docs/**' - - '.github/**' + - '.github/ISSUE_TEMPLATE/**' + - '.github/pull_request_template.md' - 'README.md' - 'README_CN.md' - 'CONTRIBUTING.md' jobs: - centos-build: - runs-on: ubuntu-20.04 - container: centos:7 - steps: - - uses: actions/checkout@v3 - - - name: Install centos environment - shell: bash - run: yum install -y git wget rpm* cpio make glibc-devel glibc-headers binutils m4 - - - name: Cache deps - id: cache-deps - uses: actions/cache@v3 - env: - cache-name: cache-deps - with: - key: ${{ runner.os }}-build-${{ env.cache-name }}-el7.x86_64-${{ hashFiles('deps/init/oceanbase.el7.x86_64.deps') }} - path: deps/3rd - - - name: Build - uses: ./.github/workflows/buildbase - with: - save_cache: ${{github.event_name == 'push'}} - os: 'centos7' - ubuntu-build: runs-on: ubuntu-22.04 steps: + - name: Free Disk Space + uses: insightsengineering/disk-space-reclaimer@v1 + with: + # this might remove tools that are actually needed, + # if set to "true" but frees about 6 GB + tools-cache: false + + # all of these default to true, but feel free to set to + # "false" if necessary for your workflow + android: true + dotnet: true + haskell: true + large-packages: true + swap-storage: true + docker-images: true + - uses: actions/checkout@v3 - name: Install ubuntu environment diff --git a/.github/workflows/farm.yml b/.github/workflows/farm.yml index 9eb9d2c96c0b19665830dd2abe9e62ff6f4d76a2..3b757baee4addfe37fad9eb31f45e2ae625f992f 100644 --- a/.github/workflows/farm.yml +++ b/.github/workflows/farm.yml @@ -5,7 +5,8 @@ on: branches: [ master,develop ] paths-ignore: - 'docs/**' - - '.github/**' + - '.github/ISSUE_TEMPLATE/**' + - '.github/pull_request_template.md' - 'README.md' - 'README_CN.md' - 'CONTRIBUTING.md' diff --git a/.github/workflows/mkbook.yml b/.github/workflows/mkbook.yml new file mode 100644 index 0000000000000000000000000000000000000000..2380dd7f09484704a74aaea1e0d251b43f9778d1 --- /dev/null +++ b/.github/workflows/mkbook.yml @@ -0,0 +1,66 @@ +# Sample workflow for building and deploying a mdBook site to GitHub Pages +# +# To get started with mdBook see: https://rust-lang.github.io/mdBook/index.html +# +name: Deploy mdBook site to Pages + +on: + # Runs on pushes targeting the default branch + push: + branches: ["develop"] + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages +permissions: + contents: read + pages: write + id-token: write + +# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. +# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. +concurrency: + group: "pages" + cancel-in-progress: false + +jobs: + # Build job + build: + runs-on: ubuntu-latest + env: + MDBOOK_VERSION: 0.4.36 + steps: + - uses: actions/checkout@v4 + - name: Prepare + run: | + mkdir mdbook + mv docs mdbook/src + + - name: Install mdBook + run: | + curl --proto '=https' --tlsv1.2 https://sh.rustup.rs -sSf -y | sh + rustup update + cargo install --version ${MDBOOK_VERSION} mdbook + - name: Setup Pages + id: pages + uses: actions/configure-pages@v4 + - name: Build with mdBook + run: mdbook build + working-directory: mdbook + - name: Upload artifact + uses: actions/upload-pages-artifact@v3 + with: + path: ./mdbook/book + + # Deployment job + deploy: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-latest + needs: build + steps: + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 \ No newline at end of file diff --git a/README.md b/README.md index d7b13a851884a6a0f247a5dd2f9e0b46a3161055..f947dc7e39105a9f9cf742f7427f356a7417f14d 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,9 @@ Join Slack + + Ask on Stack Overflow +

English | [中文版](README_CN.md) @@ -135,6 +138,7 @@ OceanBase Database is licensed under the Mulan Public License, Version 2. See th Join the OceanBase community via: * [Slack Workspace](https://join.slack.com/t/oceanbase/shared_invite/zt-1e25oz3ol-lJ6YNqPHaKwY_mhhioyEuw) +* [Ask on Stack Overflow](https://stackoverflow.com/questions/tagged/oceanbase) * [Chinese User Forum](https://ask.oceanbase.com/) * DingTalk Group: 33254054 ([QR code](images/dingtalk.svg)) * WeChat Group (Add the assistant with WeChat ID: OBCE666) diff --git a/README_CN.md b/README_CN.md index b5fb7726101d8ba980cc5a9369859bd5e2ab0c4c..a48cb2e17fad4e4e51a61e356960699accc7a776 100644 --- a/README_CN.md +++ b/README_CN.md @@ -31,6 +31,9 @@ Join Slack + + Ask on Stack Overflow +

[English](README.md) | 中文版 @@ -135,5 +138,6 @@ OceanBase 数据库根据 Mulan 公共许可证版本 2 获得许可。有关详 * [中文论坛](https://ask.oceanbase.com/) * [Slack Workspace](https://join.slack.com/t/oceanbase/shared_invite/zt-1e25oz3ol-lJ6YNqPHaKwY_mhhioyEuw) +* [Ask on Stack Overflow](https://stackoverflow.com/questions/tagged/oceanbase) * 钉钉群: 33254054 ([二维码](images/dingtalk.svg)) * 微信群 (添加微信小助手: OBCE666) diff --git a/docs/README.md b/docs/README.md index a40d550dbe15bcbfbebf6d33521316370bf4cfe5..eaa59a52164af1cc86ec5c136727ba0eb64e6f08 100644 --- a/docs/README.md +++ b/docs/README.md @@ -12,11 +12,19 @@ At present, the guide is composed of the following parts: 1. **Get started**: Setting up the development environment, build and connect to the OceanBase server, the subsections are based on an imagined newbie user journey. 1. [Install toolchain](toolchain.md) 2. [Get the code, build and run](build-and-run.md) - 3. Set up an IDE - 4. [Write and run unit tests](unittest.md) - 5. [Debug](debug.md) - 6. Commit code and submit a pull request -2. **Contribute to OceanBase**: helps you quickly get involved in the OceanBase community, which illustrates what contributions you can make and how to quickly make one. + 3. [Set up an IDE](ide-settings.md) + 4. [Coding Convensions](coding-convension.md) + 5. [Write and run unit tests](unittest.md) + 6. [Running MySQL test](mysqltest.md) + 7. [Debug](debug.md) + 8. Commit code and submit a pull request + + More information before you start a big feature developing, you should read content below and it can help you understand oceanbase better. + 1. [Logging System](logging.md) + 2. [Memory Management](memory.md) + 3. [Containers](container.md) + 4. [Coding Standard](coding_standard.md) +3. **Contribute to OceanBase**: helps you quickly get involved in the OceanBase community, which illustrates what contributions you can make and how to quickly make one. ## User documents diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md new file mode 100644 index 0000000000000000000000000000000000000000..e6cb97a9690215163eb71fe4c100c1b462f2dbce --- /dev/null +++ b/docs/SUMMARY.md @@ -0,0 +1,14 @@ +# Summary + +[OceanBase Development Guide](README.md) +[Install toolchain](toolchain.md) +[Get the code, build and run](build-and-run.md) +[Set up an IDE](ide-settings.md) +[Coding Convensions](coding-convension.md) +[Write and run unit tests](unittest.md) +[Running MySQL test](mysqltest.md) +[Debug](debug.md) +[Logging System](logging.md) +[Memory Management](memory.md) +[Containers](container.md) +[Coding Standard](coding_standard.md) diff --git a/docs/coding-convension.md b/docs/coding-convension.md new file mode 100644 index 0000000000000000000000000000000000000000..8e7fa5a4c09e8fd9b0b06988138dbf10511c8df5 --- /dev/null +++ b/docs/coding-convension.md @@ -0,0 +1,226 @@ +OceanBase is a giant project that has been developed for more than ten years and contains millions of lines of C++ code. It already has many unique programming habits. Here are some OceanBase programming habits to help people who come into contact with the OceanBase source code for the first time have an easier time accepting and understanding. For more detailed information, please refer to ["OceanBase C++ Coding Standard"](./coding_standard.md). + +# Naming Convention + +- File naming + +Code file names in OceanBase all start with `ob_`. But there are some old exception files. + +- Class naming + +Classes all start with `Ob` and use camelCase/Pascal form, and there are also some exceptions for old classes. + +- Function names, variables, etc. + +Both function names and variables use lowercase naming separated by `_`. Member variables also have `_` added as a suffix. + +# Coding Style + +OceanBase uses some relatively simple coding styles to try to make the code readable and clear, such as adding necessary spaces for operator brackets, not too long codes, not too long functions, adding necessary comments, reasonable naming, etc. Since the coding style has many details, new developers can just refer to the coding style in the current code to write code. This is also a suggestion for participating in other projects for the first time. We should try to keep it consistent with the original style. + +There is no need to worry about the styles that you are not sure about. You can discuss it with us, or after submitting the code, someone will give suggestions or code together. + +# Functional Coding Habits + +## Prohibitting STL Containers + +Since OceanBase supports multi-tenants resource isolation, in order to facilitate memory control, OceanBase prohibits the use of STL, boost and other containers. At the same time, OceanBase provides its own containers, such as `ObSEArray`, etc. For more information about OceanBase containers, please refer to [OceanBase Container Introduction] (./container.md). + +## Be Caution with the New C++ Standard + +OceanBase does not encourage the use of some syntax of the new C++ standard, such as auto, smart pointers, move semantics, range-based loops, lambda, etc. OceanBase believes that these will bring many negative effects, such as + +- Improper use of `auto` can cause serious performance problems, but it only brings syntactic convenience; +- Smart pointers cannot solve the problem of object memory usage, and improper use can also cause performance problems; +- The use of move is extremely complex, and it will lead to deeply hidden BUGs without ensuring that everyone understands it correctly. + +Of course, OceanBase does not exclude all new standards, such as encouraging the use of override, final, constexpr, etc. If you are not sure whether a certain syntax can be used, you can search and confirm in ["OceanBase C++ Coding Standard"](./coding_standard.md). + +## Single Entrance and Single Exit + +It is mandatory for all functions to return at the end, and it is prohibited to call global jump instructions such as return, goto, and exit midway. This is also the most confusing part for everyone who comes into contact with OceanBase code for the first time. + +In order to achieve this requirement, there will be a lot of `if/else if` in the code, and there are many less intuitive conditional judgments such as `OB_SUCC(ret)` in the `for` loop. At the same time, in order to reduce nesting, the macro `FALSE_IT` will be used to execute certain statements. for example + +```cpp +int ObMPStmtReset::process() +{ + int ret = OB_SUCCESS; + ... + if (OB_ISNULL(req_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid packet", K(ret), KP(req_)); + } else if (OB_INVALID_STMT_ID == stmt_id_) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("stmt_id is invalid", K(ret)); + } else if (OB_FAIL(get_session(session))) { + LOG_WARN("get session failed"); + } else if (OB_ISNULL(session)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("session is NULL or invalid", K(ret), K(session)); + } else if (OB_FAIL(process_kill_client_session(*session))) { + LOG_WARN("client session has been killed", K(ret)); + } else if (FALSE_IT(session->set_txn_free_route(pkt.txn_free_route()))) { + } else if (OB_FAIL(process_extra_info(*session, pkt, need_response_error))) { + LOG_WARN("fail get process extra info", K(ret)); + } else if (FALSE_IT(session->post_sync_session_info())) { + } else if (FALSE_IT(need_disconnect = false)) { + } else if (OB_FAIL(update_transmission_checksum_flag(*session))) { + LOG_WARN("update transmisson checksum flag failed", K(ret)); + } else { + // ... + } + return ret; +} +``` + +A lot of `if/else if` are used in the code, and the FALSE_IF macro is used to minimize the nesting of ifs. + +It is worth mentioning that similar functions will write `int ret = OB_SUCCESS;` at the beginning of the function, using ret as the function return value, and many macros will also default to the existence of ret. + +## Function Returns Error Code + +For most functions, the function is required to have an int return value, and the return value can be explained using the error code `ob_errno.h`. +Most of the functions mentioned here include some functions for obtaining values, such as the `at` function of `ObSEArray` + +```cpp +int at(int64_t idx, T &obj); +``` + +**Which functions do not need to return int values?** + +Relatively simple functions that return class attributes, such as ObSEArray's function: + +```cpp +int64_t get_capacity(); +``` + +The value will be returned directly without the int error code. +Or similar simple judgment functions do not need to return int error codes. + +## Need to Determine the Validity of All Return Values and Parameters + +OceanBase requires that as long as the function has a return value, the return value must be tested, and "check if possible." Function parameters, especially pointers, must be checked for validity before use. + +For example: + +```cpp +int ObDDLServerClient::abort_redef_table(const obrpc::ObAbortRedefTableArg &arg, sql::ObSQLSessionInfo *session) +{ + int ret = OB_SUCCESS; + ... + obrpc::ObCommonRpcProxy *common_rpc_proxy = GCTX.rs_rpc_proxy_; + if (OB_UNLIKELY(!arg.is_valid())) { // Check the validity of the parameters passed in + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arg", K(ret), K(arg)); + } else if (OB_ISNULL(common_rpc_proxy)) { // Before using pointers, check it first + ret = OB_ERR_UNEXPECTED; + LOG_WARN("common rpc proxy is null", K(ret)); + } else { + ... + } + return ret; +} +``` + +## Memory Management + +Memory management is a very troublesome issue in C/C++ programs. OceanBase has done a lot of work for memory management, including efficient memory allocation, memory problem detection, tenant memory isolation, etc. OceanBase provides a set of memory management mechanisms for this purpose, and also prohibits the direct use of C/C++ native memory allocation interfaces in programs, such as malloc, new, etc. + +The simplest, OceanBase provides the `ob_malloc/ob_free` interface to allocate and release memory: + +```cpp +void *ptr = ob_malloc(100, ObModIds::OB_COMMON_ARRAY); + +// do something + +if (NULL != ptr) { + // release resource + ob_free(ptr, ObModIds::OB_COMMON_ARRAY); + ptr = NULL; // set the pointer to null after free +} +``` + +OceanBase requires that the pointer must be assigned to null immediately after the memory is released. +For more information about memory management, please refer to [OceanBase Memory Management](./memory.md). + +# Some Conventional Interfaces + +## init/destroy + +OceanBase requires that only some very lightweight data initialization work can be implemented in the constructor, such as variables initialized to 0, pointers initialized to nullptr, etc. Because in the constructor, it is not easy to handle some complex exception scenarios, and the return value cannot be given. Most classes in OceanBase have an init function, which is usually executed after the constructor and has an int error code as the return value. Do some more complex initialization work here. Correspondingly, the destroy function is usually provided to do resource destruction. + +## reuse/reset + +Memory caching is a very effective way of improving performance. Many classes in OceanBase will have reuse/reset interfaces to facilitate the subsequent reuse of an object. Reuse usually represents lightweight cleanup work, while reset will do more resource cleanup work. But you need to look at the specific implementation class and cannot generalize. + +## Operator Overloading + +C++ provides operator overloading functions that are very convenient for writing programs, but these overloadings often bring a lot of burden, making the code difficult to read and the functions misused. For example, operator overloading may lead to implicit type conversion without the programmer's knowledge, or a seemingly simple operation may have a relatively high overhead. + +In addition, try to avoid using `operator=` and try to copy objects using `deep_copy` and `shallow_copy`. + +# Commonly Used Macros + +**OB_SUCC** + +Determine whether a statement returns successfully, equivalent to `OB_SUCCESS == (ret = func())` + +```cpp +ret = OB_SUCCESS; +if (OB_SUCC(func())) { + // do something +} +``` + +**OB_FAIL** + +Similar to `OB_SUCC`, it just determines whether a certain statement fails to execute: + +```cpp +ret = OB_SUCCESS; +if (OB_FAIL(func())) { + // do something +} +``` + +**OB_ISNULL** + +Determine whether the pointer is null, equivalent to `nullptr == ptr`, + +```cpp +if (OB_ISNULL(ptr)) { + // do something +} +``` + +**OB_NOT_NULL** + +Determine whether the pointer is non-null, equivalent to `nullptr != ptr`, + +```cpp +if (OB_NOT_NULL(ptr)) { + // do something +} +``` + +**K** + +Usually used in output logs, the usage K(obj), where obj can be a common type variable or a class object (must implement `to_string`), will be expanded into `"obj", obj`, and will eventually be output in the log "obj=123". for example: + +```cpp +LOG_WARN("fail to exec func, ", K(ret)); +``` + +**DISALLOW_COPY_AND_ASSIGN** + +Used in a class declaration to indicate that operations such as copy assignment are prohibited. + +```cpp +class LogReconfirm +{ + ... +private: + DISALLOW_COPY_AND_ASSIGN(LogReconfirm); +}; +``` diff --git a/docs/container.md b/docs/container.md new file mode 100644 index 0000000000000000000000000000000000000000..7ddda4b12658b90978b8bf8757114e4aa7ec001e --- /dev/null +++ b/docs/container.md @@ -0,0 +1,694 @@ +# Introduction + +C++ STL provides many convenient containers, such as vector, map, unordered_map, etc. Due to OceanBase programming style and memory control, the use of STL containers is prohibited in OceanBase. OceanBase provides some container implementations, including arrays, linked lists, HashMap, etc. This document will introduce some of these containers. + +> This document assumes that you already have a certain understanding of C++ STL containers. + +> pair does not belong to the container, so it can be used in OceanBase. + +> Due to historical reasons, OceanBase contains some container code that is no longer recommended but has not been deleted. + +# String +The string class provided by OceanBase is ObString. Code reference ob_string.h. + +Before introducing ObString's interface, let's first look at ObSring's memory management method, which will make it easier to understand ObString's interface design. + +There are two biggest differences from STL string: +1. ObString does not manage memory, memory is transferred from the outside, and the life cycle of the memory buffer is also controlled externally; +2. ObString does not end with '\0'. + +This is also an important point to pay attention to when using ObString. + +The memory of ObString is passed in from the outside, and three member variables are stored internally: +```cpp + char *ptr_; /// memory pointer + obstr_size_t buffer_size_; /// Memory buffer length + obstr_size_t data_length_; /// Valid data length +``` + +> obstr_size_t is used in ObString to represent the length, and its type is int32_t + +Refer to the current memory maintenance mode of ObString and the commonly used interfaces for strings. The commonly used interfaces of ObString are as follows: + +```cpp +/** + * Constructor + * + * Construct the buffer data and effective data length of the string + * + * There are also some derived constructors, such as omitting the buffer length + * (the buffer length is consistent with the data length) + */ +ObString(const obstr_size_t size, const obstr_size_t length, char *ptr); + +/** + * an empty string? + */ +bool empty() const; + +/** + * Reassign a new buffer/string + */ +void assign_buffer(char *buffer, const obstr_size_t size); + +/** + * The length of the valid data, or the length of the string + */ +obstr_size_t length() const; + +/** + * The length of the memory buffer + */ +obstr_size_t size() const; + +/** + * Data pointer + */ +const char *ptr() const; + +/** + * Case insensitively comparison + * + * @NOTE: Although ObString does not specify that it ends with '\0', + * strncasecmp is used in the implementation here, so please pay attention + * when using this function. + */ +int case_compare(const ObString &obstr) const; +int case_compare(const char *str) const; + +/** + * Case-sensitive comparison + * + * @NOTE: Compared with case_compare, strncmp is not used here, + * but memcmp is used to compare the buffer length. + */ +int compare(const ObString &obstr) const; +int32_t compare(const char *str) const; +``` + +ObString also has some other interfaces, just browse the ob_string.h code if needed. + +# Array + +OceanBase's array interface design is similar to STL vector, but it is more in line with OceanBase's style. For example, the interface will have an int return value indicating success or failure of execution. OceanBase provides multiple arrays with different implementations, but the interfaces they provide are similar. + +Commonly used array implementation classes all inherit the same interface `ObIArray`. Let's take a look at the interface definition first, and then introduce the differences between different array implementations. + +## ObIArray + +There is no memory allocator specified in the interface class of the array. + +```cpp +/** + * The default constructor + */ +ObIArray(); + +/** + * Accept the specified array + * + * The interface class will not take over data-related memory. + * Memory processing depends on the specific implementation class. + */ +ObIArray(T *data, const int64_t count); + +/** + * Similar to vector::push_back, adds an element at the end + * @return Return OB_SUCCESS when successfully + */ +int push_back(const T &obj); + +/** + * Remove the last element + * @NOTE It is very likely that the destructor will not be called. + * You need to look at the specific implementation class. + */ +void pop_back(); + +/** + * Remove the last element and copy the last element to obj + * @return Return OB_SUCCESS when successfully + */ +int pop_back(T &obj); + +/** + * Remove element at specified position + */ +int remove(int64_t idx); + +/** + * Get the element at the specified position + * @return OB_SUCCESS is returned successfully. + * If the specified location does not exist, a failure will be returned. + */ +int at(int64_t idx, T &obj); + +/** + * Reset the array. Similar to vector::clear + */ +void reset(); + +/** + * Reuse arrays. Depends on the implementation + */ +void reuse(); + +/** + * Destroy this array, which has the same effect as calling the destructor + */ +void destroy(); + +/** + * Reserve a specified amount of memory space. Does not do object initialization + */ +int reserve(int64_t capacity); + +/** + * Reserve a specified size of memory space, usually the implementation + * class will execute the object's constructor + */ +int prepare_allocate(int64_t capacity); + +/** + * Copy and destroy current data from another array + */ +int assign(const ObIArray &other); +``` + +## ObArray +ObArray manages memory by itself. When declaring the ObArray template class, you need to specify an allocator, or use the default allocator `ModulePageAllocator`. Since OceanBase requires all actions to determine the return value, it is not recommended to use ObArray's `operator=` and other functions without return values. + +Many behaviors of ObArray are similar to STL vectors. Each time the memory is expanded, the behavior is similar. It will expand twice the current data size, but up to `block_size_` size. A `block_size_` default value is `OB_MALLOC_NORMAL_BLOCK_SIZE` (think of it as 8K). + +Code reference ob_array.h. + +## ObSEArray +Similar to ObArray, it will be doubled in size when expanded, not exceeding `block_size_`. + +Different from ObArray, ObSEArray has an additional template parameter `LOCAL_ARRAY_SIZE`, which can accommodate a certain amount of elements without additional memory allocation. Therefore OBSEArray may be able to directly use stack memory instead of heap memory: + +```cpp +char local_data_buf_[LOCAL_ARRAY_SIZE * sizeof(T)]; +``` +If there is insufficient subsequent space and needs to be expanded, `local_data_buf_` will no longer store valid data but will apply for additional memory. Therefore, we must consider it comprehensively and give a reasonable `LOCAL_ARRAY_SIZE` to make ObSEArray more efficient. + +Reference code `ob_se_array.h`. + +## ObFixedArray +As the name suggests, it is a fixed-size array. Once the capacity size is determined, it cannot be changed. Code reference `ob_fixed_array.h`. + +## ObVector +ObVector does not belong to the subclass of ObIArray. Its performance and interface design are very similar to ObIArray, so you can use the subclass of ObIArray. If you are interested, please read the source code `ob_vector.h` and its implementation file `ob_vector.ipp`. + +# List +Unlike arrays, linked lists do not have a unified interface. However, the interface design here is also very similar to that in STL. There are two most commonly used linked lists, one is ObList and the other is ObDList. + +## ObList + +ObList is an ordinary circular double linked list, refer to `ob_list.h` for the code. During construction, the memory allocator needs to be passed in. Commonly used interfaces are as follows. + +```cpp +/** + * Class statement + * @param T element type + * @param Allocator memory allocator + */ +template +class ObList; + +/** + * Constructor. You must pass a memory allocator + */ +ObList(Allocator &allocator); + +/** + * Insert the specified element at the end of the linked list + */ +int push_back(const value_type &value); + +/** + * Insert the specified element at the beginning of the linked list + */ +int push_front(const value_type &value); + +/** + * Release the last element + * @note The destructor of the element is not executed + */ +int pop_back(); + +/** + * Both pop_front functions delete the first element. + * The difference is that one will copy the object and the other will not. + */ +int pop_front(value_type &value); +int pop_front(); + +/** + * Inserts the specified element at the specified position + */ +int insert(iterator iter, const value_type &value); + +/** + * Delete the element at the specified position + * @return Returns deletion success or failure + */ +int erase(iterator iter); + +/** + * Delete the first element with the same value as value + * @return Success will be returned even if the element is not found + */ +int erase(const value_type &value); + +/** + * Get the first element + */ +T &get_first(); +const T &get_first() const; + +/** + * Get the last element + */ +T &get_last(); + +/** + * Similar to STL, ObList supports iterator-related interfaces + */ +iterator begin(); +const_iterator begin(); +iterator end(); +const_iterator end() const; + +/** + * Delete all elements + */ +void clear(); + +/** + * Determine whether the linked list is empty + */ +bool empty() const; + +/** + * Number of elements + */ +int64_t size() const; +``` + +## ObDList + +> Code reference `ob_dlist.h`. + +ObDList is also a double linked list. Unlike ObList, its element memory layout and memory management method are different. The ObList object is passed in by the user. ObList internally applies for a memory copy object and constructs the front and rear pointers of the linked list nodes. ObDList is an object containing the previous and next node pointers directly passed in by the user. Due to this feature of ObDList, it will be different from the method of using STL list. + +ObDList does not manage memory and does not need to manage memory at all. Its template parameters do not have a memory allocator, only one `DLinkNode`. `DLinkNode` needs to contain the element objects you need, front and rear node pointers and implement some common operations (with assistance Implement base class), the declaration and some interfaces of ObDList are as follows: + +```cpp +template +class ObDList; + +/// Move all elements on the current linked list to list +int move(ObDList &list); + +/// Get the head node (not the first element) +DLinkNode *get_header(); +const DLinkNode *get_header() const; + +/// Get the last element +DLinkNode *get_last(); + +/// Get the first element +const DLinkNode *get_first() const; +const DLinkNode *get_first_const() const; + +/// Add a node to the tail +bool add_last(DLinkNode *e); + +/// Add a node to the head +bool add_first(DLinkNode *e); + +/// Add node at specified location +bool add_before(const DLinkNode *pos, DLinkNode *e); + +/// Move the specified node to the front +bool move_to_first(DLinkNode *e); +/// Move the specified node to the end +bool move_to_last(DLinkNode *e); + +/// Delete the last node +DLinkNode *remove_last(); +/// Delete the first node +DLinkNode *remove_first(); + +/// Delete specified element +DLinkNode *remove(DLinkNode *e); + +/// Clear linked list +void clear(); + +/// Insert another linked list at the beginning of the linked list +void push_range(ObDList &range); + +/// Delete the specified number of elements from the beginning +/// and place the deleted elements in the range +void pop_range(int32_t num, ObDList &range); + +/// Whether the linked list is empty +bool is_empty() const +/// Number of elements +int32_t get_size() const +``` + +OceanBase provides auxiliary `DLinkNode` implementations `ObDLinkNode` and `ObDLinkDerived`, making it easy to use ObDList simply by using either replication class. + +Before introducing these two auxiliary classes, let's take a brief look at a basic auxiliary interface implementation `ObDLinkBase`, which is the base class of the above two auxiliary classes. It contains the front and rear node pointers required by ObDList and some basic node operations. Both auxiliary classes are implemented by inheriting the base class, and only use different methods. + +The first auxiliary class, ObDLinkNode, is declared as follows: + +```cpp +template +struct ObDLinkNode: public ObDLinkBase > +``` + +Just give your own real linked list element type. The disadvantage is that when getting the linked list elements, you need to use `ObDLinkNode::get_data` to get your own object, such as + +```cpp +class MyObj; +ObDList> alist; + +ObDLinkNode *anode = OB_NEW(ObDLinkNode, ...); +alist.add_last(anode); + +ObDLinkNode *nodep = alist.get_first(); +MyObj &myobj = nodep->get_data(); +// do something with myobj +``` + +The second auxiliary class, ObDLinkDerived, is simpler to use than ObDLinkNode. Its declaration is as follows: + +```cpp +template +struct ObDLinkDerived: public ObDLinkBase, T +``` + +Note that it directly inherits the template class T itself, that is, there is no need to obtain the real object through get_data like ObDLinkNode. You can directly use the method of T and copy the above example: + +```cpp +class MyObj; +ObDList> alist; + +ObDLinkDerived *anode = OB_NEW(ObDLinkDerived, ...); +alist.add_last(anode); + +ObDLinkDerived *nodep = alist.get_first(); +// MyObj &myobj = nodep->get_data(); // no need any more +// MyObj *myobj = nodep; // nodep is a pointer to MyObj too +// do something with myobj or directly with nodep +``` + +Since ObDList does not manage the memory of nodes, you need to be careful when using it particularly. Pay attention to managing the life cycle of each element. Before performing cleanup actions, such as `clear` and `reset`, the memory must be released first. The interface declaration of ObDList is very clear, but it is different from the naming convention of STL::list. You can directly refer to the interface declaration in the code `ob_dlist.h` and use it without listing it. + +# Map +Map is a commonly used data structure, and its insertion and query efficiency are very high. Normally, there are two implementation methods for Map. One is a balanced search tree, typically a red-black tree. Common compilers use this method to implement it. The other is a hash table, which is unordered_map in STL. + +There are many Maps implemented in OceanBase, including the balanced search tree implementation ObRbTree and hash maps suitable for different scenarios, such as ObHashMap, ObLinkHashMap and ObLinearHashMap. + +> OceanBase implements many types of hash maps, but it is recommended to use the few introduced here unless you have a clear understanding of other implementations. + +## ObHashMap +The implementation of ObHashMap is in ob_hashmap.h. In order to facilitate the understanding of the implementation of ObHashMap, I will introduce it with reference to STL::unordered_map. + +### ObHashMap Introduction +In STL, unordered_map is declared as follows: +```cpp +template< + class Key, + class T, + class Hash = std::hash, /// Calculate hash value of Key + class KeyEqual = std::equal_to, /// Determine whether Key is equal + class Allocator = std::allocator> /// memory allocator +> class unordered_map; +``` + +Key in the template parameters is our key, T is the type of our value, Hash is a class or function that calculates the hash value based on the key, KeyEqual is a method to determine whether two key values are equal, and Allocator is an allocator. An object is a pair of keys and values. + +The declaration in OceanBase is similar: + +```cpp +template , + class _equal = equal_to<_key_type>, + class _allocer = SimpleAllocer::AllocType>, + template class _bucket_array = NormalPointer, + class _bucket_allocer = oceanbase::common::ObMalloc, + int64_t EXTEND_RATIO = 1> +class ObHashMap; +``` + +Among them, `_key_type`, `_value_type`, `_hashfunc`, `_equal` have the same meaning as the declared parameters of STL::unordered_map. There are some more parameters here: + +- `_defendmode`: OceanBase provides a thread-safe hashmap implementation with limited conditions. You can use the default value and ignore it for now, which will be introduced later; +- `_allocer` and `_bucket_allocer`: STL::unordered_map requires only one allocator, but here requires two allocators. In a hashmap, there is usually an array as a bucket array. After the elements are hashed, the corresponding bucket is found, and then the element is "mounted" on the corresponding bucket. `_bucket_allocer` is the allocator of the bucket array, and `_allocer` is the allocator of elements, that is, the allocator of key value pairs; +- EXTEND_RATIO: If EXTEND_RATIO is 1, no expansion will occur. Otherwise, the hash map is not thread-safe. + +### ObHashMap Interface Introduction +```cpp +/** + * The constructor of ObHashMap does nothing. + * You must call create for actual initialization. + * The parameters of the create function are mainly the number of buckets + * (bucket_num) and the parameters of the memory allocator. + * Providing a reasonable number of buckets can make hashmap run more efficiently + * without wasting too much memory. + * + * As you can see from the following interfaces, two memory allocators can be + * provided, one is the allocator of the bucket array, + * and the other is the allocator of element nodes. + */ +int create(int64_t bucket_num, + const ObMemAttr &bucket_attr, + const ObMemAttr &node_attr); +int create(int64_t bucket_num, const ObMemAttr &bucket_attr); +int create(int64_t bucket_num, + const lib::ObLabel &bucket_label, + const lib::ObLabel &node_label = ObModIds::OB_HASH_NODE, + uint64_t tenant_id = OB_SERVER_TENANT_ID, + uint64_t ctx_id = ObCtxIds::DEFAULT_CTX_ID); +int create(int64_t bucket_num, + _allocer *allocer, + const lib::ObLabel &bucket_label, + const lib::ObLabel &node_label = ObModIds::OB_HASH_NODE); +int create(int64_t bucket_num, + _allocer *allocer, + _bucket_allocer *bucket_allocer); + +/// Destroy the current object directly +int destroy(); + +/// Both functions will delete all elements +int clear(); +int reuse(); + +/** + * Get the element value of the specified key value + * Although the get function is also provided, it is recommended to use the current + * function. + * @param timeout_us: Timeout for getting elements. The implementation principle + * of timeout will be introduced later. + * @return found and returned successfully + */ +int get_refactored(const _key_type &key, _value_type &value, const int64_t timeout_us = 0) const; + +/** + * Set the value of a certain key value + * @param flag: 0 means it already exists and will not be overwritten, + * otherwise the original value will be overwritten. + * @param broadcast: whether to wake up the thread waiting to obtain the + * current key + * @param overwrite_key: not used. Please refer to flag + * @param callback: After the insertion or update is successful, you can + * use callback to perform some additional operations on the value. + */ +template +int set_refactored(const _key_type &key, + const _value_type &value, + int flag = 0, + int broadcast = 0, + int overwrite_key = 0, + _callback *callback = nullptr); + +/** + * Traverse all elements + * @note + * 1. You cannot delete elements, insert, etc. during the traversal process. + * Because some locks will be added during the traversal process, and locks + * will also be added for insertion, deletion and other actions, lock + * conflicts may occur; + * 2. The callback action should be as small as possible because it works + * within the lock scope. + */ +template +int foreach_refactored(_callback &callback) const; + +/** + * Delete the specified key value. + * If the value pointer is not null, the corresponding element will be returned + * @return If the element does not exist, OB_HASH_NOT_EXIST will be returned + */ +int erase_refactored(const _key_type &key, _value_type *value = NULL); + +/** + * Insert if it does not exist, otherwise call callback to update + */ +template +int set_or_update(const _key_type &key, const _value_type &value, + _callback &callback); + +/** + * Delete elements with specified key values and meeting specific conditions + */ +template +int erase_if(const _key_type &key, _pred &pred, bool &is_erased, _value_type *value = NULL); + +/** + * There is no need to copy elements, directly access the elements with + * specified key values through callback. + * @note callback executed under write lock protection + */ +template +int atomic_refactored(const _key_type &key, _callback &callback); + +/** + * There is no need to copy the element value, just get the element directly + * and access it through callback. + * @note callback executed under write lock protection + */ +template +int read_atomic(const _key_type &key, _callback &callback); +``` + +### Implementation of ObHashMap +Persons who are familiar with the implementation principle of STL unordered_map can definitely guess the implementation principle of ObHashMap. The implementation of ObHashMap is also a linear table, as a bucket array, and then uses the zipper table method to solve key hash conflicts. But here are some details, hoping to help everyone understand its implementation and use ObHashMap more efficiently. + +ObHashMap relies on ObHashTable at the bottom. For the code, refer to `ob_hashtable.h`. ObHashMap just encapsulates the semantics of Key Value on ObHashTable. + +**Conditional thread safe** + +If the template parameter `_defendmode` selects a valid lock mode, and ObHashTable has a read-write lock for each bucket, then ObHashTable will provide conditional thread safety. When accessing elements on the bucket, corresponding locks will be added, including interfaces with `callback`, so the actions in `callback` should be as light as possible and other elements of ObHashTable should not be accessed to prevent deadlock. + +ObHashMap is not thread-safe when scaling. If the provided template parameter EXTEND_RATIO is not 1, the capacity will be expanded when needed, and this is transparent to the user. + +The default value of ObHashMap `_defendmode` is an effective thread-safe protection mode `LatchReadWriteDefendMode`. + +**_defendmode** + +_defendmode defines different bucket locking methods, and 6 modes are provided in `ob_hashutils.h`: + +1. LatchReadWriteDefendMode +2. ReadWriteDefendMode +3. SpinReadWriteDefendMode +4. SpinMutexDefendMode +5. MultiWriteDefendMode +6. NoPthreadDefendMode + +The first five of them can provide thread safety protection, but they use different lock modes. In different business scenarios and different thread read and write concurrency, choosing a reasonable mode can improve efficiency and stability. The sixth mode, `NoPthreadDefendMode`, does not provide any protection. + +**get timeout waiting** + +If the specified element does not exist when getting an element, you can set a waiting time. ObHashTable will insert a `fake` element into the corresponding bucket and wait. When another thread inserts the corresponding element, the waiting thread will be awakened. However, the thread inserting the element needs to explicitly specify that it needs to be awakened, that is, the broadcast value of set_refactor is set to non-zero. + +## ObHashSet +Similar to ObHashMap, ObHashSet is based on ObHashTable and encapsulates an implementation with only keys and no values. Please refer to the code ob_hashset.h for details. + +## ObLinkHashMap +ObLinkHashMap is a lock-free hash map that takes into account both read and write performance and is thread-safe (including expansion). It uses the zipper method to resolve hash conflicts. + +Here are the characteristics of this class: + +- Taking into account both reading and writing performance; +- Implement thread safety based on lock-free solution; +- Introducing the retirement station, the node will be delayed in release, so it is recommended that the Key be as small as possible; +- There is a certain amount of memory waste; +- When expanding or shrinking capacity, batch relocation is used; +- When there is a hotspot key, the get performance is poor due to reference counting issues; +- When the bucket is expanded too much, initializing Array will be slower. + +> Regarding retire station, please refer to the paper [Reclaiming Memory for Lock-Free Data Structures:There has to be a Better Way](https://www.cs.utoronto.ca/%7Etabrown/debra/fullpaper.pdf)。 + +Below are some commonly used interfaces and precautions when using them. + +```cpp +/** + *Declaration of ObLinkHashMap + * Template parameters: + * @param Key Key type + * @param Value The type of value, which needs to be inherited from + * LinkHashValue (refer to ob_link_hashmap_deps.h) + * @param AllocHandle Class to allocate release values and nodes + * (refer to ob_link_hashmap_deps.h) + * @param RefHandle Reference counting function. Don't modify it if you + * don't deeply understand its principles. + * @param SHRINK_THRESHOLD When the number of current nodes is too many or too + * few, it will expand or shrink. Try to keep the current nodes at + * Between the ratio [1/SHRINK_THRESHOLD, 1] (non-precise control) + */ +template, + typename RefHandle=RefHandle, + int64_t SHRINK_THRESHOLD = 8> +class ObLinkHashMap; + + +/// Number of elements +int64_t size() const; + +/** + * Insert an element + * @noteIf it returns successfully, you need to execute hash.revert(value) + */ +int insert_and_get(const Key &key, Value* value); + +/// Delete specified element +int del(const Key &key); + +/** + * Get the specified element + * @note If the return is successful, revert needs to be executed + */ +int get(const Key &key, Value*& value); + +/// Releases the introduction count of the specified element. +/// Can be released across threads +void revert(Value* value); + +/** + * Determine whether the specified element exists + * @return OB_ENTRY_EXIST indicating exists + */ +int contains_key(const Key &key); + +/** + * Traverse all elements + * @param fn: bool fn(Key &key, Value *value); The bool return value + * indicates whether to continue traversing + */ +template int for_each(Function &fn); + +/** + * Delete elements that meet the conditions + * @param fn bool fn(Key &key, Value *value); The bool return value + * indicates whether it needs to be deleted + */ +template int remove_if(Function &fn); +``` + +## ObRbTree +ObRbTree is a red-black tree implementation that supports basic operations such as insertion, deletion, and search, and is not thread-safe. Since ObRbTree is not used in OceanBase, it will not be introduced again. If you are interested, please read the source code `ob_rbtree.h`. + + +# Others +OceanBase also has many basic container implementations, such as some queues (ObFixedQueue, ObLightyQueue, ObLinkQueue), bitmap (ObBitmap), tuple (ObTuple), etc. If the common containers don't meet your needs, you can find more in the `deps/oblib/src/lib` directory. diff --git a/src/logservice/libobcdc/src/ob_log_config.h b/src/logservice/libobcdc/src/ob_log_config.h index 72df0f806f6b75520f0af236ea800b59ede2b994..0c6cd3583118d517f806b00b465e2578761d9eac 100644 --- a/src/logservice/libobcdc/src/ob_log_config.h +++ b/src/logservice/libobcdc/src/ob_log_config.h @@ -111,7 +111,7 @@ public: DEF_INT(storager_queue_length, OB_CLUSTER_PARAMETER, "0", "[0,]", "storager queue length"); DEF_INT(reader_thread_num, OB_CLUSTER_PARAMETER, "10", "[1,]", "reader thread number"); DEF_INT(reader_queue_length, OB_CLUSTER_PARAMETER, "0", "[0,]", "reader queue length"); - DEF_INT(br_queue_length, OB_CLUSTER_PARAMETER, "0", "[0, ]", "user_binlog_record queue length"); + DEF_INT(br_queue_length, OB_CLUSTER_PARAMETER, "0", "[0,]", "user_binlog_record queue length"); DEF_INT(cached_schema_version_count, OB_CLUSTER_PARAMETER, "32", "[1,]", "cached schema version count"); DEF_INT(history_schema_version_count, OB_CLUSTER_PARAMETER, "16", "[1,]", "history schema version count"); DEF_INT(resource_collector_thread_num, OB_CLUSTER_PARAMETER, "11", "[1,]", "resource collector thread number"); diff --git a/src/sql/code_generator/ob_static_engine_cg.cpp b/src/sql/code_generator/ob_static_engine_cg.cpp index ce9b9075033f3fbe140b8a3aac902f3de46ee1f8..f41b678ad31e96706713b19db1b42d463ba0fa7f 100644 --- a/src/sql/code_generator/ob_static_engine_cg.cpp +++ b/src/sql/code_generator/ob_static_engine_cg.cpp @@ -8157,11 +8157,6 @@ int ObStaticEngineCG::get_phy_op_type(ObLogicalOperator &log_op, type = PHY_PX_DIST_TRANSMIT; } else if (op.get_plan()->get_optimizer_context().is_online_ddl() && ObPQDistributeMethod::PARTITION_RANGE == op.get_dist_method()) { type = PHY_PX_REPART_TRANSMIT; - } else if (OB_REPARTITION_NO_REPARTITION != op.get_repartition_type() - && !op.is_slave_mapping()) { - type = PHY_PX_REPART_TRANSMIT; - } else if (ObPQDistributeMethod::LOCAL != op.get_dist_method()) { - type = PHY_PX_DIST_TRANSMIT; } else { // NOTE: 优化器需要和执行器保持一致,既没有分区、又没有HASH、或其它重分区方式时,就使用All To One type = PHY_PX_REDUCE_TRANSMIT;