未验证 提交 31fa2558 编写于 作者: 羽飞's avatar 羽飞 提交者: GitHub

thread-safe buffer pool and btree supported (#145)

Fix problem:
1. the buffer pool and b tree is not thread safe;
2. github/workflow/build does not work

### What is changed and how it works?
1. thread-safe buffer pool
- I use a mutex in buffer pool and take a lock in buffer pool operations
such as allocate frame, dispose frame;
- The frame is locked while updating/reading the content of frame;
- Frame manager take a lock when allocate/free pages.

2. thread-safe b+tree
- Crabing protocol is used to support concurrent

3. github/workflow/build
- update the submodules;
- create a build script and run build.sh in build.yaml
上级 5b06a712
......@@ -48,14 +48,14 @@ TabWidth: 4
UseTab: Never
BreakBeforeBraces: Custom
BraceWrapping:
AfterClass: false
AfterClass: true
AfterControlStatement: false
AfterEnum: false
AfterEnum: true
AfterFunction: true
AfterNamespace: false
AfterObjCDeclaration: false
AfterStruct: false
AfterUnion: false
AfterStruct: true
AfterUnion: true
AfterExternBlock: false
BeforeCatch: false
BeforeElse: false
......
......@@ -20,30 +20,8 @@ jobs:
steps:
- name: Checkout repository and submodules
uses: actions/checkout@v2
with:
submodules: recursive
- name: build deps/googletest
run: cmake -DCMAKE_INSTALL_PREFIX=${{github.workspace}}/local -B ${{github.workspace}}/deps/googletest/build -S ${{github.workspace}}/deps/googletest && cmake --build ${{github.workspace}}/deps/googletest/build --config ${{env.BUILD_TYPE}} --target install
- name: build deps/jsoncpp
run: cmake -DJSONCPP_WITH_TESTS=OFF -DJSONCPP_WITH_POST_BUILD_UNITTEST=OFF -DCMAKE_INSTALL_PREFIX=${{github.workspace}}/local -B ${{github.workspace}}/deps/jsoncpp/build -S ${{github.workspace}}/deps/jsoncpp && cmake --build ${{github.workspace}}/deps/jsoncpp/build --config ${{env.BUILD_TYPE}} --target install
- name: build deps/libevent
run: cmake -DEVENT__DISABLE_OPENSSL=ON -DEVENT__DISABLE_MBEDTLS=ON -DCMAKE_INSTALL_PREFIX=${{github.workspace}}/local -B ${{github.workspace}}/deps/libevent/build -S ${{github.workspace}}/deps/libevent && cmake --build ${{github.workspace}}/deps/libevent/build --config ${{env.BUILD_TYPE}} --target install
- name: Configure CMake
# Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make.
# See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type
run: cmake -DCMAKE_PREFIX_PATH=${{github.workspace}}/local -B ${{github.workspace}}/build -DENABLE_ASAN=ON
- name: Build
# Build your program with the given configuration
run: cmake -DCMAKE_PREFIX_PATH=${{github.workspace}}/local --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}}
- name: Test
working-directory: ${{github.workspace}}/build
# Execute tests defined by the CMake configuration.
# See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail
run: ctest -C ${{env.BUILD_TYPE}}
shell: bash
run: sudo bash build.sh init && bash build.sh release --make -j4
./deps/3rd
./deps/libevent
./deps/googletest
./deps/jsoncpp
./deps/benchmark
build/*
build_*
cmake-build-*/*
.vscode/*
.DS_Store
......
[submodule "deps/libevent"]
path = deps/libevent
[submodule "deps/3rd/libevent"]
path = deps/3rd/libevent
url = https://github.com/libevent/libevent
[submodule "deps/googletest"]
path = deps/googletest
[submodule "deps/3rd/jsoncpp"]
path = deps/3rd/jsoncpp
url = https://github.com/open-source-parsers/jsoncpp
[submodule "deps/3rd/googletest"]
path = deps/3rd/googletest
url = https://github.com/google/googletest
[submodule "deps/jsoncpp"]
path = deps/jsoncpp
url = https://github.com/open-source-parsers/jsoncpp.git
[submodule "deps/3rd/benchmark"]
path = deps/3rd/benchmark
url = https://github.com/google/benchmark
......@@ -2,7 +2,7 @@
#INCLUDE(file1 [OPTIONAL])
cmake_minimum_required(VERSION 3.10)
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD 20)
#SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
project(minidb)
......@@ -16,14 +16,18 @@ MESSAGE(STATUS "This is PROJECT_BINARY_DIR dir " ${PROJECT_BINARY_DIR})
SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin)
#SET(LIBRARY_OUTPUT_PATH <路径>)
OPTION(ENABLE_ASAN "Enable build with address sanitizer" OFF)
OPTION(WITH_UNIT_TESTS "Compile miniob with unit tests" ON)
OPTION(CONCURRENCY "Support concurrency operations" OFF)
MESSAGE(STATUS "HOME dir: $ENV{HOME}")
#SET(ENV{变量名} 值)
IF(WIN32)
MESSAGE(STATUS "This is windows.")
ADD_DEFINITIONS(-DWIN32)
ELSEIF(WIN64)
MESSAGE(STATUS "This is windows.")
ADD_DEFINITIONS(-DWIN64)
MESSAGE(STATUS "This is windows.")
ADD_DEFINITIONS(-DWIN64)
ELSEIF(APPLE)
MESSAGE(STATUS "This is apple")
# normally __MACH__ has already been defined
......@@ -40,7 +44,6 @@ ENDIF(WIN32)
SET(CMAKE_COMMON_FLAGS "${CMAKE_COMMON_FLAGS} -Wall -DCMAKE_EXPORT_COMPILE_COMMANDS=1")
IF(DEBUG)
MESSAGE("DEBUG has been set as TRUE ${DEBUG}")
#"${CMAKE_COMMON_FLAGS} -O0 -g " ${CMAKE_COMMON_FLAGS}最好在""以内,防止被cmake 增加了;
SET(CMAKE_COMMON_FLAGS "${CMAKE_COMMON_FLAGS} -O0 -g -DDEBUG ")
ADD_DEFINITIONS(-DENABLE_DEBUG)
ELSEIF(NOT DEFINED ENV{DEBUG})
......@@ -50,13 +53,17 @@ ELSE()
MESSAGE("Enable debug")
SET(CMAKE_COMMON_FLAGS "${CMAKE_COMMON_FLAGS} -O0 -g -DDEBUG")
ADD_DEFINITIONS(-DENABLE_DEBUG)
ENDIF()
ENDIF(DEBUG)
IF (CONCURRENCY)
MESSAGE("CONCURRENCY is ON")
SET(CMAKE_COMMON_FLAGS "${CMAKE_COMMON_FLAGS} -DCONCURRENCY")
ADD_DEFINITIONS(-DCONCURRENCY)
ENDIF (CONCURRENCY)
SET(CMAKE_CXX_FLAGS ${CMAKE_COMMON_FLAGS})
SET(CMAKE_C_FLAGS ${CMAKE_COMMON_FLAGS})
MESSAGE("CMAKE_CXX_FLAGS is " ${CMAKE_CXX_FLAGS})
OPTION(ENABLE_ASAN "Enable build with address sanitizer" OFF)
IF (ENABLE_ASAN)
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-omit-frame-pointer -fsanitize=address")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-omit-frame-pointer -fsanitize=address")
......@@ -77,11 +84,10 @@ MESSAGE("Install target dir is " ${CMAKE_INSTALL_PREFIX})
ADD_SUBDIRECTORY(deps)
ADD_SUBDIRECTORY(src)
ADD_SUBDIRECTORY(test/perf)
OPTION(WITH_UNIT_TESTS "Compile miniob with unit tests" ON)
ADD_SUBDIRECTORY(benchmark)
IF(WITH_UNIT_TESTS)
ADD_SUBDIRECTORY(unitest)
ADD_SUBDIRECTORY(unittest)
ENDIF()
# install 准备安装的目录是cmakefile 的当前目录, 不是build 后生成的目录
......
PROJECT(benchmark)
MESSAGE("Begin to build " ${PROJECT_NAME})
MESSAGE(STATUS "This is PROJECT_BINARY_DIR dir " ${PROJECT_BINARY_DIR})
MESSAGE(STATUS "This is PROJECT_SOURCE_DIR dir " ${PROJECT_SOURCE_DIR})
# 可以获取父cmake的变量
MESSAGE("${CMAKE_COMMON_FLAGS}")
#INCLUDE_DIRECTORIES([AFTER|BEFORE] [SYSTEM] dir1 dir2 ...)
INCLUDE_DIRECTORIES(. ${PROJECT_SOURCE_DIR}/../deps ${PROJECT_SOURCE_DIR}/../src/observer /usr/local/include SYSTEM)
# 父cmake 设置的include_directories 和link_directories并不传导到子cmake里面
#INCLUDE_DIRECTORIES(BEFORE ${CMAKE_INSTALL_PREFIX}/include)
LINK_DIRECTORIES(/usr/local/lib /usr/local/lib64 ${PROJECT_BINARY_DIR}/../lib)
IF (DEFINED ENV{LD_LIBRARY_PATH})
SET(LD_LIBRARY_PATH_STR $ENV{LD_LIBRARY_PATH})
#separate_arguments(LD_LIBRARY_PATH_STR) #只能处理空行
string(REPLACE ":" ";" LD_LIBRARY_PATH_LIST ${LD_LIBRARY_PATH_STR})
MESSAGE(" Add LD_LIBRARY_PATH to -L flags " ${LD_LIBRARY_PATH_LIST})
LINK_DIRECTORIES(${LD_LIBRARY_PATH_LIST})
ELSE ()
LINK_DIRECTORIES(/usr/local/lib)
ENDIF ()
find_package(benchmark CONFIG REQUIRED)
FILE(GLOB_RECURSE ALL_SRC *.cpp)
# AUX_SOURCE_DIRECTORY 类似功能
FOREACH (F ${ALL_SRC})
get_filename_component(prjName ${F} NAME_WE)
MESSAGE("Build ${prjName} according to ${F}")
ADD_EXECUTABLE(${prjName} ${F})
TARGET_LINK_LIBRARIES(${prjName} common pthread dl benchmark observer_static)
ENDFOREACH (F)
/* Copyright (c) 2021 Xie Meiyi(xiemeiyi@hust.edu.cn) and OceanBase and/or its affiliates. All rights reserved.
miniob is licensed under Mulan PSL v2.
You can use this software according to the terms and conditions of the Mulan PSL v2.
You may obtain a copy of Mulan PSL v2 at:
http://license.coscl.org.cn/MulanPSL2
THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
See the Mulan PSL v2 for more details. */
//
// Created by Wangyunlai on 2023/03/14
//
#include <inttypes.h>
#include <random>
#include <stdexcept>
#include <benchmark/benchmark.h>
#include "storage/index/bplus_tree.h"
#include "storage/default/disk_buffer_pool.h"
#include "rc.h"
#include "common/log/log.h"
using namespace std;
using namespace common;
using namespace benchmark;
class IntegerGenerator
{
public:
IntegerGenerator(int min, int max)
: distrib_(min, max)
{}
int next()
{
return distrib_(rd_);
}
private:
random_device rd_;
uniform_int_distribution<> distrib_;
};
once_flag init_bpm_flag;
BufferPoolManager bpm{512};
struct Stat
{
int64_t insert_success_count = 0;
int64_t duplicate_count = 0;
int64_t insert_other_count = 0;
int64_t delete_success_count = 0;
int64_t not_exist_count = 0;
int64_t delete_other_count = 0;
int64_t scan_success_count = 0;
int64_t scan_open_failed_count = 0;
int64_t mismatch_count = 0;
int64_t scan_other_count = 0;
};
class BenchmarkBase : public Fixture
{
public:
BenchmarkBase()
{
}
virtual ~BenchmarkBase()
{
BufferPoolManager::set_instance(nullptr);
}
virtual string Name() const = 0;
virtual void SetUp(const State &state)
{
if (0 != state.thread_index()) {
return;
}
string log_name = this->Name() + ".log";
string btree_filename = this->Name() + ".btree";
LoggerFactory::init_default(log_name.c_str(), LOG_LEVEL_TRACE);
std::call_once(init_bpm_flag, []() { BufferPoolManager::set_instance(&bpm); });
::remove(btree_filename.c_str());
const int internal_max_size = 200;
const int leaf_max_size = 200;
RC rc = handler_.create(btree_filename.c_str(), INTS, sizeof(int32_t)/*attr_len*/,
internal_max_size, leaf_max_size);
if (rc != RC::SUCCESS) {
throw runtime_error("failed to create btree handler");
}
LOG_INFO("test %s setup done. threads=%d, thread index=%d",
this->Name().c_str(), state.threads(), state.thread_index());
}
virtual void TearDown(const State &state)
{
if (0 != state.thread_index()) {
return;
}
handler_.close();
LOG_INFO("test %s teardown done. threads=%d, thread index=%d",
this->Name().c_str(), state.threads(), state.thread_index());
}
void FillUp(uint32_t min, uint32_t max)
{
for (uint32_t value = min; value < max; ++value) {
const char *key = reinterpret_cast<const char *>(&value);
RID rid(value, value);
RC rc = handler_.insert_entry(key, &rid);
ASSERT(rc == RC::SUCCESS, "failed to insert entry into btree. key=%" PRIu32, value);
}
}
uint32_t GetRangeMax(const State &state) const
{
uint32_t max = static_cast<uint32_t>(state.range(0) * 3);
if (max <= 0) {
max = (1 << 31);
}
return max;
}
void Insert(uint32_t value, Stat &stat)
{
const char *key = reinterpret_cast<const char *>(&value);
RID rid(value, value);
RC rc = handler_.insert_entry(key, &rid);
switch (rc) {
case RC::SUCCESS: {
stat.insert_success_count++;
} break;
case RC::RECORD_DUPLICATE_KEY: {
stat.duplicate_count++;
} break;
default: {
stat.insert_other_count++;
} break;
}
}
void Delete(uint32_t value, Stat &stat)
{
const char *key = reinterpret_cast<const char *>(&value);
RID rid(value, value);
RC rc = handler_.delete_entry(key, &rid);
switch (rc) {
case RC::SUCCESS: {
stat.delete_success_count++;
} break;
case RC::RECORD_RECORD_NOT_EXIST: {
stat.not_exist_count++;
} break;
default: {
stat.delete_other_count++;
} break;
}
}
void Scan(uint32_t begin, uint32_t end, Stat &stat)
{
const char *begin_key = reinterpret_cast<const char *>(&begin);
const char *end_key = reinterpret_cast<const char *>(&end);
BplusTreeScanner scanner(handler_);
RC rc = scanner.open(begin_key, sizeof(begin_key), true /*inclusive*/,
end_key, sizeof(end_key), true /*inclusive*/);
if (rc != RC::SUCCESS) {
stat.scan_open_failed_count++;
} else {
RID rid;
uint32_t count = 0;
while (RC::RECORD_EOF != (rc = scanner.next_entry(rid))) {
count++;
}
if (rc != RC::RECORD_EOF) {
stat.scan_other_count++;
} else if (count != (end - begin + 1)) {
stat.mismatch_count++;
} else {
stat.scan_success_count++;
}
scanner.close();
}
}
protected:
BplusTreeHandler handler_;
};
////////////////////////////////////////////////////////////////////////////////
struct InsertionBenchmark : public BenchmarkBase
{
string Name() const override { return "insertion"; }
};
BENCHMARK_DEFINE_F(InsertionBenchmark, Insertion) (State &state)
{
IntegerGenerator generator(1, 1 << 31);
Stat stat;
for (auto _ : state) {
uint32_t value = static_cast<uint32_t>(generator.next());
Insert(value, stat);
}
state.counters["success"] = Counter(stat.insert_success_count, Counter::kIsRate);
state.counters["duplicate"] = Counter(stat.duplicate_count, Counter::kIsRate);
state.counters["other"] = Counter(stat.insert_other_count, Counter::kIsRate);
}
BENCHMARK_REGISTER_F(InsertionBenchmark, Insertion)->Threads(10);
////////////////////////////////////////////////////////////////////////////////
class DeletionBenchmark : public BenchmarkBase
{
public:
string Name() const override { return "deletion"; }
void SetUp(const State &state) override
{
if (0 != state.thread_index()) {
return;
}
BenchmarkBase::SetUp(state);
uint32_t max = GetRangeMax(state);
ASSERT(max > 0, "invalid argument count. %ld", state.range(0));
FillUp(0, max);
}
};
BENCHMARK_DEFINE_F(DeletionBenchmark, Deletion) (State &state)
{
uint32_t max = GetRangeMax(state);
IntegerGenerator generator(0, max);
Stat stat;
for (auto _ : state) {
uint32_t value = static_cast<uint32_t>(generator.next());
Delete(value, stat);
}
state.counters["success"] = Counter(stat.delete_success_count, Counter::kIsRate);
state.counters["not_exist"] = Counter(stat.not_exist_count, Counter::kIsRate);
state.counters["other"] = Counter(stat.delete_other_count, Counter::kIsRate);
}
BENCHMARK_REGISTER_F(DeletionBenchmark, Deletion)->Threads(10)->Arg(4* 10000);
////////////////////////////////////////////////////////////////////////////////
class ScanBenchmark : public BenchmarkBase
{
public:
string Name() const override { return "scan"; }
void SetUp(const State &state) override
{
if (0 != state.thread_index()) {
return;
}
BenchmarkBase::SetUp(state);
uint32_t max = static_cast<uint32_t>(state.range(0)) * 3;
ASSERT(max > 0, "invalid argument count. %ld", state.range(0));
FillUp(0, max);
}
};
BENCHMARK_DEFINE_F(ScanBenchmark, Scan) (State &state)
{
int max_range_size = 100;
uint32_t max = GetRangeMax(state);
IntegerGenerator begin_generator(1, max - max_range_size);
IntegerGenerator range_generator(1, max_range_size);
Stat stat;
for (auto _ : state) {
uint32_t begin = static_cast<uint32_t>(begin_generator.next());
uint32_t end = begin + static_cast<uint32_t>(range_generator.next());
Scan(begin, end, stat);
}
state.counters["success"] = Counter(stat.scan_success_count, Counter::kIsRate);
state.counters["open_failed_count"] = Counter(stat.scan_open_failed_count, Counter::kIsRate);
state.counters["mismatch_number_count"] = Counter(stat.mismatch_count, Counter::kIsRate);
state.counters["other"] = Counter(stat.scan_other_count, Counter::kIsRate);
}
BENCHMARK_REGISTER_F(ScanBenchmark, Scan)->Threads(10)->Arg(4 * 10000);
////////////////////////////////////////////////////////////////////////////////
struct MixtureBenchmark : public BenchmarkBase
{
string Name() const override { return "mixture"; }
};
BENCHMARK_DEFINE_F(MixtureBenchmark, Mixture) (State &state)
{
pair<uint32_t, uint32_t> data_range{0, GetRangeMax(state)};
pair<uint32_t, uint32_t> scan_range{1, 100};
IntegerGenerator data_generator(data_range.first, data_range.second);
IntegerGenerator scan_range_generator(scan_range.first, scan_range.second);
IntegerGenerator operation_generator(0, 2);
Stat stat;
for (auto _ : state) {
int64_t operation_type = operation_generator.next();
switch (operation_type) {
case 0: { // insert
uint32_t value = static_cast<uint32_t>(data_generator.next());
Insert(value, stat);
} break;
case 1: { // delete
uint32_t value = static_cast<uint32_t>(data_generator.next());
Delete(value, stat);
} break;
case 2: { // scan
uint32_t begin = static_cast<uint32_t>(data_generator.next());
uint32_t end = begin + static_cast<uint32_t>(scan_range_generator.next());
Scan(begin, end, stat);
} break;
default: {
ASSERT(false, "should not happen. operation=%ld", operation_type);
}
}
}
state.counters.insert({
{"insert_success", Counter(stat.insert_success_count, Counter::kIsRate)},
{"insert_other", Counter(stat.insert_other_count, Counter::kIsRate)},
{"insert_duplicate", Counter(stat.duplicate_count, Counter::kIsRate)},
{"delete_success", Counter(stat.delete_success_count, Counter::kIsRate)},
{"delete_other", Counter(stat.delete_other_count, Counter::kIsRate)},
{"delete_not_exist", Counter(stat.not_exist_count, Counter::kIsRate)},
{"scan_success", Counter(stat.scan_success_count, Counter::kIsRate)},
{"scan_other", Counter(stat.scan_other_count, Counter::kIsRate)},
{"scan_mismatch", Counter(stat.mismatch_count, Counter::kIsRate)},
{"scan_open_failed", Counter(stat.scan_open_failed_count, Counter::kIsRate)}
});
}
BENCHMARK_REGISTER_F(MixtureBenchmark, Mixture)->Threads(10)->Arg(4 * 10000);
////////////////////////////////////////////////////////////////////////////////
BENCHMARK_MAIN();
#!/bin/bash
TOPDIR=`readlink -f \`dirname $0\``
BUILD_SH=$TOPDIR/build.sh
CMAKE_COMMAND="cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=1"
ALL_ARGS=("$@")
BUILD_ARGS=()
MAKE_ARGS=(-j $CPU_CORES)
MAKE=make
ASAN_OPTION=ON
echo "$0 ${ALL_ARGS[@]}"
function usage
{
echo "Usage:"
echo "./build.sh -h"
echo "./build.sh init"
echo "./build.sh clean"
echo "./build.sh [BuildType] [--make [MakeOptions]]"
echo ""
echo "OPTIONS:"
echo "BuildType => debug(default), release, debug_asan, release_asan"
echo "MakeOptions => Options to make command, default: -j N"
echo ""
echo "Examples:"
echo "# Init."
echo "./build.sh init"
echo ""
echo "# Build by debug mode and make with -j24."
echo "./build.sh debug --make -j24"
}
function parse_args
{
make_start=false
for arg in "${ALL_ARGS[@]}"; do
if [[ "$arg" == "--make" ]]
then
make_start=true
elif [[ $make_start == false ]]
then
BUILD_ARGS+=("$arg")
else
MAKE_ARGS+=("$arg")
fi
done
}
# try call command make, if use give --make in command line.
function try_make
{
if [[ $MAKE != false ]]
then
$MAKE "${MAKE_ARGS[@]}"
fi
}
# create build directory and cd it.
function prepare_build_dir
{
TYPE=$1
mkdir -p $TOPDIR/build_$TYPE && cd $TOPDIR/build_$TYPE
}
function do_init
{
git submodule update --init || return
current_dir=$PWD
# build libevent
cd ${TOPDIR}/deps/3rd/libevent && \
git checkout release-2.1.12-stable && \
mkdir build && \
cd build && \
cmake .. -DEVENT__DISABLE_OPENSSL=ON && \
make -j4 && \
make install
# build googletest
cd ${TOPDIR}/deps/3rd/googletest && \
mkdir build && \
cd build && \
cmake .. && \
make -j4 && \
make install
# build google benchmark
cd ${TOPDIR}/deps/3rd/benchmark && \
mkdir build && \
cd build && \
cmake .. -DBENCHMARK_ENABLE_TESTING=OFF -DBENCHMARK_INSTALL_DOCS=OFF -DBENCHMARK_ENABLE_GTEST_TESTS=OFF -DBENCHMARK_USE_BUNDLED_GTEST=OFF -DBENCHMARK_ENABLE_ASSEMBLY_TESTS=OFF && \
make -j4 && \
make install
# build jsoncpp
cd ${TOPDIR}/deps/3rd/jsoncpp && \
mkdir build && \
cd build && \
cmake -DJSONCPP_WITH_TESTS=OFF -DJSONCPP_WITH_POST_BUILD_UNITTEST=OFF .. && \
make && \
make install
cd $current_dir
}
function prepare_build_dir
{
TYPE=$1
mkdir -p ${TOPDIR}/build_${TYPE} && cd ${TOPDIR}/build_${TYPE}
}
function do_build
{
TYPE=$1; shift
prepare_build_dir $TYPE || return
echo "${CMAKE_COMMAND} ${TOPDIR} $@"
${CMAKE_COMMAND} ${TOPDIR} $@
}
function do_clean
{
echo "clean build_* dirs"
find . -maxdepth 1 -type d -name 'build_*' | xargs rm -rf
}
function build
{
set -- "${BUILD_ARGS[@]}"
case "x$1" in
xrelease)
do_build "$@" -DCMAKE_BUILD_TYPE=RelWithDebInfo -DDEBUG=OFF
;;
xrelease_asan)
do_build "$@" -DCMAKE_BUILD_TYPE=RelWithDebInfo -DDEBUG=OFF -DENABLE_ASAN=$ASAN_OPTION
;;
xdebug)
do_build "$@" -DCMAKE_BUILD_TYPE=Debug -DDEBUG=ON
;;
xdebug_asan)
do_build "$@" -DCMAKE_BUILD_TYPE=Debug -DDEBUG=ON -DENABLE_ASAN=$ASAN_OPTION
;;
*)
BUILD_ARGS=(debug "${BUILD_ARGS[@]}")
build
;;
esac
}
function main
{
case "$1" in
-h)
usage
;;
init)
do_init
;;
clean)
do_clean
;;
*)
parse_args
build
try_make
;;
esac
}
main "$@"
Subproject commit f7547e29ccaed7b64ef4f7495ecfff1c9f6f3d03
Subproject commit 974e18ee6f146a2418f9cea83170c640e7d622d6
文件已移动
Subproject commit 6e1826dd7730330536e1838824bddd0d4d8adb0d
......@@ -258,4 +258,93 @@ void LockTrace::toString(std::string &result)
return;
}
void DebugMutex::lock()
{
#ifdef DEBUG
lock_.lock();
#endif
}
void DebugMutex::unlock()
{
#ifdef DEBUG
lock_.unlock();
#endif
}
////////////////////////////////////////////////////////////////////////////////
void Mutex::lock()
{
#ifdef CONCURRENCY
lock_.lock();
#endif
}
bool Mutex::try_lock()
{
#ifdef CONCURRENCY
return lock_.try_lock();
#else
return true;
#endif
}
void Mutex::unlock()
{
#ifdef CONCURRENCY
lock_.unlock();
#endif
}
////////////////////////////////////////////////////////////////////////////////
#ifdef CONCURRENCY
void SharedMutex::lock()
{
lock_.lock();
}
bool SharedMutex::try_lock()
{
return lock_.try_lock();
}
void SharedMutex::unlock() // unlock exclusive
{
lock_.unlock();
}
void SharedMutex::lock_shared()
{
lock_.lock_shared();
}
bool SharedMutex::try_lock_shared()
{
return lock_.try_lock_shared();
}
void SharedMutex::unlock_shared()
{
lock_.unlock_shared();
}
#else // CONCURRENCY undefined
void SharedMutex::lock()
{}
bool SharedMutex::try_lock()
{
return true;
}
void SharedMutex::unlock() // unlock exclusive
{}
void SharedMutex::lock_shared()
{}
bool SharedMutex::try_lock_shared()
{
return true;
}
void SharedMutex::unlock_shared()
{}
#endif // CONCURRENCY end
} // namespace common
\ No newline at end of file
......@@ -12,17 +12,18 @@ See the Mulan PSL v2 for more details. */
// Created by Longda on 2010
//
#ifndef __COMMON_LANG_MUTEX_H__
#define __COMMON_LANG_MUTEX_H__
#pragma once
#include <sys/types.h>
#include <errno.h>
#include <map>
#include <pthread.h>
#include <string.h>
#include <map>
#include <set>
#include <sstream>
#include <string.h>
#include <string>
#include <sys/types.h>
#include <mutex>
#include <shared_mutex>
#include "common/log/log.h"
......@@ -239,5 +240,54 @@ protected:
#endif // DEBUG_LOCK
} // namespace common
#endif // __COMMON_LANG_MUTEX_H__
class DebugMutex final
{
public:
DebugMutex() = default;
~DebugMutex() = default;
void lock();
void unlock();
private:
#ifdef DEBUG
std::mutex lock_;
#endif
};
class Mutex final
{
public:
Mutex() = default;
~Mutex() = default;
void lock();
bool try_lock();
void unlock();
private:
#ifdef CONCURRENCY
std::mutex lock_;
#endif
};
class SharedMutex final
{
public:
SharedMutex() = default;
~SharedMutex() = default;
void lock(); // lock exclusive
bool try_lock();
void unlock(); // unlock exclusive
void lock_shared();
bool try_lock_shared();
void unlock_shared();
private:
#ifdef CONCURRENCY
std::shared_mutex lock_;
#endif
};
} // namespace common
\ No newline at end of file
......@@ -16,6 +16,7 @@ See the Mulan PSL v2 for more details. */
#include <exception>
#include <stdarg.h>
#include <stdio.h>
#include <execinfo.h>
#include "common/lang/string.h"
#include "common/log/log.h"
......@@ -339,11 +340,29 @@ int LoggerFactory::init_default(
const std::string &log_file, LOG_LEVEL log_level, LOG_LEVEL console_level, LOG_ROTATE rotate_type)
{
if (g_log != nullptr) {
LOG_WARN("Default logger has been initialized");
LOG_INFO("Default logger has been initialized");
return 0;
}
return init(log_file, &g_log, log_level, console_level, rotate_type);
}
const char *lbt()
{
constexpr int buffer_size = 100;
void *buffer[buffer_size];
constexpr int bt_buffer_size = 4096;
thread_local char backtrace_buffer[bt_buffer_size];
int size = backtrace(buffer, buffer_size);
int offset = 0;
for (int i = 0; i < size; i++) {
const char *format = (0 == i) ? "0x%lx" : " 0x%lx";
offset += snprintf(backtrace_buffer + offset, sizeof(backtrace_buffer) - offset, format,
reinterpret_cast<intptr_t>(buffer[i]));
}
return backtrace_buffer;
}
} // namespace common
\ No newline at end of file
......@@ -12,14 +12,13 @@ See the Mulan PSL v2 for more details. */
// Created by Longda on 2010
//
#ifndef __COMMON_LOG_LOG_H__
#define __COMMON_LOG_LOG_H__
#pragma once
#include <sys/time.h>
#include <assert.h>
#include <errno.h>
#include <pthread.h>
#include <string.h>
#include <time.h>
#include <fstream>
#include <iostream>
......@@ -47,7 +46,8 @@ typedef enum {
typedef enum { LOG_ROTATE_BYDAY = 0, LOG_ROTATE_BYSIZE, LOG_ROTATE_LAST } LOG_ROTATE;
class Log {
class Log
{
public:
Log(const std::string &log_name, const LOG_LEVEL log_level = LOG_LEVEL_INFO,
const LOG_LEVEL console_level = LOG_LEVEL_WARN);
......@@ -161,31 +161,34 @@ extern Log *g_log;
#define LOG_HEAD(prefix, level) \
if (common::g_log) { \
time_t now_time; \
time(&now_time); \
struct tm *p = localtime(&now_time); \
struct timeval tv; \
gettimeofday(&tv, NULL); \
struct tm *p = localtime(&tv.tv_sec); \
char sz_head[LOG_HEAD_SIZE] = {0}; \
if (p) { \
int usec = (int)tv.tv_usec; \
snprintf(sz_head, LOG_HEAD_SIZE, \
"%d-%d-%d %d:%d:%u pid:%u tid:%llx ", \
"%04d-%02d-%02d %02d:%02d:%02u.%06d pid:%u tid:%llx ", \
p->tm_year + 1900, \
p->tm_mon + 1, \
p->tm_mday, \
p->tm_hour, \
p->tm_min, \
p->tm_sec, \
usec, \
(u32_t)getpid(), \
gettid()); \
common::g_log->rotate(p->tm_year + 1900, p->tm_mon + 1, p->tm_mday); \
} \
snprintf(prefix, \
sizeof(prefix), \
"[%s %s %s %s %u]>>", \
"[%s %s %s:%u %s]>>", \
sz_head, \
(common::g_log)->prefix_msg(level), \
__FILE_NAME__, \
__FUNCTION__, \
(u32_t)__LINE__); \
(u32_t)__LINE__, \
__FUNCTION__ \
); \
}
#define LOG_OUTPUT(level, fmt, ...) \
......@@ -287,20 +290,30 @@ int Log::out(const LOG_LEVEL console_level, const LOG_LEVEL log_level, T &msg)
}
#ifndef ASSERT
#ifdef DEBUG
#define ASSERT(expression, description, ...) \
do { \
if (!(expression)) { \
if (common::g_log) { \
LOG_PANIC(description, ##__VA_ARGS__); \
LOG_PANIC("\n"); \
} \
assert(expression); \
} \
} while (0)
#else // DEBUG
#define ASSERT(expression, description, ...)
#endif // DEBUG
#endif // ASSERT
#define SYS_OUTPUT_FILE_POS ", File:" << __FILE__ << ", line:" << __LINE__ << ",function:" << __FUNCTION__
#define SYS_OUTPUT_ERROR ",error:" << errno << ":" << strerror(errno)
/**
* 获取当前函数调用栈
*/
const char *lbt();
} // namespace common
#endif //__COMMON_LOG_LOG_H__
// __CR__
// Copyright (c) 2021 LongdaFeng All Rights Reserved
//
// This software contains the intellectual property of LongdaFeng
// or is licensed to LongdaFeng from third parties. Use of this
// software and the intellectual property contained therein is
// expressly limited to the terms and conditions of the License Agreement
// under which it is provided by or on behalf of LongdaFeng.
// __CR__
//
// Created by Longda on 2010
//
#include "mm/debug_new.h"
#include <new>
#include <stdio.h>
#include <stdlib.h>
#ifdef _MSC_VER
#pragma warning(disable : 4073)
#pragma init_seg(lib)
#endif
#ifndef DEBUG_NEW_HASHTABLESIZE
#define DEBUG_NEW_HASHTABLESIZE 16384
#endif
#ifndef DEBUG_NEW_HASH
#define DEBUG_NEW_HASH(p) (((unsigned)(p) >> 8) % DEBUG_NEW_HASHTABLESIZE)
#endif
// The default behaviour now is to copy the file name, because we found
// that the exit leakage check cannot access the address of the file
// name sometimes (in our case, a core dump will occur when trying to
// access the file name in a shared library after a SIGINT).
#ifndef DEBUG_NEW_FILENAME_LEN
#define DEBUG_NEW_FILENAME_LEN 20
#endif
#if DEBUG_NEW_FILENAME_LEN == 0 && !defined(DEBUG_NEW_NO_FILENAME_COPY)
#define DEBUG_NEW_NO_FILENAME_COPY
#endif
#ifndef DEBUG_NEW_NO_FILENAME_COPY
#include <string.h>
#endif
struct new_ptr_list_t {
new_ptr_list_t *next;
#ifdef DEBUG_NEW_NO_FILENAME_COPY
const char *file;
#else
char file[DEBUG_NEW_FILENAME_LEN];
#endif
int line;
size_t size;
};
static new_ptr_list_t *new_ptr_list[DEBUG_NEW_HASHTABLESIZE];
bool new_verbose_flag = false;
bool new_autocheck_flag = true;
bool check_leaks()
{
bool fLeaked = false;
for (int i = 0; i < DEBUG_NEW_HASHTABLESIZE; ++i) {
new_ptr_list_t *ptr = new_ptr_list[i];
if (ptr == NULL)
continue;
fLeaked = true;
while (ptr) {
printf("Leaked object at %p (size %llu, %s:%d)\n",
(char *)ptr + sizeof(new_ptr_list_t),
(unsigned long long)ptr->size,
ptr->file,
ptr->line);
ptr = ptr->next;
}
}
if (fLeaked)
return true;
else
return false;
}
void *operator new(size_t size, const char *file, int line)
{
size_t s = size + sizeof(new_ptr_list_t);
new_ptr_list_t *ptr = (new_ptr_list_t *)malloc(s);
if (ptr == NULL) {
fprintf(stderr, "new: out of memory when allocating %u bytes\n", size);
abort();
}
void *pointer = (char *)ptr + sizeof(new_ptr_list_t);
size_t hash_index = DEBUG_NEW_HASH(pointer);
ptr->next = new_ptr_list[hash_index];
#ifdef DEBUG_NEW_NO_FILENAME_COPY
ptr->file = file;
#else
strncpy(ptr->file, file, DEBUG_NEW_FILENAME_LEN - 1);
ptr->file[DEBUG_NEW_FILENAME_LEN - 1] = '\0';
#endif
ptr->line = line;
ptr->size = size;
new_ptr_list[hash_index] = ptr;
if (new_verbose_flag)
printf("new: allocated %p (size %u, %s:%d)\n", pointer, size, file, line);
return pointer;
}
void *operator new[](size_t size, const char *file, int line)
{
return operator new(size, file, line);
}
void *operator new(size_t size)
{
return operator new(size, "<Unknown>", 0);
}
void *operator new[](size_t size)
{
return operator new(size);
}
void *operator new(size_t size, const std::nothrow_t &) throw()
{
return operator new(size);
}
void *operator new[](size_t size, const std::nothrow_t &) throw()
{
return operator new[](size);
}
void operator delete(void *pointer)
{
if (pointer == NULL)
return;
size_t hash_index = DEBUG_NEW_HASH(pointer);
new_ptr_list_t *ptr = new_ptr_list[hash_index];
new_ptr_list_t *ptr_last = NULL;
while (ptr) {
if ((char *)ptr + sizeof(new_ptr_list_t) == pointer) {
if (new_verbose_flag)
printf("delete: freeing %p (size %u)\n", pointer, ptr->size);
if (ptr_last == NULL)
new_ptr_list[hash_index] = ptr->next;
else
ptr_last->next = ptr->next;
free(ptr);
return;
}
ptr_last = ptr;
ptr = ptr->next;
}
fprintf(stderr, "delete: invalid pointer %p\n", pointer);
abort();
}
void operator delete[](void *pointer)
{
operator delete(pointer);
}
// Some older compilers like Borland C++ Compiler 5.5.1 and Digital Mars
// Compiler 8.29 do not support placement delete operators.
// NO_PLACEMENT_DELETE needs to be defined when using such compilers.
// Also note that in that case memory leakage will occur if an exception
// is thrown in the initialization (constructor) of a dynamically
// created object.
#ifndef NO_PLACEMENT_DELETE
void operator delete(void *pointer, const char *file, int line)
{
if (new_verbose_flag)
printf("info: exception thrown on initializing object at %p (%s:%d)\n", pointer, file, line);
operator delete(pointer);
}
void operator delete[](void *pointer, const char *file, int line)
{
operator delete(pointer, file, line);
}
void operator delete(void *pointer, const std::nothrow_t &)
{
operator delete(pointer, "<Unknown>", 0);
}
void operator delete[](void *pointer, const std::nothrow_t &)
{
operator delete(pointer, std::nothrow);
}
#endif // NO_PLACEMENT_DELETE
// Proxy class to automatically call check_leaks if new_autocheck_flag is set
class new_check_t {
public:
new_check_t()
{}
~new_check_t()
{
if (new_autocheck_flag) {
// Check for leakage.
// If any leaks are found, set new_verbose_flag so that any
// delete operations in the destruction of global/static
// objects will display information to compensate for
// possible false leakage reports.
if (check_leaks())
new_verbose_flag = true;
}
}
};
static new_check_t new_check_object;
......@@ -132,6 +132,13 @@ void *MemPoolItem::alloc()
return buffer;
}
MemPoolItem::unique_ptr MemPoolItem::alloc_unique_ptr()
{
void *item = this->alloc();
auto deleter = [this](void *p) { this->free(p); };
return MemPoolItem::unique_ptr(item, deleter);
}
void MemPoolItem::free(void *buf)
{
MUTEX_LOCK(&this->mutex);
......
......@@ -12,14 +12,15 @@ See the Mulan PSL v2 for more details. */
// Created by Longda on 2010
//
#ifndef __COMMON_MM_MPOOL_H__
#define __COMMON_MM_MPOOL_H__
#pragma once
#include <queue>
#include <list>
#include <set>
#include <string>
#include <sstream>
#include <functional>
#include <memory>
#include "common/lang/mutex.h"
#include "common/log/log.h"
......@@ -290,12 +291,15 @@ T *MemPoolSimple<T>::alloc()
used.insert(buffer);
MUTEX_UNLOCK(&this->mutex);
new (buffer) T();
return buffer;
}
template <class T>
void MemPoolSimple<T>::free(T *buf)
{
buf->~T();
MUTEX_LOCK(&this->mutex);
size_t num = used.erase(buf);
......@@ -327,6 +331,9 @@ std::string MemPoolSimple<T>::to_string()
}
class MemPoolItem {
public:
using unique_ptr = std::unique_ptr<void, std::function<void(void * const)>>;
public:
MemPoolItem(const char *tag) : name(tag)
{
......@@ -369,6 +376,7 @@ public:
* @return
*/
void *alloc();
unique_ptr alloc_unique_ptr();
/**
* Free one item, the resouce will return to memory Pool
......@@ -446,4 +454,3 @@ protected:
};
} // namespace common
#endif /* __COMMON_MM_MPOOL_H__ */
Subproject commit 93f08be653c36ddc6943e9513fc14c7292b4d007
Subproject commit 117ee9a03fc74617e378f755c6b25005ac0e954f
/* Copyright (c) 2021 Xie Meiyi(xiemeiyi@hust.edu.cn) and OceanBase and/or its affiliates. All rights reserved.
miniob is licensed under Mulan PSL v2.
You can use this software according to the terms and conditions of the Mulan PSL v2.
You may obtain a copy of Mulan PSL v2 at:
http://license.coscl.org.cn/MulanPSL2
THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
See the Mulan PSL v2 for more details. */
//
// Created by Wangyunlai on 2023/03/07.
//
#include "session/thread_data.h"
#include "session/session.h"
thread_local ThreadData * ThreadData::thread_data_;
Trx * ThreadData::trx() const
{
return (session_ == nullptr) ? nullptr : session_->current_trx();
}
\ No newline at end of file
/* Copyright (c) 2021 Xie Meiyi(xiemeiyi@hust.edu.cn) and OceanBase and/or its affiliates. All rights reserved.
miniob is licensed under Mulan PSL v2.
You can use this software according to the terms and conditions of the Mulan PSL v2.
You may obtain a copy of Mulan PSL v2 at:
http://license.coscl.org.cn/MulanPSL2
THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
See the Mulan PSL v2 for more details. */
//
// Created by Wangyunlai on 2023/03/07.
//
#pragma once
class Trx;
class Session;
class ThreadData
{
public:
static ThreadData *current() { return thread_data_; }
static void setup(ThreadData *thread) { thread_data_ = thread; }
public:
ThreadData() = default;
~ThreadData() = default;
Session *session() const { return session_; }
Trx * trx() const;
void set_session(Session *session) { session_ = session; }
private:
static thread_local ThreadData * thread_data_;
private:
Session *session_ = nullptr;
};
\ No newline at end of file
/* Copyright (c) 2021 Xie Meiyi(xiemeiyi@hust.edu.cn) and OceanBase and/or its affiliates. All rights reserved.
miniob is licensed under Mulan PSL v2.
You can use this software according to the terms and conditions of the Mulan PSL v2.
You may obtain a copy of Mulan PSL v2 at:
http://license.coscl.org.cn/MulanPSL2
THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
See the Mulan PSL v2 for more details. */
//
// Created by lianyu on 2022/10/29.
//
#include "storage/buffer/frame.h"
#include "session/thread_data.h"
FrameId::FrameId(int file_desc, PageNum page_num) : file_desc_(file_desc), page_num_(page_num)
{}
bool FrameId::equal_to(const FrameId &other) const
{
return file_desc_ == other.file_desc_ && page_num_ == other.page_num_;
}
bool FrameId::operator==(const FrameId &other) const
{
return this->equal_to(other);
}
size_t FrameId::hash() const
{
return (static_cast<size_t>(file_desc_) << 32L) | page_num_;
}
int FrameId::file_desc() const
{
return file_desc_;
}
PageNum FrameId::page_num() const
{
return page_num_;
}
std::string to_string(const FrameId &frame_id)
{
std::stringstream ss;
ss << "fd:" << frame_id.file_desc() << ",page_num:" << frame_id.page_num();
return ss.str();
}
////////////////////////////////////////////////////////////////////////////////
intptr_t get_default_debug_xid()
{
ThreadData *thd = ThreadData::current();
intptr_t xid = (thd == nullptr) ?
// pthread_self的返回值类型是pthread_t,pthread_t在linux和mac上不同
// 在Linux上是一个整数类型,而在mac上是一个指针。为了能在两个平台上都编译通过,
// 就将pthread_self返回值转换两次
reinterpret_cast<intptr_t>(reinterpret_cast<void*>(pthread_self())) :
reinterpret_cast<intptr_t>(thd);
return xid;
}
void Frame::write_latch()
{
write_latch(get_default_debug_xid());
}
void Frame::write_latch(intptr_t xid)
{
{
std::scoped_lock debug_lock(debug_lock_);
ASSERT(pin_count_.load() > 0,
"frame lock. write lock failed while pin count is invalid. "
"this=%p, pin=%d, pageNum=%d, fd=%d, xid=%lx, lbt=%s",
this, pin_count_.load(), page_.page_num, file_desc_, xid, lbt());
ASSERT(write_locker_ != xid,
"frame lock write twice."
"this=%p, pin=%d, pageNum=%d, fd=%d, xid=%lx, lbt=%s",
this, pin_count_.load(), page_.page_num, file_desc_, xid, lbt());
ASSERT(read_lockers_.find(xid) == read_lockers_.end(),
"frame lock write while holding the read lock."
"this=%p, pin=%d, pageNum=%d, fd=%d, xid=%lx, lbt=%s",
this, pin_count_.load(), page_.page_num, file_desc_, xid, lbt());
}
lock_.lock();
write_locker_ = xid;
LOG_DEBUG("frame write lock success."
"this=%p, pin=%d, pageNum=%d, write locker=%lx, fd=%d, xid=%lx, lbt=%s",
this, pin_count_.load(), page_.page_num, write_locker_, file_desc_, xid, lbt());
// pthread_rwlock_wrlock(&rwlock_);
}
void Frame::write_unlatch()
{
write_unlatch(get_default_debug_xid());
}
void Frame::write_unlatch(intptr_t xid)
{
// 因为当前已经加着写锁,而且写锁只有一个,所以不再加debug_lock来做校验
ASSERT(pin_count_.load() > 0,
"frame lock. write unlock failed while pin count is invalid."
"this=%p, pin=%d, pageNum=%d, fd=%d, xid=%lx, lbt=%s",
this, pin_count_.load(), page_.page_num, file_desc_, xid, lbt());
ASSERT(write_locker_ == xid,
"frame unlock write while not the owner."
"write_locker=%lx, this=%p, pin=%d, pageNum=%d, fd=%d, xid=%lx, lbt=%s",
write_locker_, this, pin_count_.load(), page_.page_num, file_desc_, xid, lbt());
LOG_DEBUG("frame write unlock success. this=%p, pin=%d, pageNum=%d, fd=%d, xid=%lx, lbt=%s",
this, pin_count_.load(), page_.page_num, file_desc_, xid, lbt());
write_locker_ = 0;
lock_.unlock();
// pthread_rwlock_unlock(&rwlock_);
}
void Frame::read_latch()
{
read_latch(get_default_debug_xid());
}
void Frame::read_latch(intptr_t xid)
{
{
std::scoped_lock debug_lock(debug_lock_);
ASSERT(pin_count_ > 0, "frame lock. read lock failed while pin count is invalid."
"this=%p, pin=%d, pageNum=%d, fd=%d, xid=%lx, lbt=%s",
this, pin_count_.load(), page_.page_num, file_desc_, xid, lbt());
ASSERT(read_lockers_.find(xid) == read_lockers_.end(),
"frame lock read double times."
"this=%p, pin=%d, pageNum=%d, fd=%d, xid=%lx, lbt=%s",
this, pin_count_.load(), page_.page_num, file_desc_, xid, lbt());
ASSERT(xid != write_locker_,
"frame lock read while holding the write lock."
"this=%p, pin=%d, pageNum=%d, fd=%d, xid=%lx, lbt=%s",
this, pin_count_.load(), page_.page_num, file_desc_, xid, lbt());
read_lockers_.insert(xid);
}
lock_.lock();
LOG_DEBUG("frame read lock success."
"this=%p, pin=%d, pageNum=%d, fd=%d, xid=%lx, lbt=%s",
this, pin_count_.load(), page_.page_num, file_desc_, xid, lbt());
// pthread_rwlock_rdlock(&rwlock_);
}
bool Frame::try_read_latch()
{
intptr_t xid = get_default_debug_xid();
{
std::scoped_lock debug_lock(debug_lock_);
ASSERT(pin_count_ > 0, "frame try lock. read lock failed while pin count is invalid."
"this=%p, pin=%d, pageNum=%d, fd=%d, xid=%lx, lbt=%s",
this, pin_count_.load(), page_.page_num, file_desc_, xid, lbt());
ASSERT(read_lockers_.find(xid) == read_lockers_.end(),
"frame try to lock read double times."
"this=%p, pin=%d, pageNum=%d, fd=%d, xid=%lx, lbt=%s",
this, pin_count_.load(), page_.page_num, file_desc_, xid, lbt());
ASSERT(xid != write_locker_,
"frame try to lock read while holding the write lock."
"this=%p, pin=%d, pageNum=%d, fd=%d, xid=%lx, lbt=%s",
this, pin_count_.load(), page_.page_num, file_desc_, xid, lbt());
}
bool ret = lock_.try_lock();
if (ret) {
debug_lock_.lock();
read_lockers_.insert(xid);
LOG_DEBUG("frame read lock success."
"this=%p, pin=%d, pageNum=%d, fd=%d, xid=%lx, lbt=%s",
this, pin_count_.load(), page_.page_num, file_desc_, xid, lbt());
debug_lock_.unlock();
}
return ret;
}
void Frame::read_unlatch()
{
read_unlatch(get_default_debug_xid());
}
void Frame::read_unlatch(intptr_t xid)
{
{
std::scoped_lock debug_lock(debug_lock_);
ASSERT(pin_count_.load() > 0,
"frame lock. read unlock failed while pin count is invalid."
"this=%p, pin=%d, pageNum=%d, fd=%d, xid=%lx, lbt=%s",
this, pin_count_.load(), page_.page_num, file_desc_, xid, lbt());
ASSERT(read_lockers_.find(xid) != read_lockers_.end(),
"frame unlock while not holding read lock."
"this=%p, pin=%d, pageNum=%d, fd=%d, xid=%lx, lbt=%s",
this, pin_count_.load(), page_.page_num, file_desc_, xid, lbt());
read_lockers_.erase(xid);
}
LOG_DEBUG("frame read unlock success."
"this=%p, pin=%d, pageNum=%d, fd=%d, xid=%lx, lbt=%s",
this, pin_count_.load(), page_.page_num, file_desc_, xid, lbt());
lock_.unlock();
// pthread_rwlock_unlock(&rwlock_);
}
void Frame::pin()
{
std::scoped_lock debug_lock(debug_lock_);
intptr_t xid = get_default_debug_xid();
int pin_count = ++pin_count_;
LOG_DEBUG("after frame pin. this=%p, write locker=%lx, read locker has xid %d? pin=%d, fd=%d, pageNum=%d, xid=%lx, lbt=%s",
this, write_locker_, read_lockers_.find(xid) != read_lockers_.end(),
pin_count, file_desc_, page_.page_num, xid, lbt());
}
int Frame::unpin()
{
intptr_t xid = get_default_debug_xid();
ASSERT(pin_count_.load() > 0,
"try to unpin a frame that pin count <= 0."
"this=%p, pin=%d, pageNum=%d, fd=%d, xid=%lx, lbt=%s",
this, pin_count_.load(), page_.page_num, file_desc_, xid, lbt());
std::scoped_lock debug_lock(debug_lock_);
int pin_count = --pin_count_;
LOG_DEBUG("after frame unpin. "
"this=%p, write locker=%lx, read locker has xid? %d, pin=%d, fd=%d, pageNum=%d, xid=%lx, lbt=%s",
this, write_locker_, read_lockers_.find(xid) != read_lockers_.end(),
pin_count, file_desc_, page_.page_num, xid, lbt());
if (0 == pin_count) {
ASSERT(write_locker_ == 0,
"frame unpin to 0 failed while someone hold the write lock. write locker=%lx, pageNum=%d, fd=%d, xid=%lx",
write_locker_, page_.page_num, file_desc_, xid);
ASSERT(read_lockers_.empty(),
"frame unpin to 0 failed while someone hold the read locks. reader num=%d, pageNum=%d, fd=%d, xid=%lx",
read_lockers_.size(), page_.page_num, file_desc_, xid);
}
return pin_count;
}
unsigned long current_time()
{
struct timespec tp;
clock_gettime(CLOCK_MONOTONIC, &tp);
return tp.tv_sec * 1000 * 1000 * 1000UL + tp.tv_nsec;
}
void Frame::access()
{
acc_time_ = current_time();
}
std::string to_string(const Frame &frame)
{
std::stringstream ss;
ss << "frame id:" << to_string(frame.frame_id())
<< ", dirty=" << frame.dirty()
<< ", pin=" << frame.pin_count()
<< ", fd=" << frame.file_desc()
<< ", page num=" << frame.page_num()
<< ", lsn=" << frame.lsn();
return ss.str();
}
/* Copyright (c) 2021 Xie Meiyi(xiemeiyi@hust.edu.cn) and OceanBase and/or its affiliates. All rights reserved.
miniob is licensed under Mulan PSL v2.
You can use this software according to the terms and conditions of the Mulan PSL v2.
You may obtain a copy of Mulan PSL v2 at:
http://license.coscl.org.cn/MulanPSL2
THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
See the Mulan PSL v2 for more details. */
//
// Created by lianyu on 2022/10/29.
//
#pragma once
#include <pthread.h>
#include <string.h>
#include <string>
#include <mutex>
#include <set>
#include <atomic>
#include "storage/buffer/page.h"
#include "common/log/log.h"
#include "common/lang/mutex.h"
class FrameId
{
public:
FrameId(int file_desc, PageNum page_num);
bool equal_to(const FrameId &other) const;
bool operator==(const FrameId &other) const;
size_t hash() const;
int file_desc() const;
PageNum page_num() const;
friend std::string to_string(const FrameId &frame_id);
private:
int file_desc_;
PageNum page_num_;
};
class Frame
{
public:
void clear_page()
{
memset(&page_, 0, sizeof(page_));
}
int file_desc() const { return file_desc_; }
void set_file_desc(int fd) { file_desc_ = fd; }
Page & page() { return page_; }
PageNum page_num() const { return page_.page_num; }
void set_page_num(PageNum page_num) { page_.page_num = page_num; }
FrameId frame_id() const { return FrameId(file_desc_, page_.page_num); }
LSN lsn() const { return page_.lsn; }
void set_lsn(LSN lsn) { page_.lsn = lsn; }
/// 刷新访问时间 TODO touch is better?
void access();
/**
* 标记指定页面为“脏”页。如果修改了页面的内容,则应调用此函数,
* 以便该页面被淘汰出缓冲区时系统将新的页面数据写入磁盘文件
*/
void mark_dirty() { dirty_ = true; }
void clear_dirty() { dirty_ = false; }
bool dirty() const { return dirty_; }
char *data() { return page_.data; }
bool can_purge() { return pin_count_.load() == 0; }
/**
* 给当前页帧增加引用计数
* pin通常都会加着frame manager锁来访问
*/
void pin();
/**
* 释放一个当前页帧的引用计数
* 与pin对应,但是通常不会加着frame manager的锁来访问
*/
int unpin();
int pin_count() const { return pin_count_.load(); }
void write_latch();
void write_latch(intptr_t xid);
void write_unlatch();
void write_unlatch(intptr_t xid);
void read_latch();
void read_latch(intptr_t xid);
bool try_read_latch();
void read_unlatch();
void read_unlatch(intptr_t xid);
friend std::string to_string(const Frame &frame);
private:
friend class BufferPool;
bool dirty_ = false;
std::atomic<int> pin_count_{0};
unsigned long acc_time_ = 0;
int file_desc_ = -1;
Page page_;
//读写锁
pthread_rwlock_t rwlock_ = PTHREAD_RWLOCK_INITIALIZER;
/// 在非并发编译时,加锁解锁动作将什么都不做
common::Mutex lock_;
/// 使用一些手段来做测试,提前检测出头疼的死锁问题
/// 如果编译时没有增加调试选项,这些代码什么都不做
common::DebugMutex debug_lock_;
intptr_t write_locker_ = 0;
std::set<intptr_t> read_lockers_;
};
/* Copyright (c) 2021 Xie Meiyi(xiemeiyi@hust.edu.cn) and OceanBase and/or its affiliates. All rights reserved.
miniob is licensed under Mulan PSL v2.
You can use this software according to the terms and conditions of the Mulan PSL v2.
You may obtain a copy of Mulan PSL v2 at:
http://license.coscl.org.cn/MulanPSL2
THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
See the Mulan PSL v2 for more details. */
//
// Created by Wangyunlai on 2023/03/07.
//
#pragma once
#include <stdint.h>
using TrxID = int32_t;
using SpaceID = int32_t;
/// 磁盘文件,包括存放数据的文件和索引(B+-Tree)文件,都按照页来组织
/// 每一页都有一个编号,称为PageNum
using PageNum = int32_t;
/// 数据文件中按照页来组织,每一页会存放一些行数据(row),或称为记录(record)
/// 每一行(row/record),都占用一个槽位(slot),这些槽有一个编号,称为SlotNum
using SlotNum = int32_t;
/// LSN for log sequence number
using LSN = int32_t;
static constexpr int BP_INVALID_PAGE_NUM = -1;
static constexpr int INVALID_TRX_ID = -1;
static constexpr int INVALID_LSN = -1;
static constexpr PageNum BP_HEADER_PAGE = 0;
static constexpr int LOG_BUFFER_SIZE = 1<<10; // TODO move to log record
static constexpr const int BP_PAGE_SIZE = (1 << 13);
static constexpr const int BP_PAGE_DATA_SIZE = (BP_PAGE_SIZE - sizeof(PageNum) - sizeof(LSN));
struct Page
{
PageNum page_num;
LSN lsn;
char data[BP_PAGE_DATA_SIZE];
};
......@@ -11,10 +11,10 @@ See the Mulan PSL v2 for more details. */
//
// Created by Meiyi & Longda on 2021/4/13.
//
#include "disk_buffer_pool.h"
#include <errno.h>
#include <string.h>
#include "storage/default/disk_buffer_pool.h"
#include "common/lang/mutex.h"
#include "common/log/log.h"
#include "common/os/os.h"
......@@ -22,15 +22,9 @@ See the Mulan PSL v2 for more details. */
using namespace common;
static const PageNum BP_HEADER_PAGE = 0;
static const int MEM_POOL_ITEM_NUM = 128;
unsigned long current_time()
{
struct timespec tp;
clock_gettime(CLOCK_MONOTONIC, &tp);
return tp.tv_sec * 1000 * 1000 * 1000UL + tp.tv_nsec;
}
////////////////////////////////////////////////////////////////////////////////
BPFrameManager::BPFrameManager(const char *name) : allocator_(name)
{}
......@@ -54,44 +48,80 @@ RC BPFrameManager::cleanup()
return RC::SUCCESS;
}
Frame *BPFrameManager::begin_purge()
int BPFrameManager::purge_frames(int count, std::function<RC(Frame *frame)> purger)
{
Frame *frame_can_purge = nullptr;
auto purge_finder = [&frame_can_purge](const BPFrameId &frame_id, Frame *const frame) {
std::lock_guard<std::mutex> lock_guard(lock_);
std::vector<Frame *> frames_can_purge;
if (count <= 0) {
count = 1;
}
frames_can_purge.reserve(count);
auto purge_finder = [&frames_can_purge, count](const FrameId &frame_id, Frame *const frame) {
if (frame->can_purge()) {
frame_can_purge = frame;
return false; // false to break the progress
frame->pin();
frames_can_purge.push_back(frame);
if (frames_can_purge.size() >= static_cast<size_t>(count)) {
return false; // false to break the progress
}
}
return true; // true continue to look up
};
frames_.foreach_reverse(purge_finder);
return frame_can_purge;
LOG_INFO("purge frames find %ld pages total", frames_can_purge.size());
/// 当前还在frameManager的锁内,而 purger 是一个非常耗时的操作
/// 他需要把脏页数据刷新到磁盘上去,所以这里会极大地降低并发度
int freed_count = 0;
for (Frame *frame : frames_can_purge) {
RC rc = purger(frame);
if (RC::SUCCESS == rc) {
free_internal(frame->frame_id(), frame);
freed_count++;
} else {
frame->unpin();
LOG_WARN("failed to purge frame. frame_id=%s, rc=%s",
to_string(frame->frame_id()).c_str(), strrc(rc));
}
}
LOG_INFO("purge frame done. number=%d", freed_count);
return freed_count;
}
Frame *BPFrameManager::get(int file_desc, PageNum page_num)
{
BPFrameId frame_id(file_desc, page_num);
FrameId frame_id(file_desc, page_num);
std::lock_guard<std::mutex> lock_guard(lock_);
return get_internal(frame_id);
}
Frame *BPFrameManager::get_internal(const FrameId &frame_id)
{
Frame *frame = nullptr;
(void)frames_.get(frame_id, frame);
if (frame != nullptr) {
frame->pin();
}
return frame;
}
Frame *BPFrameManager::alloc(int file_desc, PageNum page_num)
{
BPFrameId frame_id(file_desc, page_num);
FrameId frame_id(file_desc, page_num);
std::lock_guard<std::mutex> lock_guard(lock_);
Frame *frame = nullptr;
bool found = frames_.get(frame_id, frame);
if (found) {
// assert (frame != nullptr);
return nullptr; // should use get
Frame *frame = get_internal(frame_id);
if (frame != nullptr) {
return frame;
}
frame = allocator_.alloc();
if (frame != nullptr) {
ASSERT(frame->pin_count() == 0, "got an invalid frame that pin count is not 0. frame=%s",
to_string(*frame).c_str());
frame->pin();
frames_.put(frame_id, frame);
}
return frame;
......@@ -99,20 +129,21 @@ Frame *BPFrameManager::alloc(int file_desc, PageNum page_num)
RC BPFrameManager::free(int file_desc, PageNum page_num, Frame *frame)
{
BPFrameId frame_id(file_desc, page_num);
FrameId frame_id(file_desc, page_num);
std::lock_guard<std::mutex> lock_guard(lock_);
return free_internal(frame_id, frame);
}
RC BPFrameManager::free_internal(const FrameId &frame_id, Frame *frame)
{
Frame *frame_source = nullptr;
bool found = frames_.get(frame_id, frame_source);
if (!found || frame != frame_source) {
LOG_WARN("failed to find frame or got frame not match. file_desc=%d, PageNum=%d, frame_source=%p, frame=%p",
file_desc,
page_num,
frame_source,
frame);
return RC::GENERIC_ERROR;
}
ASSERT(found && frame == frame_source && frame->pin_count() == 1,
"failed to free frame. found=%d, frameId=%s, frame_source=%p, frame=%p, pinCount=%d, lbt=%s",
found, to_string(frame_id).c_str(), frame_source, frame, frame->pin_count(), lbt());
frame->unpin();
frames_.remove(frame_id);
allocator_.free(frame);
return RC::SUCCESS;
......@@ -123,8 +154,9 @@ std::list<Frame *> BPFrameManager::find_list(int file_desc)
std::lock_guard<std::mutex> lock_guard(lock_);
std::list<Frame *> frames;
auto fetcher = [&frames, file_desc](const BPFrameId &frame_id, Frame *const frame) -> bool {
auto fetcher = [&frames, file_desc](const FrameId &frame_id, Frame *const frame) -> bool {
if (file_desc == frame_id.file_desc()) {
frame->pin();
frames.push_back(frame);
}
return true;
......@@ -177,17 +209,17 @@ DiskBufferPool::DiskBufferPool(BufferPoolManager &bp_manager, BPFrameManager &fr
DiskBufferPool::~DiskBufferPool()
{
close_file();
LOG_INFO("Exit");
LOG_INFO("disk buffer pool exit");
}
RC DiskBufferPool::open_file(const char *file_name)
{
int fd;
if ((fd = open(file_name, O_RDWR)) < 0) {
int fd = open(file_name, O_RDWR);
if (fd < 0) {
LOG_ERROR("Failed to open file %s, because %s.", file_name, strerror(errno));
return RC::IOERR_ACCESS;
}
LOG_INFO("Successfully open file %s.", file_name);
LOG_INFO("Successfully open buffer pool file %s.", file_name);
file_name_ = file_name;
file_desc_ = fd;
......@@ -201,13 +233,11 @@ RC DiskBufferPool::open_file(const char *file_name)
return rc;
}
hdr_frame_->dirty_ = false;
hdr_frame_->file_desc_ = fd;
hdr_frame_->pin_count_ = 1;
hdr_frame_->acc_time_ = current_time();
hdr_frame_->set_file_desc(fd);
hdr_frame_->access();
if ((rc = load_page(BP_HEADER_PAGE, hdr_frame_)) != RC::SUCCESS) {
LOG_ERROR("Failed to load first page of %s, due to %s.", file_name, strerror(errno));
hdr_frame_->pin_count_ = 0;
purge_frame(BP_HEADER_PAGE, hdr_frame_);
close(fd);
file_desc_ = -1;
......@@ -227,15 +257,16 @@ RC DiskBufferPool::close_file()
return rc;
}
hdr_frame_->pin_count_--;
hdr_frame_->unpin();
// TODO: 理论上是在回放时回滚未提交事务,但目前没有undo log,因此不下刷数据page,只通过redo log回放
if ((rc = purge_page(0)) != RC::SUCCESS) {
hdr_frame_->pin_count_++;
LOG_ERROR("Failed to close %s, due to failed to purge all pages.", file_name_.c_str());
rc = purge_all_pages();
if (rc != RC::SUCCESS) {
LOG_ERROR("failed to close %s, due to failed to purge pages. rc=%s", file_name_.c_str(), strrc(rc));
return rc;
}
disposed_pages.clear();
disposed_pages_.clear();
if (close(file_desc_) < 0) {
LOG_ERROR("Failed to close fileId:%d, fileName:%s, error:%s", file_desc_, file_name_.c_str(), strerror(errno));
......@@ -251,30 +282,31 @@ RC DiskBufferPool::close_file()
RC DiskBufferPool::get_this_page(PageNum page_num, Frame **frame)
{
RC rc = RC::SUCCESS;
*frame = nullptr;
Frame *used_match_frame = frame_manager_.get(file_desc_, page_num);
if (used_match_frame != nullptr) {
used_match_frame->pin_count_++;
used_match_frame->acc_time_ = current_time();
used_match_frame->access();
*frame = used_match_frame;
return RC::SUCCESS;
}
std::scoped_lock lock_guard(lock_); // 直接加了一把大锁,其实可以根据访问的页面来细化提高并行度
// Allocate one page and load the data into this page
Frame *allocated_frame = nullptr;
if ((rc = allocate_frame(page_num, &allocated_frame)) != RC::SUCCESS) {
rc = allocate_frame(page_num, &allocated_frame);
if (rc != RC::SUCCESS) {
LOG_ERROR("Failed to alloc frame %s:%d, due to failed to alloc page.", file_name_.c_str(), page_num);
return rc;
}
allocated_frame->dirty_ = false;
allocated_frame->file_desc_ = file_desc_;
allocated_frame->pin_count_ = 1;
allocated_frame->acc_time_ = current_time();
allocated_frame->set_file_desc(file_desc_);
// allocated_frame->pin(); // pined in manager::get
allocated_frame->access();
if ((rc = load_page(page_num, allocated_frame)) != RC::SUCCESS) {
LOG_ERROR("Failed to load page %s:%d", file_name_.c_str(), page_num);
allocated_frame->pin_count_ = 0;
purge_frame(page_num, allocated_frame);
return rc;
}
......@@ -287,6 +319,8 @@ RC DiskBufferPool::allocate_page(Frame **frame)
{
RC rc = RC::SUCCESS;
lock_.lock();
int byte = 0, bit = 0;
if ((file_header_->allocated_pages) < (file_header_->page_count)) {
// There is one free page
......@@ -298,6 +332,8 @@ RC DiskBufferPool::allocate_page(Frame **frame)
file_header_->bitmap[byte] |= (1 << bit);
// TODO, do we need clean the loaded page's data?
hdr_frame_->mark_dirty();
lock_.unlock();
return get_this_page(i, frame);
}
}
......@@ -305,15 +341,16 @@ RC DiskBufferPool::allocate_page(Frame **frame)
if (file_header_->page_count >= BPFileHeader::MAX_PAGE_NUM) {
LOG_WARN("file buffer pool is full. page count %d, max page count %d",
file_header_->page_count,
BPFileHeader::MAX_PAGE_NUM);
return BUFFERPOOL_NOBUF;
file_header_->page_count, BPFileHeader::MAX_PAGE_NUM);
lock_.unlock();
return RC::BUFFERPOOL_NOBUF;
}
PageNum page_num = file_header_->page_count;
Frame *allocated_frame = nullptr;
if ((rc = allocate_frame(page_num, &allocated_frame)) != RC::SUCCESS) {
LOG_ERROR("Failed to allocate frame %s, due to no free page.", file_name_.c_str());
lock_.unlock();
return rc;
}
......@@ -325,93 +362,73 @@ RC DiskBufferPool::allocate_page(Frame **frame)
file_header_->bitmap[byte] |= (1 << bit);
hdr_frame_->mark_dirty();
allocated_frame->dirty_ = false;
allocated_frame->file_desc_ = file_desc_;
allocated_frame->pin_count_ = 1;
allocated_frame->acc_time_ = current_time();
allocated_frame->set_file_desc(file_desc_);
allocated_frame->access();
allocated_frame->clear_page();
allocated_frame->page_.page_num = file_header_->page_count - 1;
allocated_frame->set_page_num(file_header_->page_count - 1);
// Use flush operation to extension file
if ((rc = flush_page(*allocated_frame)) != RC::SUCCESS) {
if ((rc = flush_page_internal(*allocated_frame)) != RC::SUCCESS) {
LOG_WARN("Failed to alloc page %s , due to failed to extend one page.", file_name_.c_str());
// skip return false, delay flush the extended page
// return tmp;
}
*frame = allocated_frame;
return RC::SUCCESS;
}
RC DiskBufferPool::unpin_page(Frame *frame)
{
ASSERT(frame->pin_count_ >= 1, "Page %d 's pin_count is smaller than 1", frame->page_num());
if (--frame->pin_count_ == 0) {
PageNum page_num = frame->page_num();
auto pages_it = disposed_pages.find(page_num);
if (pages_it != disposed_pages.end()) {
LOG_INFO("Dispose file_desc:%d, page:%d", file_desc_, page_num);
dispose_page(page_num);
disposed_pages.erase(pages_it);
}
}
lock_.unlock();
*frame = allocated_frame;
return RC::SUCCESS;
}
/**
* dispose_page will delete the data of the page of pageNum, free the page both from buffer pool and data file.
* purge_page will purge the page of pageNum, free the page from buffer pool
* @return
*/
RC DiskBufferPool::dispose_page(PageNum page_num)
{
RC rc = purge_page(page_num);
if (rc != RC::SUCCESS) {
LOG_INFO("Dispose page %s:%d later, due to this page is being used", file_name_.c_str(), page_num);
disposed_pages.insert(page_num);
return rc;
std::scoped_lock lock_guard(lock_);
Frame *used_frame = frame_manager_.get(file_desc_, page_num);
if (used_frame != nullptr) {
ASSERT("the page try to dispose is in use. frame:%s", to_string(*used_frame).c_str());
frame_manager_.free(file_desc_, page_num, used_frame);
} else {
LOG_WARN("failed to fetch the page while disposing it. pageNum=%d", page_num);
return RC::NOTFOUND;
}
hdr_frame_->dirty_ = true;
hdr_frame_->mark_dirty();
file_header_->allocated_pages--;
char tmp = 1 << (page_num % 8);
file_header_->bitmap[page_num / 8] &= ~tmp;
return RC::SUCCESS;
}
RC DiskBufferPool::unpin_page(Frame *frame)
{
frame->unpin();
return RC::SUCCESS;
}
RC DiskBufferPool::purge_frame(PageNum page_num, Frame *buf)
{
if (buf->pin_count_ > 0) {
LOG_INFO("Begin to free page %d of %d(file id), but it's pinned, pin_count:%d.",
buf->page_num(),
buf->file_desc_,
buf->pin_count_);
if (buf->pin_count() != 1) {
LOG_INFO("Begin to free page %d of %d(file id), but it's pin count > 1:%d.",
buf->page_num(), buf->file_desc(), buf->pin_count());
return RC::LOCKED_UNLOCK;
}
if (buf->dirty_) {
RC rc = flush_page(*buf);
if (buf->dirty()) {
RC rc = flush_page_internal(*buf);
if (rc != RC::SUCCESS) {
LOG_WARN("Failed to flush page %d of %d(file desc) during purge page.", buf->page_num(), buf->file_desc_);
LOG_WARN("Failed to flush page %d of %d(file desc) during purge page.", buf->page_num(), buf->file_desc());
return rc;
}
}
LOG_DEBUG("Successfully purge frame =%p, page %d of %d(file desc)", buf, buf->page_num(), buf->file_desc_);
LOG_DEBUG("Successfully purge frame =%p, page %d of %d(file desc)", buf, buf->page_num(), buf->file_desc());
frame_manager_.free(file_desc_, page_num, buf);
return RC::SUCCESS;
}
/**
* dispose_page will delete the data of the page of pageNum
* force_page will flush the page of pageNum
* @param pageNum
* @return
*/
RC DiskBufferPool::purge_page(PageNum page_num)
{
std::scoped_lock lock_guard(lock_);
Frame *used_frame = frame_manager_.get(file_desc_, page_num);
if (used_frame != nullptr) {
return purge_frame(page_num, used_frame);
......@@ -423,23 +440,12 @@ RC DiskBufferPool::purge_page(PageNum page_num)
RC DiskBufferPool::purge_all_pages()
{
std::list<Frame *> used = frame_manager_.find_list(file_desc_);
std::scoped_lock lock_guard(lock_);
for (std::list<Frame *>::iterator it = used.begin(); it != used.end(); ++it) {
Frame *frame = *it;
if (frame->pin_count_ > 0) {
LOG_WARN("The page has been pinned, file_desc:%d, pagenum:%d, pin_count=%d",
frame->file_desc_,
frame->page_.page_num,
frame->pin_count_);
continue;
}
if (frame->dirty_) {
RC rc = flush_page(*frame);
if (rc != RC::SUCCESS) {
LOG_ERROR("Failed to flush all pages' of %s.", file_name_.c_str());
return rc;
}
}
frame_manager_.free(file_desc_, frame->page_.page_num, frame);
purge_frame(frame->page_num(), frame);
}
return RC::SUCCESS;
}
......@@ -447,17 +453,16 @@ RC DiskBufferPool::purge_all_pages()
RC DiskBufferPool::check_all_pages_unpinned()
{
std::list<Frame *> frames = frame_manager_.find_list(file_desc_);
for (auto &frame : frames) {
if (frame->page_num() == BP_HEADER_PAGE && frame->pin_count_ > 1) {
LOG_WARN("This page has been pinned. file desc=%d, page num:%d, pin count=%d",
file_desc_,
frame->page_num(),
frame->pin_count_);
} else if (frame->page_num() != BP_HEADER_PAGE && frame->pin_count_ > 0) {
LOG_WARN("This page has been pinned. file desc=%d, page num:%d, pin count=%d",
file_desc_,
frame->page_num(),
frame->pin_count_);
std::scoped_lock lock_guard(lock_);
for (Frame *frame : frames) {
frame->unpin();
if (frame->page_num() == BP_HEADER_PAGE && frame->pin_count() > 1) {
LOG_WARN("This page has been pinned. file desc=%d, pageNum:%d, pin count=%d",
file_desc_, frame->page_num(), frame->pin_count());
} else if (frame->page_num() != BP_HEADER_PAGE && frame->pin_count() > 0) {
LOG_WARN("This page has been pinned. file desc=%d, pageNum:%d, pin count=%d",
file_desc_, frame->page_num(), frame->pin_count());
}
}
LOG_INFO("all pages have been checked of file desc %d", file_desc_);
......@@ -465,11 +470,17 @@ RC DiskBufferPool::check_all_pages_unpinned()
}
RC DiskBufferPool::flush_page(Frame &frame)
{
std::scoped_lock lock_guard(lock_);
return flush_page_internal(frame);
}
RC DiskBufferPool::flush_page_internal(Frame &frame)
{
// The better way is use mmap the block into memory,
// so it is easier to flush data to file.
Page &page = frame.page_;
Page &page = frame.page();
s64_t offset = ((s64_t)page.page_num) * sizeof(Page);
if (lseek(file_desc_, offset, SEEK_SET) == offset - 1) {
LOG_ERROR("Failed to flush page %lld of %d due to failed to seek %s.", offset, file_desc_, strerror(errno));
......@@ -480,8 +491,8 @@ RC DiskBufferPool::flush_page(Frame &frame)
LOG_ERROR("Failed to flush page %lld of %d due to %s.", offset, file_desc_, strerror(errno));
return RC::IOERR_WRITE;
}
frame.dirty_ = false;
LOG_DEBUG("Flush block. file desc=%d, page num=%d", file_desc_, page.page_num);
frame.clear_dirty();
LOG_DEBUG("Flush block. file desc=%d, pageNum=%d", file_desc_, page.page_num);
return RC::SUCCESS;
}
......@@ -505,6 +516,7 @@ RC DiskBufferPool::recover_page(PageNum page_num)
byte = page_num / 8;
bit = page_num % 8;
std::scoped_lock lock_guard(lock_);
if (!(file_header_->bitmap[byte] & (1 << bit))) {
file_header_->bitmap[byte] |= (1 << bit);
file_header_->allocated_pages++;
......@@ -516,30 +528,34 @@ RC DiskBufferPool::recover_page(PageNum page_num)
RC DiskBufferPool::allocate_frame(PageNum page_num, Frame **buffer)
{
while (true) {
Frame *frame = frame_manager_.alloc(file_desc_, page_num);
if (frame != nullptr) {
*buffer = frame;
auto purger = [this](Frame *frame) {
if (!frame->dirty()) {
return RC::SUCCESS;
}
frame = frame_manager_.begin_purge();
if (frame == nullptr) {
LOG_ERROR("All pages have been used and pinned.");
return RC::NOMEM;
RC rc = RC::SUCCESS;
if (frame->file_desc() == file_desc_) {
rc = this->flush_page_internal(*frame);
} else {
rc = bp_manager_.flush_page(*frame);
}
if (frame->dirty_) {
RC rc = bp_manager_.flush_page(*frame);
if (rc != RC::SUCCESS) {
LOG_ERROR("Failed to aclloc block due to failed to flush old block.");
return rc;
}
if (rc != RC::SUCCESS) {
LOG_ERROR("Failed to aclloc block due to failed to flush old block. rc=%s", strrc(rc));
}
return rc;
};
frame_manager_.free(frame->file_desc(), frame->page_num(), frame);
while (true) {
Frame *frame = frame_manager_.alloc(file_desc_, page_num);
if (frame != nullptr) {
*buffer = frame;
return RC::SUCCESS;
}
(void)frame_manager_.purge_frames(1/*count*/, purger);
}
return RC::INTERNAL;
return RC::BUFFERPOOL_NOBUF;
}
RC DiskBufferPool::check_page_num(PageNum page_num)
......@@ -557,21 +573,18 @@ RC DiskBufferPool::check_page_num(PageNum page_num)
RC DiskBufferPool::load_page(PageNum page_num, Frame *frame)
{
s64_t offset = ((s64_t)page_num) * sizeof(Page);
s64_t offset = ((s64_t)page_num) * BP_PAGE_SIZE;
if (lseek(file_desc_, offset, SEEK_SET) == -1) {
LOG_ERROR("Failed to load page %s:%d, due to failed to lseek:%s.", file_name_.c_str(), page_num, strerror(errno));
return RC::IOERR_SEEK;
}
int ret = readn(file_desc_, &(frame->page_), sizeof(Page));
Page &page = frame->page();
int ret = readn(file_desc_, &page, BP_PAGE_SIZE);
if (ret != 0) {
LOG_ERROR("Failed to load page %s:%d, due to failed to read data:%s, ret=%d, page count=%d",
file_name_.c_str(),
page_num,
strerror(errno),
ret,
file_header_->allocated_pages);
file_name_.c_str(), page_num, strerror(errno), ret, file_header_->allocated_pages);
return RC::IOERR_READ;
}
return RC::SUCCESS;
......@@ -587,9 +600,13 @@ int DiskBufferPool::file_desc() const
return file_desc_;
}
////////////////////////////////////////////////////////////////////////////////
BufferPoolManager::BufferPoolManager()
BufferPoolManager::BufferPoolManager(int page_num /* = 0 */)
{
frame_manager_.init(MEM_POOL_ITEM_NUM);
if (page_num <= 0) {
page_num = MEM_POOL_ITEM_NUM * DEFAULT_ITEM_NUM_PER_POOL;
}
const int pool_num = std::max(page_num / DEFAULT_ITEM_NUM_PER_POOL, 1);
frame_manager_.init(pool_num);
}
BufferPoolManager::~BufferPoolManager()
......@@ -622,7 +639,7 @@ RC BufferPoolManager::create_file(const char *file_name)
}
Page page;
memset(&page, 0, sizeof(Page));
memset(&page, 0, BP_PAGE_SIZE);
BPFileHeader *file_header = (BPFileHeader *)page.data;
file_header->allocated_pages = 1;
......@@ -636,7 +653,7 @@ RC BufferPoolManager::create_file(const char *file_name)
return RC::IOERR_SEEK;
}
if (writen(fd, (char *)&page, sizeof(Page)) != 0) {
if (writen(fd, (char *)&page, BP_PAGE_SIZE) != 0) {
LOG_ERROR("Failed to write header to file %s, due to %s.", file_name, strerror(errno));
close(fd);
return RC::IOERR_WRITE;
......@@ -651,6 +668,7 @@ RC BufferPoolManager::open_file(const char *_file_name, DiskBufferPool *&_bp)
{
std::string file_name(_file_name);
std::scoped_lock lock_guard(lock_);
if (buffer_pools_.find(file_name) != buffer_pools_.end()) {
LOG_WARN("file already opened. file name=%s", _file_name);
return RC::BUFFERPOOL_OPEN;
......@@ -666,6 +684,7 @@ RC BufferPoolManager::open_file(const char *_file_name, DiskBufferPool *&_bp)
buffer_pools_.insert(std::pair<std::string, DiskBufferPool *>(file_name, bp));
fd_buffer_pools_.insert(std::pair<int, DiskBufferPool *>(bp->file_desc(), bp));
LOG_DEBUG("insert buffer pool into fd buffer pools. fd=%d, bp=%p, lbt=%s", bp->file_desc(), bp, lbt());
_bp = bp;
return RC::SUCCESS;
}
......@@ -673,6 +692,9 @@ RC BufferPoolManager::open_file(const char *_file_name, DiskBufferPool *&_bp)
RC BufferPoolManager::close_file(const char *_file_name)
{
std::string file_name(_file_name);
std::scoped_lock lock_guard(lock_);
auto iter = buffer_pools_.find(file_name);
if (iter == buffer_pools_.end()) {
LOG_WARN("file has not opened: %s", _file_name);
......@@ -680,7 +702,17 @@ RC BufferPoolManager::close_file(const char *_file_name)
}
int fd = iter->second->file_desc();
fd_buffer_pools_.erase(fd);
if (0 == fd_buffer_pools_.erase(fd)) {
int count = 0;
for (auto fd_iter = fd_buffer_pools_.begin(); fd_iter != fd_buffer_pools_.end(); ++fd_iter) {
if (fd_iter->second == iter->second) {
fd_buffer_pools_.erase(fd_iter);
count = 1;
break;
}
}
ASSERT(count == 1, "the buffer pool was not erased from fd buffer pools.");
}
DiskBufferPool *bp = iter->second;
buffer_pools_.erase(iter);
......@@ -691,6 +723,8 @@ RC BufferPoolManager::close_file(const char *_file_name)
RC BufferPoolManager::flush_page(Frame &frame)
{
int fd = frame.file_desc();
std::scoped_lock lock_guard(lock_);
auto iter = fd_buffer_pools_.find(fd);
if (iter == fd_buffer_pools_.end()) {
LOG_WARN("unknown buffer pool of fd %d", fd);
......
......@@ -23,28 +23,22 @@ See the Mulan PSL v2 for more details. */
#include <string>
#include <mutex>
#include <unordered_map>
#include <functional>
#include "rc.h"
#include "defs.h"
#include "common/lang/mutex.h"
#include "common/mm/mem_pool.h"
#include "common/lang/lru_cache.h"
#include "common/lang/bitmap.h"
#include "storage/buffer/page.h"
#include "storage/buffer/frame.h"
class BufferPoolManager;
class DiskBufferPool;
//
#define BP_INVALID_PAGE_NUM (-1)
#define BP_PAGE_SIZE (1 << 14)
#define BP_PAGE_DATA_SIZE (BP_PAGE_SIZE - sizeof(PageNum))
#define BP_FILE_SUB_HDR_SIZE (sizeof(BPFileSubHeader))
struct Page {
PageNum page_num;
char data[BP_PAGE_DATA_SIZE];
};
// sizeof(Page) should be equal to BP_PAGE_SIZE
/**
* BufferPool的文件第一个页面,存放一些元数据信息,包括了后面每页的分配信息。
* TODO 1. 当前的做法,只能分配比较少的页面,你可以扩展一下,支持更多的页面或无限多的页面吗?
......@@ -52,7 +46,8 @@ struct Page {
* 2. 当前使用bitmap存放页面分配情况,但是这种方法在页面非常多的时候,查找空闲页面的
* 效率非常低,你有办法优化吗?
*/
struct BPFileHeader {
struct BPFileHeader
{
int32_t page_count; //! 当前文件一共有多少个页面
int32_t allocated_pages; //! 已经分配了多少个页面
char bitmap[0]; //! 页面分配位图, 第0个页面(就是当前页面),总是1
......@@ -63,96 +58,8 @@ struct BPFileHeader {
static const int MAX_PAGE_NUM = (BP_PAGE_DATA_SIZE - sizeof(page_count) - sizeof(allocated_pages)) * 8;
};
class Frame {
public:
void clear_page()
{
memset(&page_, 0, sizeof(page_));
}
PageNum page_num() const
{
return page_.page_num;
}
void set_page_num(PageNum page_num)
{
page_.page_num = page_num;
}
/**
* 标记指定页面为“脏”页。如果修改了页面的内容,则应调用此函数,
* 以便该页面被淘汰出缓冲区时系统将新的页面数据写入磁盘文件
*/
void mark_dirty()
{
dirty_ = true;
}
char *data()
{
return page_.data;
}
int file_desc() const
{
return file_desc_;
}
void set_file_desc(int fd)
{
file_desc_ = fd;
}
bool can_purge()
{
return pin_count_ <= 0;
}
private:
friend class DiskBufferPool;
bool dirty_ = false;
unsigned int pin_count_ = 0;
unsigned long acc_time_ = 0;
int file_desc_ = -1;
Page page_;
};
class BPFrameId {
public:
BPFrameId(int file_desc, PageNum page_num) : file_desc_(file_desc), page_num_(page_num)
{}
bool equal_to(const BPFrameId &other) const
{
return file_desc_ == other.file_desc_ && page_num_ == other.page_num_;
}
bool operator==(const BPFrameId &other) const
{
return this->equal_to(other);
}
size_t hash() const
{
return static_cast<size_t>(file_desc_) << 32L | page_num_;
}
int file_desc() const
{
return file_desc_;
}
PageNum page_num() const
{
return page_num_;
}
private:
int file_desc_;
PageNum page_num_;
};
class BPFrameManager {
class BPFrameManager
{
public:
BPFrameManager(const char *tag);
......@@ -173,9 +80,12 @@ public:
/**
* 如果不能从空闲链表中分配新的页面,就使用这个接口,
* 尝试从pin count=0的页面中淘汰一个
* 尝试从pin count=0的页面中淘汰一些
* @param count 想要purge多少个页面
* @param purger 需要在释放frame之前,对页面做些什么操作。当前是刷新脏数据到磁盘
* @return 返回本次清理了多少个页面
*/
Frame *begin_purge();
int purge_frames(int count, std::function<RC(Frame *frame)> purger);
size_t frame_num() const
{
......@@ -190,23 +100,29 @@ public:
return allocator_.get_size();
}
private:
Frame *get_internal(const FrameId &frame_id);
RC free_internal(const FrameId &frame_id, Frame *frame);
private:
class BPFrameIdHasher {
public:
size_t operator()(const BPFrameId &frame_id) const
size_t operator()(const FrameId &frame_id) const
{
return frame_id.hash();
}
};
using FrameLruCache = common::LruCache<BPFrameId, Frame *, BPFrameIdHasher>;
using FrameLruCache = common::LruCache<FrameId, Frame *, BPFrameIdHasher>;
using FrameAllocator = common::MemPoolSimple<Frame>;
std::mutex lock_;
FrameLruCache frames_;
FrameLruCache frames_;
FrameAllocator allocator_;
};
class BufferPoolIterator {
class BufferPoolIterator
{
public:
BufferPoolIterator();
~BufferPoolIterator();
......@@ -221,7 +137,8 @@ private:
PageNum current_page_num_ = -1;
};
class DiskBufferPool {
class DiskBufferPool
{
public:
DiskBufferPool(BufferPoolManager &bp_manager, BPFrameManager &frame_manager);
~DiskBufferPool();
......@@ -253,9 +170,6 @@ public:
*/
RC allocate_page(Frame **frame);
/**
* 比purge_page多一个动作, 在磁盘上将对应的页数据删掉。
*/
RC dispose_page(PageNum page_num);
/**
......@@ -300,7 +214,6 @@ public:
*/
RC recover_page(PageNum page_num);
protected:
protected:
RC allocate_frame(PageNum page_num, Frame **buf);
......@@ -315,22 +228,30 @@ protected:
*/
RC load_page(PageNum page_num, Frame *frame);
/**
* 如果页面是脏的,就将数据刷新到磁盘
*/
RC flush_page_internal(Frame &frame);
private:
BufferPoolManager &bp_manager_;
BPFrameManager &frame_manager_;
std::string file_name_;
int file_desc_ = -1;
Frame *hdr_frame_ = nullptr;
BPFileHeader *file_header_ = nullptr;
std::set<PageNum> disposed_pages;
BufferPoolManager & bp_manager_;
BPFrameManager & frame_manager_;
std::string file_name_;
int file_desc_ = -1;
Frame * hdr_frame_ = nullptr;
BPFileHeader * file_header_ = nullptr;
std::set<PageNum> disposed_pages_;
common::Mutex lock_;
private:
friend class BufferPoolIterator;
};
class BufferPoolManager {
class BufferPoolManager
{
public:
BufferPoolManager();
BufferPoolManager(int page_num = 0);
~BufferPoolManager();
RC create_file(const char *file_name);
......@@ -340,11 +261,13 @@ public:
RC flush_page(Frame &frame);
public:
static void set_instance(BufferPoolManager *bpm);
static void set_instance(BufferPoolManager *bpm); // TODO 优化全局变量的表示方法
static BufferPoolManager &instance();
private:
BPFrameManager frame_manager_{"BufPool"};
common::Mutex lock_;
std::unordered_map<std::string, DiskBufferPool *> buffer_pools_;
std::unordered_map<int, DiskBufferPool *> fd_buffer_pools_;
};
......@@ -19,6 +19,9 @@ See the Mulan PSL v2 for more details. */
#include "sql/parser/parse_defs.h"
#include "common/lang/lower_bound.h"
using namespace std;
using namespace common;
#define FIRST_INDEX_PAGE 1
int calc_internal_page_capacity(int attr_length)
......@@ -77,6 +80,17 @@ int IndexNodeHandler::size() const
return node_->key_num;
}
int IndexNodeHandler::max_size() const
{
return is_leaf() ? header_.leaf_max_size : header_.internal_max_size;
}
int IndexNodeHandler::min_size() const
{
const int max = this->max_size();
return max - max/2;
}
void IndexNodeHandler::increase_size(int n)
{
node_->key_num += n;
......@@ -91,6 +105,40 @@ void IndexNodeHandler::set_parent_page_num(PageNum page_num)
{
this->node_->parent = page_num;
}
/**
* 检查一个节点经过插入或删除操作后是否需要分裂或合并操作
* @return true 需要分裂或合并;
* false 不需要分裂或合并
*/
bool IndexNodeHandler::is_safe(BplusTreeOperationType op, bool is_root_node)
{
switch (op) {
case BplusTreeOperationType::READ: {
return true;
} break;
case BplusTreeOperationType::INSERT: {
return size() < max_size();
} break;
case BplusTreeOperationType::DELETE: {
if (is_root_node) { // 参考adjust_root
if (node_->is_leaf) {
return size() > 1; // 根节点如果空的话,就需要删除整棵树
}
// not leaf
return size() > 2; // 根节点还有子节点,但是如果删除一个子节点后,只剩一个子节点,就要把自己删除,把唯一的子节点变更为根节点
}
return size() > min_size();
} break;
default: {
// do nothing
} break;
}
ASSERT(false, "invalid operation type: %d", static_cast<int>(op));
return false;
}
std::string to_string(const IndexNodeHandler &handler)
{
std::stringstream ss;
......@@ -127,7 +175,6 @@ LeafIndexNodeHandler::LeafIndexNodeHandler(const IndexFileHeader &header, Frame
void LeafIndexNodeHandler::init_empty()
{
IndexNodeHandler::init_empty(true);
leaf_node_->prev_brother = BP_INVALID_PAGE_NUM;
leaf_node_->next_brother = BP_INVALID_PAGE_NUM;
}
......@@ -136,18 +183,10 @@ void LeafIndexNodeHandler::set_next_page(PageNum page_num)
leaf_node_->next_brother = page_num;
}
void LeafIndexNodeHandler::set_prev_page(PageNum page_num)
{
leaf_node_->prev_brother = page_num;
}
PageNum LeafIndexNodeHandler::next_page() const
{
return leaf_node_->next_brother;
}
PageNum LeafIndexNodeHandler::prev_page() const
{
return leaf_node_->prev_brother;
}
char *LeafIndexNodeHandler::key_at(int index)
{
......@@ -161,16 +200,6 @@ char *LeafIndexNodeHandler::value_at(int index)
return __value_at(index);
}
int LeafIndexNodeHandler::max_size() const
{
return header_.leaf_max_size;
}
int LeafIndexNodeHandler::min_size() const
{
return header_.leaf_max_size - header_.leaf_max_size / 2;
}
int LeafIndexNodeHandler::lookup(const KeyComparator &comparator, const char *key, bool *found /* = nullptr */) const
{
const int size = this->size();
......@@ -247,21 +276,6 @@ RC LeafIndexNodeHandler::move_to(LeafIndexNodeHandler &other, DiskBufferPool *bp
this->increase_size(-this->size());
other.set_next_page(this->next_page());
PageNum next_right_page_num = this->next_page();
if (next_right_page_num != BP_INVALID_PAGE_NUM) {
Frame *next_right_frame;
RC rc = bp->get_this_page(next_right_page_num, &next_right_frame);
if (rc != RC::SUCCESS) {
LOG_WARN("failed to fetch next right page. page number:%d. rc=%d:%s", next_right_page_num, rc, strrc(rc));
return rc;
}
LeafIndexNodeHandler next_right_node(header_, next_right_frame);
next_right_node.set_prev_page(other.page_num());
next_right_frame->mark_dirty();
bp->unpin_page(next_right_frame);
}
return RC::SUCCESS;
}
......@@ -296,7 +310,7 @@ char *LeafIndexNodeHandler::__value_at(int index) const
std::string to_string(const LeafIndexNodeHandler &handler, const KeyPrinter &printer)
{
std::stringstream ss;
ss << to_string((const IndexNodeHandler &)handler) << ",prev page:" << handler.prev_page()
ss << to_string((const IndexNodeHandler &)handler)
<< ",next page:" << handler.next_page();
ss << ",values=[" << printer(handler.__key_at(0));
for (int i = 1; i < handler.size(); i++) {
......@@ -317,10 +331,7 @@ bool LeafIndexNodeHandler::validate(const KeyComparator &comparator, DiskBufferP
for (int i = 1; i < node_size; i++) {
if (comparator(__key_at(i - 1), __key_at(i)) >= 0) {
LOG_WARN("page number = %d, invalid key order. id1=%d,id2=%d, this=%s",
page_num(),
i - 1,
i,
to_string(*this).c_str());
page_num(), i - 1, i, to_string(*this).c_str());
return false;
}
}
......@@ -341,8 +352,7 @@ bool LeafIndexNodeHandler::validate(const KeyComparator &comparator, DiskBufferP
int index_in_parent = parent_node.value_index(this->page_num());
if (index_in_parent < 0) {
LOG_WARN("invalid leaf node. cannot find index in parent. this page num=%d, parent page num=%d",
this->page_num(),
parent_page_num);
this->page_num(), parent_page_num);
bp->unpin_page(parent_frame);
return false;
}
......@@ -352,9 +362,7 @@ bool LeafIndexNodeHandler::validate(const KeyComparator &comparator, DiskBufferP
if (cmp_result < 0) {
LOG_WARN("invalid leaf node. first item should be greate than or equal to parent item. "
"this page num=%d, parent page num=%d, index in parent=%d",
this->page_num(),
parent_node.page_num(),
index_in_parent);
this->page_num(), parent_node.page_num(), index_in_parent);
bp->unpin_page(parent_frame);
return false;
}
......@@ -365,9 +373,7 @@ bool LeafIndexNodeHandler::validate(const KeyComparator &comparator, DiskBufferP
if (cmp_result >= 0) {
LOG_WARN("invalid leaf node. last item should be less than the item at the first after item in parent."
"this page num=%d, parent page num=%d, parent item to compare=%d",
this->page_num(),
parent_node.page_num(),
index_in_parent + 1);
this->page_num(), parent_node.page_num(), index_in_parent + 1);
bp->unpin_page(parent_frame);
return false;
}
......@@ -440,16 +446,6 @@ RC InternalIndexNodeHandler::move_half_to(InternalIndexNodeHandler &other, DiskB
return rc;
}
int InternalIndexNodeHandler::max_size() const
{
return header_.internal_max_size;
}
int InternalIndexNodeHandler::min_size() const
{
return header_.internal_max_size - header_.internal_max_size / 2;
}
/**
* lookup the first item which key <= item
* @return unlike the leafNode, the return value is not the insert position,
......@@ -573,10 +569,7 @@ RC InternalIndexNodeHandler::copy_from(const char *items, int num, DiskBufferPoo
rc = disk_buffer_pool->get_this_page(page_num, &frame);
if (rc != RC::SUCCESS) {
LOG_WARN("failed to set child's page num. child page num:%d, this page num=%d, rc=%d:%s",
page_num,
this_page_num,
rc,
strrc(rc));
page_num, this_page_num, rc, strrc(rc));
return rc;
}
IndexNodeHandler child_node(header_, frame);
......@@ -654,10 +647,7 @@ bool InternalIndexNodeHandler::validate(const KeyComparator &comparator, DiskBuf
for (int i = 2; i < node_size; i++) {
if (comparator(__key_at(i - 1), __key_at(i)) >= 0) {
LOG_WARN("page number = %d, invalid key order. id1=%d,id2=%d, this=%s",
page_num(),
i - 1,
i,
to_string(*this).c_str());
page_num(), i - 1, i, to_string(*this).c_str());
return false;
}
}
......@@ -670,15 +660,13 @@ bool InternalIndexNodeHandler::validate(const KeyComparator &comparator, DiskBuf
Frame *child_frame;
RC rc = bp->get_this_page(page_num, &child_frame);
if (rc != RC::SUCCESS) {
LOG_WARN(
"failed to fetch child page while validate internal page. page num=%d, rc=%d:%s", page_num, rc, strrc(rc));
LOG_WARN("failed to fetch child page while validate internal page. page num=%d, rc=%d:%s",
page_num, rc, strrc(rc));
} else {
IndexNodeHandler child_node(header_, child_frame);
if (child_node.parent_page_num() != this->page_num()) {
LOG_WARN("child's parent page num is invalid. child page num=%d, parent page num=%d, this page num=%d",
child_node.page_num(),
child_node.parent_page_num(),
this->page_num());
child_node.page_num(), child_node.parent_page_num(), this->page_num());
result = false;
}
bp->unpin_page(child_frame);
......@@ -706,8 +694,7 @@ bool InternalIndexNodeHandler::validate(const KeyComparator &comparator, DiskBuf
int index_in_parent = parent_node.value_index(this->page_num());
if (index_in_parent < 0) {
LOG_WARN("invalid internal node. cannot find index in parent. this page num=%d, parent page num=%d",
this->page_num(),
parent_page_num);
this->page_num(), parent_page_num);
bp->unpin_page(parent_frame);
return false;
}
......@@ -717,9 +704,7 @@ bool InternalIndexNodeHandler::validate(const KeyComparator &comparator, DiskBuf
if (cmp_result < 0) {
LOG_WARN("invalid internal node. the second item should be greate than or equal to parent item. "
"this page num=%d, parent page num=%d, index in parent=%d",
this->page_num(),
parent_node.page_num(),
index_in_parent);
this->page_num(), parent_node.page_num(), index_in_parent);
bp->unpin_page(parent_frame);
return false;
}
......@@ -730,9 +715,7 @@ bool InternalIndexNodeHandler::validate(const KeyComparator &comparator, DiskBuf
if (cmp_result >= 0) {
LOG_WARN("invalid internal node. last item should be less than the item at the first after item in parent."
"this page num=%d, parent page num=%d, parent item to compare=%d",
this->page_num(),
parent_node.page_num(),
index_in_parent + 1);
this->page_num(), parent_node.page_num(), index_in_parent + 1);
bp->unpin_page(parent_frame);
return false;
}
......@@ -778,9 +761,7 @@ RC BplusTreeHandler::create(const char *file_name, AttrType attr_type, int attr_
if (header_frame->page_num() != FIRST_INDEX_PAGE) {
LOG_WARN("header page num should be %d but got %d. is it a new file : %s",
FIRST_INDEX_PAGE,
header_frame->page_num(),
file_name);
FIRST_INDEX_PAGE, header_frame->page_num(), file_name);
bpm.close_file(file_name);
return RC::INTERNAL;
}
......@@ -809,7 +790,7 @@ RC BplusTreeHandler::create(const char *file_name, AttrType attr_type, int attr_
header_dirty_ = false;
bp->unpin_page(header_frame);
mem_pool_item_ = new common::MemPoolItem(file_name);
mem_pool_item_ = make_unique<common::MemPoolItem>(file_name);
if (mem_pool_item_->init(file_header->key_length) < 0) {
LOG_WARN("Failed to init memory pool for index %s", file_name);
close();
......@@ -850,7 +831,7 @@ RC BplusTreeHandler::open(const char *file_name)
header_dirty_ = false;
disk_buffer_pool_ = disk_buffer_pool;
mem_pool_item_ = new common::MemPoolItem(file_name);
mem_pool_item_ = make_unique<common::MemPoolItem>(file_name);
if (mem_pool_item_->init(file_header_.key_length) < 0) {
LOG_WARN("Failed to init memory pool for index %s", file_name);
close();
......@@ -869,11 +850,7 @@ RC BplusTreeHandler::open(const char *file_name)
RC BplusTreeHandler::close()
{
if (disk_buffer_pool_ != nullptr) {
disk_buffer_pool_->close_file(); // TODO
delete mem_pool_item_;
mem_pool_item_ = nullptr;
disk_buffer_pool_->close_file();
}
disk_buffer_pool_ = nullptr;
......@@ -958,9 +935,10 @@ RC BplusTreeHandler::print_leafs()
return RC::SUCCESS;
}
Frame *frame;
LatchMemo latch_memo(disk_buffer_pool_);
Frame *frame = nullptr;
RC rc = left_most_page(frame);
RC rc = left_most_page(latch_memo, frame);
if (rc != RC::SUCCESS) {
LOG_WARN("failed to get left most page. rc=%d:%s", rc, strrc(rc));
return rc;
......@@ -971,13 +949,13 @@ RC BplusTreeHandler::print_leafs()
LOG_INFO("leaf info: %s", to_string(leaf_node, key_printer_).c_str());
PageNum next_page_num = leaf_node.next_page();
disk_buffer_pool_->unpin_page(frame);
latch_memo.release();
if (next_page_num == BP_INVALID_PAGE_NUM) {
break;
}
rc = disk_buffer_pool_->get_this_page(next_page_num, &frame);
rc = latch_memo.get_page(next_page_num, frame);
if (rc != RC::SUCCESS) {
LOG_WARN("failed to get next page. page id=%d, rc=%d:%s", next_page_num, rc, strrc(rc));
return rc;
......@@ -986,7 +964,7 @@ RC BplusTreeHandler::print_leafs()
return rc;
}
bool BplusTreeHandler::validate_node_recursive(Frame *frame)
bool BplusTreeHandler::validate_node_recursive(LatchMemo &latch_memo, Frame *frame)
{
bool result = true;
IndexNodeHandler node(file_header_, frame);
......@@ -999,80 +977,57 @@ bool BplusTreeHandler::validate_node_recursive(Frame *frame)
for (int i = 0; result && i < internal_node.size(); i++) {
PageNum page_num = internal_node.value_at(i);
Frame *child_frame;
RC rc = disk_buffer_pool_->get_this_page(page_num, &child_frame);
RC rc = latch_memo.get_page(page_num, child_frame);
if (rc != RC::SUCCESS) {
LOG_WARN("failed to fetch child page.page id=%d, rc=%d:%s", page_num, rc, strrc(rc));
result = false;
break;
}
result = validate_node_recursive(child_frame);
result = validate_node_recursive(latch_memo, child_frame);
}
}
disk_buffer_pool_->unpin_page(frame);
return result;
}
bool BplusTreeHandler::validate_leaf_link()
bool BplusTreeHandler::validate_leaf_link(LatchMemo &latch_memo)
{
if (is_empty()) {
return true;
}
Frame *frame;
RC rc = left_most_page(frame);
Frame *frame = nullptr;
RC rc = left_most_page(latch_memo, frame);
if (rc != RC::SUCCESS) {
LOG_WARN("failed to fetch left most page. rc=%d:%s", rc, strrc(rc));
return false;
}
PageNum prev_page_num = BP_INVALID_PAGE_NUM;
LeafIndexNodeHandler leaf_node(file_header_, frame);
if (leaf_node.prev_page() != prev_page_num) {
LOG_WARN("invalid page. current_page_num=%d, prev page num should be %d but got %d",
frame->page_num(),
prev_page_num,
leaf_node.prev_page());
return false;
}
PageNum next_page_num = leaf_node.next_page();
prev_page_num = frame->page_num();
char *prev_key = (char *)mem_pool_item_->alloc();
memcpy(prev_key, leaf_node.key_at(leaf_node.size() - 1), file_header_.key_length);
disk_buffer_pool_->unpin_page(frame);
MemPoolItem::unique_ptr prev_key = mem_pool_item_->alloc_unique_ptr();
memcpy(prev_key.get(), leaf_node.key_at(leaf_node.size() - 1), file_header_.key_length);
bool result = true;
while (result && next_page_num != BP_INVALID_PAGE_NUM) {
rc = disk_buffer_pool_->get_this_page(next_page_num, &frame);
rc = latch_memo.get_page(next_page_num, frame);
if (rc != RC::SUCCESS) {
free_key(prev_key);
LOG_WARN("failed to fetch next page. page num=%d, rc=%d:%s", next_page_num, rc, strrc(rc));
LOG_WARN("failed to fetch next page. page num=%d, rc=%s", next_page_num, strrc(rc));
return false;
}
LeafIndexNodeHandler leaf_node(file_header_, frame);
if (leaf_node.prev_page() != prev_page_num) {
LOG_WARN("invalid page. current_page_num=%d, prev page num should be %d but got %d",
frame->page_num(),
prev_page_num,
leaf_node.prev_page());
result = false;
}
if (key_comparator_(prev_key, leaf_node.key_at(0)) >= 0) {
if (key_comparator_((char *)prev_key.get(), leaf_node.key_at(0)) >= 0) {
LOG_WARN("invalid page. current first key is not bigger than last");
result = false;
}
next_page_num = leaf_node.next_page();
memcpy(prev_key, leaf_node.key_at(leaf_node.size() - 1), file_header_.key_length);
prev_page_num = frame->page_num();
disk_buffer_pool_->unpin_page(frame);
memcpy(prev_key.get(), leaf_node.key_at(leaf_node.size() - 1), file_header_.key_length);
}
free_key(prev_key);
// can do more things
return result;
}
......@@ -1083,14 +1038,15 @@ bool BplusTreeHandler::validate_tree()
return true;
}
Frame *frame;
RC rc = disk_buffer_pool_->get_this_page(file_header_.root_page, &frame);
LatchMemo latch_memo(disk_buffer_pool_);
Frame *frame = nullptr;
RC rc = latch_memo.get_page(file_header_.root_page, frame); // 这里仅仅调试使用,不加root锁
if (rc != RC::SUCCESS) {
LOG_WARN("failed to fetch root page. page id=%d, rc=%d:%s", file_header_.root_page, rc, strrc(rc));
return rc;
}
if (!validate_node_recursive(frame) || !validate_leaf_link()) {
if (!validate_node_recursive(latch_memo, frame) || !validate_leaf_link(latch_memo)) {
LOG_WARN("Current B+ Tree is invalid");
print_tree();
return false;
......@@ -1105,60 +1061,85 @@ bool BplusTreeHandler::is_empty() const
return file_header_.root_page == BP_INVALID_PAGE_NUM;
}
RC BplusTreeHandler::find_leaf(const char *key, Frame *&frame)
RC BplusTreeHandler::find_leaf(LatchMemo &latch_memo, BplusTreeOperationType op, const char *key, Frame *&frame)
{
return find_leaf_internal(
[&](InternalIndexNodeHandler &internal_node) {
auto child_page_getter = [this, key](InternalIndexNodeHandler &internal_node) {
return internal_node.value_at(internal_node.lookup(key_comparator_, key));
},
frame);
};
return find_leaf_internal(latch_memo, op, child_page_getter, frame);
}
RC BplusTreeHandler::left_most_page(Frame *&frame)
RC BplusTreeHandler::left_most_page(LatchMemo &latch_memo, Frame *&frame)
{
return find_leaf_internal([&](InternalIndexNodeHandler &internal_node) { return internal_node.value_at(0); }, frame);
}
RC BplusTreeHandler::right_most_page(Frame *&frame)
{
return find_leaf_internal(
[&](InternalIndexNodeHandler &internal_node) { return internal_node.value_at(internal_node.size() - 1); }, frame);
auto child_page_getter = [](InternalIndexNodeHandler &internal_node) { return internal_node.value_at(0); };
return find_leaf_internal(latch_memo, BplusTreeOperationType::READ, child_page_getter, frame);
}
RC BplusTreeHandler::find_leaf_internal(
const std::function<PageNum(InternalIndexNodeHandler &)> &child_page_getter, Frame *&frame)
LatchMemo &latch_memo, BplusTreeOperationType op,
const std::function<PageNum(InternalIndexNodeHandler &)> &child_page_getter,
Frame *&frame)
{
// root locked
if (op != BplusTreeOperationType::READ) {
latch_memo.xlatch(&root_lock_);
} else {
latch_memo.slatch(&root_lock_);
}
if (is_empty()) {
return RC::EMPTY;
}
RC rc = disk_buffer_pool_->get_this_page(file_header_.root_page, &frame);
RC rc = crabing_protocal_fetch_page(latch_memo, op, file_header_.root_page, true/* is_root_node */, frame);
if (rc != RC::SUCCESS) {
LOG_WARN("failed to fetch root page. page id=%d, rc=%d:%s", file_header_.root_page, rc, strrc(rc));
return rc;
}
IndexNode *node = (IndexNode *)frame->data();
while (false == node->is_leaf) {
PageNum next_page_id;
for (; !node->is_leaf; ) {
InternalIndexNodeHandler internal_node(file_header_, frame);
PageNum page_num = child_page_getter(internal_node);
disk_buffer_pool_->unpin_page(frame);
rc = disk_buffer_pool_->get_this_page(page_num, &frame);
next_page_id = child_page_getter(internal_node);
rc = crabing_protocal_fetch_page(latch_memo, op, next_page_id, false /* is_root_node */, frame);
if (rc != RC::SUCCESS) {
LOG_WARN("Failed to load page page_num:%d", page_num);
LOG_WARN("Failed to load page page_num:%d. rc=%s", next_page_id, strrc(rc));
return rc;
}
node = (IndexNode *)frame->data();
}
return RC::SUCCESS;
}
RC BplusTreeHandler::insert_entry_into_leaf_node(Frame *frame, const char *key, const RID *rid)
RC BplusTreeHandler::crabing_protocal_fetch_page(LatchMemo &latch_memo,
BplusTreeOperationType op,
PageNum page_num,
bool is_root_node,
Frame *&frame)
{
bool readonly = (op == BplusTreeOperationType::READ);
const int memo_point = latch_memo.memo_point();
RC rc = latch_memo.get_page(page_num, frame);
if (rc != RC::SUCCESS) {
LOG_WARN("failed to get frame. pageNum=%d, rc=%s", page_num, strrc(rc));
return rc;
}
LatchMemoType latch_type = readonly ? LatchMemoType::SHARED : LatchMemoType::EXCLUSIVE;
latch_memo.latch(frame, latch_type);
IndexNodeHandler index_node(file_header_, frame);
if (index_node.is_safe(op, is_root_node)) {
latch_memo.release_to(memo_point); // 当前节点不会分裂或合并,可以将前面的锁都释放掉
}
return rc;
}
RC BplusTreeHandler::insert_entry_into_leaf_node(LatchMemo &latch_memo, Frame *frame, const char *key, const RID *rid)
{
LeafIndexNodeHandler leaf_node(file_header_, frame);
bool exists = false;
bool exists = false; // 该数据是否已经存在指定的叶子节点中了
int insert_position = leaf_node.lookup(key_comparator_, key, &exists);
if (exists) {
LOG_TRACE("entry exists");
......@@ -1168,47 +1149,32 @@ RC BplusTreeHandler::insert_entry_into_leaf_node(Frame *frame, const char *key,
if (leaf_node.size() < leaf_node.max_size()) {
leaf_node.insert(insert_position, key, (const char *)rid);
frame->mark_dirty();
disk_buffer_pool_->unpin_page(frame);
// disk_buffer_pool_->unpin_page(frame); // unpin pages 由latch memo 来操作
return RC::SUCCESS;
}
Frame *new_frame = nullptr;
RC rc = split<LeafIndexNodeHandler>(frame, new_frame);
RC rc = split<LeafIndexNodeHandler>(latch_memo, frame, new_frame);
if (rc != RC::SUCCESS) {
LOG_WARN("failed to split leaf node. rc=%d:%s", rc, strrc(rc));
return rc;
}
LeafIndexNodeHandler new_index_node(file_header_, new_frame);
new_index_node.set_prev_page(frame->page_num());
new_index_node.set_next_page(leaf_node.next_page());
new_index_node.set_parent_page_num(leaf_node.parent_page_num());
leaf_node.set_next_page(new_frame->page_num());
PageNum next_page_num = new_index_node.next_page();
if (next_page_num != BP_INVALID_PAGE_NUM) {
Frame *next_frame;
rc = disk_buffer_pool_->get_this_page(next_page_num, &next_frame);
if (rc != RC::SUCCESS) {
LOG_WARN("failed to fetch next page. page num=%d, rc=%d:%s", next_page_num, rc, strrc(rc));
return rc;
}
LeafIndexNodeHandler next_node(file_header_, next_frame);
next_node.set_prev_page(new_frame->page_num());
disk_buffer_pool_->unpin_page(next_frame);
}
if (insert_position < leaf_node.size()) {
leaf_node.insert(insert_position, key, (const char *)rid);
} else {
new_index_node.insert(insert_position - leaf_node.size(), key, (const char *)rid);
}
return insert_entry_into_parent(frame, new_frame, new_index_node.key_at(0));
return insert_entry_into_parent(latch_memo, frame, new_frame, new_index_node.key_at(0));
}
RC BplusTreeHandler::insert_entry_into_parent(Frame *frame, Frame *new_frame, const char *key)
RC BplusTreeHandler::insert_entry_into_parent(LatchMemo &latch_memo, Frame *frame, Frame *new_frame, const char *key)
{
RC rc = RC::SUCCESS;
......@@ -1234,50 +1200,52 @@ RC BplusTreeHandler::insert_entry_into_parent(Frame *frame, Frame *new_frame, co
frame->mark_dirty();
new_frame->mark_dirty();
disk_buffer_pool_->unpin_page(frame);
disk_buffer_pool_->unpin_page(new_frame);
// disk_buffer_pool_->unpin_page(frame);
// disk_buffer_pool_->unpin_page(new_frame);
file_header_.root_page = root_frame->page_num();
update_root_page_num(); // TODO
root_frame->write_latch(); // 在root页面更新之后,别人就可以访问到了,这时候就要加上锁
update_root_page_num_locked(root_frame->page_num());
root_frame->mark_dirty();
root_frame->write_unlatch();
disk_buffer_pool_->unpin_page(root_frame);
return RC::SUCCESS;
} else {
Frame *parent_frame;
rc = disk_buffer_pool_->get_this_page(parent_page_num, &parent_frame);
Frame *parent_frame = nullptr;
rc = latch_memo.get_page(parent_page_num, parent_frame);
if (rc != RC::SUCCESS) {
LOG_WARN("failed to insert entry into leaf. rc=%d:%s", rc, strrc(rc));
// should do more things to recover
// we should do some things to recover
return rc;
}
InternalIndexNodeHandler node(file_header_, parent_frame);
// 在第一次遍历这个页面时,我们已经拿到parent frame的write latch,所以这里不再去加锁
InternalIndexNodeHandler parent_node(file_header_, parent_frame);
/// current node is not in full mode, insert the entry and return
if (node.size() < node.max_size()) {
node.insert(key, new_frame->page_num(), key_comparator_);
/// 当前这个父节点还没有满,直接将新节点数据插进入就行了
if (parent_node.size() < parent_node.max_size()) {
parent_node.insert(key, new_frame->page_num(), key_comparator_);
new_node_handler.set_parent_page_num(parent_page_num);
frame->mark_dirty();
new_frame->mark_dirty();
parent_frame->mark_dirty();
disk_buffer_pool_->unpin_page(frame);
disk_buffer_pool_->unpin_page(new_frame);
disk_buffer_pool_->unpin_page(parent_frame);
// disk_buffer_pool_->unpin_page(frame);
// disk_buffer_pool_->unpin_page(new_frame);
// disk_buffer_pool_->unpin_page(parent_frame);
} else {
// we should split the node and insert the entry and then insert new entry to current node's parent
Frame *new_parent_frame;
rc = split<InternalIndexNodeHandler>(parent_frame, new_parent_frame);
// 当前父节点即将装满了,那只能再将父节点执行分裂操作
Frame *new_parent_frame = nullptr;
rc = split<InternalIndexNodeHandler>(latch_memo, parent_frame, new_parent_frame);
if (rc != RC::SUCCESS) {
LOG_WARN("failed to split internal node. rc=%d:%s", rc, strrc(rc));
disk_buffer_pool_->unpin_page(frame);
disk_buffer_pool_->unpin_page(new_frame);
disk_buffer_pool_->unpin_page(parent_frame);
// disk_buffer_pool_->unpin_page(frame);
// disk_buffer_pool_->unpin_page(new_frame);
// disk_buffer_pool_->unpin_page(parent_frame);
} else {
// insert into left or right ? decide by key compare result
InternalIndexNodeHandler new_node(file_header_, new_parent_frame);
......@@ -1285,14 +1253,17 @@ RC BplusTreeHandler::insert_entry_into_parent(Frame *frame, Frame *new_frame, co
new_node.insert(key, new_frame->page_num(), key_comparator_);
new_node_handler.set_parent_page_num(new_node.page_num());
} else {
node.insert(key, new_frame->page_num(), key_comparator_);
new_node_handler.set_parent_page_num(node.page_num());
parent_node.insert(key, new_frame->page_num(), key_comparator_);
new_node_handler.set_parent_page_num(parent_node.page_num());
}
disk_buffer_pool_->unpin_page(frame);
disk_buffer_pool_->unpin_page(new_frame);
// disk_buffer_pool_->unpin_page(frame);
// disk_buffer_pool_->unpin_page(new_frame);
rc = insert_entry_into_parent(parent_frame, new_parent_frame, new_node.key_at(0));
// 虽然这里是递归调用,但是通常B+ Tree 的层高比较低(3层已经可以容纳很多数据),所以没有栈溢出风险。
// Q: 在查找叶子节点时,我们都会尝试将没必要的锁提前释放掉,在这里插入数据时,是在向上遍历节点,
// 理论上来说,我们可以释放更低层级节点的锁,但是并没有这么做,为什么?
rc = insert_entry_into_parent(latch_memo, parent_frame, new_parent_frame, new_node.key_at(0));
}
}
}
......@@ -1301,47 +1272,37 @@ RC BplusTreeHandler::insert_entry_into_parent(Frame *frame, Frame *new_frame, co
/**
* split one full node into two
* @param page_handle[inout] the node to split
* @param new_page_handle[out] the new node after split
* @param intert_position the intert position of new key
*/
template <typename IndexNodeHandlerType>
RC BplusTreeHandler::split(Frame *frame, Frame *&new_frame)
RC BplusTreeHandler::split(LatchMemo &latch_memo, Frame *frame, Frame *&new_frame)
{
IndexNodeHandlerType old_node(file_header_, frame);
// add a new node
RC rc = disk_buffer_pool_->allocate_page(&new_frame);
RC rc = latch_memo.allocate_page(new_frame);
if (rc != RC::SUCCESS) {
LOG_WARN("Failed to split index page due to failed to allocate page, rc=%d:%s", rc, strrc(rc));
return rc;
}
latch_memo.xlatch(new_frame);
IndexNodeHandlerType new_node(file_header_, new_frame);
new_node.init_empty();
new_node.set_parent_page_num(old_node.parent_page_num());
old_node.move_half_to(new_node, disk_buffer_pool_);
old_node.move_half_to(new_node, disk_buffer_pool_); // TODO remove disk buffer pool
frame->mark_dirty();
new_frame->mark_dirty();
return RC::SUCCESS;
}
RC BplusTreeHandler::update_root_page_num()
void BplusTreeHandler::update_root_page_num_locked(PageNum root_page_num)
{
Frame *header_frame;
RC rc = disk_buffer_pool_->get_this_page(FIRST_INDEX_PAGE, &header_frame);
if (rc != RC::SUCCESS) {
LOG_WARN("failed to fetch header page. rc=%d:%s", rc, strrc(rc));
return rc;
}
IndexFileHeader *header = (IndexFileHeader *)header_frame->data();
header->root_page = file_header_.root_page;
header_frame->mark_dirty();
disk_buffer_pool_->unpin_page(header_frame);
return rc;
file_header_.root_page = root_page_num;
header_dirty_ = true;
LOG_DEBUG("set root page to %d", root_page_num);
}
RC BplusTreeHandler::create_new_tree(const char *key, const RID *rid)
......@@ -1353,7 +1314,7 @@ RC BplusTreeHandler::create_new_tree(const char *key, const RID *rid)
return rc;
}
Frame *frame;
Frame *frame = nullptr;
rc = disk_buffer_pool_->allocate_page(&frame);
if (rc != RC::SUCCESS) {
LOG_WARN("failed to allocate root page. rc=%d:%s", rc, strrc(rc));
......@@ -1363,32 +1324,26 @@ RC BplusTreeHandler::create_new_tree(const char *key, const RID *rid)
LeafIndexNodeHandler leaf_node(file_header_, frame);
leaf_node.init_empty();
leaf_node.insert(0, key, (const char *)rid);
file_header_.root_page = frame->page_num();
update_root_page_num_locked(frame->page_num());
frame->mark_dirty();
disk_buffer_pool_->unpin_page(frame);
rc = update_root_page_num();
// disk_buffer_pool_->check_all_pages_unpinned(file_id_);
return rc;
}
char *BplusTreeHandler::make_key(const char *user_key, const RID &rid)
MemPoolItem::unique_ptr BplusTreeHandler::make_key(const char *user_key, const RID &rid)
{
char *key = (char *)mem_pool_item_->alloc();
MemPoolItem::unique_ptr key = mem_pool_item_->alloc_unique_ptr();
if (key == nullptr) {
LOG_WARN("Failed to alloc memory for key.");
return nullptr;
}
memcpy(key, user_key, file_header_.attr_length);
memcpy(key + file_header_.attr_length, &rid, sizeof(rid));
memcpy(static_cast<char *>(key.get()), user_key, file_header_.attr_length);
memcpy(static_cast<char *>(key.get()) + file_header_.attr_length, &rid, sizeof(rid));
return key;
}
void BplusTreeHandler::free_key(char *key)
{
mem_pool_item_->free(key);
}
RC BplusTreeHandler::insert_entry(const char *user_key, const RID *rid)
{
if (user_key == nullptr || rid == nullptr) {
......@@ -1396,38 +1351,40 @@ RC BplusTreeHandler::insert_entry(const char *user_key, const RID *rid)
return RC::INVALID_ARGUMENT;
}
char *key = make_key(user_key, *rid);
if (key == nullptr) {
MemPoolItem::unique_ptr pkey = make_key(user_key, *rid);
if (pkey == nullptr) {
LOG_WARN("Failed to alloc memory for key.");
return RC::NOMEM;
}
char *key = static_cast<char *>(pkey.get());
if (is_empty()) {
RC rc = create_new_tree(key, rid);
mem_pool_item_->free(key);
return rc;
root_lock_.lock();
if (is_empty()) {
RC rc = create_new_tree(key, rid);
root_lock_.unlock();
return rc;
}
root_lock_.unlock();
}
Frame *frame;
RC rc = find_leaf(key, frame);
LatchMemo latch_memo(disk_buffer_pool_);
Frame *frame = nullptr;
RC rc = find_leaf(latch_memo, BplusTreeOperationType::INSERT, key, frame);
if (rc != RC::SUCCESS) {
LOG_WARN("Failed to find leaf %s. rc=%d:%s", rid->to_string().c_str(), rc, strrc(rc));
mem_pool_item_->free(key);
return rc;
}
rc = insert_entry_into_leaf_node(frame, key, rid);
rc = insert_entry_into_leaf_node(latch_memo, frame, key, rid);
if (rc != RC::SUCCESS) {
LOG_TRACE("Failed to insert into leaf of index, rid:%s", rid->to_string().c_str());
disk_buffer_pool_->unpin_page(frame);
mem_pool_item_->free(key);
// disk_buffer_pool_->check_all_pages_unpinned(file_id_);
return rc;
}
mem_pool_item_->free(key);
LOG_TRACE("insert entry success");
// disk_buffer_pool_->check_all_pages_unpinned(file_id_);
return RC::SUCCESS;
}
......@@ -1436,43 +1393,44 @@ RC BplusTreeHandler::get_entry(const char *user_key, int key_len, std::list<RID>
BplusTreeScanner scanner(*this);
RC rc = scanner.open(user_key, key_len, true /*left_inclusive*/, user_key, key_len, true /*right_inclusive*/);
if (rc != RC::SUCCESS) {
LOG_WARN("failed to open scanner. rc=%d:%s", rc, strrc(rc));
LOG_WARN("failed to open scanner. rc=%s", strrc(rc));
return rc;
}
RID rid;
while ((rc = scanner.next_entry(&rid)) == RC::SUCCESS) {
while ((rc = scanner.next_entry(rid)) == RC::SUCCESS) {
rids.push_back(rid);
}
scanner.close();
if (rc != RC::RECORD_EOF) {
LOG_WARN("scanner return error. rc=%d:%s", rc, strrc(rc));
LOG_WARN("scanner return error. rc=%s", strrc(rc));
} else {
rc = RC::SUCCESS;
}
return rc;
}
RC BplusTreeHandler::adjust_root(Frame *root_frame)
RC BplusTreeHandler::adjust_root(LatchMemo &latch_memo, Frame *root_frame)
{
IndexNodeHandler root_node(file_header_, root_frame);
if (root_node.is_leaf() && root_node.size() > 0) {
root_frame->mark_dirty();
disk_buffer_pool_->unpin_page(root_frame);
return RC::SUCCESS;
}
PageNum new_root_page_num = BP_INVALID_PAGE_NUM;
if (root_node.is_leaf()) {
// this is a leaf and an empty node
file_header_.root_page = BP_INVALID_PAGE_NUM;
ASSERT(root_node.size() == 0, "");
// file_header_.root_page = BP_INVALID_PAGE_NUM;
new_root_page_num = BP_INVALID_PAGE_NUM;
} else {
// this is an internal node and has only one child node
// 根节点只有一个子节点了,需要把自己删掉,把子节点提升为根节点
InternalIndexNodeHandler internal_node(file_header_, root_frame);
const PageNum child_page_num = internal_node.value_at(0);
Frame *child_frame;
RC rc = disk_buffer_pool_->get_this_page(child_page_num, &child_frame);
Frame *child_frame = nullptr;
RC rc = latch_memo.get_page(child_page_num, child_frame);
if (rc != RC::SUCCESS) {
LOG_WARN("failed to fetch child page. page num=%d, rc=%d:%s", child_page_num, rc, strrc(rc));
return rc;
......@@ -1480,24 +1438,23 @@ RC BplusTreeHandler::adjust_root(Frame *root_frame)
IndexNodeHandler child_node(file_header_, child_frame);
child_node.set_parent_page_num(BP_INVALID_PAGE_NUM);
disk_buffer_pool_->unpin_page(child_frame);
file_header_.root_page = child_page_num;
// file_header_.root_page = child_page_num;
new_root_page_num = child_page_num;
}
update_root_page_num();
update_root_page_num_locked(new_root_page_num);
PageNum old_root_page_num = root_frame->page_num();
disk_buffer_pool_->unpin_page(root_frame);
disk_buffer_pool_->dispose_page(old_root_page_num);
latch_memo.dispose_page(old_root_page_num);
return RC::SUCCESS;
}
template <typename IndexNodeHandlerType>
RC BplusTreeHandler::coalesce_or_redistribute(Frame *frame)
RC BplusTreeHandler::coalesce_or_redistribute(LatchMemo &latch_memo, Frame *frame)
{
IndexNodeHandlerType index_node(file_header_, frame);
if (index_node.size() >= index_node.min_size()) {
disk_buffer_pool_->unpin_page(frame);
return RC::SUCCESS;
}
......@@ -1505,30 +1462,26 @@ RC BplusTreeHandler::coalesce_or_redistribute(Frame *frame)
if (BP_INVALID_PAGE_NUM == parent_page_num) {
// this is the root page
if (index_node.size() > 1) {
disk_buffer_pool_->unpin_page(frame);
} else {
// adjust the root node
adjust_root(frame);
adjust_root(latch_memo, frame);
}
return RC::SUCCESS;
}
Frame *parent_frame;
RC rc = disk_buffer_pool_->get_this_page(parent_page_num, &parent_frame);
Frame *parent_frame = nullptr;
RC rc = latch_memo.get_page(parent_page_num, parent_frame);
if (rc != RC::SUCCESS) {
LOG_WARN("failed to fetch parent page. page id=%d, rc=%d:%s", parent_page_num, rc, strrc(rc));
disk_buffer_pool_->unpin_page(frame);
return rc;
}
InternalIndexNodeHandler parent_index_node(file_header_, parent_frame);
int index = parent_index_node.lookup(key_comparator_, index_node.key_at(index_node.size() - 1));
if (parent_index_node.value_at(index) != frame->page_num()) {
LOG_ERROR("lookup return an invalid value. index=%d, this page num=%d, but got %d",
index,
frame->page_num(),
parent_index_node.value_at(index));
}
ASSERT(parent_index_node.value_at(index) == frame->page_num(),
"lookup return an invalid value. index=%d, this page num=%d, but got %d",
index, frame->page_num(), parent_index_node.value_at(index));
PageNum neighbor_page_num;
if (index == 0) {
neighbor_page_num = parent_index_node.value_at(1);
......@@ -1536,31 +1489,29 @@ RC BplusTreeHandler::coalesce_or_redistribute(Frame *frame)
neighbor_page_num = parent_index_node.value_at(index - 1);
}
Frame *neighbor_frame;
rc = disk_buffer_pool_->get_this_page(neighbor_page_num, &neighbor_frame);
Frame *neighbor_frame = nullptr;
rc = latch_memo.get_page(neighbor_page_num, neighbor_frame); // 当前已经拥有了父节点的写锁,所以直接尝试获取此页面然后加锁
if (rc != RC::SUCCESS) {
LOG_WARN("failed to fetch neighbor page. page id=%d, rc=%d:%s", neighbor_page_num, rc, strrc(rc));
// TODO do more thing to release resource
disk_buffer_pool_->unpin_page(frame);
disk_buffer_pool_->unpin_page(parent_frame);
// do something to release resource
return rc;
}
latch_memo.xlatch(neighbor_frame);
IndexNodeHandlerType neighbor_node(file_header_, neighbor_frame);
if (index_node.size() + neighbor_node.size() > index_node.max_size()) {
rc = redistribute<IndexNodeHandlerType>(neighbor_frame, frame, parent_frame, index);
} else {
rc = coalesce<IndexNodeHandlerType>(neighbor_frame, frame, parent_frame, index);
rc = coalesce<IndexNodeHandlerType>(latch_memo, neighbor_frame, frame, parent_frame, index);
}
return rc;
}
template <typename IndexNodeHandlerType>
RC BplusTreeHandler::coalesce(Frame *neighbor_frame, Frame *frame, Frame *parent_frame, int index)
RC BplusTreeHandler::coalesce(LatchMemo &latch_memo, Frame *neighbor_frame, Frame *frame, Frame *parent_frame, int index)
{
IndexNodeHandlerType neighbor_node(file_header_, neighbor_frame);
IndexNodeHandlerType node(file_header_, frame);
InternalIndexNodeHandler parent_node(file_header_, parent_frame);
Frame *left_frame = nullptr;
......@@ -1592,30 +1543,10 @@ RC BplusTreeHandler::coalesce(Frame *neighbor_frame, Frame *frame, Frame *parent
LeafIndexNodeHandler left_leaf_node(file_header_, left_frame);
LeafIndexNodeHandler right_leaf_node(file_header_, right_frame);
left_leaf_node.set_next_page(right_leaf_node.next_page());
PageNum next_right_page_num = right_leaf_node.next_page();
if (next_right_page_num != BP_INVALID_PAGE_NUM) {
Frame *next_right_frame;
rc = disk_buffer_pool_->get_this_page(next_right_page_num, &next_right_frame);
if (rc != RC::SUCCESS) {
LOG_WARN("failed to fetch next right page. page number:%d. rc=%d:%s", next_right_page_num, rc, strrc(rc));
disk_buffer_pool_->unpin_page(frame);
disk_buffer_pool_->unpin_page(neighbor_frame);
disk_buffer_pool_->unpin_page(parent_frame);
return rc;
}
LeafIndexNodeHandler next_right_node(file_header_, next_right_frame);
next_right_node.set_prev_page(left_node.page_num());
disk_buffer_pool_->unpin_page(next_right_frame);
}
}
PageNum right_page_num = right_frame->page_num();
disk_buffer_pool_->unpin_page(left_frame);
disk_buffer_pool_->unpin_page(right_frame);
disk_buffer_pool_->dispose_page(right_page_num);
return coalesce_or_redistribute<InternalIndexNodeHandler>(parent_frame);
latch_memo.dispose_page(right_frame->page_num());
return coalesce_or_redistribute<InternalIndexNodeHandler>(latch_memo, parent_frame);
}
template <typename IndexNodeHandlerType>
......@@ -1646,20 +1577,18 @@ RC BplusTreeHandler::redistribute(Frame *neighbor_frame, Frame *frame, Frame *pa
neighbor_frame->mark_dirty();
frame->mark_dirty();
parent_frame->mark_dirty();
disk_buffer_pool_->unpin_page(parent_frame);
disk_buffer_pool_->unpin_page(neighbor_frame);
disk_buffer_pool_->unpin_page(frame);
return RC::SUCCESS;
}
RC BplusTreeHandler::delete_entry_internal(Frame *leaf_frame, const char *key)
RC BplusTreeHandler::delete_entry_internal(LatchMemo &latch_memo, Frame *leaf_frame, const char *key)
{
LeafIndexNodeHandler leaf_index_node(file_header_, leaf_frame);
const int remove_count = leaf_index_node.remove(key, key_comparator_);
if (remove_count == 0) {
LOG_TRACE("no data to remove");
disk_buffer_pool_->unpin_page(leaf_frame);
LOG_TRACE("no data need to remove");
// disk_buffer_pool_->unpin_page(leaf_frame);
return RC::RECORD_RECORD_NOT_EXIST;
}
// leaf_index_node.validate(key_comparator_, disk_buffer_pool_, file_id_);
......@@ -1667,41 +1596,47 @@ RC BplusTreeHandler::delete_entry_internal(Frame *leaf_frame, const char *key)
leaf_frame->mark_dirty();
if (leaf_index_node.size() >= leaf_index_node.min_size()) {
disk_buffer_pool_->unpin_page(leaf_frame);
return RC::SUCCESS;
}
return coalesce_or_redistribute<LeafIndexNodeHandler>(leaf_frame);
return coalesce_or_redistribute<LeafIndexNodeHandler>(latch_memo, leaf_frame);
}
RC BplusTreeHandler::delete_entry(const char *user_key, const RID *rid)
{
char *key = (char *)mem_pool_item_->alloc();
if (nullptr == key) {
MemPoolItem::unique_ptr pkey = mem_pool_item_->alloc_unique_ptr();
if (nullptr == pkey) {
LOG_WARN("Failed to alloc memory for key. size=%d", file_header_.key_length);
return RC::NOMEM;
}
char *key = static_cast<char *>(pkey.get());
memcpy(key, user_key, file_header_.attr_length);
memcpy(key + file_header_.attr_length, rid, sizeof(*rid));
Frame *leaf_frame;
RC rc = find_leaf(key, leaf_frame);
if (rc != RC::SUCCESS) {
LOG_WARN("failed to find leaf page. rc =%d:%s", rc, strrc(rc));
mem_pool_item_->free(key);
BplusTreeOperationType op = BplusTreeOperationType::DELETE;
LatchMemo latch_memo(disk_buffer_pool_);
Frame *leaf_frame = nullptr;
RC rc = find_leaf(latch_memo, op, key, leaf_frame);
if (rc == RC::EMPTY) {
rc = RC::RECORD_RECORD_NOT_EXIST;
return rc;
}
rc = delete_entry_internal(leaf_frame, key);
if (rc != RC::SUCCESS) {
LOG_WARN("Failed to delete index");
mem_pool_item_->free(key);
LOG_WARN("failed to find leaf page. rc =%s", strrc(rc));
return rc;
}
mem_pool_item_->free(key);
return RC::SUCCESS;
return delete_entry_internal(latch_memo, leaf_frame, key);
}
BplusTreeScanner::BplusTreeScanner(BplusTreeHandler &tree_handler) : tree_handler_(tree_handler)
////////////////////////////////////////////////////////////////////////////////
BplusTreeScanner::BplusTreeScanner(BplusTreeHandler &tree_handler)
: tree_handler_(tree_handler),
latch_memo_(tree_handler.disk_buffer_pool_)
{}
BplusTreeScanner::~BplusTreeScanner()
......@@ -1709,8 +1644,8 @@ BplusTreeScanner::~BplusTreeScanner()
close();
}
RC BplusTreeScanner::open(const char *left_user_key, int left_len, bool left_inclusive, const char *right_user_key,
int right_len, bool right_inclusive)
RC BplusTreeScanner::open(const char *left_user_key, int left_len, bool left_inclusive,
const char *right_user_key, int right_len, bool right_inclusive)
{
RC rc = RC::SUCCESS;
if (inited_) {
......@@ -1719,6 +1654,7 @@ RC BplusTreeScanner::open(const char *left_user_key, int left_len, bool left_inc
}
inited_ = true;
first_emitted_ = false;
// 校验输入的键值是否是合法范围
if (left_user_key && right_user_key) {
......@@ -1732,15 +1668,14 @@ RC BplusTreeScanner::open(const char *left_user_key, int left_len, bool left_inc
}
if (nullptr == left_user_key) {
rc = tree_handler_.left_most_page(left_frame_);
rc = tree_handler_.left_most_page(latch_memo_, current_frame_);
if (rc != RC::SUCCESS) {
LOG_WARN("failed to find left most page. rc=%d:%s", rc, strrc(rc));
LOG_WARN("failed to find left most page. rc=%s", strrc(rc));
return rc;
}
iter_index_ = 0;
} else {
char *left_key = nullptr;
char *fixed_left_key = const_cast<char *>(left_user_key);
if (tree_handler_.file_header_.attr_type == CHARS) {
......@@ -1756,39 +1691,48 @@ RC BplusTreeScanner::open(const char *left_user_key, int left_len, bool left_inc
}
}
MemPoolItem::unique_ptr left_pkey;
if (left_inclusive) {
left_key = tree_handler_.make_key(fixed_left_key, *RID::min());
left_pkey = tree_handler_.make_key(fixed_left_key, *RID::min());
} else {
left_key = tree_handler_.make_key(fixed_left_key, *RID::max());
left_pkey = tree_handler_.make_key(fixed_left_key, *RID::max());
}
const char *left_key = (const char *)left_pkey.get();
if (fixed_left_key != left_user_key) {
delete[] fixed_left_key;
fixed_left_key = nullptr;
}
rc = tree_handler_.find_leaf(left_key, left_frame_);
if (rc != RC::SUCCESS) {
LOG_WARN("failed to find left page. rc=%d:%s", rc, strrc(rc));
tree_handler_.free_key(left_key);
rc = tree_handler_.find_leaf(latch_memo_, BplusTreeOperationType::READ, left_key, current_frame_);
if (rc == RC::EMPTY) {
rc = RC::SUCCESS;
current_frame_ = nullptr;
return rc;
} else if (rc != RC::SUCCESS) {
LOG_WARN("failed to find left page. rc=%s", strrc(rc));
return rc;
}
LeafIndexNodeHandler left_node(tree_handler_.file_header_, left_frame_);
LeafIndexNodeHandler left_node(tree_handler_.file_header_, current_frame_);
int left_index = left_node.lookup(tree_handler_.key_comparator_, left_key);
tree_handler_.free_key(left_key);
// lookup 返回的是适合插入的位置,还需要判断一下是否在合适的边界范围内
if (left_index >= left_node.size()) { // 超出了当前页,就需要向后移动一个位置
const PageNum next_page_num = left_node.next_page();
if (next_page_num == BP_INVALID_PAGE_NUM) { // 这里已经是最后一页,说明当前扫描,没有数据
latch_memo_.release();
current_frame_ = nullptr;
return RC::SUCCESS;
}
tree_handler_.disk_buffer_pool_->unpin_page(left_frame_);
rc = tree_handler_.disk_buffer_pool_->get_this_page(next_page_num, &left_frame_);
rc = latch_memo_.get_page(next_page_num, current_frame_);
if (rc != RC::SUCCESS) {
LOG_WARN("failed to fetch next page. page num=%d, rc=%d:%s", next_page_num, rc, strrc(rc));
LOG_WARN("failed to fetch next page. page num=%d, rc=%s", next_page_num, strrc(rc));
return rc;
}
latch_memo_.slatch(current_frame_);
left_index = 0;
}
......@@ -1797,17 +1741,9 @@ RC BplusTreeScanner::open(const char *left_user_key, int left_len, bool left_inc
// 没有指定右边界范围,那么就返回右边界最大值
if (nullptr == right_user_key) {
rc = tree_handler_.right_most_page(right_frame_);
if (rc != RC::SUCCESS) {
LOG_WARN("failed to fetch right most page. rc=%d:%s", rc, strrc(rc));
return rc;
}
LeafIndexNodeHandler node(tree_handler_.file_header_, right_frame_);
end_index_ = node.size() - 1;
right_key_ = nullptr;
} else {
char *right_key = nullptr;
char *fixed_right_key = const_cast<char *>(right_user_key);
bool should_include_after_fix = false;
if (tree_handler_.file_header_.attr_type == CHARS) {
......@@ -1822,126 +1758,98 @@ RC BplusTreeScanner::open(const char *left_user_key, int left_len, bool left_inc
}
}
if (right_inclusive) {
right_key = tree_handler_.make_key(fixed_right_key, *RID::max());
right_key_ = tree_handler_.make_key(fixed_right_key, *RID::max());
} else {
right_key = tree_handler_.make_key(fixed_right_key, *RID::min());
right_key_ = tree_handler_.make_key(fixed_right_key, *RID::min());
}
if (fixed_right_key != right_user_key) {
delete[] fixed_right_key;
fixed_right_key = nullptr;
}
}
rc = tree_handler_.find_leaf(right_key, right_frame_);
if (rc != RC::SUCCESS) {
LOG_WARN("failed to find left page. rc=%d:%s", rc, strrc(rc));
tree_handler_.free_key(right_key);
return rc;
}
LeafIndexNodeHandler right_node(tree_handler_.file_header_, right_frame_);
int right_index = right_node.lookup(tree_handler_.key_comparator_, right_key);
tree_handler_.free_key(right_key);
// lookup 返回的是适合插入的位置,需要根据实际情况做调整
// 通常情况下需要找到上一个位置
if (right_index > 0) {
right_index--;
} else {
// 实际上,只有最左边的叶子节点查找时,lookup 才可能返回0
// 其它的叶子节点都不可能返回0,所以这段逻辑其实是可以简化的
const PageNum prev_page_num = right_node.prev_page();
if (prev_page_num == BP_INVALID_PAGE_NUM) {
end_index_ = -1;
return RC::SUCCESS;
}
if (touch_end()) {
current_frame_ = nullptr;
}
tree_handler_.disk_buffer_pool_->unpin_page(right_frame_);
rc = tree_handler_.disk_buffer_pool_->get_this_page(prev_page_num, &right_frame_);
if (rc != RC::SUCCESS) {
LOG_WARN("failed to fetch prev page num. page num=%d, rc=%d:%s", prev_page_num, rc, strrc(rc));
return rc;
}
return RC::SUCCESS;
}
LeafIndexNodeHandler tmp_node(tree_handler_.file_header_, right_frame_);
right_index = tmp_node.size() - 1;
}
end_index_ = right_index;
}
void BplusTreeScanner::fetch_item(RID &rid)
{
LeafIndexNodeHandler node(tree_handler_.file_header_, current_frame_);
memcpy(&rid, node.value_at(iter_index_), sizeof(rid));
}
// 判断是否左边界比右边界要靠后
// 两个边界最多会多一页
// 查找不存在的元素,或者不存在的范围数据时,可能会存在这个问题
if (left_frame_->page_num() == right_frame_->page_num() && iter_index_ > end_index_) {
end_index_ = -1;
} else {
LeafIndexNodeHandler left_node(tree_handler_.file_header_, left_frame_);
LeafIndexNodeHandler right_node(tree_handler_.file_header_, right_frame_);
if (left_node.prev_page() == right_node.page_num()) {
end_index_ = -1;
}
bool BplusTreeScanner::touch_end()
{
if (right_key_ == nullptr) {
return false;
}
return RC::SUCCESS;
LeafIndexNodeHandler node(tree_handler_.file_header_, current_frame_);
const char *this_key = node.key_at(iter_index_);
int compare_result = tree_handler_.key_comparator_(this_key, static_cast<char *>(right_key_.get()));
return compare_result > 0;
}
RC BplusTreeScanner::next_entry(RID *rid)
RC BplusTreeScanner::next_entry(RID &rid)
{
if (-1 == end_index_) {
if (nullptr == current_frame_) {
return RC::RECORD_EOF;
}
LeafIndexNodeHandler node(tree_handler_.file_header_, left_frame_);
memcpy(rid, node.value_at(iter_index_), sizeof(*rid));
if (left_frame_->page_num() == right_frame_->page_num() && iter_index_ == end_index_) {
end_index_ = -1;
if (!first_emitted_) {
fetch_item(rid);
first_emitted_ = true;
return RC::SUCCESS;
}
if (iter_index_ < node.size() - 1) {
++iter_index_;
iter_index_++;
LeafIndexNodeHandler node(tree_handler_.file_header_, current_frame_);
if (iter_index_ < node.size()) {
if (touch_end()) {
return RC::RECORD_EOF;
}
fetch_item(rid);
return RC::SUCCESS;
}
RC rc = RC::SUCCESS;
if (left_frame_->page_num() != right_frame_->page_num()) {
PageNum page_num = node.next_page();
tree_handler_.disk_buffer_pool_->unpin_page(left_frame_);
if (page_num == BP_INVALID_PAGE_NUM) {
left_frame_ = nullptr;
LOG_WARN("got invalid next page. page num=%d", page_num);
rc = RC::INTERNAL;
} else {
rc = tree_handler_.disk_buffer_pool_->get_this_page(page_num, &left_frame_);
if (rc != RC::SUCCESS) {
left_frame_ = nullptr;
LOG_WARN("failed to fetch next page. page num=%d, rc=%d:%s", page_num, rc, strrc(rc));
return rc;
}
PageNum next_page_num = node.next_page();
if (BP_INVALID_PAGE_NUM == next_page_num) {
return RC::RECORD_EOF;
}
iter_index_ = 0;
}
} else if (end_index_ != -1) {
LOG_WARN("should have more pages but not. left page=%d, right page=%d",
left_frame_->page_num(),
right_frame_->page_num());
rc = RC::INTERNAL;
const int memo_point = latch_memo_.memo_point();
rc = latch_memo_.get_page(next_page_num, current_frame_);
if (rc != RC::SUCCESS) {
LOG_WARN("failed to get next page. page num=%d, rc=%s", next_page_num, strrc(rc));
return rc;
}
return rc;
/**
* 如果这里直接去加锁,那可能会造成死锁
* 因为这里访问页面的方式顺序与插入、删除的顺序不一样
* 如果加锁失败,就由上层做重试
*/
bool locked = latch_memo_.try_slatch(current_frame_);
if (!locked) {
return RC::LOCKED_NEED_WAIT;
}
latch_memo_.release_to(memo_point);
iter_index_ = -1; // `next` will add 1
return next_entry(rid);
}
RC BplusTreeScanner::close()
{
if (left_frame_ != nullptr) {
tree_handler_.disk_buffer_pool_->unpin_page(left_frame_);
left_frame_ = nullptr;
}
if (right_frame_ != nullptr) {
tree_handler_.disk_buffer_pool_->unpin_page(right_frame_);
right_frame_ = nullptr;
}
end_index_ = -1;
inited_ = false;
LOG_INFO("bplus tree scanner closed");
LOG_TRACE("bplus tree scanner closed");
return RC::SUCCESS;
}
......@@ -1986,6 +1894,7 @@ RC BplusTreeScanner::fix_user_key(
// 等价于扫描 >= ABCE 的数据
// 如果是扫描 <=/< user_key的数据
// 示例:<=/< ABCD1 <==> <= ABCD (attr_length=4)
// NOTE: 假设都是普通的ASCII字符,不包含二进制字符,使用char不会溢出
*should_inclusive = true;
if (want_greater) {
key_buf[attr_length - 1]++;
......
......@@ -14,22 +14,33 @@ See the Mulan PSL v2 for more details. */
// Rewritten by Longda & Wangyunlai
//
//
#ifndef __OBSERVER_STORAGE_COMMON_INDEX_MANAGER_H_
#define __OBSERVER_STORAGE_COMMON_INDEX_MANAGER_H_
#pragma once
#include <string.h>
#include <sstream>
#include <functional>
#include <memory>
#include "storage/record/record_manager.h"
#include "storage/default/disk_buffer_pool.h"
#include "storage/trx/latch_memo.h"
#include "sql/parser/parse_defs.h"
#include "common/lang/comparator.h"
#include "common/log/log.h"
#define EMPTY_RID_PAGE_NUM -1
#define EMPTY_RID_PAGE_NUM -1 // TODO remove me
#define EMPTY_RID_SLOT_NUM -1
class AttrComparator {
enum class BplusTreeOperationType
{
READ,
INSERT,
DELETE,
};
class AttrComparator
{
public:
void init(AttrType type, int length)
{
......@@ -55,8 +66,7 @@ public:
return common::compare_string((void *)v1, attr_length_, (void *)v2, attr_length_);
}
default: {
LOG_ERROR("unknown attr type. %d", attr_type_);
abort();
ASSERT(false, "unknown attr type. %d", attr_type_);
}
}
}
......@@ -66,7 +76,8 @@ private:
int attr_length_;
};
class KeyComparator {
class KeyComparator
{
public:
void init(AttrType type, int length)
{
......@@ -94,7 +105,8 @@ private:
AttrComparator attr_comparator_;
};
class AttrPrinter {
class AttrPrinter
{
public:
void init(AttrType type, int length)
{
......@@ -127,8 +139,7 @@ public:
return str;
}
default: {
LOG_ERROR("unknown attr type. %d", attr_type_);
abort();
ASSERT(false, "unknown attr type. %d", attr_type_);
}
}
}
......@@ -138,7 +149,8 @@ private:
int attr_length_;
};
class KeyPrinter {
class KeyPrinter
{
public:
void init(AttrType type, int length)
{
......@@ -169,7 +181,8 @@ private:
* this is the first page of bplus tree.
* only one field can be supported, can you extend it to multi-fields?
*/
struct IndexFileHeader {
struct IndexFileHeader
{
IndexFileHeader()
{
memset(this, 0, sizeof(IndexFileHeader));
......@@ -197,17 +210,17 @@ struct IndexFileHeader {
}
};
#define RECORD_RESERVER_PAIR_NUM 2
/**
* the common part of page describtion of bplus tree
* storage format:
* | page type | item number | parent page id |
*/
struct IndexNode {
struct IndexNode
{
static constexpr int HEADER_SIZE = 12;
bool is_leaf;
int key_num;
bool is_leaf;
int key_num;
PageNum parent;
};
......@@ -222,10 +235,10 @@ struct IndexNode {
* the value is rid.
* can you implenment a cluster index ?
*/
struct LeafIndexNode : public IndexNode {
static constexpr int HEADER_SIZE = IndexNode::HEADER_SIZE + 8;
struct LeafIndexNode : public IndexNode
{
static constexpr int HEADER_SIZE = IndexNode::HEADER_SIZE + 4;
PageNum prev_brother;
PageNum next_brother;
/**
* leaf can store order keys and rids at most
......@@ -242,7 +255,8 @@ struct LeafIndexNode : public IndexNode {
* the first key is ignored(key0).
* so it will waste space, can you fix this?
*/
struct InternalIndexNode : public IndexNode {
struct InternalIndexNode : public IndexNode
{
static constexpr int HEADER_SIZE = IndexNode::HEADER_SIZE;
/**
......@@ -251,24 +265,34 @@ struct InternalIndexNode : public IndexNode {
char array[0];
};
class IndexNodeHandler {
/**
* IndexNode 仅作为数据在内存或磁盘中的表示
* IndexNodeHandler 负责对IndexNode做各种操作。
* 作为一个类来说,虚函数会影响“结构体”真实的内存布局,所以将数据存储与操作分开
*/
class IndexNodeHandler
{
public:
IndexNodeHandler(const IndexFileHeader &header, Frame *frame);
virtual ~IndexNodeHandler() = default;
void init_empty(bool leaf);
bool is_leaf() const;
int key_size() const;
int value_size() const;
int item_size() const;
int key_size() const;
int value_size() const;
int item_size() const;
void increase_size(int n);
int size() const;
int size() const;
int max_size() const;
int min_size() const;
void set_parent_page_num(PageNum page_num);
PageNum parent_page_num() const;
PageNum page_num() const;
bool is_safe(BplusTreeOperationType op, bool is_root_node);
bool validate() const;
friend std::string to_string(const IndexNodeHandler &handler);
......@@ -279,15 +303,15 @@ protected:
IndexNode *node_;
};
class LeafIndexNodeHandler : public IndexNodeHandler {
class LeafIndexNodeHandler : public IndexNodeHandler
{
public:
LeafIndexNodeHandler(const IndexFileHeader &header, Frame *frame);
virtual ~LeafIndexNodeHandler() = default;
void init_empty();
void set_next_page(PageNum page_num);
void set_prev_page(PageNum page_num);
PageNum next_page() const;
PageNum prev_page() const;
char *key_at(int index);
char *value_at(int index);
......@@ -301,7 +325,7 @@ public:
void insert(int index, const char *key, const char *value);
void remove(int index);
int remove(const char *key, const KeyComparator &comparator);
int remove(const char *key, const KeyComparator &comparator);
RC move_half_to(LeafIndexNodeHandler &other, DiskBufferPool *bp);
RC move_first_to_end(LeafIndexNodeHandler &other, DiskBufferPool *disk_buffer_pool);
RC move_last_to_front(LeafIndexNodeHandler &other, DiskBufferPool *bp);
......@@ -310,9 +334,6 @@ public:
*/
RC move_to(LeafIndexNodeHandler &other, DiskBufferPool *bp);
int max_size() const;
int min_size() const;
bool validate(const KeyComparator &comparator, DiskBufferPool *bp) const;
friend std::string to_string(const LeafIndexNodeHandler &handler, const KeyPrinter &printer);
......@@ -329,9 +350,11 @@ private:
LeafIndexNode *leaf_node_;
};
class InternalIndexNodeHandler : public IndexNodeHandler {
class InternalIndexNodeHandler : public IndexNodeHandler
{
public:
InternalIndexNodeHandler(const IndexFileHeader &header, Frame *frame);
virtual ~InternalIndexNodeHandler() = default;
void init_empty();
void create_new_root(PageNum first_page_num, const char *key, PageNum page_num);
......@@ -351,13 +374,16 @@ public:
/**
* 与Leaf节点不同,lookup返回指定key应该属于哪个子节点,返回这个子节点在当前节点中的索引
* 如果想要返回插入位置,就提供 `insert_position` 参数
* @param comparator 用于键值比较的函数
* @param key 查找的键值
* @param found 如果是有效指针,将会返回当前是否存在指定的键值
* @param insert_position 如果是有效指针,将会返回可以插入指定键值的位置
* NOTE: 查找效率不高,你可以优化它吗?
*/
int lookup(
const KeyComparator &comparator, const char *key, bool *found = nullptr, int *insert_position = nullptr) const;
int max_size() const;
int min_size() const;
int lookup(const KeyComparator &comparator,
const char *key,
bool *found = nullptr,
int *insert_position = nullptr) const;
RC move_to(InternalIndexNodeHandler &other, DiskBufferPool *disk_buffer_pool);
RC move_first_to_end(InternalIndexNodeHandler &other, DiskBufferPool *disk_buffer_pool);
......@@ -382,17 +408,21 @@ private:
int item_size() const;
private:
InternalIndexNode *internal_node_;
InternalIndexNode *internal_node_ = nullptr;
};
class BplusTreeHandler {
class BplusTreeHandler
{
public:
/**
* 此函数创建一个名为fileName的索引。
* attrType描述被索引属性的类型,attrLength描述被索引属性的长度
*/
RC create(
const char *file_name, AttrType attr_type, int attr_length, int internal_max_size = -1, int leaf_max_size = -1);
RC create(const char *file_name,
AttrType attr_type,
int attr_length,
int internal_max_size = -1,
int leaf_max_size = -1);
/**
* 打开名为fileName的索引文件。
......@@ -434,71 +464,85 @@ public:
/**
* Check whether current B+ tree is invalid or not.
* return true means current tree is valid, return false means current tree is invalid.
* @return
* @return true means current tree is valid, return false means current tree is invalid.
* @note thread unsafe
*/
bool validate_tree();
public:
/**
* 这些函数都是线程不安全的,不要在多线程的环境下调用
*/
RC print_tree();
RC print_leafs();
private:
/**
* 这些函数都是线程不安全的,不要在多线程的环境下调用
*/
RC print_leaf(Frame *frame);
RC print_internal_node_recursive(Frame *frame);
bool validate_node(IndexNode *node);
bool validate_leaf_link();
bool validate_node_recursive(Frame *frame);
bool validate_leaf_link(LatchMemo &latch_memo);
bool validate_node_recursive(LatchMemo &latch_memo, Frame *frame);
protected:
RC find_leaf(const char *key, Frame *&frame);
RC left_most_page(Frame *&frame);
RC right_most_page(Frame *&frame);
RC find_leaf_internal(const std::function<PageNum(InternalIndexNodeHandler &)> &child_page_getter, Frame *&frame);
RC insert_into_parent(PageNum parent_page, Frame *left_frame, const char *pkey, Frame &right_frame);
RC find_leaf(LatchMemo &latch_memo, BplusTreeOperationType op, const char *key, Frame *&frame);
RC left_most_page(LatchMemo &latch_memo, Frame *&frame);
RC find_leaf_internal(LatchMemo &latch_memo, BplusTreeOperationType op,
const std::function<PageNum(InternalIndexNodeHandler &)> &child_page_getter,
Frame *&frame);
RC crabing_protocal_fetch_page(LatchMemo &latch_memo, BplusTreeOperationType op, PageNum page_num, bool is_root_page,
Frame *&frame);
RC delete_entry_internal(Frame *leaf_frame, const char *key);
RC insert_into_parent(LatchMemo &latch_memo, PageNum parent_page, Frame *left_frame, const char *pkey,
Frame &right_frame);
RC insert_into_new_root(Frame *left_frame, const char *pkey, Frame &right_frame);
RC delete_entry_internal(LatchMemo &latch_memo, Frame *leaf_frame, const char *key);
template <typename IndexNodeHandlerType>
RC split(Frame *frame, Frame *&new_frame);
RC split(LatchMemo &latch_memo, Frame *frame, Frame *&new_frame);
template <typename IndexNodeHandlerType>
RC coalesce_or_redistribute(Frame *frame);
RC coalesce_or_redistribute(LatchMemo &latch_memo, Frame *frame);
template <typename IndexNodeHandlerType>
RC coalesce(Frame *neighbor_frame, Frame *frame, Frame *parent_frame, int index);
RC coalesce(LatchMemo &latch_memo, Frame *neighbor_frame, Frame *frame, Frame *parent_frame, int index);
template <typename IndexNodeHandlerType>
RC redistribute(Frame *neighbor_frame, Frame *frame, Frame *parent_frame, int index);
RC insert_entry_into_parent(Frame *frame, Frame *new_frame, const char *key);
RC insert_entry_into_leaf_node(Frame *frame, const char *pkey, const RID *rid);
RC update_root_page_num();
RC insert_entry_into_parent(LatchMemo &latch_memo, Frame *frame, Frame *new_frame, const char *key);
RC insert_entry_into_leaf_node(LatchMemo &latch_memo, Frame *frame, const char *pkey, const RID *rid);
RC create_new_tree(const char *key, const RID *rid);
RC adjust_root(Frame *root_frame);
void update_root_page_num(PageNum root_page_num);
void update_root_page_num_locked(PageNum root_page_num);
RC adjust_root(LatchMemo &latch_memo, Frame *root_frame);
private:
char *make_key(const char *user_key, const RID &rid);
common::MemPoolItem::unique_ptr make_key(const char *user_key, const RID &rid);
void free_key(char *key);
protected:
DiskBufferPool *disk_buffer_pool_ = nullptr;
bool header_dirty_ = false;
bool header_dirty_ = false; //
IndexFileHeader file_header_;
KeyComparator key_comparator_;
KeyPrinter key_printer_;
// 在调整根节点时,需要加上这个锁。
// 这个锁可以使用递归读写锁,但是这里偷懒先不改
common::SharedMutex root_lock_;
common::MemPoolItem *mem_pool_item_ = nullptr;
KeyComparator key_comparator_;
KeyPrinter key_printer_;
std::unique_ptr<common::MemPoolItem> mem_pool_item_;
private:
friend class BplusTreeScanner;
friend class BplusTreeTester;
};
class BplusTreeScanner {
class BplusTreeScanner
{
public:
BplusTreeScanner(BplusTreeHandler &tree_handler);
~BplusTreeScanner();
......@@ -512,10 +556,10 @@ public:
* @param right_len right_user_key 的内存大小(只有在变长字段中才会关注)
* @param right_inclusive 右边界的值是否包含在内
*/
RC open(const char *left_user_key, int left_len, bool left_inclusive, const char *right_user_key, int right_len,
bool right_inclusive);
RC open(const char *left_user_key, int left_len, bool left_inclusive,
const char *right_user_key, int right_len, bool right_inclusive);
RC next_entry(RID *rid);
RC next_entry(RID &rid);
RC close();
......@@ -525,16 +569,20 @@ private:
*/
RC fix_user_key(const char *user_key, int key_len, bool want_greater, char **fixed_key, bool *should_inclusive);
void fetch_item(RID &rid);
bool touch_end();
private:
bool inited_ = false;
BplusTreeHandler &tree_handler_;
LatchMemo latch_memo_;
/// 使用左右叶子节点和位置来表示扫描的起始位置和终止位置
/// 起始位置和终止位置都是有效的数据
Frame *left_frame_ = nullptr;
Frame *right_frame_ = nullptr;
Frame *current_frame_ = nullptr;
common::MemPoolItem::unique_ptr right_key_;
int iter_index_ = -1;
int end_index_ = -1; // use -1 for end of scan
bool first_emitted_ = false;
};
#endif //__OBSERVER_STORAGE_COMMON_INDEX_MANAGER_H_
......@@ -132,7 +132,7 @@ RC BplusTreeIndexScanner::open(
RC BplusTreeIndexScanner::next_entry(RID *rid)
{
return tree_scanner_.next_entry(rid);
return tree_scanner_.next_entry(*rid);
}
RC BplusTreeIndexScanner::destroy()
......
......@@ -222,28 +222,6 @@ RC RecordPageHandler::recover_insert_record(const char *data, RID *rid)
return RC::SUCCESS;
}
RC RecordPageHandler::update_record(const Record *rec)
{
if (rec->rid().slot_num >= page_header_->record_capacity) {
LOG_ERROR(
"Invalid slot_num %d, exceed page's record capacity, page_num %d.", rec->rid().slot_num, frame_->page_num());
return RC::INVALID_ARGUMENT;
}
Bitmap bitmap(bitmap_, page_header_->record_capacity);
if (!bitmap.get_bit(rec->rid().slot_num)) {
LOG_ERROR("Invalid slot_num %d, slot is empty, page_num %d.", rec->rid().slot_num, frame_->page_num());
return RC::RECORD_RECORD_NOT_EXIST;
} else {
char *record_data = get_record_data(rec->rid().slot_num);
memcpy(record_data, rec->data(), page_header_->record_real_size);
bitmap.set_bit(rec->rid().slot_num);
frame_->mark_dirty();
// LOG_TRACE("Update record. file_id=%d, page num=%d,slot=%d", file_id_, rec->rid.page_num, rec->rid.slot_num);
return RC::SUCCESS;
}
}
RC RecordPageHandler::delete_record(const RID *rid)
{
if (rid->slot_num >= page_header_->record_capacity) {
......@@ -258,10 +236,9 @@ RC RecordPageHandler::delete_record(const RID *rid)
frame_->mark_dirty();
if (page_header_->record_num == 0) {
DiskBufferPool *disk_buffer_pool = disk_buffer_pool_;
PageNum page_num = get_page_num();
// PageNum page_num = get_page_num();
cleanup();
disk_buffer_pool->dispose_page(page_num);
// disk_buffer_pool->dispose_page(page_num); // TODO 确认是否可以不删除页面
}
return RC::SUCCESS;
} else {
......@@ -415,18 +392,6 @@ RC RecordFileHandler::recover_insert_record(const char *data, int record_size, R
return record_page_handler.recover_insert_record(data, rid);
}
RC RecordFileHandler::update_record(const Record *rec)
{
RC ret;
RecordPageHandler page_handler;
if ((ret = page_handler.init(*disk_buffer_pool_, rec->rid().page_num)) != RC::SUCCESS) {
LOG_ERROR("Failed to init record page handler.page number=%d", rec->rid().page_num);
return ret;
}
return page_handler.update_record(rec);
}
RC RecordFileHandler::delete_record(const RID *rid)
{
RC rc = RC::SUCCESS;
......
......@@ -20,8 +20,16 @@ See the Mulan PSL v2 for more details. */
#include "common/lang/bitmap.h"
class ConditionFilter;
class RecordPageHandler;
struct PageHeader {
/**
* 数据文件,按照页面来组织,每一页都存放一些记录/数据行
* 每一页都有一个这样的页头,虽然看起来浪费,但是现在就简单的这么做
* 从这个页头描述的信息来看,当前仅支持定长行/记录。如果要支持变长记录,
* 或者超长(超出一页)的记录,这么做是不合适的。
*/
struct PageHeader
{
int32_t record_num; // 当前页面记录的个数
int32_t record_capacity; // 最大记录个数
int32_t record_real_size; // 每条记录的实际大小
......@@ -29,8 +37,11 @@ struct PageHeader {
int32_t first_record_offset; // 第一条记录的偏移量
};
class RecordPageHandler;
class RecordPageIterator {
/**
* 遍历一个页面中每条记录的iterator
*/
class RecordPageIterator
{
public:
RecordPageIterator();
~RecordPageIterator();
......@@ -52,7 +63,11 @@ private:
SlotNum next_slot_num_ = 0;
};
class RecordPageHandler {
/**
* 负责处理一个页面中各种操作,比如插入记录、删除记录或者查找记录
*/
class RecordPageHandler
{
public:
RecordPageHandler() = default;
~RecordPageHandler();
......@@ -63,20 +78,6 @@ public:
RC insert_record(const char *data, RID *rid);
RC recover_insert_record(const char *data, RID *rid);
RC update_record(const Record *rec);
template <class RecordUpdater>
RC update_record_in_place(const RID *rid, RecordUpdater updater)
{
Record record;
RC rc = get_record(rid, &record);
if (rc != RC::SUCCESS) {
return rc;
}
rc = updater(record);
frame_->mark_dirty();
return rc;
}
RC delete_record(const RID *rid);
......@@ -102,7 +103,8 @@ private:
friend class RecordPageIterator;
};
class RecordFileHandler {
class RecordFileHandler
{
public:
RecordFileHandler() = default;
RC init(DiskBufferPool *buffer_pool);
......@@ -130,19 +132,6 @@ public:
*/
RC get_record(const RID *rid, Record *rec);
template <class RecordUpdater> // 改成普通模式, 不使用模板
RC update_record_in_place(const RID *rid, RecordUpdater updater)
{
RC rc = RC::SUCCESS;
RecordPageHandler page_handler;
if ((rc != page_handler.init(*disk_buffer_pool_, rid->page_num)) != RC::SUCCESS) {
return rc;
}
return page_handler.update_record_in_place(rid, updater);
}
private:
RC init_free_pages();
......@@ -151,7 +140,8 @@ private:
std::unordered_set<PageNum> free_pages_; // 没有填充满的页面集合
};
class RecordFileScanner {
class RecordFileScanner
{
public:
RecordFileScanner() = default;
......
/* Copyright (c) 2021 Xie Meiyi(xiemeiyi@hust.edu.cn) and OceanBase and/or its affiliates. All rights reserved.
miniob is licensed under Mulan PSL v2.
You can use this software according to the terms and conditions of the Mulan PSL v2.
You may obtain a copy of Mulan PSL v2 at:
http://license.coscl.org.cn/MulanPSL2
THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
See the Mulan PSL v2 for more details. */
//
// Created by Wangyunlai on 2023/03/08.
//
#include "storage/trx/latch_memo.h"
#include "storage/buffer/frame.h"
#include "storage/default/disk_buffer_pool.h"
#include "common/lang/mutex.h"
LatchMemoItem::LatchMemoItem(LatchMemoType type, Frame *frame)
{
this->type = type;
this->frame = frame;
}
LatchMemoItem::LatchMemoItem(LatchMemoType type, common::SharedMutex *lock)
{
this->type = type;
this->lock = lock;
}
////////////////////////////////////////////////////////////////////////////////
LatchMemo::LatchMemo(DiskBufferPool *buffer_pool) : buffer_pool_(buffer_pool)
{}
LatchMemo::~LatchMemo()
{
this->release();
}
RC LatchMemo::get_page(PageNum page_num, Frame *&frame)
{
frame = nullptr;
RC rc = buffer_pool_->get_this_page(page_num, &frame);
if (rc != RC::SUCCESS) {
return rc;
}
items_.emplace_back(LatchMemoType::PIN, frame);
return RC::SUCCESS;
}
RC LatchMemo::allocate_page(Frame *&frame)
{
frame = nullptr;
RC rc = buffer_pool_->allocate_page(&frame);
if (rc == RC::SUCCESS) {
items_.emplace_back(LatchMemoType::PIN, frame);
ASSERT(frame->pin_count() == 1, "allocate a new frame. frame=%s", to_string(*frame).c_str());
}
return rc;
}
void LatchMemo::dispose_page(PageNum page_num)
{
disposed_pages_.emplace_back(page_num);
}
void LatchMemo::latch(Frame *frame, LatchMemoType type)
{
switch (type) {
case LatchMemoType::EXCLUSIVE: {
frame->write_latch();
} break;
case LatchMemoType::SHARED: {
frame->read_latch();
} break;
default: {
ASSERT(false, "invalid latch type: %d", static_cast<int>(type));
}
}
items_.emplace_back(type, frame);
}
void LatchMemo::xlatch(Frame *frame)
{
this->latch(frame, LatchMemoType::EXCLUSIVE);
}
void LatchMemo::slatch(Frame *frame)
{
this->latch(frame, LatchMemoType::SHARED);
}
bool LatchMemo::try_slatch(Frame *frame)
{
bool ret = frame->try_read_latch();
if (ret) {
items_.emplace_back(LatchMemoType::SHARED, frame);
}
return ret;
}
void LatchMemo::xlatch(common::SharedMutex *lock)
{
lock->lock();
items_.emplace_back(LatchMemoType::EXCLUSIVE, lock);
LOG_DEBUG("lock root success");
}
void LatchMemo::slatch(common::SharedMutex *lock)
{
lock->lock_shared();
items_.emplace_back(LatchMemoType::SHARED, lock);
}
void LatchMemo::release_item(LatchMemoItem &item)
{
switch (item.type) {
case LatchMemoType::EXCLUSIVE: {
if (item.frame != nullptr) {
item.frame->write_unlatch();
} else {
LOG_DEBUG("release root lock");
item.lock->unlock();
}
} break;
case LatchMemoType::SHARED: {
if (item.frame != nullptr) {
item.frame->read_unlatch();
} else {
item.lock->unlock_shared();
}
} break;
case LatchMemoType::PIN: {
buffer_pool_->unpin_page(item.frame);
} break;
default: {
ASSERT(false, "invalid latch type: %d", static_cast<int>(item.type));
}
}
}
void LatchMemo::release()
{
int point = static_cast<int>(items_.size());
release_to(point);
for (PageNum page_num : disposed_pages_) {
buffer_pool_->dispose_page(page_num);
}
disposed_pages_.clear();
}
void LatchMemo::release_to(int point)
{
ASSERT(point >= 0 && point <= static_cast<int>(items_.size()),
"invalid memo point. point=%d, items size=%d",
point, static_cast<int>(items_.size()));
auto iter = items_.begin();
for (int i = point - 1; i >= 0; i--, ++iter) {
LatchMemoItem &item = items_[i];
release_item(item);
}
items_.erase(items_.begin(), iter);
}
/* Copyright (c) 2021 Xie Meiyi(xiemeiyi@hust.edu.cn) and OceanBase and/or its affiliates. All rights reserved.
miniob is licensed under Mulan PSL v2.
You can use this software according to the terms and conditions of the Mulan PSL v2.
You may obtain a copy of Mulan PSL v2 at:
http://license.coscl.org.cn/MulanPSL2
THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
See the Mulan PSL v2 for more details. */
//
// Created by Wangyunlai on 2023/03/08.
//
#pragma once
#include <deque>
#include <vector>
#include "rc.h"
#include "storage/buffer/page.h"
class Frame;
class DiskBufferPool;
namespace common {
class SharedMutex;
}
enum class LatchMemoType
{
NONE,
SHARED,
EXCLUSIVE,
PIN,
};
struct LatchMemoItem
{
LatchMemoItem() = default;
LatchMemoItem(LatchMemoType type, Frame *frame);
LatchMemoItem(LatchMemoType type, common::SharedMutex *lock);
LatchMemoType type = LatchMemoType::NONE;
Frame *frame = nullptr;
common::SharedMutex *lock = nullptr;
};
class LatchMemo final
{
public:
LatchMemo(DiskBufferPool *buffer_pool);
~LatchMemo();
RC get_page(PageNum page_num, Frame *&frame);
RC allocate_page(Frame *&frame);
void dispose_page(PageNum page_num);
void latch(Frame *frame, LatchMemoType type);
void xlatch(Frame *frame);
void slatch(Frame *frame);
bool try_slatch(Frame *frame);
void xlatch(common::SharedMutex *lock);
void slatch(common::SharedMutex *lock);
void release();
/// 除了最后一个锁,其它的都释放掉
void release_to(int point);
int memo_point() const { return static_cast<int>(items_.size()); }
private:
void release_item(LatchMemoItem &item);
private:
DiskBufferPool * buffer_pool_ = nullptr;
std::deque<LatchMemoItem> items_;
std::vector<PageNum> disposed_pages_;
};
\ No newline at end of file
......@@ -12,8 +12,7 @@ See the Mulan PSL v2 for more details. */
// Created by Wangyunlai on 2021/5/24.
//
#ifndef __OBSERVER_STORAGE_TRX_TRX_H_
#define __OBSERVER_STORAGE_TRX_TRX_H_
#pragma once
#include <stddef.h>
#include <unordered_map>
......@@ -26,7 +25,8 @@ See the Mulan PSL v2 for more details. */
class Table;
class Operation {
class Operation
{
public:
enum class Type : int {
INSERT,
......@@ -57,7 +57,9 @@ private:
PageNum page_num_;
SlotNum slot_num_;
};
class OperationHasher {
class OperationHasher
{
public:
size_t operator()(const Operation &op) const
{
......@@ -65,7 +67,8 @@ public:
}
};
class OperationEqualer {
class OperationEqualer
{
public:
bool operator()(const Operation &op1, const Operation &op2) const
{
......@@ -77,7 +80,8 @@ public:
* 这里是一个简单的事务实现,可以支持提交/回滚。但是没有对并发访问做控制
* 可以在这个基础上做备份恢复,当然也可以重写
*/
class Trx {
class Trx
{
public:
static std::atomic<int32_t> trx_id;
......@@ -129,5 +133,3 @@ private:
int32_t trx_id_ = 0;
std::unordered_map<Table *, OperationSet> operations_;
};
#endif // __OBSERVER_STORAGE_TRX_TRX_H_
......@@ -38,5 +38,4 @@ FOREACH (F ${ALL_SRC})
ADD_EXECUTABLE(${prjName} ${F})
TARGET_LINK_LIBRARIES(${prjName} common pthread dl observer_static)
ENDFOREACH (F)
ENDFOREACH (F)
\ No newline at end of file
PROJECT(unitest)
PROJECT(unittest)
MESSAGE("Begin to build " ${PROJECT_NAME})
MESSAGE(STATUS "This is PROJECT_BINARY_DIR dir " ${PROJECT_BINARY_DIR})
MESSAGE(STATUS "This is PROJECT_SOURCE_DIR dir " ${PROJECT_SOURCE_DIR})
......@@ -28,7 +28,6 @@ ENDIF ()
find_package(GTest CONFIG REQUIRED)
enable_testing()
include(GoogleTest)
#get_filename_component(<VAR> FileName
......
......@@ -128,7 +128,7 @@ void test_insert()
int t = i % TIMES;
if (t == 0 || t == 1 || t == 2) {
if (rc != RC::RECORD_DUPLICATE_KEY) {
LOG_WARN("insert duplicate key success");
LOG_WARN("insert duplicate key success");
}
ASSERT_EQ(RC::RECORD_DUPLICATE_KEY, rc);
} else {
......@@ -184,7 +184,7 @@ void test_delete()
rc = handler->delete_entry((const char *)&i, &rid);
if (rc != RC::SUCCESS) {
LOG_WARN("failed to delete entry. i=%d, rid=%s", i, rid.to_string().c_str());
LOG_WARN("failed to delete entry. i=%d, rid=%s", i, rid.to_string().c_str());
}
ASSERT_EQ(RC::SUCCESS, rc);
......@@ -233,12 +233,12 @@ void test_delete()
ASSERT_EQ(0, rids.size());
} else {
if (rids.size() != 1) {
LOG_WARN("invalid. i=%d, rid=%s, check rid=%s", i, rid.to_string().c_str(), check_rid.to_string().c_str());
LOG_WARN("invalid. i=%d, rid=%s, check rid=%s", i, rid.to_string().c_str(), check_rid.to_string().c_str());
}
ASSERT_EQ(1, rids.size());
check_rid = rids.front();
if (rid != check_rid) {
LOG_WARN("invalid. i=%d, rid=%s, check rid=%s", i, rid.to_string().c_str(), check_rid.to_string().c_str());
LOG_WARN("invalid. i=%d, rid=%s, check rid=%s", i, rid.to_string().c_str(), check_rid.to_string().c_str());
}
ASSERT_EQ(rid.page_num, check_rid.page_num);
ASSERT_EQ(rid.slot_num, check_rid.slot_num);
......@@ -315,8 +315,6 @@ void test_delete()
TEST(test_bplus_tree, test_leaf_index_node_handle)
{
LoggerFactory::init_default("test.log");
IndexFileHeader index_file_header;
index_file_header.root_page = BP_INVALID_PAGE_NUM;
index_file_header.internal_max_size = 5;
......@@ -374,8 +372,6 @@ TEST(test_bplus_tree, test_leaf_index_node_handle)
}
TEST(test_bplus_tree, test_internal_index_node_handle)
{
LoggerFactory::init_default("test.log");
IndexFileHeader index_file_header;
index_file_header.root_page = BP_INVALID_PAGE_NUM;
index_file_header.internal_max_size = 5;
......@@ -506,7 +502,7 @@ TEST(test_bplus_tree, test_chars)
ASSERT_EQ(rc, RC::SUCCESS);
int count = 0;
while (RC::SUCCESS == (rc = scanner.next_entry(&rid))) {
while (RC::SUCCESS == (rc = scanner.next_entry(rid))) {
count++;
}
scanner.close();
......@@ -525,6 +521,7 @@ TEST(test_bplus_tree, test_scanner)
int count = 0;
RC rc = RC::SUCCESS;
RID rid;
// 初始化B树,插入数据[1 - 199] 所有奇数
for (int i = 0; i < 100; i++) {
int key = i * 2 + 1;
rid.page_num = 0;
......@@ -542,7 +539,7 @@ TEST(test_bplus_tree, test_scanner)
rc = scanner.open((const char *)&begin, 4, false, (const char *)&end, 4, false);
ASSERT_EQ(RC::SUCCESS, rc);
rc = scanner.next_entry(&rid);
rc = scanner.next_entry(rid);
ASSERT_EQ(RC::RECORD_EOF, rc);
scanner.close();
......@@ -551,7 +548,7 @@ TEST(test_bplus_tree, test_scanner)
end = 1;
rc = scanner.open((const char *)&begin, 4, false, (const char *)&end, 4, false);
ASSERT_EQ(RC::SUCCESS, rc);
rc = scanner.next_entry(&rid);
rc = scanner.next_entry(rid);
ASSERT_EQ(RC::RECORD_EOF, rc);
scanner.close();
......@@ -560,9 +557,9 @@ TEST(test_bplus_tree, test_scanner)
end = 1;
rc = scanner.open((const char *)&begin, 4, false, (const char *)&end, 4, true/*inclusive*/);
ASSERT_EQ(RC::SUCCESS, rc);
rc = scanner.next_entry(&rid);
rc = scanner.next_entry(rid);
ASSERT_EQ(RC::SUCCESS, rc);
rc = scanner.next_entry(&rid);
rc = scanner.next_entry(rid);
ASSERT_EQ(RC::RECORD_EOF, rc);
scanner.close();
......@@ -571,7 +568,7 @@ TEST(test_bplus_tree, test_scanner)
end = 3;
rc = scanner.open((const char *)&begin, 4, false, (const char *)&end, 4, false/*inclusive*/);
ASSERT_EQ(RC::SUCCESS, rc);
rc = scanner.next_entry(&rid);
rc = scanner.next_entry(rid);
ASSERT_EQ(RC::RECORD_EOF, rc);
scanner.close();
......@@ -580,7 +577,7 @@ TEST(test_bplus_tree, test_scanner)
end = 3;
rc = scanner.open((const char *)&begin, 4, true, (const char *)&end, 4, true/*inclusive*/);
ASSERT_EQ(RC::SUCCESS, rc);
while ((rc = scanner.next_entry(&rid)) == RC::SUCCESS) {
while ((rc = scanner.next_entry(rid)) == RC::SUCCESS) {
count++;
}
ASSERT_EQ(2, count);
......@@ -593,7 +590,7 @@ TEST(test_bplus_tree, test_scanner)
rc = scanner.open((const char *)&begin, 4, true, (const char *)&end, 4, true/*inclusive*/);
ASSERT_EQ(RC::SUCCESS, rc);
count = 0;
while ((rc = scanner.next_entry(&rid)) == RC::SUCCESS) {
while ((rc = scanner.next_entry(rid)) == RC::SUCCESS) {
count++;
}
ASSERT_EQ(2, count);
......@@ -606,7 +603,7 @@ TEST(test_bplus_tree, test_scanner)
rc = scanner.open((const char *)&begin, 4, true, (const char *)&end, 4, true/*inclusive*/);
ASSERT_EQ(RC::SUCCESS, rc);
count = 0;
while ((rc = scanner.next_entry(&rid)) == RC::SUCCESS) {
while ((rc = scanner.next_entry(rid)) == RC::SUCCESS) {
count++;
}
ASSERT_EQ((end - begin) / 2 + 1, count);
......@@ -619,7 +616,7 @@ TEST(test_bplus_tree, test_scanner)
rc = scanner.open((const char *)&begin, 4, true, (const char *)&end, 4, true/*inclusive*/);
ASSERT_EQ(RC::SUCCESS, rc);
count = 0;
while ((rc = scanner.next_entry(&rid)) == RC::SUCCESS) {
while ((rc = scanner.next_entry(rid)) == RC::SUCCESS) {
count++;
}
ASSERT_EQ((end - begin) / 2 + 1, count);
......@@ -632,7 +629,7 @@ TEST(test_bplus_tree, test_scanner)
rc = scanner.open((const char *)&begin, 4, true, (const char *)&end, 4, true/*inclusive*/);
ASSERT_EQ(RC::SUCCESS, rc);
count = 0;
while ((rc = scanner.next_entry(&rid)) == RC::SUCCESS) {
while ((rc = scanner.next_entry(rid)) == RC::SUCCESS) {
count++;
}
ASSERT_EQ((end - begin) / 2 + 1, count);
......@@ -645,7 +642,7 @@ TEST(test_bplus_tree, test_scanner)
rc = scanner.open((const char *)&begin, 4, true, (const char *)&end, 4, true/*inclusive*/);
ASSERT_EQ(RC::SUCCESS, rc);
count = 0;
while ((rc = scanner.next_entry(&rid)) == RC::SUCCESS) {
while ((rc = scanner.next_entry(rid)) == RC::SUCCESS) {
count++;
}
ASSERT_EQ(5, count);
......@@ -657,7 +654,7 @@ TEST(test_bplus_tree, test_scanner)
end = 301;
rc = scanner.open((const char *)&begin, 4, true, (const char *)&end, 4, true/*inclusive*/);
ASSERT_EQ(RC::SUCCESS, rc);
rc = scanner.next_entry(&rid);
rc = scanner.next_entry(rid);
ASSERT_EQ(RC::RECORD_EOF, rc);
scanner.close();
......@@ -674,7 +671,7 @@ TEST(test_bplus_tree, test_scanner)
rc = scanner.open(nullptr, 4, true, (const char *)&end, 4, true/*inclusive*/);
ASSERT_EQ(RC::SUCCESS, rc);
count = 0;
while ((rc = scanner.next_entry(&rid)) == RC::SUCCESS) {
while ((rc = scanner.next_entry(rid)) == RC::SUCCESS) {
count++;
}
ASSERT_EQ(100, count);
......@@ -687,7 +684,7 @@ TEST(test_bplus_tree, test_scanner)
rc = scanner.open(nullptr, 4, true, (const char *)&end, 4, true/*inclusive*/);
ASSERT_EQ(RC::SUCCESS, rc);
count = 0;
while ((rc = scanner.next_entry(&rid)) == RC::SUCCESS) {
while ((rc = scanner.next_entry(rid)) == RC::SUCCESS) {
count++;
}
ASSERT_EQ(5, count);
......@@ -700,7 +697,7 @@ TEST(test_bplus_tree, test_scanner)
rc = scanner.open((const char *)&begin, 4, true, nullptr, 4, true/*inclusive*/);
ASSERT_EQ(RC::SUCCESS, rc);
count = 0;
while ((rc = scanner.next_entry(&rid)) == RC::SUCCESS) {
while ((rc = scanner.next_entry(rid)) == RC::SUCCESS) {
count++;
}
ASSERT_EQ(5, count);
......@@ -713,7 +710,7 @@ TEST(test_bplus_tree, test_scanner)
rc = scanner.open(nullptr, 0, true, nullptr, 0, true/*inclusive*/);
ASSERT_EQ(RC::SUCCESS, rc);
count = 0;
while ((rc = scanner.next_entry(&rid)) == RC::SUCCESS) {
while ((rc = scanner.next_entry(rid)) == RC::SUCCESS) {
count++;
}
ASSERT_EQ(100, count);
......@@ -750,6 +747,7 @@ int main(int argc, char **argv)
// 调用RUN_ALL_TESTS()运行所有测试用例
// main函数返回RUN_ALL_TESTS()的运行结果
LoggerFactory::init_default("test.log", LOG_LEVEL_TRACE);
init_bpm();
int rc = RUN_ALL_TESTS();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册