bplus_tree.cpp 58.0 KB
Newer Older
羽飞's avatar
羽飞 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
/* Copyright (c) 2021 Xie Meiyi(xiemeiyi@hust.edu.cn) and OceanBase and/or its affiliates. All rights reserved.
miniob is licensed under Mulan PSL v2.
You can use this software according to the terms and conditions of the Mulan PSL v2.
You may obtain a copy of Mulan PSL v2 at:
         http://license.coscl.org.cn/MulanPSL2
THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
See the Mulan PSL v2 for more details. */

//
// Created by Xie Meiyi
// Rewritten by Longda & Wangyunlai
//
#include "storage/index/bplus_tree.h"
#include "storage/default/disk_buffer_pool.h"
#include "common/log/log.h"
#include "sql/parser/parse_defs.h"
羽飞's avatar
羽飞 已提交
19
#include "common/lang/lower_bound.h"
羽飞's avatar
羽飞 已提交
20

21 22 23
using namespace std;
using namespace common;

羽飞's avatar
羽飞 已提交
24 25 26 27 28 29
#define FIRST_INDEX_PAGE 1

int calc_internal_page_capacity(int attr_length)
{
  int item_size = attr_length + sizeof(RID) + sizeof(PageNum);

L
Longda Feng 已提交
30
  int capacity = ((int)BP_PAGE_DATA_SIZE - InternalIndexNode::HEADER_SIZE) / item_size;
羽飞's avatar
羽飞 已提交
31 32 33 34 35 36
  return capacity;
}

int calc_leaf_page_capacity(int attr_length)
{
  int item_size = attr_length + sizeof(RID) + sizeof(RID);
L
Longda Feng 已提交
37
  int capacity = ((int)BP_PAGE_DATA_SIZE - LeafIndexNode::HEADER_SIZE) / item_size;
羽飞's avatar
羽飞 已提交
38 39 40 41
  return capacity;
}

/////////////////////////////////////////////////////////////////////////////////
羽飞's avatar
羽飞 已提交
42
IndexNodeHandler::IndexNodeHandler(const IndexFileHeader &header, Frame *frame)
L
Longda Feng 已提交
43
    : header_(header), page_num_(frame->page_num()), node_((IndexNode *)frame->data())
羽飞's avatar
羽飞 已提交
44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
{}

bool IndexNodeHandler::is_leaf() const
{
  return node_->is_leaf;
}
void IndexNodeHandler::init_empty(bool leaf)
{
  node_->is_leaf = leaf;
  node_->key_num = 0;
  node_->parent = BP_INVALID_PAGE_NUM;
}
PageNum IndexNodeHandler::page_num() const
{
  return page_num_;
}

int IndexNodeHandler::key_size() const
{
  return header_.key_length;
}

int IndexNodeHandler::value_size() const
{
  // return header_.value_size;
  return sizeof(RID);
}

int IndexNodeHandler::item_size() const
{
  return key_size() + value_size();
}

int IndexNodeHandler::size() const
{
  return node_->key_num;
}

82 83 84 85 86 87 88 89 90 91 92
int IndexNodeHandler::max_size() const
{
  return is_leaf() ? header_.leaf_max_size : header_.internal_max_size;
}

int IndexNodeHandler::min_size() const
{
  const int max = this->max_size();
  return max - max/2;
}

羽飞's avatar
羽飞 已提交
93 94 95 96 97 98 99 100 101 102 103 104 105 106
void IndexNodeHandler::increase_size(int n)
{
  node_->key_num += n;
}

PageNum IndexNodeHandler::parent_page_num() const
{
  return node_->parent;
}

void IndexNodeHandler::set_parent_page_num(PageNum page_num)
{
  this->node_->parent = page_num;
}
107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140

/**
 * 检查一个节点经过插入或删除操作后是否需要分裂或合并操作
 * @return true 需要分裂或合并;
 *         false 不需要分裂或合并
 */
bool IndexNodeHandler::is_safe(BplusTreeOperationType op, bool is_root_node)
{
  switch (op) {
    case BplusTreeOperationType::READ: {
      return true;
    } break;
    case BplusTreeOperationType::INSERT: {
      return size() < max_size();
    } break;
    case BplusTreeOperationType::DELETE: {
      if (is_root_node) {  // 参考adjust_root
        if (node_->is_leaf) {
          return size() > 1; // 根节点如果空的话,就需要删除整棵树
        }
        // not leaf
        return size() > 2;   // 根节点还有子节点,但是如果删除一个子节点后,只剩一个子节点,就要把自己删除,把唯一的子节点变更为根节点
      }
      return size() > min_size();
    } break;
    default: {
      // do nothing
    } break;
  }

  ASSERT(false, "invalid operation type: %d", static_cast<int>(op));
  return false;
}

羽飞's avatar
羽飞 已提交
141 142 143 144
std::string to_string(const IndexNodeHandler &handler)
{
  std::stringstream ss;

L
Longda Feng 已提交
145
  ss << "PageNum:" << handler.page_num() << ",is_leaf:" << handler.is_leaf() << ","
羽飞's avatar
羽飞 已提交
146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
     << "key_num:" << handler.size() << ","
     << "parent:" << handler.parent_page_num() << ",";

  return ss.str();
}

bool IndexNodeHandler::validate() const
{
  if (parent_page_num() == BP_INVALID_PAGE_NUM) {
    // this is a root page
    if (size() < 1) {
      LOG_WARN("root page has no item");
      return false;
    }

    if (!is_leaf() && size() < 2) {
      LOG_WARN("root page internal node has less than 2 child. size=%d", size());
      return false;
    }
  }
  return true;
}

/////////////////////////////////////////////////////////////////////////////////
羽飞's avatar
羽飞 已提交
170
LeafIndexNodeHandler::LeafIndexNodeHandler(const IndexFileHeader &header, Frame *frame)
L
Longda Feng 已提交
171
    : IndexNodeHandler(header, frame), leaf_node_((LeafIndexNode *)frame->data())
羽飞's avatar
羽飞 已提交
172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
{}

void LeafIndexNodeHandler::init_empty()
{
  IndexNodeHandler::init_empty(true);
  leaf_node_->next_brother = BP_INVALID_PAGE_NUM;
}

void LeafIndexNodeHandler::set_next_page(PageNum page_num)
{
  leaf_node_->next_brother = page_num;
}

PageNum LeafIndexNodeHandler::next_page() const
{
  return leaf_node_->next_brother;
}

char *LeafIndexNodeHandler::key_at(int index)
{
  assert(index >= 0 && index < size());
  return __key_at(index);
}

char *LeafIndexNodeHandler::value_at(int index)
{
  assert(index >= 0 && index < size());
  return __value_at(index);
}

int LeafIndexNodeHandler::lookup(const KeyComparator &comparator, const char *key, bool *found /* = nullptr */) const
{
  const int size = this->size();
羽飞's avatar
羽飞 已提交
205 206 207 208
  common::BinaryIterator<char> iter_begin(item_size(), __key_at(0));
  common::BinaryIterator<char> iter_end(item_size(), __key_at(size));
  common::BinaryIterator<char> iter = lower_bound(iter_begin, iter_end, key, comparator, found);
  return iter - iter_begin;
羽飞's avatar
羽飞 已提交
209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239
}

void LeafIndexNodeHandler::insert(int index, const char *key, const char *value)
{
  if (index < size()) {
    memmove(__item_at(index + 1), __item_at(index), (size() - index) * item_size());
  }
  memcpy(__item_at(index), key, key_size());
  memcpy(__item_at(index) + key_size(), value, value_size());
  increase_size(1);
}
void LeafIndexNodeHandler::remove(int index)
{
  assert(index >= 0 && index < size());
  if (index < size() - 1) {
    memmove(__item_at(index), __item_at(index + 1), (size() - index - 1) * item_size());
  }
  increase_size(-1);
}

int LeafIndexNodeHandler::remove(const char *key, const KeyComparator &comparator)
{
  bool found = false;
  int index = lookup(comparator, key, &found);
  if (found) {
    this->remove(index);
    return 1;
  }
  return 0;
}

羽飞's avatar
羽飞 已提交
240
RC LeafIndexNodeHandler::move_half_to(LeafIndexNodeHandler &other, DiskBufferPool *bp)
羽飞's avatar
羽飞 已提交
241 242 243 244 245 246
{
  const int size = this->size();
  const int move_index = size / 2;

  memcpy(other.__item_at(0), this->__item_at(move_index), item_size() * (size - move_index));
  other.increase_size(size - move_index);
L
Longda Feng 已提交
247
  this->increase_size(-(size - move_index));
羽飞's avatar
羽飞 已提交
248 249
  return RC::SUCCESS;
}
羽飞's avatar
羽飞 已提交
250
RC LeafIndexNodeHandler::move_first_to_end(LeafIndexNodeHandler &other, DiskBufferPool *disk_buffer_pool)
羽飞's avatar
羽飞 已提交
251 252 253 254
{
  other.append(__item_at(0));

  if (size() >= 1) {
L
Longda Feng 已提交
255
    memmove(__item_at(0), __item_at(1), (size() - 1) * item_size());
羽飞's avatar
羽飞 已提交
256 257 258 259 260
  }
  increase_size(-1);
  return RC::SUCCESS;
}

羽飞's avatar
羽飞 已提交
261
RC LeafIndexNodeHandler::move_last_to_front(LeafIndexNodeHandler &other, DiskBufferPool *bp)
羽飞's avatar
羽飞 已提交
262 263 264 265 266 267 268 269 270
{
  other.preappend(__item_at(size() - 1));

  increase_size(-1);
  return RC::SUCCESS;
}
/**
 * move all items to left page
 */
羽飞's avatar
羽飞 已提交
271
RC LeafIndexNodeHandler::move_to(LeafIndexNodeHandler &other, DiskBufferPool *bp)
羽飞's avatar
羽飞 已提交
272 273 274
{
  memcpy(other.__item_at(other.size()), this->__item_at(0), this->size() * item_size());
  other.increase_size(this->size());
L
Longda Feng 已提交
275
  this->increase_size(-this->size());
羽飞's avatar
羽飞 已提交
276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311

  other.set_next_page(this->next_page());
  return RC::SUCCESS;
}

void LeafIndexNodeHandler::append(const char *item)
{
  memcpy(__item_at(size()), item, item_size());
  increase_size(1);
}

void LeafIndexNodeHandler::preappend(const char *item)
{
  if (size() > 0) {
    memmove(__item_at(1), __item_at(0), size() * item_size());
  }
  memcpy(__item_at(0), item, item_size());
  increase_size(1);
}

char *LeafIndexNodeHandler::__item_at(int index) const
{
  return leaf_node_->array + (index * item_size());
}
char *LeafIndexNodeHandler::__key_at(int index) const
{
  return __item_at(index);
}
char *LeafIndexNodeHandler::__value_at(int index) const
{
  return __item_at(index) + key_size();
}

std::string to_string(const LeafIndexNodeHandler &handler, const KeyPrinter &printer)
{
  std::stringstream ss;
312
  ss << to_string((const IndexNodeHandler &)handler)
羽飞's avatar
羽飞 已提交
313
     << ",next page:" << handler.next_page();
L
Longda Feng 已提交
314
  ss << ",values=[" << printer(handler.__key_at(0));
羽飞's avatar
羽飞 已提交
315 316 317 318 319 320 321
  for (int i = 1; i < handler.size(); i++) {
    ss << "," << printer(handler.__key_at(i));
  }
  ss << "]";
  return ss.str();
}

羽飞's avatar
羽飞 已提交
322
bool LeafIndexNodeHandler::validate(const KeyComparator &comparator, DiskBufferPool *bp) const
羽飞's avatar
羽飞 已提交
323 324 325 326 327 328 329 330 331 332
{
  bool result = IndexNodeHandler::validate();
  if (false == result) {
    return false;
  }

  const int node_size = size();
  for (int i = 1; i < node_size; i++) {
    if (comparator(__key_at(i - 1), __key_at(i)) >= 0) {
      LOG_WARN("page number = %d, invalid key order. id1=%d,id2=%d, this=%s",
333
               page_num(), i - 1, i, to_string(*this).c_str());
羽飞's avatar
羽飞 已提交
334 335 336 337 338 339 340 341 342
      return false;
    }
  }

  PageNum parent_page_num = this->parent_page_num();
  if (parent_page_num == BP_INVALID_PAGE_NUM) {
    return true;
  }

羽飞's avatar
羽飞 已提交
343 344
  Frame *parent_frame;
  RC rc = bp->get_this_page(parent_page_num, &parent_frame);
羽飞's avatar
羽飞 已提交
345
  if (rc != RC::SUCCESS) {
L
Longda Feng 已提交
346
    LOG_WARN("failed to fetch parent page. page num=%d, rc=%d:%s", parent_page_num, rc, strrc(rc));
羽飞's avatar
羽飞 已提交
347 348 349
    return false;
  }

羽飞's avatar
羽飞 已提交
350
  InternalIndexNodeHandler parent_node(header_, parent_frame);
羽飞's avatar
羽飞 已提交
351 352 353
  int index_in_parent = parent_node.value_index(this->page_num());
  if (index_in_parent < 0) {
    LOG_WARN("invalid leaf node. cannot find index in parent. this page num=%d, parent page num=%d",
354
             this->page_num(), parent_page_num);
羽飞's avatar
羽飞 已提交
355
    bp->unpin_page(parent_frame);
羽飞's avatar
羽飞 已提交
356 357 358 359 360 361
    return false;
  }

  if (0 != index_in_parent) {
    int cmp_result = comparator(__key_at(0), parent_node.key_at(index_in_parent));
    if (cmp_result < 0) {
L
Longda Feng 已提交
362 363
      LOG_WARN("invalid leaf node. first item should be greate than or equal to parent item. "
               "this page num=%d, parent page num=%d, index in parent=%d",
364
               this->page_num(), parent_node.page_num(), index_in_parent);
羽飞's avatar
羽飞 已提交
365
      bp->unpin_page(parent_frame);
羽飞's avatar
羽飞 已提交
366 367 368 369 370 371 372
      return false;
    }
  }

  if (index_in_parent < parent_node.size() - 1) {
    int cmp_result = comparator(__key_at(size() - 1), parent_node.key_at(index_in_parent + 1));
    if (cmp_result >= 0) {
L
Longda Feng 已提交
373 374
      LOG_WARN("invalid leaf node. last item should be less than the item at the first after item in parent."
               "this page num=%d, parent page num=%d, parent item to compare=%d",
375
               this->page_num(), parent_node.page_num(), index_in_parent + 1);
羽飞's avatar
羽飞 已提交
376
      bp->unpin_page(parent_frame);
羽飞's avatar
羽飞 已提交
377 378 379
      return false;
    }
  }
羽飞's avatar
羽飞 已提交
380
  bp->unpin_page(parent_frame);
羽飞's avatar
羽飞 已提交
381 382 383 384
  return true;
}

/////////////////////////////////////////////////////////////////////////////////
羽飞's avatar
羽飞 已提交
385
InternalIndexNodeHandler::InternalIndexNodeHandler(const IndexFileHeader &header, Frame *frame)
L
Longda Feng 已提交
386
    : IndexNodeHandler(header, frame), internal_node_((InternalIndexNode *)frame->data())
羽飞's avatar
羽飞 已提交
387 388 389 390 391 392 393 394 395 396 397
{}

std::string to_string(const InternalIndexNodeHandler &node, const KeyPrinter &printer)
{
  std::stringstream ss;
  ss << to_string((const IndexNodeHandler &)node);
  ss << ",children:["
     << "{key:" << printer(node.__key_at(0)) << ","
     << "value:" << *(PageNum *)node.__value_at(0) << "}";

  for (int i = 1; i < node.size(); i++) {
L
Longda Feng 已提交
398
    ss << ",{key:" << printer(node.__key_at(i)) << ",value:" << *(PageNum *)node.__value_at(i) << "}";
羽飞's avatar
羽飞 已提交
399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433
  }
  ss << "]";
  return ss.str();
}

void InternalIndexNodeHandler::init_empty()
{
  IndexNodeHandler::init_empty(false);
}
void InternalIndexNodeHandler::create_new_root(PageNum first_page_num, const char *key, PageNum page_num)
{
  memset(__key_at(0), 0, key_size());
  memcpy(__value_at(0), &first_page_num, value_size());
  memcpy(__item_at(1), key, key_size());
  memcpy(__value_at(1), &page_num, value_size());
  increase_size(2);
}

/**
 * insert one entry
 * the entry to be inserted will never at the first slot.
 * the right child page after split will always have bigger keys.
 */
void InternalIndexNodeHandler::insert(const char *key, PageNum page_num, const KeyComparator &comparator)
{
  int insert_position = -1;
  lookup(comparator, key, nullptr, &insert_position);
  if (insert_position < size()) {
    memmove(__item_at(insert_position + 1), __item_at(insert_position), (size() - insert_position) * item_size());
  }
  memcpy(__item_at(insert_position), key, key_size());
  memcpy(__value_at(insert_position), &page_num, value_size());
  increase_size(1);
}

羽飞's avatar
羽飞 已提交
434
RC InternalIndexNodeHandler::move_half_to(InternalIndexNodeHandler &other, DiskBufferPool *bp)
羽飞's avatar
羽飞 已提交
435 436 437
{
  const int size = this->size();
  const int move_index = size / 2;
羽飞's avatar
羽飞 已提交
438
  RC rc = other.copy_from(this->__item_at(move_index), size - move_index, bp);
羽飞's avatar
羽飞 已提交
439 440 441 442 443
  if (rc != RC::SUCCESS) {
    LOG_WARN("failed to copy item to new node. rc=%d:%s", rc, strrc(rc));
    return rc;
  }

L
Longda Feng 已提交
444
  increase_size(-(size - move_index));
羽飞's avatar
羽飞 已提交
445 446 447 448 449 450
  return rc;
}

/**
 * lookup the first item which key <= item
 * @return unlike the leafNode, the return value is not the insert position,
L
Longda Feng 已提交
451
 * but only the index of child to find.
羽飞's avatar
羽飞 已提交
452
 */
L
Longda Feng 已提交
453 454
int InternalIndexNodeHandler::lookup(const KeyComparator &comparator, const char *key, bool *found /* = nullptr */,
    int *insert_position /*= nullptr */) const
羽飞's avatar
羽飞 已提交
455 456
{
  const int size = this->size();
羽飞's avatar
羽飞 已提交
457 458 459
  if (size == 0) {
    if (insert_position) {
      *insert_position = 1;
羽飞's avatar
羽飞 已提交
460
    }
羽飞's avatar
羽飞 已提交
461 462
    if (found) {
      *found = false;
羽飞's avatar
羽飞 已提交
463
    }
羽飞's avatar
羽飞 已提交
464
    return 0;
羽飞's avatar
羽飞 已提交
465
  }
羽飞's avatar
羽飞 已提交
466 467 468 469 470

  common::BinaryIterator<char> iter_begin(item_size(), __key_at(1));
  common::BinaryIterator<char> iter_end(item_size(), __key_at(size));
  common::BinaryIterator<char> iter = lower_bound(iter_begin, iter_end, key, comparator, found);
  int ret = static_cast<int>(iter - iter_begin) + 1;
羽飞's avatar
羽飞 已提交
471
  if (insert_position) {
羽飞's avatar
羽飞 已提交
472
    *insert_position = ret;
羽飞's avatar
羽飞 已提交
473
  }
羽飞's avatar
羽飞 已提交
474 475 476 477 478

  if (ret >= size || comparator(key, __key_at(ret)) < 0) {
    return ret - 1;
  }
  return ret;
羽飞's avatar
羽飞 已提交
479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501
}

char *InternalIndexNodeHandler::key_at(int index)
{
  assert(index >= 0 && index < size());
  return __key_at(index);
}

void InternalIndexNodeHandler::set_key_at(int index, const char *key)
{
  assert(index >= 0 && index < size());
  memcpy(__key_at(index), key, key_size());
}

PageNum InternalIndexNodeHandler::value_at(int index)
{
  assert(index >= 0 && index < size());
  return *(PageNum *)__value_at(index);
}

int InternalIndexNodeHandler::value_index(PageNum page_num)
{
  for (int i = 0; i < size(); i++) {
L
Longda Feng 已提交
502
    if (page_num == *(PageNum *)__value_at(i)) {
羽飞's avatar
羽飞 已提交
503 504 505 506 507 508 509 510 511 512 513 514 515 516 517
      return i;
    }
  }
  return -1;
}

void InternalIndexNodeHandler::remove(int index)
{
  assert(index >= 0 && index < size());
  if (index < size() - 1) {
    memmove(__item_at(index), __item_at(index + 1), (size() - index - 1) * item_size());
  }
  increase_size(-1);
}

羽飞's avatar
羽飞 已提交
518
RC InternalIndexNodeHandler::move_to(InternalIndexNodeHandler &other, DiskBufferPool *disk_buffer_pool)
羽飞's avatar
羽飞 已提交
519
{
羽飞's avatar
羽飞 已提交
520
  RC rc = other.copy_from(__item_at(0), size(), disk_buffer_pool);
羽飞's avatar
羽飞 已提交
521 522 523 524 525
  if (rc != RC::SUCCESS) {
    LOG_WARN("failed to copy items to other node. rc=%d:%s", rc, strrc(rc));
    return rc;
  }

L
Longda Feng 已提交
526
  increase_size(-this->size());
羽飞's avatar
羽飞 已提交
527 528 529
  return RC::SUCCESS;
}

羽飞's avatar
羽飞 已提交
530
RC InternalIndexNodeHandler::move_first_to_end(InternalIndexNodeHandler &other, DiskBufferPool *disk_buffer_pool)
羽飞's avatar
羽飞 已提交
531
{
羽飞's avatar
羽飞 已提交
532
  RC rc = other.append(__item_at(0), disk_buffer_pool);
羽飞's avatar
羽飞 已提交
533 534 535 536 537 538
  if (rc != RC::SUCCESS) {
    LOG_WARN("failed to append item to others.");
    return rc;
  }

  if (size() >= 1) {
L
Longda Feng 已提交
539
    memmove(__item_at(0), __item_at(1), (size() - 1) * item_size());
羽飞's avatar
羽飞 已提交
540 541 542 543 544
  }
  increase_size(-1);
  return rc;
}

羽飞's avatar
羽飞 已提交
545
RC InternalIndexNodeHandler::move_last_to_front(InternalIndexNodeHandler &other, DiskBufferPool *bp)
羽飞's avatar
羽飞 已提交
546
{
羽飞's avatar
羽飞 已提交
547
  RC rc = other.preappend(__item_at(size() - 1), bp);
羽飞's avatar
羽飞 已提交
548 549 550 551 552 553 554 555 556 557 558
  if (rc != RC::SUCCESS) {
    LOG_WARN("failed to preappend to others");
    return rc;
  }

  increase_size(-1);
  return rc;
}
/**
 * copy items from other node to self's right
 */
羽飞's avatar
羽飞 已提交
559
RC InternalIndexNodeHandler::copy_from(const char *items, int num, DiskBufferPool *disk_buffer_pool)
羽飞's avatar
羽飞 已提交
560 561 562 563 564
{
  memcpy(__item_at(this->size()), items, num * item_size());

  RC rc = RC::SUCCESS;
  PageNum this_page_num = this->page_num();
羽飞's avatar
羽飞 已提交
565
  Frame *frame = nullptr;
羽飞's avatar
羽飞 已提交
566 567
  for (int i = 0; i < num; i++) {
    const PageNum page_num = *(const PageNum *)((items + i * item_size()) + key_size());
羽飞's avatar
羽飞 已提交
568
    rc = disk_buffer_pool->get_this_page(page_num, &frame);
羽飞's avatar
羽飞 已提交
569 570
    if (rc != RC::SUCCESS) {
      LOG_WARN("failed to set child's page num. child page num:%d, this page num=%d, rc=%d:%s",
571
               page_num, this_page_num, rc, strrc(rc));
羽飞's avatar
羽飞 已提交
572 573
      return rc;
    }
羽飞's avatar
羽飞 已提交
574
    IndexNodeHandler child_node(header_, frame);
羽飞's avatar
羽飞 已提交
575
    child_node.set_parent_page_num(this_page_num);
羽飞's avatar
羽飞 已提交
576 577
    frame->mark_dirty();
    disk_buffer_pool->unpin_page(frame);
羽飞's avatar
羽飞 已提交
578 579 580 581 582
  }
  increase_size(num);
  return rc;
}

羽飞's avatar
羽飞 已提交
583
RC InternalIndexNodeHandler::append(const char *item, DiskBufferPool *bp)
羽飞's avatar
羽飞 已提交
584
{
羽飞's avatar
羽飞 已提交
585
  return this->copy_from(item, 1, bp);
羽飞's avatar
羽飞 已提交
586 587
}

羽飞's avatar
羽飞 已提交
588
RC InternalIndexNodeHandler::preappend(const char *item, DiskBufferPool *bp)
羽飞's avatar
羽飞 已提交
589 590
{
  PageNum child_page_num = *(PageNum *)(item + key_size());
羽飞's avatar
羽飞 已提交
591 592
  Frame *frame = nullptr;
  RC rc = bp->get_this_page(child_page_num, &frame);
羽飞's avatar
羽飞 已提交
593 594 595 596 597
  if (rc != RC::SUCCESS) {
    LOG_WARN("failed to fetch child page. rc=%d:%s", rc, strrc(rc));
    return rc;
  }

羽飞's avatar
羽飞 已提交
598
  IndexNodeHandler child_node(header_, frame);
羽飞's avatar
羽飞 已提交
599 600
  child_node.set_parent_page_num(this->page_num());

羽飞's avatar
羽飞 已提交
601 602
  frame->mark_dirty();
  bp->unpin_page(frame);
羽飞's avatar
羽飞 已提交
603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637

  if (this->size() > 0) {
    memmove(__item_at(1), __item_at(0), this->size() * item_size());
  }

  memcpy(__item_at(0), item, item_size());
  increase_size(1);
  return RC::SUCCESS;
}

char *InternalIndexNodeHandler::__item_at(int index) const
{
  return internal_node_->array + (index * item_size());
}

char *InternalIndexNodeHandler::__key_at(int index) const
{
  return __item_at(index);
}

char *InternalIndexNodeHandler::__value_at(int index) const
{
  return __item_at(index) + key_size();
}

int InternalIndexNodeHandler::value_size() const
{
  return sizeof(PageNum);
}

int InternalIndexNodeHandler::item_size() const
{
  return key_size() + this->value_size();
}

羽飞's avatar
羽飞 已提交
638
bool InternalIndexNodeHandler::validate(const KeyComparator &comparator, DiskBufferPool *bp) const
羽飞's avatar
羽飞 已提交
639 640 641 642 643 644 645 646 647 648
{
  bool result = IndexNodeHandler::validate();
  if (false == result) {
    return false;
  }

  const int node_size = size();
  for (int i = 2; i < node_size; i++) {
    if (comparator(__key_at(i - 1), __key_at(i)) >= 0) {
      LOG_WARN("page number = %d, invalid key order. id1=%d,id2=%d, this=%s",
649
          page_num(), i - 1, i, to_string(*this).c_str());
羽飞's avatar
羽飞 已提交
650 651 652 653 654 655 656 657 658
      return false;
    }
  }

  for (int i = 0; result && i < node_size; i++) {
    PageNum page_num = *(PageNum *)__value_at(i);
    if (page_num < 0) {
      LOG_WARN("this page num=%d, got invalid child page. page num=%d", this->page_num(), page_num);
    } else {
羽飞's avatar
羽飞 已提交
659 660
      Frame *child_frame;
      RC rc = bp->get_this_page(page_num, &child_frame);
羽飞's avatar
羽飞 已提交
661
      if (rc != RC::SUCCESS) {
662 663
        LOG_WARN("failed to fetch child page while validate internal page. page num=%d, rc=%d:%s", 
                 page_num, rc, strrc(rc));
羽飞's avatar
羽飞 已提交
664
      } else {
L
Longda Feng 已提交
665 666 667
        IndexNodeHandler child_node(header_, child_frame);
        if (child_node.parent_page_num() != this->page_num()) {
          LOG_WARN("child's parent page num is invalid. child page num=%d, parent page num=%d, this page num=%d",
668
              child_node.page_num(), child_node.parent_page_num(), this->page_num());
L
Longda Feng 已提交
669 670 671
          result = false;
        }
        bp->unpin_page(child_frame);
羽飞's avatar
羽飞 已提交
672 673 674 675 676 677 678 679 680 681 682 683 684
      }
    }
  }

  if (!result) {
    return result;
  }

  const PageNum parent_page_num = this->parent_page_num();
  if (parent_page_num == BP_INVALID_PAGE_NUM) {
    return result;
  }

羽飞's avatar
羽飞 已提交
685 686
  Frame *parent_frame;
  RC rc = bp->get_this_page(parent_page_num, &parent_frame);
羽飞's avatar
羽飞 已提交
687 688 689 690 691
  if (rc != RC::SUCCESS) {
    LOG_WARN("failed to fetch parent page. page num=%d, rc=%d:%s", parent_page_num, rc, strrc(rc));
    return false;
  }

羽飞's avatar
羽飞 已提交
692
  InternalIndexNodeHandler parent_node(header_, parent_frame);
羽飞's avatar
羽飞 已提交
693 694 695
  int index_in_parent = parent_node.value_index(this->page_num());
  if (index_in_parent < 0) {
    LOG_WARN("invalid internal node. cannot find index in parent. this page num=%d, parent page num=%d",
696
             this->page_num(), parent_page_num);
羽飞's avatar
羽飞 已提交
697
    bp->unpin_page(parent_frame);
羽飞's avatar
羽飞 已提交
698 699 700 701 702 703
    return false;
  }

  if (0 != index_in_parent) {
    int cmp_result = comparator(__key_at(1), parent_node.key_at(index_in_parent));
    if (cmp_result < 0) {
L
Longda Feng 已提交
704 705
      LOG_WARN("invalid internal node. the second item should be greate than or equal to parent item. "
               "this page num=%d, parent page num=%d, index in parent=%d",
706
               this->page_num(), parent_node.page_num(), index_in_parent);
羽飞's avatar
羽飞 已提交
707
      bp->unpin_page(parent_frame);
羽飞's avatar
羽飞 已提交
708 709 710 711 712 713 714
      return false;
    }
  }

  if (index_in_parent < parent_node.size() - 1) {
    int cmp_result = comparator(__key_at(size() - 1), parent_node.key_at(index_in_parent + 1));
    if (cmp_result >= 0) {
L
Longda Feng 已提交
715 716
      LOG_WARN("invalid internal node. last item should be less than the item at the first after item in parent."
               "this page num=%d, parent page num=%d, parent item to compare=%d",
717
               this->page_num(), parent_node.page_num(), index_in_parent + 1);
羽飞's avatar
羽飞 已提交
718
      bp->unpin_page(parent_frame);
羽飞's avatar
羽飞 已提交
719 720 721
      return false;
    }
  }
羽飞's avatar
羽飞 已提交
722
  bp->unpin_page(parent_frame);
羽飞's avatar
羽飞 已提交
723 724 725 726 727 728 729 730

  return result;
}

/////////////////////////////////////////////////////////////////////////////////

RC BplusTreeHandler::sync()
{
羽飞's avatar
羽飞 已提交
731
  return disk_buffer_pool_->flush_all_pages();
羽飞's avatar
羽飞 已提交
732 733
}

L
Longda Feng 已提交
734 735
RC BplusTreeHandler::create(const char *file_name, AttrType attr_type, int attr_length, int internal_max_size /* = -1*/,
    int leaf_max_size /* = -1 */)
羽飞's avatar
羽飞 已提交
736
{
羽飞's avatar
羽飞 已提交
737 738
  BufferPoolManager &bpm = BufferPoolManager::instance();
  RC rc = bpm.create_file(file_name);
羽飞's avatar
羽飞 已提交
739 740 741 742 743 744
  if (rc != RC::SUCCESS) {
    LOG_WARN("Failed to create file. file name=%s, rc=%d:%s", file_name, rc, strrc(rc));
    return rc;
  }
  LOG_INFO("Successfully create index file:%s", file_name);

羽飞's avatar
羽飞 已提交
745 746
  DiskBufferPool *bp = nullptr;
  rc = bpm.open_file(file_name, bp);
羽飞's avatar
羽飞 已提交
747 748 749 750 751 752
  if (rc != RC::SUCCESS) {
    LOG_WARN("Failed to open file. file name=%s, rc=%d:%s", file_name, rc, strrc(rc));
    return rc;
  }
  LOG_INFO("Successfully open index file %s.", file_name);

羽飞's avatar
羽飞 已提交
753 754
  Frame *header_frame;
  rc = bp->allocate_page(&header_frame);
羽飞's avatar
羽飞 已提交
755 756
  if (rc != RC::SUCCESS) {
    LOG_WARN("failed to allocate header page for bplus tree. rc=%d:%s", rc, strrc(rc));
羽飞's avatar
羽飞 已提交
757
    bpm.close_file(file_name);
羽飞's avatar
羽飞 已提交
758 759 760
    return rc;
  }

羽飞's avatar
羽飞 已提交
761
  if (header_frame->page_num() != FIRST_INDEX_PAGE) {
羽飞's avatar
羽飞 已提交
762
    LOG_WARN("header page num should be %d but got %d. is it a new file : %s",
763
             FIRST_INDEX_PAGE, header_frame->page_num(), file_name);
羽飞's avatar
羽飞 已提交
764
    bpm.close_file(file_name);
羽飞's avatar
羽飞 已提交
765 766 767 768 769 770 771 772 773
    return RC::INTERNAL;
  }

  if (internal_max_size < 0) {
    internal_max_size = calc_internal_page_capacity(attr_length);
  }
  if (leaf_max_size < 0) {
    leaf_max_size = calc_leaf_page_capacity(attr_length);
  }
羽飞's avatar
羽飞 已提交
774 775

  char *pdata = header_frame->data();
羽飞's avatar
羽飞 已提交
776 777 778 779 780 781 782 783
  IndexFileHeader *file_header = (IndexFileHeader *)pdata;
  file_header->attr_length = attr_length;
  file_header->key_length = attr_length + sizeof(RID);
  file_header->attr_type = attr_type;
  file_header->internal_max_size = internal_max_size;
  file_header->leaf_max_size = leaf_max_size;
  file_header->root_page = BP_INVALID_PAGE_NUM;

羽飞's avatar
羽飞 已提交
784
  header_frame->mark_dirty();
羽飞's avatar
羽飞 已提交
785

羽飞's avatar
羽飞 已提交
786
  disk_buffer_pool_ = bp;
羽飞's avatar
羽飞 已提交
787 788 789

  memcpy(&file_header_, pdata, sizeof(file_header_));
  header_dirty_ = false;
羽飞's avatar
羽飞 已提交
790
  bp->unpin_page(header_frame);
羽飞's avatar
羽飞 已提交
791

792
  mem_pool_item_ = make_unique<common::MemPoolItem>(file_name);
羽飞's avatar
羽飞 已提交
793 794 795 796 797 798 799 800 801 802 803 804 805 806
  if (mem_pool_item_->init(file_header->key_length) < 0) {
    LOG_WARN("Failed to init memory pool for index %s", file_name);
    close();
    return RC::NOMEM;
  }

  key_comparator_.init(file_header->attr_type, file_header->attr_length);
  key_printer_.init(file_header->attr_type, file_header->attr_length);
  LOG_INFO("Successfully create index %s", file_name);
  return RC::SUCCESS;
}

RC BplusTreeHandler::open(const char *file_name)
{
羽飞's avatar
羽飞 已提交
807
  if (disk_buffer_pool_ != nullptr) {
羽飞's avatar
羽飞 已提交
808 809 810 811
    LOG_WARN("%s has been opened before index.open.", file_name);
    return RC::RECORD_OPENNED;
  }

羽飞's avatar
羽飞 已提交
812 813 814
  BufferPoolManager &bpm = BufferPoolManager::instance();
  DiskBufferPool *disk_buffer_pool;
  RC rc = bpm.open_file(file_name, disk_buffer_pool);
羽飞's avatar
羽飞 已提交
815 816 817 818 819
  if (rc != RC::SUCCESS) {
    LOG_WARN("Failed to open file name=%s, rc=%d:%s", file_name, rc, strrc(rc));
    return rc;
  }

羽飞's avatar
羽飞 已提交
820 821
  Frame *frame;
  rc = disk_buffer_pool->get_this_page(FIRST_INDEX_PAGE, &frame);
羽飞's avatar
羽飞 已提交
822 823
  if (rc != RC::SUCCESS) {
    LOG_WARN("Failed to get first page file name=%s, rc=%d:%s", file_name, rc, strrc(rc));
羽飞's avatar
羽飞 已提交
824
    bpm.close_file(file_name);
羽飞's avatar
羽飞 已提交
825 826 827
    return rc;
  }

羽飞's avatar
羽飞 已提交
828
  char *pdata = frame->data();
羽飞's avatar
羽飞 已提交
829 830 831 832
  memcpy(&file_header_, pdata, sizeof(IndexFileHeader));
  header_dirty_ = false;
  disk_buffer_pool_ = disk_buffer_pool;

833
  mem_pool_item_ = make_unique<common::MemPoolItem>(file_name);
羽飞's avatar
羽飞 已提交
834 835 836 837 838 839 840
  if (mem_pool_item_->init(file_header_.key_length) < 0) {
    LOG_WARN("Failed to init memory pool for index %s", file_name);
    close();
    return RC::NOMEM;
  }

  // close old page_handle
羽飞's avatar
羽飞 已提交
841
  disk_buffer_pool->unpin_page(frame);
羽飞's avatar
羽飞 已提交
842 843

  key_comparator_.init(file_header_.attr_type, file_header_.attr_length);
羽飞's avatar
羽飞 已提交
844
  key_printer_.init(file_header_.attr_type, file_header_.attr_length);
羽飞's avatar
羽飞 已提交
845 846 847 848 849 850
  LOG_INFO("Successfully open index %s", file_name);
  return RC::SUCCESS;
}

RC BplusTreeHandler::close()
{
羽飞's avatar
羽飞 已提交
851
  if (disk_buffer_pool_ != nullptr) {
852
    disk_buffer_pool_->close_file();
羽飞's avatar
羽飞 已提交
853 854 855 856 857 858
  }

  disk_buffer_pool_ = nullptr;
  return RC::SUCCESS;
}

羽飞's avatar
羽飞 已提交
859
RC BplusTreeHandler::print_leaf(Frame *frame)
羽飞's avatar
羽飞 已提交
860
{
羽飞's avatar
羽飞 已提交
861
  LeafIndexNodeHandler leaf_node(file_header_, frame);
羽飞's avatar
羽飞 已提交
862
  LOG_INFO("leaf node: %s", to_string(leaf_node, key_printer_).c_str());
羽飞's avatar
羽飞 已提交
863
  disk_buffer_pool_->unpin_page(frame);
羽飞's avatar
羽飞 已提交
864 865 866
  return RC::SUCCESS;
}

羽飞's avatar
羽飞 已提交
867
RC BplusTreeHandler::print_internal_node_recursive(Frame *frame)
羽飞's avatar
羽飞 已提交
868 869 870
{
  RC rc = RC::SUCCESS;
  LOG_INFO("bplus tree. file header: %s", file_header_.to_string().c_str());
羽飞's avatar
羽飞 已提交
871
  InternalIndexNodeHandler internal_node(file_header_, frame);
羽飞's avatar
羽飞 已提交
872 873 874 875 876
  LOG_INFO("internal node: %s", to_string(internal_node, key_printer_).c_str());

  int node_size = internal_node.size();
  for (int i = 0; i < node_size; i++) {
    PageNum page_num = internal_node.value_at(i);
羽飞's avatar
羽飞 已提交
877 878
    Frame *child_frame;
    rc = disk_buffer_pool_->get_this_page(page_num, &child_frame);
羽飞's avatar
羽飞 已提交
879 880
    if (rc != RC::SUCCESS) {
      LOG_WARN("failed to fetch child page. page id=%d, rc=%d:%s", page_num, rc, strrc(rc));
羽飞's avatar
羽飞 已提交
881
      disk_buffer_pool_->unpin_page(frame);
羽飞's avatar
羽飞 已提交
882 883 884
      return rc;
    }

羽飞's avatar
羽飞 已提交
885
    IndexNodeHandler node(file_header_, child_frame);
羽飞's avatar
羽飞 已提交
886
    if (node.is_leaf()) {
羽飞's avatar
羽飞 已提交
887
      rc = print_leaf(child_frame);
羽飞's avatar
羽飞 已提交
888
    } else {
羽飞's avatar
羽飞 已提交
889
      rc = print_internal_node_recursive(child_frame);
羽飞's avatar
羽飞 已提交
890 891
    }
    if (rc != RC::SUCCESS) {
羽飞's avatar
羽飞 已提交
892 893
      LOG_WARN("failed to print node. page id=%d, rc=%d:%s", child_frame->page_num(), rc, strrc(rc));
      disk_buffer_pool_->unpin_page(frame);
羽飞's avatar
羽飞 已提交
894 895 896 897
      return rc;
    }
  }

羽飞's avatar
羽飞 已提交
898
  disk_buffer_pool_->unpin_page(frame);
羽飞's avatar
羽飞 已提交
899 900 901 902 903
  return RC::SUCCESS;
}

RC BplusTreeHandler::print_tree()
{
羽飞's avatar
羽飞 已提交
904
  if (disk_buffer_pool_ == nullptr) {
羽飞's avatar
羽飞 已提交
905 906 907 908 909 910 911 912
    LOG_WARN("Index hasn't been created or opened, fail to print");
    return RC::SUCCESS;
  }
  if (is_empty()) {
    LOG_INFO("tree is empty");
    return RC::SUCCESS;
  }

羽飞's avatar
羽飞 已提交
913
  Frame *frame;
羽飞's avatar
羽飞 已提交
914
  PageNum page_num = file_header_.root_page;
羽飞's avatar
羽飞 已提交
915
  RC rc = disk_buffer_pool_->get_this_page(page_num, &frame);
羽飞's avatar
羽飞 已提交
916 917 918 919
  if (rc != RC::SUCCESS) {
    LOG_WARN("failed to fetch page. page id=%d, rc=%d:%s", page_num, rc, strrc(rc));
    return rc;
  }
L
Longda Feng 已提交
920

羽飞's avatar
羽飞 已提交
921
  IndexNodeHandler node(file_header_, frame);
羽飞's avatar
羽飞 已提交
922
  if (node.is_leaf()) {
羽飞's avatar
羽飞 已提交
923
    rc = print_leaf(frame);
羽飞's avatar
羽飞 已提交
924
  } else {
羽飞's avatar
羽飞 已提交
925
    rc = print_internal_node_recursive(frame);
羽飞's avatar
羽飞 已提交
926 927 928 929 930 931 932 933 934 935 936
  }
  return rc;
}

RC BplusTreeHandler::print_leafs()
{
  if (is_empty()) {
    LOG_INFO("empty tree");
    return RC::SUCCESS;
  }

937 938
  LatchMemo latch_memo(disk_buffer_pool_);
  Frame *frame = nullptr;
L
Longda Feng 已提交
939

940
  RC rc = left_most_page(latch_memo, frame);
羽飞's avatar
羽飞 已提交
941 942 943 944 945
  if (rc != RC::SUCCESS) {
    LOG_WARN("failed to get left most page. rc=%d:%s", rc, strrc(rc));
    return rc;
  }

羽飞's avatar
羽飞 已提交
946 947
  while (frame->page_num() != BP_INVALID_PAGE_NUM) {
    LeafIndexNodeHandler leaf_node(file_header_, frame);
羽飞's avatar
羽飞 已提交
948 949 950
    LOG_INFO("leaf info: %s", to_string(leaf_node, key_printer_).c_str());

    PageNum next_page_num = leaf_node.next_page();
951
    latch_memo.release();
羽飞's avatar
羽飞 已提交
952 953 954 955 956

    if (next_page_num == BP_INVALID_PAGE_NUM) {
      break;
    }

957
    rc = latch_memo.get_page(next_page_num, frame);
羽飞's avatar
羽飞 已提交
958 959 960 961 962 963 964 965
    if (rc != RC::SUCCESS) {
      LOG_WARN("failed to get next page. page id=%d, rc=%d:%s", next_page_num, rc, strrc(rc));
      return rc;
    }
  }
  return rc;
}

966
bool BplusTreeHandler::validate_node_recursive(LatchMemo &latch_memo, Frame *frame)
羽飞's avatar
羽飞 已提交
967 968
{
  bool result = true;
羽飞's avatar
羽飞 已提交
969
  IndexNodeHandler node(file_header_, frame);
羽飞's avatar
羽飞 已提交
970
  if (node.is_leaf()) {
羽飞's avatar
羽飞 已提交
971 972
    LeafIndexNodeHandler leaf_node(file_header_, frame);
    result = leaf_node.validate(key_comparator_, disk_buffer_pool_);
羽飞's avatar
羽飞 已提交
973
  } else {
羽飞's avatar
羽飞 已提交
974 975
    InternalIndexNodeHandler internal_node(file_header_, frame);
    result = internal_node.validate(key_comparator_, disk_buffer_pool_);
羽飞's avatar
羽飞 已提交
976 977
    for (int i = 0; result && i < internal_node.size(); i++) {
      PageNum page_num = internal_node.value_at(i);
羽飞's avatar
羽飞 已提交
978
      Frame *child_frame;
979
      RC rc = latch_memo.get_page(page_num, child_frame);
羽飞's avatar
羽飞 已提交
980 981 982 983 984 985
      if (rc != RC::SUCCESS) {
        LOG_WARN("failed to fetch child page.page id=%d, rc=%d:%s", page_num, rc, strrc(rc));
        result = false;
        break;
      }

986
      result = validate_node_recursive(latch_memo, child_frame);
羽飞's avatar
羽飞 已提交
987 988 989 990 991 992
    }
  }

  return result;
}

993
bool BplusTreeHandler::validate_leaf_link(LatchMemo &latch_memo)
羽飞's avatar
羽飞 已提交
994 995 996 997 998
{
  if (is_empty()) {
    return true;
  }

999 1000
  Frame *frame = nullptr;
  RC rc = left_most_page(latch_memo, frame);
羽飞's avatar
羽飞 已提交
1001 1002 1003 1004 1005
  if (rc != RC::SUCCESS) {
    LOG_WARN("failed to fetch left most page. rc=%d:%s", rc, strrc(rc));
    return false;
  }

羽飞's avatar
羽飞 已提交
1006
  LeafIndexNodeHandler leaf_node(file_header_, frame);
羽飞's avatar
羽飞 已提交
1007 1008
  PageNum next_page_num = leaf_node.next_page();

1009 1010
  MemPoolItem::unique_ptr prev_key = mem_pool_item_->alloc_unique_ptr();
  memcpy(prev_key.get(), leaf_node.key_at(leaf_node.size() - 1), file_header_.key_length);
羽飞's avatar
羽飞 已提交
1011 1012 1013

  bool result = true;
  while (result && next_page_num != BP_INVALID_PAGE_NUM) {
1014
    rc = latch_memo.get_page(next_page_num, frame);
羽飞's avatar
羽飞 已提交
1015
    if (rc != RC::SUCCESS) {
1016
      LOG_WARN("failed to fetch next page. page num=%d, rc=%s", next_page_num, strrc(rc));
羽飞's avatar
羽飞 已提交
1017 1018 1019
      return false;
    }

羽飞's avatar
羽飞 已提交
1020
    LeafIndexNodeHandler leaf_node(file_header_, frame);
1021
    if (key_comparator_((char *)prev_key.get(), leaf_node.key_at(0)) >= 0) {
羽飞's avatar
羽飞 已提交
1022 1023 1024 1025 1026
      LOG_WARN("invalid page. current first key is not bigger than last");
      result = false;
    }

    next_page_num = leaf_node.next_page();
1027
    memcpy(prev_key.get(), leaf_node.key_at(leaf_node.size() - 1), file_header_.key_length);
羽飞's avatar
羽飞 已提交
1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039
  }

  // can do more things
  return result;
}

bool BplusTreeHandler::validate_tree()
{
  if (is_empty()) {
    return true;
  }

1040 1041 1042
  LatchMemo latch_memo(disk_buffer_pool_);
  Frame *frame = nullptr;
  RC rc = latch_memo.get_page(file_header_.root_page, frame); // 这里仅仅调试使用,不加root锁
羽飞's avatar
羽飞 已提交
1043 1044
  if (rc != RC::SUCCESS) {
    LOG_WARN("failed to fetch root page. page id=%d, rc=%d:%s", file_header_.root_page, rc, strrc(rc));
羽飞's avatar
羽飞 已提交
1045
    return false;
羽飞's avatar
羽飞 已提交
1046 1047
  }

1048
  if (!validate_node_recursive(latch_memo, frame) || !validate_leaf_link(latch_memo)) {
羽飞's avatar
羽飞 已提交
1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062
    LOG_WARN("Current B+ Tree is invalid");
    print_tree();
    return false;
  }

  LOG_INFO("great! current tree is valid");
  return true;
}

bool BplusTreeHandler::is_empty() const
{
  return file_header_.root_page == BP_INVALID_PAGE_NUM;
}

1063
RC BplusTreeHandler::find_leaf(LatchMemo &latch_memo, BplusTreeOperationType op, const char *key, Frame *&frame)
羽飞's avatar
羽飞 已提交
1064
{
1065
  auto child_page_getter = [this, key](InternalIndexNodeHandler &internal_node) {
L
Longda Feng 已提交
1066
        return internal_node.value_at(internal_node.lookup(key_comparator_, key));
1067 1068
      };
  return find_leaf_internal(latch_memo, op, child_page_getter, frame);
羽飞's avatar
羽飞 已提交
1069 1070
}

1071
RC BplusTreeHandler::left_most_page(LatchMemo &latch_memo, Frame *&frame)
羽飞's avatar
羽飞 已提交
1072
{
1073 1074
  auto child_page_getter = [](InternalIndexNodeHandler &internal_node) { return internal_node.value_at(0); };
  return find_leaf_internal(latch_memo, BplusTreeOperationType::READ, child_page_getter, frame);
羽飞's avatar
羽飞 已提交
1075 1076
}

L
Longda Feng 已提交
1077
RC BplusTreeHandler::find_leaf_internal(
1078 1079 1080
    LatchMemo &latch_memo, BplusTreeOperationType op, 
    const std::function<PageNum(InternalIndexNodeHandler &)> &child_page_getter, 
    Frame *&frame)
羽飞's avatar
羽飞 已提交
1081
{
1082 1083 1084 1085 1086 1087 1088
  // root locked
  if (op != BplusTreeOperationType::READ) {
    latch_memo.xlatch(&root_lock_);
  } else {
    latch_memo.slatch(&root_lock_);
  }

羽飞's avatar
羽飞 已提交
1089 1090 1091 1092
  if (is_empty()) {
    return RC::EMPTY;
  }

1093
  RC rc = crabing_protocal_fetch_page(latch_memo, op, file_header_.root_page, true/* is_root_node */, frame);
羽飞's avatar
羽飞 已提交
1094 1095 1096 1097 1098
  if (rc != RC::SUCCESS) {
    LOG_WARN("failed to fetch root page. page id=%d, rc=%d:%s", file_header_.root_page, rc, strrc(rc));
    return rc;
  }

羽飞's avatar
羽飞 已提交
1099
  IndexNode *node = (IndexNode *)frame->data();
1100 1101
  PageNum next_page_id;
  for (; !node->is_leaf; ) {
羽飞's avatar
羽飞 已提交
1102
    InternalIndexNodeHandler internal_node(file_header_, frame);
1103 1104
    next_page_id = child_page_getter(internal_node);
    rc = crabing_protocal_fetch_page(latch_memo, op, next_page_id, false /* is_root_node */, frame);
羽飞's avatar
羽飞 已提交
1105
    if (rc != RC::SUCCESS) {
1106
      LOG_WARN("Failed to load page page_num:%d. rc=%s", next_page_id, strrc(rc));
羽飞's avatar
羽飞 已提交
1107 1108
      return rc;
    }
1109

羽飞's avatar
羽飞 已提交
1110
    node = (IndexNode *)frame->data();
羽飞's avatar
羽飞 已提交
1111 1112 1113 1114
  }
  return RC::SUCCESS;
}

1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138
RC BplusTreeHandler::crabing_protocal_fetch_page(LatchMemo &latch_memo, 
                                                 BplusTreeOperationType op, 
                                                 PageNum page_num, 
                                                 bool is_root_node, 
                                                 Frame *&frame)
{
  bool readonly = (op == BplusTreeOperationType::READ);
  const int memo_point = latch_memo.memo_point();
  RC rc = latch_memo.get_page(page_num, frame);
  if (rc != RC::SUCCESS) {
    LOG_WARN("failed to get frame. pageNum=%d, rc=%s", page_num, strrc(rc));
    return rc;
  }

  LatchMemoType latch_type = readonly ? LatchMemoType::SHARED : LatchMemoType::EXCLUSIVE;
  latch_memo.latch(frame, latch_type);
  IndexNodeHandler index_node(file_header_, frame);
  if (index_node.is_safe(op, is_root_node)) {
    latch_memo.release_to(memo_point); // 当前节点不会分裂或合并,可以将前面的锁都释放掉
  }
  return rc;
}

RC BplusTreeHandler::insert_entry_into_leaf_node(LatchMemo &latch_memo, Frame *frame, const char *key, const RID *rid)
羽飞's avatar
羽飞 已提交
1139
{
羽飞's avatar
羽飞 已提交
1140
  LeafIndexNodeHandler leaf_node(file_header_, frame);
1141
  bool exists = false; // 该数据是否已经存在指定的叶子节点中了
羽飞's avatar
羽飞 已提交
1142 1143 1144 1145 1146 1147 1148 1149
  int insert_position = leaf_node.lookup(key_comparator_, key, &exists);
  if (exists) {
    LOG_TRACE("entry exists");
    return RC::RECORD_DUPLICATE_KEY;
  }

  if (leaf_node.size() < leaf_node.max_size()) {
    leaf_node.insert(insert_position, key, (const char *)rid);
羽飞's avatar
羽飞 已提交
1150
    frame->mark_dirty();
1151
    // disk_buffer_pool_->unpin_page(frame); // unpin pages 由latch memo 来操作
羽飞's avatar
羽飞 已提交
1152 1153 1154
    return RC::SUCCESS;
  }

L
Longda Feng 已提交
1155
  Frame *new_frame = nullptr;
1156
  RC rc = split<LeafIndexNodeHandler>(latch_memo, frame, new_frame);
羽飞's avatar
羽飞 已提交
1157 1158 1159 1160 1161
  if (rc != RC::SUCCESS) {
    LOG_WARN("failed to split leaf node. rc=%d:%s", rc, strrc(rc));
    return rc;
  }

羽飞's avatar
羽飞 已提交
1162
  LeafIndexNodeHandler new_index_node(file_header_, new_frame);
羽飞's avatar
羽飞 已提交
1163 1164
  new_index_node.set_next_page(leaf_node.next_page());
  new_index_node.set_parent_page_num(leaf_node.parent_page_num());
羽飞's avatar
羽飞 已提交
1165
  leaf_node.set_next_page(new_frame->page_num());
羽飞's avatar
羽飞 已提交
1166 1167 1168 1169 1170 1171 1172

  if (insert_position < leaf_node.size()) {
    leaf_node.insert(insert_position, key, (const char *)rid);
  } else {
    new_index_node.insert(insert_position - leaf_node.size(), key, (const char *)rid);
  }

1173
  return insert_entry_into_parent(latch_memo, frame, new_frame, new_index_node.key_at(0));
羽飞's avatar
羽飞 已提交
1174 1175
}

1176
RC BplusTreeHandler::insert_entry_into_parent(LatchMemo &latch_memo, Frame *frame, Frame *new_frame, const char *key)
羽飞's avatar
羽飞 已提交
1177 1178 1179
{
  RC rc = RC::SUCCESS;

羽飞's avatar
羽飞 已提交
1180 1181
  IndexNodeHandler node_handler(file_header_, frame);
  IndexNodeHandler new_node_handler(file_header_, new_frame);
羽飞's avatar
羽飞 已提交
1182 1183 1184 1185 1186
  PageNum parent_page_num = node_handler.parent_page_num();

  if (parent_page_num == BP_INVALID_PAGE_NUM) {

    // create new root page
羽飞's avatar
羽飞 已提交
1187 1188
    Frame *root_frame;
    rc = disk_buffer_pool_->allocate_page(&root_frame);
羽飞's avatar
羽飞 已提交
1189 1190 1191 1192 1193
    if (rc != RC::SUCCESS) {
      LOG_WARN("failed to allocate new root page. rc=%d:%s", rc, strrc(rc));
      return rc;
    }

羽飞's avatar
羽飞 已提交
1194
    InternalIndexNodeHandler root_node(file_header_, root_frame);
羽飞's avatar
羽飞 已提交
1195
    root_node.init_empty();
羽飞's avatar
羽飞 已提交
1196 1197 1198
    root_node.create_new_root(frame->page_num(), key, new_frame->page_num());
    node_handler.set_parent_page_num(root_frame->page_num());
    new_node_handler.set_parent_page_num(root_frame->page_num());
羽飞's avatar
羽飞 已提交
1199

羽飞's avatar
羽飞 已提交
1200 1201
    frame->mark_dirty();
    new_frame->mark_dirty();
1202 1203
    // disk_buffer_pool_->unpin_page(frame);
    // disk_buffer_pool_->unpin_page(new_frame);
羽飞's avatar
羽飞 已提交
1204

1205 1206
    root_frame->write_latch(); // 在root页面更新之后,别人就可以访问到了,这时候就要加上锁
    update_root_page_num_locked(root_frame->page_num());
羽飞's avatar
羽飞 已提交
1207
    root_frame->mark_dirty();
1208
    root_frame->write_unlatch();
羽飞's avatar
羽飞 已提交
1209
    disk_buffer_pool_->unpin_page(root_frame);
羽飞's avatar
羽飞 已提交
1210 1211 1212 1213 1214

    return RC::SUCCESS;

  } else {

1215 1216
    Frame *parent_frame = nullptr;
    rc = latch_memo.get_page(parent_page_num, parent_frame);
羽飞's avatar
羽飞 已提交
1217 1218
    if (rc != RC::SUCCESS) {
      LOG_WARN("failed to insert entry into leaf. rc=%d:%s", rc, strrc(rc));
1219
      // we should do some things to recover
羽飞's avatar
羽飞 已提交
1220 1221 1222
      return rc;
    }

1223 1224
    // 在第一次遍历这个页面时,我们已经拿到parent frame的write latch,所以这里不再去加锁
    InternalIndexNodeHandler parent_node(file_header_, parent_frame);
羽飞's avatar
羽飞 已提交
1225

1226 1227 1228
    /// 当前这个父节点还没有满,直接将新节点数据插进入就行了
    if (parent_node.size() < parent_node.max_size()) {
      parent_node.insert(key, new_frame->page_num(), key_comparator_);
羽飞's avatar
羽飞 已提交
1229 1230
      new_node_handler.set_parent_page_num(parent_page_num);

羽飞's avatar
羽飞 已提交
1231 1232 1233
      frame->mark_dirty();
      new_frame->mark_dirty();
      parent_frame->mark_dirty();
1234 1235 1236
      // disk_buffer_pool_->unpin_page(frame);
      // disk_buffer_pool_->unpin_page(new_frame);
      // disk_buffer_pool_->unpin_page(parent_frame);
羽飞's avatar
羽飞 已提交
1237 1238 1239

    } else {

1240 1241 1242
      // 当前父节点即将装满了,那只能再将父节点执行分裂操作
      Frame *new_parent_frame = nullptr;
      rc = split<InternalIndexNodeHandler>(latch_memo, parent_frame, new_parent_frame);
羽飞's avatar
羽飞 已提交
1243
      if (rc != RC::SUCCESS) {
L
Longda Feng 已提交
1244
        LOG_WARN("failed to split internal node. rc=%d:%s", rc, strrc(rc));
1245 1246 1247
        // disk_buffer_pool_->unpin_page(frame);
        // disk_buffer_pool_->unpin_page(new_frame);
        // disk_buffer_pool_->unpin_page(parent_frame);
羽飞's avatar
羽飞 已提交
1248
      } else {
L
Longda Feng 已提交
1249 1250 1251 1252
        // insert into left or right ? decide by key compare result
        InternalIndexNodeHandler new_node(file_header_, new_parent_frame);
        if (key_comparator_(key, new_node.key_at(0)) > 0) {
          new_node.insert(key, new_frame->page_num(), key_comparator_);
羽飞's avatar
羽飞 已提交
1253
          new_node_handler.set_parent_page_num(new_node.page_num());
L
Longda Feng 已提交
1254
        } else {
1255 1256
          parent_node.insert(key, new_frame->page_num(), key_comparator_);
          new_node_handler.set_parent_page_num(parent_node.page_num());
L
Longda Feng 已提交
1257
        }
羽飞's avatar
羽飞 已提交
1258

1259 1260
        // disk_buffer_pool_->unpin_page(frame);
        // disk_buffer_pool_->unpin_page(new_frame);
L
Longda Feng 已提交
1261

1262 1263 1264 1265
        // 虽然这里是递归调用,但是通常B+ Tree 的层高比较低(3层已经可以容纳很多数据),所以没有栈溢出风险。
        // Q: 在查找叶子节点时,我们都会尝试将没必要的锁提前释放掉,在这里插入数据时,是在向上遍历节点,
        //    理论上来说,我们可以释放更低层级节点的锁,但是并没有这么做,为什么?
        rc = insert_entry_into_parent(latch_memo, parent_frame, new_parent_frame, new_node.key_at(0));
羽飞's avatar
羽飞 已提交
1266 1267 1268 1269 1270 1271 1272 1273 1274 1275
      }
    }
  }
  return rc;
}

/**
 * split one full node into two
 */
template <typename IndexNodeHandlerType>
1276
RC BplusTreeHandler::split(LatchMemo &latch_memo, Frame *frame, Frame *&new_frame)
羽飞's avatar
羽飞 已提交
1277
{
羽飞's avatar
羽飞 已提交
1278
  IndexNodeHandlerType old_node(file_header_, frame);
羽飞's avatar
羽飞 已提交
1279 1280

  // add a new node
1281
  RC rc = latch_memo.allocate_page(new_frame);
羽飞's avatar
羽飞 已提交
1282
  if (rc != RC::SUCCESS) {
羽飞's avatar
羽飞 已提交
1283
    LOG_WARN("Failed to split index page due to failed to allocate page, rc=%d:%s", rc, strrc(rc));
羽飞's avatar
羽飞 已提交
1284 1285 1286
    return rc;
  }

1287 1288
  latch_memo.xlatch(new_frame);

羽飞's avatar
羽飞 已提交
1289
  IndexNodeHandlerType new_node(file_header_, new_frame);
羽飞's avatar
羽飞 已提交
1290 1291 1292
  new_node.init_empty();
  new_node.set_parent_page_num(old_node.parent_page_num());

1293
  old_node.move_half_to(new_node, disk_buffer_pool_); // TODO remove disk buffer pool
羽飞's avatar
羽飞 已提交
1294

羽飞's avatar
羽飞 已提交
1295 1296
  frame->mark_dirty();
  new_frame->mark_dirty();
羽飞's avatar
羽飞 已提交
1297 1298 1299
  return RC::SUCCESS;
}

1300
void BplusTreeHandler::update_root_page_num_locked(PageNum root_page_num)
羽飞's avatar
羽飞 已提交
1301
{
1302 1303 1304
  file_header_.root_page = root_page_num;
  header_dirty_ = true;
  LOG_DEBUG("set root page to %d", root_page_num);
羽飞's avatar
羽飞 已提交
1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315
}

RC BplusTreeHandler::create_new_tree(const char *key, const RID *rid)
{
  RC rc = RC::SUCCESS;
  if (file_header_.root_page != BP_INVALID_PAGE_NUM) {
    rc = RC::INTERNAL;
    LOG_WARN("cannot create new tree while root page is valid. root page id=%d", file_header_.root_page);
    return rc;
  }

1316
  Frame *frame = nullptr;
羽飞's avatar
羽飞 已提交
1317
  rc = disk_buffer_pool_->allocate_page(&frame);
羽飞's avatar
羽飞 已提交
1318 1319 1320 1321 1322
  if (rc != RC::SUCCESS) {
    LOG_WARN("failed to allocate root page. rc=%d:%s", rc, strrc(rc));
    return rc;
  }

羽飞's avatar
羽飞 已提交
1323
  LeafIndexNodeHandler leaf_node(file_header_, frame);
羽飞's avatar
羽飞 已提交
1324 1325
  leaf_node.init_empty();
  leaf_node.insert(0, key, (const char *)rid);
1326
  update_root_page_num_locked(frame->page_num());
羽飞's avatar
羽飞 已提交
1327 1328
  frame->mark_dirty();
  disk_buffer_pool_->unpin_page(frame);
羽飞's avatar
羽飞 已提交
1329 1330 1331 1332 1333

  // disk_buffer_pool_->check_all_pages_unpinned(file_id_);
  return rc;
}

1334
MemPoolItem::unique_ptr BplusTreeHandler::make_key(const char *user_key, const RID &rid)
羽飞's avatar
羽飞 已提交
1335
{
1336
  MemPoolItem::unique_ptr key = mem_pool_item_->alloc_unique_ptr();
羽飞's avatar
羽飞 已提交
1337
  if (key == nullptr) {
羽飞's avatar
羽飞 已提交
1338
    LOG_WARN("Failed to alloc memory for key.");
羽飞's avatar
羽飞 已提交
1339 1340
    return nullptr;
  }
1341 1342
  memcpy(static_cast<char *>(key.get()), user_key, file_header_.attr_length);
  memcpy(static_cast<char *>(key.get()) + file_header_.attr_length, &rid, sizeof(rid));
羽飞's avatar
羽飞 已提交
1343 1344 1345 1346 1347 1348 1349 1350 1351 1352
  return key;
}

RC BplusTreeHandler::insert_entry(const char *user_key, const RID *rid)
{
  if (user_key == nullptr || rid == nullptr) {
    LOG_WARN("Invalid arguments, key is empty or rid is empty");
    return RC::INVALID_ARGUMENT;
  }

1353 1354
  MemPoolItem::unique_ptr pkey = make_key(user_key, *rid);
  if (pkey == nullptr) {
羽飞's avatar
羽飞 已提交
1355
    LOG_WARN("Failed to alloc memory for key.");
羽飞's avatar
羽飞 已提交
1356 1357 1358
    return RC::NOMEM;
  }

1359 1360
  char *key = static_cast<char *>(pkey.get());

羽飞's avatar
羽飞 已提交
1361
  if (is_empty()) {
1362 1363 1364 1365 1366 1367 1368
    root_lock_.lock();
    if (is_empty()) {
      RC rc = create_new_tree(key, rid);
      root_lock_.unlock();
      return rc;
    }
    root_lock_.unlock();
羽飞's avatar
羽飞 已提交
1369 1370
  }

1371 1372 1373 1374
  LatchMemo latch_memo(disk_buffer_pool_);

  Frame *frame = nullptr;
  RC rc = find_leaf(latch_memo, BplusTreeOperationType::INSERT, key, frame);
羽飞's avatar
羽飞 已提交
1375
  if (rc != RC::SUCCESS) {
羽飞's avatar
羽飞 已提交
1376
    LOG_WARN("Failed to find leaf %s. rc=%d:%s", rid->to_string().c_str(), rc, strrc(rc));
羽飞's avatar
羽飞 已提交
1377 1378 1379
    return rc;
  }

1380
  rc = insert_entry_into_leaf_node(latch_memo, frame, key, rid);
羽飞's avatar
羽飞 已提交
1381
  if (rc != RC::SUCCESS) {
羽飞's avatar
羽飞 已提交
1382
    LOG_TRACE("Failed to insert into leaf of index, rid:%s", rid->to_string().c_str());
羽飞's avatar
羽飞 已提交
1383 1384 1385 1386 1387 1388 1389
    return rc;
  }

  LOG_TRACE("insert entry success");
  return RC::SUCCESS;
}

羽飞's avatar
羽飞 已提交
1390
RC BplusTreeHandler::get_entry(const char *user_key, int key_len, std::list<RID> &rids)
羽飞's avatar
羽飞 已提交
1391 1392
{
  BplusTreeScanner scanner(*this);
L
Longda Feng 已提交
1393
  RC rc = scanner.open(user_key, key_len, true /*left_inclusive*/, user_key, key_len, true /*right_inclusive*/);
羽飞's avatar
羽飞 已提交
1394
  if (rc != RC::SUCCESS) {
1395
    LOG_WARN("failed to open scanner. rc=%s", strrc(rc));
羽飞's avatar
羽飞 已提交
1396 1397 1398 1399
    return rc;
  }

  RID rid;
1400
  while ((rc = scanner.next_entry(rid)) == RC::SUCCESS) {
羽飞's avatar
羽飞 已提交
1401 1402 1403 1404 1405
    rids.push_back(rid);
  }

  scanner.close();
  if (rc != RC::RECORD_EOF) {
1406
    LOG_WARN("scanner return error. rc=%s", strrc(rc));
羽飞's avatar
羽飞 已提交
1407 1408 1409 1410 1411 1412
  } else {
    rc = RC::SUCCESS;
  }
  return rc;
}

1413
RC BplusTreeHandler::adjust_root(LatchMemo &latch_memo, Frame *root_frame)
羽飞's avatar
羽飞 已提交
1414
{
羽飞's avatar
羽飞 已提交
1415
  IndexNodeHandler root_node(file_header_, root_frame);
羽飞's avatar
羽飞 已提交
1416
  if (root_node.is_leaf() && root_node.size() > 0) {
羽飞's avatar
羽飞 已提交
1417
    root_frame->mark_dirty();
羽飞's avatar
羽飞 已提交
1418 1419 1420
    return RC::SUCCESS;
  }

1421
  PageNum new_root_page_num = BP_INVALID_PAGE_NUM;
羽飞's avatar
羽飞 已提交
1422
  if (root_node.is_leaf()) {
1423 1424 1425
    ASSERT(root_node.size() == 0, "");
    // file_header_.root_page = BP_INVALID_PAGE_NUM;
    new_root_page_num = BP_INVALID_PAGE_NUM;
羽飞's avatar
羽飞 已提交
1426
  } else {
1427
    // 根节点只有一个子节点了,需要把自己删掉,把子节点提升为根节点
羽飞's avatar
羽飞 已提交
1428
    InternalIndexNodeHandler internal_node(file_header_, root_frame);
羽飞's avatar
羽飞 已提交
1429 1430

    const PageNum child_page_num = internal_node.value_at(0);
1431 1432
    Frame *child_frame = nullptr;
    RC rc = latch_memo.get_page(child_page_num, child_frame);
羽飞's avatar
羽飞 已提交
1433 1434 1435 1436 1437
    if (rc != RC::SUCCESS) {
      LOG_WARN("failed to fetch child page. page num=%d, rc=%d:%s", child_page_num, rc, strrc(rc));
      return rc;
    }

羽飞's avatar
羽飞 已提交
1438
    IndexNodeHandler child_node(file_header_, child_frame);
羽飞's avatar
羽飞 已提交
1439
    child_node.set_parent_page_num(BP_INVALID_PAGE_NUM);
L
Longda Feng 已提交
1440

1441 1442
    // file_header_.root_page = child_page_num;
    new_root_page_num = child_page_num;
羽飞's avatar
羽飞 已提交
1443 1444
  }

1445
  update_root_page_num_locked(new_root_page_num);
羽飞's avatar
羽飞 已提交
1446

羽飞's avatar
羽飞 已提交
1447
  PageNum old_root_page_num = root_frame->page_num();
1448
  latch_memo.dispose_page(old_root_page_num);
羽飞's avatar
羽飞 已提交
1449 1450
  return RC::SUCCESS;
}
1451

羽飞's avatar
羽飞 已提交
1452
template <typename IndexNodeHandlerType>
1453
RC BplusTreeHandler::coalesce_or_redistribute(LatchMemo &latch_memo, Frame *frame)
羽飞's avatar
羽飞 已提交
1454
{
羽飞's avatar
羽飞 已提交
1455
  IndexNodeHandlerType index_node(file_header_, frame);
羽飞's avatar
羽飞 已提交
1456 1457 1458 1459 1460 1461 1462 1463 1464 1465
  if (index_node.size() >= index_node.min_size()) {
    return RC::SUCCESS;
  }

  const PageNum parent_page_num = index_node.parent_page_num();
  if (BP_INVALID_PAGE_NUM == parent_page_num) {
    // this is the root page
    if (index_node.size() > 1) {
    } else {
      // adjust the root node
1466
      adjust_root(latch_memo, frame);
羽飞's avatar
羽飞 已提交
1467 1468 1469 1470
    }
    return RC::SUCCESS;
  }

1471 1472
  Frame *parent_frame = nullptr;
  RC rc = latch_memo.get_page(parent_page_num, parent_frame);
羽飞's avatar
羽飞 已提交
1473 1474 1475 1476 1477
  if (rc != RC::SUCCESS) {
    LOG_WARN("failed to fetch parent page. page id=%d, rc=%d:%s", parent_page_num, rc, strrc(rc));
    return rc;
  }

羽飞's avatar
羽飞 已提交
1478
  InternalIndexNodeHandler parent_index_node(file_header_, parent_frame);
羽飞's avatar
羽飞 已提交
1479
  int index = parent_index_node.lookup(key_comparator_, index_node.key_at(index_node.size() - 1));
1480 1481 1482 1483
  ASSERT(parent_index_node.value_at(index) == frame->page_num(),
         "lookup return an invalid value. index=%d, this page num=%d, but got %d",
         index, frame->page_num(), parent_index_node.value_at(index));
  
羽飞's avatar
羽飞 已提交
1484 1485 1486 1487 1488 1489 1490
  PageNum neighbor_page_num;
  if (index == 0) {
    neighbor_page_num = parent_index_node.value_at(1);
  } else {
    neighbor_page_num = parent_index_node.value_at(index - 1);
  }

1491 1492
  Frame *neighbor_frame = nullptr;
  rc = latch_memo.get_page(neighbor_page_num, neighbor_frame); // 当前已经拥有了父节点的写锁,所以直接尝试获取此页面然后加锁
羽飞's avatar
羽飞 已提交
1493 1494
  if (rc != RC::SUCCESS) {
    LOG_WARN("failed to fetch neighbor page. page id=%d, rc=%d:%s", neighbor_page_num, rc, strrc(rc));
1495
    // do something to release resource
羽飞's avatar
羽飞 已提交
1496 1497 1498
    return rc;
  }

1499 1500
  latch_memo.xlatch(neighbor_frame);

羽飞's avatar
羽飞 已提交
1501
  IndexNodeHandlerType neighbor_node(file_header_, neighbor_frame);
羽飞's avatar
羽飞 已提交
1502
  if (index_node.size() + neighbor_node.size() > index_node.max_size()) {
羽飞's avatar
羽飞 已提交
1503
    rc = redistribute<IndexNodeHandlerType>(neighbor_frame, frame, parent_frame, index);
羽飞's avatar
羽飞 已提交
1504
  } else {
1505
    rc = coalesce<IndexNodeHandlerType>(latch_memo, neighbor_frame, frame, parent_frame, index);
羽飞's avatar
羽飞 已提交
1506
  }
1507

羽飞's avatar
羽飞 已提交
1508 1509 1510 1511
  return rc;
}

template <typename IndexNodeHandlerType>
1512
RC BplusTreeHandler::coalesce(LatchMemo &latch_memo, Frame *neighbor_frame, Frame *frame, Frame *parent_frame, int index)
羽飞's avatar
羽飞 已提交
1513
{
羽飞's avatar
羽飞 已提交
1514
  InternalIndexNodeHandler parent_node(file_header_, parent_frame);
羽飞's avatar
羽飞 已提交
1515

羽飞's avatar
羽飞 已提交
1516 1517
  Frame *left_frame = nullptr;
  Frame *right_frame = nullptr;
羽飞's avatar
羽飞 已提交
1518 1519
  if (index == 0) {
    // neighbor node is at right
L
Longda Feng 已提交
1520
    left_frame = frame;
羽飞's avatar
羽飞 已提交
1521
    right_frame = neighbor_frame;
羽飞's avatar
羽飞 已提交
1522 1523
    index++;
  } else {
L
Longda Feng 已提交
1524
    left_frame = neighbor_frame;
羽飞's avatar
羽飞 已提交
1525
    right_frame = frame;
羽飞's avatar
羽飞 已提交
1526 1527 1528
    // neighbor is at left
  }

羽飞's avatar
羽飞 已提交
1529 1530
  IndexNodeHandlerType left_node(file_header_, left_frame);
  IndexNodeHandlerType right_node(file_header_, right_frame);
羽飞's avatar
羽飞 已提交
1531 1532 1533

  parent_node.remove(index);
  // parent_node.validate(key_comparator_, disk_buffer_pool_, file_id_);
羽飞's avatar
羽飞 已提交
1534
  RC rc = right_node.move_to(left_node, disk_buffer_pool_);
羽飞's avatar
羽飞 已提交
1535 1536 1537 1538 1539 1540 1541
  if (rc != RC::SUCCESS) {
    LOG_WARN("failed to move right node to left. rc=%d:%s", rc, strrc(rc));
    return rc;
  }
  // left_node.validate(key_comparator_);

  if (left_node.is_leaf()) {
羽飞's avatar
羽飞 已提交
1542 1543
    LeafIndexNodeHandler left_leaf_node(file_header_, left_frame);
    LeafIndexNodeHandler right_leaf_node(file_header_, right_frame);
羽飞's avatar
羽飞 已提交
1544 1545 1546
    left_leaf_node.set_next_page(right_leaf_node.next_page());
  }

1547 1548
  latch_memo.dispose_page(right_frame->page_num());
  return coalesce_or_redistribute<InternalIndexNodeHandler>(latch_memo, parent_frame);
羽飞's avatar
羽飞 已提交
1549 1550 1551
}

template <typename IndexNodeHandlerType>
羽飞's avatar
羽飞 已提交
1552
RC BplusTreeHandler::redistribute(Frame *neighbor_frame, Frame *frame, Frame *parent_frame, int index)
羽飞's avatar
羽飞 已提交
1553
{
羽飞's avatar
羽飞 已提交
1554 1555 1556
  InternalIndexNodeHandler parent_node(file_header_, parent_frame);
  IndexNodeHandlerType neighbor_node(file_header_, neighbor_frame);
  IndexNodeHandlerType node(file_header_, frame);
羽飞's avatar
羽飞 已提交
1557
  if (neighbor_node.size() < node.size()) {
L
Longda Feng 已提交
1558
    LOG_ERROR("got invalid nodes. neighbor node size %d, this node size %d", neighbor_node.size(), node.size());
羽飞's avatar
羽飞 已提交
1559 1560 1561
  }
  if (index == 0) {
    // the neighbor is at right
羽飞's avatar
羽飞 已提交
1562
    neighbor_node.move_first_to_end(node, disk_buffer_pool_);
羽飞's avatar
羽飞 已提交
1563 1564 1565 1566 1567 1568
    // neighbor_node.validate(key_comparator_, disk_buffer_pool_, file_id_);
    // node.validate(key_comparator_, disk_buffer_pool_, file_id_);
    parent_node.set_key_at(index + 1, neighbor_node.key_at(0));
    // parent_node.validate(key_comparator_, disk_buffer_pool_, file_id_);
  } else {
    // the neighbor is at left
羽飞's avatar
羽飞 已提交
1569
    neighbor_node.move_last_to_front(node, disk_buffer_pool_);
羽飞's avatar
羽飞 已提交
1570 1571 1572 1573 1574 1575
    // neighbor_node.validate(key_comparator_, disk_buffer_pool_, file_id_);
    // node.validate(key_comparator_, disk_buffer_pool_, file_id_);
    parent_node.set_key_at(index, node.key_at(0));
    // parent_node.validate(key_comparator_, disk_buffer_pool_, file_id_);
  }

羽飞's avatar
羽飞 已提交
1576 1577 1578
  neighbor_frame->mark_dirty();
  frame->mark_dirty();
  parent_frame->mark_dirty();
1579

羽飞's avatar
羽飞 已提交
1580 1581 1582
  return RC::SUCCESS;
}

1583
RC BplusTreeHandler::delete_entry_internal(LatchMemo &latch_memo, Frame *leaf_frame, const char *key)
羽飞's avatar
羽飞 已提交
1584
{
羽飞's avatar
羽飞 已提交
1585
  LeafIndexNodeHandler leaf_index_node(file_header_, leaf_frame);
羽飞's avatar
羽飞 已提交
1586 1587 1588

  const int remove_count = leaf_index_node.remove(key, key_comparator_);
  if (remove_count == 0) {
1589 1590
    LOG_TRACE("no data need to remove");
    // disk_buffer_pool_->unpin_page(leaf_frame);
羽飞's avatar
羽飞 已提交
1591
    return RC::RECORD_NOT_EXIST;
羽飞's avatar
羽飞 已提交
1592 1593 1594
  }
  // leaf_index_node.validate(key_comparator_, disk_buffer_pool_, file_id_);

羽飞's avatar
羽飞 已提交
1595
  leaf_frame->mark_dirty();
羽飞's avatar
羽飞 已提交
1596 1597 1598 1599 1600

  if (leaf_index_node.size() >= leaf_index_node.min_size()) {
    return RC::SUCCESS;
  }

1601
  return coalesce_or_redistribute<LeafIndexNodeHandler>(latch_memo, leaf_frame);
羽飞's avatar
羽飞 已提交
1602 1603 1604 1605
}

RC BplusTreeHandler::delete_entry(const char *user_key, const RID *rid)
{
1606 1607
  MemPoolItem::unique_ptr pkey = mem_pool_item_->alloc_unique_ptr();
  if (nullptr == pkey) {
羽飞's avatar
羽飞 已提交
1608 1609 1610
    LOG_WARN("Failed to alloc memory for key. size=%d", file_header_.key_length);
    return RC::NOMEM;
  }
1611 1612
  char *key = static_cast<char *>(pkey.get());

羽飞's avatar
羽飞 已提交
1613 1614 1615
  memcpy(key, user_key, file_header_.attr_length);
  memcpy(key + file_header_.attr_length, rid, sizeof(*rid));

1616 1617 1618 1619 1620 1621
  BplusTreeOperationType op = BplusTreeOperationType::DELETE;
  LatchMemo latch_memo(disk_buffer_pool_);

  Frame *leaf_frame = nullptr;
  RC rc = find_leaf(latch_memo, op, key, leaf_frame);
  if (rc == RC::EMPTY) {
羽飞's avatar
羽飞 已提交
1622
    rc = RC::RECORD_NOT_EXIST;
羽飞's avatar
羽飞 已提交
1623 1624
    return rc;
  }
1625
  
羽飞's avatar
羽飞 已提交
1626
  if (rc != RC::SUCCESS) {
1627
    LOG_WARN("failed to find leaf page. rc =%s", strrc(rc));
羽飞's avatar
羽飞 已提交
1628 1629
    return rc;
  }
1630 1631

  return delete_entry_internal(latch_memo, leaf_frame, key);
羽飞's avatar
羽飞 已提交
1632 1633
}

1634 1635 1636 1637 1638
////////////////////////////////////////////////////////////////////////////////

BplusTreeScanner::BplusTreeScanner(BplusTreeHandler &tree_handler) 
    : tree_handler_(tree_handler),
    latch_memo_(tree_handler.disk_buffer_pool_)
羽飞's avatar
羽飞 已提交
1639 1640 1641 1642 1643 1644 1645
{}

BplusTreeScanner::~BplusTreeScanner()
{
  close();
}

1646 1647
RC BplusTreeScanner::open(const char *left_user_key, int left_len, bool left_inclusive, 
                          const char *right_user_key, int right_len, bool right_inclusive)
羽飞's avatar
羽飞 已提交
1648 1649 1650 1651 1652 1653 1654 1655
{
  RC rc = RC::SUCCESS;
  if (inited_) {
    LOG_WARN("tree scanner has been inited");
    return RC::INTERNAL;
  }

  inited_ = true;
1656
  first_emitted_ = false;
L
Longda Feng 已提交
1657

羽飞's avatar
羽飞 已提交
1658 1659 1660 1661
  // 校验输入的键值是否是合法范围
  if (left_user_key && right_user_key) {
    const auto &attr_comparator = tree_handler_.key_comparator_.attr_comparator();
    const int result = attr_comparator(left_user_key, right_user_key);
L
Longda Feng 已提交
1662 1663 1664
    if (result > 0 ||  // left < right
                       // left == right but is (left,right)/[left,right) or (left,right]
        (result == 0 && (left_inclusive == false || right_inclusive == false))) {
羽飞's avatar
羽飞 已提交
1665 1666 1667 1668 1669
      return RC::INVALID_ARGUMENT;
    }
  }

  if (nullptr == left_user_key) {
1670
    rc = tree_handler_.left_most_page(latch_memo_, current_frame_);
羽飞's avatar
羽飞 已提交
1671
    if (rc != RC::SUCCESS) {
1672
      LOG_WARN("failed to find left most page. rc=%s", strrc(rc));
羽飞's avatar
羽飞 已提交
1673 1674 1675 1676 1677
      return rc;
    }

    iter_index_ = 0;
  } else {
羽飞's avatar
羽飞 已提交
1678 1679 1680 1681

    char *fixed_left_key = const_cast<char *>(left_user_key);
    if (tree_handler_.file_header_.attr_type == CHARS) {
      bool should_inclusive_after_fix = false;
L
Longda Feng 已提交
1682
      rc = fix_user_key(left_user_key, left_len, true /*greater*/, &fixed_left_key, &should_inclusive_after_fix);
羽飞's avatar
羽飞 已提交
1683
      if (rc != RC::SUCCESS) {
羽飞's avatar
羽飞 已提交
1684 1685
        LOG_WARN("failed to fix left user key. rc=%s", strrc(rc));
        return rc;
羽飞's avatar
羽飞 已提交
1686 1687 1688
      }

      if (should_inclusive_after_fix) {
L
Longda Feng 已提交
1689
        left_inclusive = true;
羽飞's avatar
羽飞 已提交
1690 1691 1692
      }
    }

1693
    MemPoolItem::unique_ptr left_pkey;
羽飞's avatar
羽飞 已提交
1694
    if (left_inclusive) {
1695
      left_pkey = tree_handler_.make_key(fixed_left_key, *RID::min());
羽飞's avatar
羽飞 已提交
1696
    } else {
1697
      left_pkey = tree_handler_.make_key(fixed_left_key, *RID::max());
羽飞's avatar
羽飞 已提交
1698 1699
    }

1700 1701
    const char *left_key = (const char *)left_pkey.get();

羽飞's avatar
羽飞 已提交
1702 1703 1704
    if (fixed_left_key != left_user_key) {
      delete[] fixed_left_key;
      fixed_left_key = nullptr;
羽飞's avatar
羽飞 已提交
1705 1706
    }

1707 1708 1709 1710 1711 1712 1713
    rc = tree_handler_.find_leaf(latch_memo_, BplusTreeOperationType::READ, left_key, current_frame_);
    if (rc == RC::EMPTY) {
      rc = RC::SUCCESS;
      current_frame_ = nullptr;
      return rc;
    } else if (rc != RC::SUCCESS) {
      LOG_WARN("failed to find left page. rc=%s", strrc(rc));
羽飞's avatar
羽飞 已提交
1714 1715
      return rc;
    }
1716 1717 1718
    

    LeafIndexNodeHandler left_node(tree_handler_.file_header_, current_frame_);
羽飞's avatar
羽飞 已提交
1719 1720
    int left_index = left_node.lookup(tree_handler_.key_comparator_, left_key);
    // lookup 返回的是适合插入的位置,还需要判断一下是否在合适的边界范围内
L
Longda Feng 已提交
1721
    if (left_index >= left_node.size()) {  // 超出了当前页,就需要向后移动一个位置
羽飞's avatar
羽飞 已提交
1722
      const PageNum next_page_num = left_node.next_page();
L
Longda Feng 已提交
1723
      if (next_page_num == BP_INVALID_PAGE_NUM) {  // 这里已经是最后一页,说明当前扫描,没有数据
1724 1725
        latch_memo_.release();
        current_frame_ = nullptr;
L
Longda Feng 已提交
1726
        return RC::SUCCESS;
羽飞's avatar
羽飞 已提交
1727 1728
      }

1729
      rc = latch_memo_.get_page(next_page_num, current_frame_);
羽飞's avatar
羽飞 已提交
1730
      if (rc != RC::SUCCESS) {
1731
        LOG_WARN("failed to fetch next page. page num=%d, rc=%s", next_page_num, strrc(rc));
羽飞's avatar
羽飞 已提交
1732
        return rc;
羽飞's avatar
羽飞 已提交
1733
      }
1734
      latch_memo_.slatch(current_frame_);
羽飞's avatar
羽飞 已提交
1735 1736 1737 1738 1739 1740 1741 1742

      left_index = 0;
    }
    iter_index_ = left_index;
  }

  // 没有指定右边界范围,那么就返回右边界最大值
  if (nullptr == right_user_key) {
1743
    right_key_ = nullptr;
羽飞's avatar
羽飞 已提交
1744 1745
  } else {

羽飞's avatar
羽飞 已提交
1746 1747 1748
    char *fixed_right_key = const_cast<char *>(right_user_key);
    bool should_include_after_fix = false;
    if (tree_handler_.file_header_.attr_type == CHARS) {
L
Longda Feng 已提交
1749
      rc = fix_user_key(right_user_key, right_len, false /*want_greater*/, &fixed_right_key, &should_include_after_fix);
羽飞's avatar
羽飞 已提交
1750
      if (rc != RC::SUCCESS) {
羽飞's avatar
羽飞 已提交
1751 1752
        LOG_WARN("failed to fix right user key. rc=%s", strrc(rc));
        return rc;
羽飞's avatar
羽飞 已提交
1753 1754 1755
      }

      if (should_include_after_fix) {
L
Longda Feng 已提交
1756
        right_inclusive = true;
羽飞's avatar
羽飞 已提交
1757 1758
      }
    }
羽飞's avatar
羽飞 已提交
1759
    if (right_inclusive) {
1760
      right_key_ = tree_handler_.make_key(fixed_right_key, *RID::max());
羽飞's avatar
羽飞 已提交
1761
    } else {
1762
      right_key_ = tree_handler_.make_key(fixed_right_key, *RID::min());
羽飞's avatar
羽飞 已提交
1763 1764 1765 1766 1767
    }

    if (fixed_right_key != right_user_key) {
      delete[] fixed_right_key;
      fixed_right_key = nullptr;
羽飞's avatar
羽飞 已提交
1768
    }
1769
  }
羽飞's avatar
羽飞 已提交
1770

1771 1772 1773
  if (touch_end()) {
    current_frame_ = nullptr;
  }
羽飞's avatar
羽飞 已提交
1774

1775 1776
  return RC::SUCCESS;
}
羽飞's avatar
羽飞 已提交
1777

1778 1779 1780 1781 1782
void BplusTreeScanner::fetch_item(RID &rid)
{
  LeafIndexNodeHandler node(tree_handler_.file_header_, current_frame_);
  memcpy(&rid, node.value_at(iter_index_), sizeof(rid));
}
羽飞's avatar
羽飞 已提交
1783

1784 1785 1786 1787
bool BplusTreeScanner::touch_end()
{
  if (right_key_ == nullptr) {
    return false;
羽飞's avatar
羽飞 已提交
1788
  }
1789 1790 1791 1792 1793
  
  LeafIndexNodeHandler node(tree_handler_.file_header_, current_frame_);
  const char *this_key = node.key_at(iter_index_);
  int compare_result = tree_handler_.key_comparator_(this_key, static_cast<char *>(right_key_.get()));
  return compare_result > 0;
羽飞's avatar
羽飞 已提交
1794 1795
}

1796
RC BplusTreeScanner::next_entry(RID &rid)
羽飞's avatar
羽飞 已提交
1797
{
1798
  if (nullptr == current_frame_) {
羽飞's avatar
羽飞 已提交
1799 1800 1801
    return RC::RECORD_EOF;
  }

1802 1803 1804
  if (!first_emitted_) {
    fetch_item(rid);
    first_emitted_ = true;
羽飞's avatar
羽飞 已提交
1805 1806 1807
    return RC::SUCCESS;
  }

1808 1809 1810 1811 1812 1813 1814 1815 1816
  iter_index_++;

  LeafIndexNodeHandler node(tree_handler_.file_header_, current_frame_);
  if (iter_index_ < node.size()) {
    if (touch_end()) {
      return RC::RECORD_EOF;
    }

    fetch_item(rid);
羽飞's avatar
羽飞 已提交
1817 1818 1819 1820
    return RC::SUCCESS;
  }

  RC rc = RC::SUCCESS;
1821 1822 1823 1824
  PageNum next_page_num = node.next_page();
  if (BP_INVALID_PAGE_NUM == next_page_num) {
    return RC::RECORD_EOF;
  }
羽飞's avatar
羽飞 已提交
1825

1826 1827 1828 1829 1830
  const int memo_point = latch_memo_.memo_point();
  rc = latch_memo_.get_page(next_page_num, current_frame_);
  if (rc != RC::SUCCESS) {
    LOG_WARN("failed to get next page. page num=%d, rc=%s", next_page_num, strrc(rc));
    return rc;
羽飞's avatar
羽飞 已提交
1831
  }
1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845

  /**
   * 如果这里直接去加锁,那可能会造成死锁
   * 因为这里访问页面的方式顺序与插入、删除的顺序不一样
   * 如果加锁失败,就由上层做重试
   */
  bool locked = latch_memo_.try_slatch(current_frame_);
  if (!locked) {
    return RC::LOCKED_NEED_WAIT;
  }

  latch_memo_.release_to(memo_point);
  iter_index_ = -1; // `next` will add 1
  return next_entry(rid);
羽飞's avatar
羽飞 已提交
1846 1847 1848 1849 1850
}

RC BplusTreeScanner::close()
{
  inited_ = false;
1851
  LOG_TRACE("bplus tree scanner closed");
羽飞's avatar
羽飞 已提交
1852 1853
  return RC::SUCCESS;
}
羽飞's avatar
羽飞 已提交
1854

L
Longda Feng 已提交
1855 1856
RC BplusTreeScanner::fix_user_key(
    const char *user_key, int key_len, bool want_greater, char **fixed_key, bool *should_inclusive)
羽飞's avatar
羽飞 已提交
1857 1858 1859 1860 1861 1862 1863 1864
{
  if (nullptr == fixed_key || nullptr == should_inclusive) {
    return RC::INVALID_ARGUMENT;
  }

  // 这里很粗暴,变长字段才需要做调整,其它默认都不需要做调整
  assert(tree_handler_.file_header_.attr_type == CHARS);
  assert(strlen(user_key) >= static_cast<size_t>(key_len));
L
Longda Feng 已提交
1865

羽飞's avatar
羽飞 已提交
1866
  *should_inclusive = false;
L
Longda Feng 已提交
1867

羽飞's avatar
羽飞 已提交
1868
  int32_t attr_length = tree_handler_.file_header_.attr_length;
L
Longda Feng 已提交
1869
  char *key_buf = new (std::nothrow) char[attr_length];
羽飞's avatar
羽飞 已提交
1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895
  if (nullptr == key_buf) {
    return RC::NOMEM;
  }

  if (key_len <= attr_length) {
    memcpy(key_buf, user_key, key_len);
    memset(key_buf + key_len, 0, attr_length - key_len);

    *fixed_key = key_buf;
    return RC::SUCCESS;
  }

  // key_len > attr_length
  memcpy(key_buf, user_key, attr_length);

  char c = user_key[attr_length];
  if (c == 0) {
    *fixed_key = key_buf;
    return RC::SUCCESS;
  }

  // 扫描 >=/> user_key 的数据
  // 示例:>=/> ABCD1 的数据,attr_length=4,
  //      等价于扫描 >= ABCE 的数据
  // 如果是扫描 <=/< user_key的数据
  // 示例:<=/< ABCD1  <==> <= ABCD  (attr_length=4)
1896
  // NOTE: 假设都是普通的ASCII字符,不包含二进制字符,使用char不会溢出
羽飞's avatar
羽飞 已提交
1897 1898 1899 1900
  *should_inclusive = true;
  if (want_greater) {
    key_buf[attr_length - 1]++;
  }
L
Longda Feng 已提交
1901

羽飞's avatar
羽飞 已提交
1902 1903 1904
  *fixed_key = key_buf;
  return RC::SUCCESS;
}