提交 a1f194c9 编写于 作者: M mindspore-ci-bot 提交者: Gitee

!2188 Remove B+ tree deadcode and add an additional output to Search function

Merge pull request !2188 from JesseKLee/deadcode
......@@ -408,8 +408,7 @@ Status ClueOp::FillIOBlockQueue(const std::vector<int64_t> &i_keys) {
break;
}
}
auto file_it = filename_index_->Search(*it);
file_index.emplace_back(std::pair<std::string, int64_t>(file_it.value(), *it));
file_index.emplace_back(std::pair<std::string, int64_t>((*filename_index_)[*it], *it));
}
} else {
for (auto it = filename_index_->begin(); it != filename_index_->end(); ++it) {
......
......@@ -72,8 +72,9 @@ Status FilenameBlock::GetFilename(std::string *out_filename, const AutoIndexObj<
RETURN_IF_NOT_OK(IOBlock::GetKey(&fetched_key));
// Do an index lookup using that key to get the filename.
auto it = index.Search(fetched_key);
if (it != index.end()) {
auto r = index.Search(fetched_key);
if (r.second) {
auto &it = r.first;
*out_filename = it.value();
} else {
RETURN_STATUS_UNEXPECTED("Could not find filename from index");
......
......@@ -314,8 +314,7 @@ Status TextFileOp::FillIOBlockQueue(const std::vector<int64_t> &i_keys) {
break;
}
}
auto file_it = filename_index_->Search(*it);
file_index.emplace_back(std::pair<std::string, int64_t>(file_it.value(), *it));
file_index.emplace_back(std::pair<std::string, int64_t>((*filename_index_)[*it], *it));
}
} else {
for (auto it = filename_index_->begin(); it != filename_index_->end(); ++it) {
......
......@@ -451,8 +451,7 @@ Status TFReaderOp::FillIOBlockShuffle(const std::vector<int64_t> &i_keys) {
}
} else {
// Do an index lookup using that key to get the filename.
auto file_it = filename_index_->Search(*it);
std::string file_name = file_it.value();
std::string file_name = (*filename_index_)[*it];
if (NeedPushFileToblockQueue(file_name, &start_offset, &end_offset, pre_count)) {
auto ioBlock = std::make_unique<FilenameBlock>(*it, start_offset, end_offset, IOBlock::kDeIoBlockNone);
RETURN_IF_NOT_OK(PushIoBlockQueue(queue_index, std::move(ioBlock)));
......
......@@ -40,8 +40,6 @@ struct BPlusTreeTraits {
static constexpr slot_type kLeafSlots = 256;
// Number of slots in each inner node of the tree
static constexpr slot_type kInnerSlots = 128;
// If kAppendMode is true, we will split high instead of 50/50 split
static constexpr bool kAppendMode = false;
};
/// Implementation of B+ tree
......@@ -123,19 +121,14 @@ class BPlusTree {
std::unique_ptr<value_type> DoUpdate(const key_type &key, const value_type &new_value);
std::unique_ptr<value_type> DoUpdate(const key_type &key, std::unique_ptr<value_type> &&new_value);
void PopulateNumKeys();
key_type KeyAtPos(uint64_t inx);
// Statistics
struct tree_stats {
std::atomic<uint64_t> size_;
uint32_t leaves_;
uint32_t inner_nodes_;
uint32_t level_;
bool num_keys_array_valid_;
tree_stats() : size_(0), leaves_(0), inner_nodes_(0), level_(0), num_keys_array_valid_(false) {}
tree_stats() : size_(0), leaves_(0), inner_nodes_(0), level_(0) {}
};
private:
......@@ -160,10 +153,6 @@ class BPlusTree {
Node<BaseNode> lru_;
};
uint64_t PopulateNumKeys(BaseNode *n);
key_type KeyAtPos(BaseNode *n, uint64_t inx);
// This control block keeps track of all the nodes we traverse on insert.
// To maximize concurrency, internal nodes are latched S. If a node split
// is required, we must releases all the latches and redo it again and change
......@@ -255,7 +244,6 @@ class BPlusTree {
slot_type slot_dir_[traits::kInnerSlots] = {0};
key_type keys_[traits::kInnerSlots] = {0};
BaseNode *data_[traits::kInnerSlots + 1] = {nullptr};
uint64_t num_keys_[traits::kInnerSlots + 1] = {0};
slot_type slotuse_;
};
......@@ -391,7 +379,6 @@ class BPlusTree {
Iterator operator--(int);
bool operator==(const Iterator &x) const { return (x.cur_ == cur_) && (x.slot_ == slot_); }
bool operator!=(const Iterator &x) const { return (x.cur_ != cur_) || (x.slot_ != slot_); }
private:
......@@ -441,7 +428,6 @@ class BPlusTree {
ConstIterator operator--(int);
bool operator==(const ConstIterator &x) const { return (x.cur_ == cur_) && (x.slot_ == slot_); }
bool operator!=(const ConstIterator &x) const { return (x.cur_ != cur_) || (x.slot_ != slot_); }
private:
......@@ -451,20 +437,17 @@ class BPlusTree {
};
Iterator begin();
Iterator end();
ConstIterator begin() const;
ConstIterator end() const;
ConstIterator cbegin() const;
ConstIterator cend() const;
// Locate the entry with key
ConstIterator Search(const key_type &key) const;
Iterator Search(const key_type &key);
std::pair<ConstIterator, bool> Search(const key_type &key) const;
std::pair<Iterator, bool> Search(const key_type &key);
value_type operator[](key_type key);
};
......
......@@ -269,26 +269,17 @@ typename BPlusTree<K, V, A, C, T>::IndexRc BPlusTree<K, V, A, C, T>::LeafInsertK
RETURN_IF_BAD_RC(rc);
leaf_nodes_.InsertAfter(node, new_leaf);
*split_node = new_leaf;
if (slot == node->slotuse_ && traits::kAppendMode) {
// Split high. Good for bulk load and keys are in asending order on insert
*split_key = key;
// Just insert the new key to the new leaf. No further need to move the keys
// from one leaf to the other.
rc = new_leaf->InsertIntoSlot(nullptr, 0, key, std::move(value));
// 50/50 split
rc = node->Split(new_leaf);
RETURN_IF_BAD_RC(rc);
*split_key = new_leaf->keys_[0];
if (LessThan(key, *split_key)) {
rc = node->InsertIntoSlot(nullptr, slot, key, std::move(value));
RETURN_IF_BAD_RC(rc);
} else {
// 50/50 split
rc = node->Split(new_leaf);
slot -= node->slotuse_;
rc = new_leaf->InsertIntoSlot(nullptr, slot, key, std::move(value));
RETURN_IF_BAD_RC(rc);
*split_key = new_leaf->keys_[0];
if (LessThan(key, *split_key)) {
rc = node->InsertIntoSlot(nullptr, slot, key, std::move(value));
RETURN_IF_BAD_RC(rc);
} else {
slot -= node->slotuse_;
rc = new_leaf->InsertIntoSlot(nullptr, slot, key, std::move(value));
RETURN_IF_BAD_RC(rc);
}
}
}
return rc;
......@@ -309,25 +300,18 @@ typename BPlusTree<K, V, A, C, T>::IndexRc BPlusTree<K, V, A, C, T>::InnerInsert
rc = AllocateInner(&new_inner);
RETURN_IF_BAD_RC(rc);
*split_node = new_inner;
if (slot == node->slotuse_ && traits::kAppendMode) {
*split_key = key;
new_inner->data_[0] = node->data_[node->slotuse_];
rc = new_inner->InsertIntoSlot(0, key, ptr);
rc = node->Split(new_inner, split_key);
RETURN_IF_BAD_RC(rc);
if (LessThan(key, *split_key)) {
// Need to readjust the slot position since the split key is no longer in the two children.
slot = FindSlot(node, key);
rc = node->InsertIntoSlot(slot, key, ptr);
RETURN_IF_BAD_RC(rc);
} else {
rc = node->Split(new_inner, split_key);
// Same reasoning as above
slot = FindSlot(new_inner, key);
rc = new_inner->InsertIntoSlot(slot, key, ptr);
RETURN_IF_BAD_RC(rc);
if (LessThan(key, *split_key)) {
// Need to readjust the slot position since the split key is no longer in the two children.
slot = FindSlot(node, key);
rc = node->InsertIntoSlot(slot, key, ptr);
RETURN_IF_BAD_RC(rc);
} else {
// Same reasoning as above
slot = FindSlot(new_inner, key);
rc = new_inner->InsertIntoSlot(slot, key, ptr);
RETURN_IF_BAD_RC(rc);
}
}
}
return rc;
......@@ -377,8 +361,7 @@ typename BPlusTree<K, V, A, C, T>::IndexRc BPlusTree<K, V, A, C, T>::InsertKeyVa
}
template <typename K, typename V, typename A, typename C, typename T>
typename BPlusTree<K, V, A, C, T>::IndexRc BPlusTree<K, V, A, C, T>::Locate(RWLock *parent_lock,
bool forUpdate,
typename BPlusTree<K, V, A, C, T>::IndexRc BPlusTree<K, V, A, C, T>::Locate(RWLock *parent_lock, bool forUpdate,
BPlusTree<K, V, A, C, T>::BaseNode *top,
const key_type &key,
BPlusTree<K, V, A, C, T>::LeafNode **ln,
......@@ -481,9 +464,6 @@ Status BPlusTree<K, V, A, C, T>::DoInsert(const key_type &key, std::unique_ptr<v
do {
// Track all the paths to the target and lock each internal node in S.
LockPathCB InsCB(this, retry);
// Mark the numKeysArray invalid. We may latch the tree in S and multiple guys are doing insert.
// But it is okay as we all set the same value.
stats_.num_keys_array_valid_ = false;
// Initially we lock path in S unless we need to do node split.
retry = false;
BaseNode *new_child = nullptr;
......@@ -552,70 +532,6 @@ std::unique_ptr<V> BPlusTree<K, V, A, C, T>::DoUpdate(const key_type &key, std::
}
}
template <typename K, typename V, typename A, typename C, typename T>
void BPlusTree<K, V, A, C, T>::PopulateNumKeys() {
// Start from the root and we calculate how many leaf nodes as pointed to by each inner node.
// The results are stored in the numKeys array in each inner node.
(void)PopulateNumKeys(root_);
// Indicate the result is accurate since we have the tree locked exclusive.
stats_.num_keys_array_valid_ = true;
}
template <typename K, typename V, typename A, typename C, typename T>
uint64_t BPlusTree<K, V, A, C, T>::PopulateNumKeys(BPlusTree<K, V, A, C, T>::BaseNode *n) {
if (n->is_leafnode()) {
auto *leaf = static_cast<LeafNode *>(n);
return leaf->slotuse_;
} else {
auto *inner = static_cast<InnerNode *>(n);
uint64_t num_keys = 0;
for (auto i = 0; i < inner->slotuse_ + 1; i++) {
inner->num_keys_[i] = PopulateNumKeys(inner->data_[i]);
num_keys += inner->num_keys_[i];
}
return num_keys;
}
}
template <typename K, typename V, typename A, typename C, typename T>
typename BPlusTree<K, V, A, C, T>::key_type BPlusTree<K, V, A, C, T>::KeyAtPos(uint64_t inx) {
if (stats_.num_keys_array_valid_ == false) {
// We need exclusive access to the tree. If concurrent insert is going on, it is hard to get accurate numbers
UniqueLock lck(&rw_lock_);
// Check again.
if (stats_.num_keys_array_valid_ == false) {
PopulateNumKeys();
}
}
// Now we know how many keys each inner branch contains, we can now traverse the correct node in log n time.
return KeyAtPos(root_, inx);
}
template <typename K, typename V, typename A, typename C, typename T>
typename BPlusTree<K, V, A, C, T>::key_type BPlusTree<K, V, A, C, T>::KeyAtPos(BPlusTree<K, V, A, C, T>::BaseNode *n,
uint64_t inx) {
if (n->is_leafnode()) {
auto *leaf = static_cast<LeafNode *>(n);
return leaf->keys_[leaf->slot_dir_[inx]];
} else {
auto *inner = static_cast<InnerNode *>(n);
if ((inx + 1) > inner->num_keys_[0]) {
inx -= inner->num_keys_[0];
} else {
return KeyAtPos(inner->data_[0], inx);
}
for (auto i = 0; i < inner->slotuse_; i++) {
if ((inx + 1) > inner->num_keys_[inner->slot_dir_[i] + 1]) {
inx -= inner->num_keys_[inner->slot_dir_[i] + 1];
} else {
return KeyAtPos(inner->data_[inner->slot_dir_[i] + 1], inx);
}
}
}
// If we get here, inx is way too big. Instead of throwing exception, we will just return the default value
// of key_type whatever it is.
return key_type();
}
} // namespace dataset
} // namespace mindspore
#endif
......@@ -286,7 +286,8 @@ typename BPlusTree<K, V, A, C, T>::ConstIterator &BPlusTree<K, V, A, C, T>::Cons
}
template <typename K, typename V, typename A, typename C, typename T>
typename BPlusTree<K, V, A, C, T>::ConstIterator BPlusTree<K, V, A, C, T>::Search(const key_type &key) const {
std::pair<typename BPlusTree<K, V, A, C, T>::ConstIterator, bool> BPlusTree<K, V, A, C, T>::Search(
const key_type &key) const {
if (root_ != nullptr) {
LeafNode *leaf = nullptr;
slot_type slot;
......@@ -294,21 +295,15 @@ typename BPlusTree<K, V, A, C, T>::ConstIterator BPlusTree<K, V, A, C, T>::Searc
// Lock the tree in S, pass the lock to Locate which will unlock it for us underneath.
myLock->LockShared();
IndexRc rc = Locate(myLock, false, root_, key, &leaf, &slot);
if (rc == IndexRc::kOk) {
// All locks from the tree to the parent of leaf are all gone. We still have a S lock
// on the leaf. The unlock will be handled by the iterator when it goes out of scope.
return ConstIterator(leaf, slot, true);
} else {
MS_LOG(DEBUG) << "Key not found. rc = " << static_cast<int>(rc) << ".";
return cend();
}
bool find = (rc == IndexRc::kOk);
return std::make_pair(ConstIterator(leaf, slot, find), find);
} else {
return cend();
return std::make_pair(cend(), false);
}
}
template <typename K, typename V, typename A, typename C, typename T>
typename BPlusTree<K, V, A, C, T>::Iterator BPlusTree<K, V, A, C, T>::Search(const key_type &key) {
std::pair<typename BPlusTree<K, V, A, C, T>::Iterator, bool> BPlusTree<K, V, A, C, T>::Search(const key_type &key) {
if (root_ != nullptr) {
LeafNode *leaf = nullptr;
slot_type slot;
......@@ -316,23 +311,17 @@ typename BPlusTree<K, V, A, C, T>::Iterator BPlusTree<K, V, A, C, T>::Search(con
// Lock the tree in S, pass the lock to Locate which will unlock it for us underneath.
myLock->LockShared();
IndexRc rc = Locate(myLock, false, root_, key, &leaf, &slot);
if (rc == IndexRc::kOk) {
// All locks from the tree to the parent of leaf are all gone. We still have a S lock
// on the leaf. The unlock will be handled by the iterator when it goes out of scope.
return Iterator(leaf, slot, true);
} else {
MS_LOG(DEBUG) << "Key not found. rc = " << static_cast<int>(rc) << ".";
return end();
}
bool find = (rc == IndexRc::kOk);
return std::make_pair(Iterator(leaf, slot, find), find);
} else {
return end();
return std::make_pair(end(), false);
}
}
template <typename K, typename V, typename A, typename C, typename T>
typename BPlusTree<K, V, A, C, T>::value_type BPlusTree<K, V, A, C, T>::operator[](key_type key) {
Iterator it = Search(key);
return it.value();
auto r = Search(key);
return r.first.value();
}
template <typename K, typename V, typename A, typename C, typename T>
......
......@@ -32,13 +32,8 @@ using mindspore::LogStream;
// For testing purposes, we will make the branching factor very low.
struct mytraits {
using slot_type = uint16_t;
static const slot_type kLeafSlots = 6;
static const slot_type kInnerSlots = 3;
static const bool kAppendMode = false;
};
......@@ -95,13 +90,14 @@ TEST_F(MindDataTestBPlusTree, Test1) {
// Test search
{
MS_LOG(INFO) << "Locate key " << 100 << " Expect found.";
auto it = btree.Search(100);
EXPECT_FALSE(it == btree.end());
auto r = btree.Search(100);
auto &it = r.first;
EXPECT_TRUE(r.second);
EXPECT_EQ(it.key(), 100);
EXPECT_EQ(it.value(), "Hello World. I am 100");
MS_LOG(INFO) << "Locate key " << 300 << " Expect not found.";
it = btree.Search(300);
EXPECT_TRUE(it == btree.end());
auto q = btree.Search(300);
EXPECT_FALSE(q.second);
}
// Test duplicate key
......@@ -169,26 +165,18 @@ TEST_F(MindDataTestBPlusTree, Test2) {
{
MS_LOG(INFO) << "Locating key from 0 to 9999. Expect found.";
for (int i = 0; i < 10000; i++) {
auto it = btree.Search(i);
bool eoS = (it == btree.end());
EXPECT_FALSE(eoS);
if (!eoS) {
auto r = btree.Search(i);
EXPECT_TRUE(r.second);
if (r.second) {
auto &it = r.first;
EXPECT_EQ(it.key(), i);
std::string val = "Hello World. I am " + std::to_string(i);
EXPECT_EQ(it.value(), val);
}
}
MS_LOG(INFO) << "Locate key " << 10000 << ". Expect not found";
auto it = btree.Search(10000);
EXPECT_TRUE(it == btree.end());
}
// Test to retrieve key at certain position.
{
for (int i = 0; i < 10000; i++) {
int k = btree.KeyAtPos(i);
EXPECT_EQ(k, i);
}
auto q = btree.Search(10000);
EXPECT_FALSE(q.second);
}
}
......@@ -204,7 +192,8 @@ TEST_F(MindDataTestBPlusTree, Test3) {
uint64_t max = ai.max_key();
EXPECT_EQ(min, 1);
EXPECT_EQ(max, 4);
auto it = ai.Search(3);
auto r = ai.Search(3);
auto &it = r.first;
EXPECT_EQ(it.value(), "b");
MS_LOG(INFO) << "Dump all the values using [] operator.";
for (uint64_t i = min; i <= max; i++) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册