Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
indiff7643
Terarkdb
提交
2b8c4463
T
Terarkdb
项目概览
indiff7643
/
Terarkdb
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
T
Terarkdb
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
2b8c4463
编写于
2月 07, 2014
作者:
I
Igor Canadi
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'master' into columnfamilies
上级
0143abdb
3ce8d9a9
变更
13
隐藏空白更改
内联
并排
Showing
13 changed file
with
299 addition
and
204 deletion
+299
-204
Makefile
Makefile
+13
-5
build_tools/regression_build_test.sh
build_tools/regression_build_test.sh
+22
-0
db/db_bench.cc
db/db_bench.cc
+18
-0
include/rocksdb/memtablerep.h
include/rocksdb/memtablerep.h
+9
-8
port/port_posix.h
port/port_posix.h
+0
-1
table/format.cc
table/format.cc
+16
-6
table/format.h
table/format.h
+40
-5
table/meta_blocks.cc
table/meta_blocks.cc
+52
-53
table/meta_blocks.h
table/meta_blocks.h
+7
-0
table/table_properties.cc
table/table_properties.cc
+16
-35
table/table_test.cc
table/table_test.cc
+39
-78
tools/sst_dump.cc
tools/sst_dump.cc
+63
-9
util/env_posix.cc
util/env_posix.cc
+4
-4
未找到文件。
Makefile
浏览文件 @
2b8c4463
...
...
@@ -6,7 +6,12 @@
INSTALL_PATH
?=
$(CURDIR)
#-----------------------------------------------
ifneq
($(MAKECMDGOALS),dbg)
OPT
+=
-O2
-fno-omit-frame-pointer
-momit-leaf-frame-pointer
else
OPT
+=
-fno-omit-frame-pointer
-momit-leaf-frame-pointer
endif
#-----------------------------------------------
# detect what platform we're building on
...
...
@@ -134,10 +139,13 @@ $(SHARED3): $(LIBOBJECTS)
endif
# PLATFORM_SHARED_EXT
.PHONY
:
blackbox_crash_test check clean coverage crash_test ldb_tests
\
release tags valgrind_check whitebox_crash_test format shared_lib all
\
dbg
all
:
$(LIBRARY) $(PROGRAMS)
.PHONY
:
blackbox_crash_test check clean coverage crash_test ldb_tests
\
release tags valgrind_check whitebox_crash_test format shared_lib
dbg
:
$(PROGRAMS)
# Will also generate shared libraries.
release
:
...
...
@@ -151,7 +159,7 @@ coverage:
# Delete intermediate files
find
.
-type
f
-regex
".*
\.\(\(
gcda
\)\|\(
gcno
\)\)
"
-exec
rm
{}
\;
check
:
all
$(PROGRAMS) $(TESTS) $(TOOLS)
check
:
$(PROGRAMS) $(TESTS) $(TOOLS)
for
t
in
$(TESTS)
;
do
echo
"***** Running
$$
t"
;
./
$$
t
||
exit
1
;
done
python tools/ldb_test.py
...
...
@@ -347,8 +355,8 @@ $(MEMENVLIBRARY) : $(MEMENVOBJECTS)
rm
-f
$@
$(AR)
-rs
$@
$(MEMENVOBJECTS)
memenv_test
:
helpers/memenv/memenv_test.o $(MEMENV
LIBRARY) $(LIBRARY
) $(TESTHARNESS)
$(CXX)
helpers/memenv/memenv_test.o
$(MEMENV
LIBRARY)
$(LIBRARY
)
$(TESTHARNESS)
$(EXEC_LDFLAGS)
-o
$@
$(LDFLAGS)
$(COVERAGEFLAGS)
memenv_test
:
helpers/memenv/memenv_test.o $(MEMENV
OBJECTS) $(LIBOBJECTS
) $(TESTHARNESS)
$(CXX)
helpers/memenv/memenv_test.o
$(MEMENV
OBJECTS)
$(LIBOBJECTS
)
$(TESTHARNESS)
$(EXEC_LDFLAGS)
-o
$@
$(LDFLAGS)
$(COVERAGEFLAGS)
manual_compaction_test
:
util/manual_compaction_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX)
util/manual_compaction_test.o
$(LIBOBJECTS)
$(TESTHARNESS)
$(EXEC_LDFLAGS)
-o
$@
$(LDFLAGS)
$(COVERAGEFLAGS)
...
...
build_tools/regression_build_test.sh
浏览文件 @
2b8c4463
...
...
@@ -117,6 +117,27 @@ make release
--sync
=
0
\
--threads
=
16
>
${
STAT_FILE
}
.readrandom
# measure readrandom with 6GB block cache and tailing iterator
./db_bench
\
--benchmarks
=
readrandom
\
--db
=
$DATA_DIR
\
--use_existing_db
=
1
\
--bloom_bits
=
10
\
--num
=
$NUM
\
--reads
=
$((
NUM
/
5
))
\
--cache_size
=
6442450944
\
--cache_numshardbits
=
6
\
--table_cache_numshardbits
=
4
\
--open_files
=
55000
\
--disable_seek_compaction
=
1
\
--use_tailing_iterator
=
1
\
--statistics
=
1
\
--histogram
=
1
\
--disable_data_sync
=
1
\
--disable_wal
=
1
\
--sync
=
0
\
--threads
=
16
>
${
STAT_FILE
}
.readrandomtailing
# measure readrandom with 100MB block cache
./db_bench
\
--benchmarks
=
readrandom
\
...
...
@@ -300,6 +321,7 @@ function send_benchmark_to_ods {
send_benchmark_to_ods overwrite overwrite
$STAT_FILE
.overwrite
send_benchmark_to_ods fillseq fillseq
$STAT_FILE
.fillseq
send_benchmark_to_ods readrandom readrandom
$STAT_FILE
.readrandom
send_benchmark_to_ods readrandom readrandom_tailing
$STAT_FILE
.readrandomtailing
send_benchmark_to_ods readrandom readrandom_smallblockcache
$STAT_FILE
.readrandomsmallblockcache
send_benchmark_to_ods readrandom readrandom_memtable_sst
$STAT_FILE
.readrandom_mem_sst
send_benchmark_to_ods readrandom readrandom_fillunique_random
$STAT_FILE
.readrandom_filluniquerandom
...
...
db/db_bench.cc
浏览文件 @
2b8c4463
...
...
@@ -447,6 +447,9 @@ static auto FLAGS_compaction_fadvice_e =
DEFINE_bool
(
use_multiget
,
false
,
"Use multiget to access a series of keys instead of get"
);
DEFINE_bool
(
use_tailing_iterator
,
false
,
"Use tailing iterator to access a series of keys instead of get"
);
DEFINE_int64
(
keys_per_multiget
,
90
,
"If use_multiget is true, determines number"
" of keys to group per call Arbitrary default is good because it"
" agrees with readwritepercent"
);
...
...
@@ -1729,6 +1732,21 @@ class Benchmark {
thread
->
stats
.
FinishedSingleOp
(
db_
);
keys_left
-=
num_keys
;
}
}
else
if
(
FLAGS_use_tailing_iterator
)
{
// use tailing iterator for gets
options
.
tailing
=
true
;
Iterator
*
iter
=
db_
->
NewIterator
(
options
);
while
(
!
duration
.
Done
(
1
))
{
const
long
long
k
=
thread
->
rand
.
Next
()
%
FLAGS_num
;
unique_ptr
<
char
[]
>
key
=
GenerateKeyFromInt
(
k
);
iter
->
Seek
(
key
.
get
());
if
(
iter
->
Valid
()
&&
iter
->
key
().
compare
(
Slice
(
key
.
get
()))
==
0
)
{
++
found
;
}
thread
->
stats
.
FinishedSingleOp
(
db_
);
}
delete
iter
;
}
else
{
// Regular case. Do one "get" at a time Get
Iterator
*
iter
=
db_
->
NewIterator
(
options
);
std
::
string
value
;
...
...
include/rocksdb/memtablerep.h
浏览文件 @
2b8c4463
...
...
@@ -21,7 +21,7 @@
// types built in:
// - SkipListRep: This is the default; it is backed by a skip list.
// - HashSkipListRep: The memtable rep that is best used for keys that are
// structured like "prefix:suffix" where iteration within
g
a prefix is
// structured like "prefix:suffix" where iteration within a prefix is
// common and iteration across different prefixes is rare. It is backed by
// a hash map where each bucket is a skip list.
// - VectorRep: This is backed by an unordered std::vector. On iteration, the
...
...
@@ -85,7 +85,7 @@ class MemTableRep {
// Initialize an iterator over the specified collection.
// The returned iterator is not valid.
// explicit Iterator(const MemTableRep* collection);
virtual
~
Iterator
()
{
};
virtual
~
Iterator
()
{
}
// Returns true iff the iterator is positioned at a valid node.
virtual
bool
Valid
()
const
=
0
;
...
...
@@ -143,7 +143,7 @@ class MemTableRep {
// new MemTableRep objects
class
MemTableRepFactory
{
public:
virtual
~
MemTableRepFactory
()
{
};
virtual
~
MemTableRepFactory
()
{
}
virtual
MemTableRep
*
CreateMemTableRep
(
MemTableRep
::
KeyComparator
&
,
Arena
*
)
=
0
;
virtual
const
char
*
Name
()
const
=
0
;
...
...
@@ -159,7 +159,8 @@ class MemTableRepFactory {
// bytes reserved for usage.
class
VectorRepFactory
:
public
MemTableRepFactory
{
const
size_t
count_
;
public:
public:
explicit
VectorRepFactory
(
size_t
count
=
0
)
:
count_
(
count
)
{
}
virtual
MemTableRep
*
CreateMemTableRep
(
MemTableRep
::
KeyComparator
&
,
Arena
*
)
override
;
...
...
@@ -170,9 +171,9 @@ public:
// This uses a skip list to store keys. It is the default.
class
SkipListFactory
:
public
MemTableRepFactory
{
public:
virtual
MemTableRep
*
CreateMemTableRep
(
MemTableRep
::
KeyComparator
&
,
Arena
*
)
override
;
public:
virtual
MemTableRep
*
CreateMemTableRep
(
MemTableRep
::
KeyComparator
&
,
Arena
*
)
override
;
virtual
const
char
*
Name
()
const
override
{
return
"SkipListFactory"
;
}
...
...
@@ -196,4 +197,4 @@ extern MemTableRepFactory* NewHashSkipListRepFactory(
extern
MemTableRepFactory
*
NewHashLinkListRepFactory
(
const
SliceTransform
*
transform
,
size_t
bucket_count
=
50000
);
}
}
// namespace rocksdb
port/port_posix.h
浏览文件 @
2b8c4463
...
...
@@ -349,7 +349,6 @@ inline bool BZip2_Compress(const CompressionOptions& opts, const char* input,
output
->
resize
(
output
->
size
()
-
_stream
.
avail_out
);
BZ2_bzCompressEnd
(
&
_stream
);
return
true
;
return
output
;
#endif
return
false
;
}
...
...
table/format.cc
浏览文件 @
2b8c4463
...
...
@@ -9,6 +9,8 @@
#include "table/format.h"
#include <string>
#include "port/port.h"
#include "rocksdb/env.h"
#include "table/block.h"
...
...
@@ -43,8 +45,8 @@ void Footer::EncodeTo(std::string* dst) const {
metaindex_handle_
.
EncodeTo
(
dst
);
index_handle_
.
EncodeTo
(
dst
);
dst
->
resize
(
2
*
BlockHandle
::
kMaxEncodedLength
);
// Padding
PutFixed32
(
dst
,
static_cast
<
uint32_t
>
(
kTableMagicNumber
&
0xffffffffu
));
PutFixed32
(
dst
,
static_cast
<
uint32_t
>
(
kTableMagicNumber
>>
32
));
PutFixed32
(
dst
,
static_cast
<
uint32_t
>
(
table_magic_number
()
&
0xffffffffu
));
PutFixed32
(
dst
,
static_cast
<
uint32_t
>
(
table_magic_number
()
>>
32
));
assert
(
dst
->
size
()
==
original_size
+
kEncodedLength
);
}
...
...
@@ -52,13 +54,21 @@ Status Footer::DecodeFrom(Slice* input) {
assert
(
input
!=
nullptr
);
assert
(
input
->
size
()
>=
kEncodedLength
);
const
char
*
magic_ptr
=
input
->
data
()
+
kEncodedLength
-
8
;
const
char
*
magic_ptr
=
input
->
data
()
+
kEncodedLength
-
kMagicNumberLengthByte
;
const
uint32_t
magic_lo
=
DecodeFixed32
(
magic_ptr
);
const
uint32_t
magic_hi
=
DecodeFixed32
(
magic_ptr
+
4
);
const
uint64_t
magic
=
((
static_cast
<
uint64_t
>
(
magic_hi
)
<<
32
)
|
(
static_cast
<
uint64_t
>
(
magic_lo
)));
if
(
magic
!=
kTableMagicNumber
)
{
return
Status
::
InvalidArgument
(
"not an sstable (bad magic number)"
);
if
(
HasInitializedTableMagicNumber
())
{
if
(
magic
!=
table_magic_number
())
{
char
buffer
[
80
];
snprintf
(
buffer
,
sizeof
(
buffer
)
-
1
,
"not an sstable (bad magic number --- %lx)"
,
magic
);
return
Status
::
InvalidArgument
(
buffer
);
}
}
else
{
set_table_magic_number
(
magic
);
}
Status
result
=
metaindex_handle_
.
DecodeFrom
(
input
);
...
...
@@ -221,7 +231,7 @@ Status UncompressBlockContents(const char* data, size_t n,
default:
return
Status
::
Corruption
(
"bad block type"
);
}
result
->
compression_type
=
kNoCompression
;
// not compressed any more
result
->
compression_type
=
kNoCompression
;
// not compressed any more
return
Status
::
OK
();
}
...
...
table/format.h
浏览文件 @
2b8c4463
...
...
@@ -21,6 +21,9 @@ class Block;
class
RandomAccessFile
;
struct
ReadOptions
;
// the length of the magic number in bytes.
const
int
kMagicNumberLengthByte
=
8
;
// BlockHandle is a pointer to the extent of a file that stores a data
// block or a meta block.
class
BlockHandle
{
...
...
@@ -63,12 +66,16 @@ class BlockHandle {
// end of every table file.
class
Footer
{
public:
// Constructs a footer without specifying its table magic number.
// In such case, the table magic number of such footer should be
// initialized via @ReadFooterFromFile().
Footer
()
:
Footer
(
kInvalidTableMagicNumber
)
{}
// @table_magic_number serves two purposes:
// 1. Identify different types of the tables.
// 2. Help us to identify if a given file is a valid sst.
Footer
(
uint64_t
table_magic_number
)
:
kTableMagicNumber
(
table_magic_number
)
{
}
explicit
Footer
(
uint64_t
table_magic_number
)
:
table_magic_number_
(
table_magic_number
)
{}
// The block handle for the metaindex block of the table
const
BlockHandle
&
metaindex_handle
()
const
{
return
metaindex_handle_
;
}
...
...
@@ -78,24 +85,52 @@ class Footer {
const
BlockHandle
&
index_handle
()
const
{
return
index_handle_
;
}
void
set_index_handle
(
const
BlockHandle
&
h
)
{
index_handle_
=
h
;
}
uint64_t
table_magic_number
()
const
{
return
table_magic_number_
;
}
void
EncodeTo
(
std
::
string
*
dst
)
const
;
// Set the current footer based on the input slice. If table_magic_number_
// is not set (i.e., HasInitializedTableMagicNumber() is true), then this
// function will also initialize table_magic_number_. Otherwise, this
// function will verify whether the magic number specified in the input
// slice matches table_magic_number_ and update the current footer only
// when the test passes.
Status
DecodeFrom
(
Slice
*
input
);
// Encoded length of a Footer. Note that the serialization of a
// Footer will always occupy exactly this many bytes. It consists
// of two block handles and a magic number.
enum
{
kEncodedLength
=
2
*
BlockHandle
::
kMaxEncodedLength
+
8
kEncodedLength
=
2
*
BlockHandle
::
kMaxEncodedLength
+
8
};
const
uint64_t
kInvalidTableMagicNumber
=
0
;
private:
// Set the table_magic_number only when it was not previously
// initialized. Return true on success.
bool
set_table_magic_number
(
uint64_t
magic_number
)
{
if
(
HasInitializedTableMagicNumber
())
{
table_magic_number_
=
magic_number
;
return
true
;
}
return
false
;
}
// return true if @table_magic_number_ is set to a value different
// from @kInvalidTableMagicNumber.
bool
HasInitializedTableMagicNumber
()
const
{
return
(
table_magic_number_
!=
kInvalidTableMagicNumber
);
}
BlockHandle
metaindex_handle_
;
BlockHandle
index_handle_
;
const
uint64_t
kTableMagicNumber
;
uint64_t
table_magic_number_
;
};
// Read the footer from file
...
...
table/meta_blocks.cc
浏览文件 @
2b8c4463
...
...
@@ -2,12 +2,13 @@
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
#include "table/meta_blocks.h"
#include <map>
#include <string>
#include "rocksdb/table.h"
#include "rocksdb/table_properties.h"
#include "table/block.h"
#include "table/format.h"
#include "util/coding.h"
...
...
@@ -104,9 +105,8 @@ bool NotifyCollectTableCollectorsOnAdd(
Status
s
=
collector
->
Add
(
key
,
value
);
all_succeeded
=
all_succeeded
&&
s
.
ok
();
if
(
!
s
.
ok
())
{
LogPropertiesCollectionError
(
info_log
,
"Add"
,
/* method */
collector
->
Name
()
);
LogPropertiesCollectionError
(
info_log
,
"Add"
/* method */
,
collector
->
Name
());
}
}
return
all_succeeded
;
...
...
@@ -123,9 +123,8 @@ bool NotifyCollectTableCollectorsOnFinish(
all_succeeded
=
all_succeeded
&&
s
.
ok
();
if
(
!
s
.
ok
())
{
LogPropertiesCollectionError
(
info_log
,
"Finish"
,
/* method */
collector
->
Name
()
);
LogPropertiesCollectionError
(
info_log
,
"Finish"
/* method */
,
collector
->
Name
());
}
else
{
builder
->
Add
(
user_collected_properties
);
}
...
...
@@ -151,14 +150,8 @@ Status ReadProperties(
BlockContents
block_contents
;
ReadOptions
read_options
;
read_options
.
verify_checksums
=
false
;
Status
s
=
ReadBlockContents
(
file
,
read_options
,
handle
,
&
block_contents
,
env
,
false
);
Status
s
=
ReadBlockContents
(
file
,
read_options
,
handle
,
&
block_contents
,
env
,
false
);
if
(
!
s
.
ok
())
{
return
s
;
...
...
@@ -166,22 +159,20 @@ Status ReadProperties(
Block
properties_block
(
block_contents
);
std
::
unique_ptr
<
Iterator
>
iter
(
properties_block
.
NewIterator
(
BytewiseComparator
())
);
properties_block
.
NewIterator
(
BytewiseComparator
()));
// All pre-defined properties of type uint64_t
std
::
unordered_map
<
std
::
string
,
uint64_t
*>
predefined_uint64_properties
=
{
{
TablePropertiesNames
::
kDataSize
,
&
table_properties
->
data_size
},
{
TablePropertiesNames
::
kIndexSize
,
&
table_properties
->
index_size
},
{
TablePropertiesNames
::
kFilterSize
,
&
table_properties
->
filter_size
},
{
TablePropertiesNames
::
kRawKeySize
,
&
table_properties
->
raw_key_size
},
{
TablePropertiesNames
::
kRawValueSize
,
&
table_properties
->
raw_value_size
},
{
TablePropertiesNames
::
kNumDataBlocks
,
&
table_properties
->
num_data_blocks
},
{
TablePropertiesNames
::
kNumEntries
,
&
table_properties
->
num_entries
},
{
TablePropertiesNames
::
kFormatVersion
,
&
table_properties
->
format_version
},
{
TablePropertiesNames
::
kFixedKeyLen
,
&
table_properties
->
fixed_key_len
},
};
{
TablePropertiesNames
::
kDataSize
,
&
table_properties
->
data_size
},
{
TablePropertiesNames
::
kIndexSize
,
&
table_properties
->
index_size
},
{
TablePropertiesNames
::
kFilterSize
,
&
table_properties
->
filter_size
},
{
TablePropertiesNames
::
kRawKeySize
,
&
table_properties
->
raw_key_size
},
{
TablePropertiesNames
::
kRawValueSize
,
&
table_properties
->
raw_value_size
},
{
TablePropertiesNames
::
kNumDataBlocks
,
&
table_properties
->
num_data_blocks
},
{
TablePropertiesNames
::
kNumEntries
,
&
table_properties
->
num_entries
},
{
TablePropertiesNames
::
kFormatVersion
,
&
table_properties
->
format_version
},
{
TablePropertiesNames
::
kFixedKeyLen
,
&
table_properties
->
fixed_key_len
}};
std
::
string
last_key
;
for
(
iter
->
SeekToFirst
();
iter
->
Valid
();
iter
->
Next
())
{
...
...
@@ -192,10 +183,8 @@ Status ReadProperties(
auto
key
=
iter
->
key
().
ToString
();
// properties block is strictly sorted with no duplicate key.
assert
(
last_key
.
empty
()
||
BytewiseComparator
()
->
Compare
(
key
,
last_key
)
>
0
);
assert
(
last_key
.
empty
()
||
BytewiseComparator
()
->
Compare
(
key
,
last_key
)
>
0
);
last_key
=
key
;
auto
raw_val
=
iter
->
value
();
...
...
@@ -218,8 +207,7 @@ Status ReadProperties(
}
else
{
// handle user-collected properties
table_properties
->
user_collected_properties
.
insert
(
std
::
make_pair
(
key
,
raw_val
.
ToString
())
);
{
key
,
raw_val
.
ToString
()});
}
}
...
...
@@ -244,21 +232,14 @@ Status ReadTableProperties(
BlockContents
metaindex_contents
;
ReadOptions
read_options
;
read_options
.
verify_checksums
=
false
;
s
=
ReadBlockContents
(
file
,
read_options
,
metaindex_handle
,
&
metaindex_contents
,
env
,
false
);
s
=
ReadBlockContents
(
file
,
read_options
,
metaindex_handle
,
&
metaindex_contents
,
env
,
false
);
if
(
!
s
.
ok
())
{
return
s
;
}
Block
metaindex_block
(
metaindex_contents
);
std
::
unique_ptr
<
Iterator
>
meta_iter
(
metaindex_block
.
NewIterator
(
BytewiseComparator
())
);
metaindex_block
.
NewIterator
(
BytewiseComparator
()));
// -- Read property block
meta_iter
->
Seek
(
kPropertiesBlock
);
...
...
@@ -266,21 +247,39 @@ Status ReadTableProperties(
if
(
meta_iter
->
Valid
()
&&
meta_iter
->
key
()
==
kPropertiesBlock
&&
meta_iter
->
status
().
ok
())
{
s
=
ReadProperties
(
meta_iter
->
value
(),
file
,
env
,
info_log
,
properties
);
s
=
ReadProperties
(
meta_iter
->
value
(),
file
,
env
,
info_log
,
properties
);
}
else
{
s
=
Status
::
Corruption
(
"Unable to read the property block from the plain table"
);
"Unable to read the property block from the plain table"
);
}
return
s
;
}
Status
ReadTableMagicNumber
(
const
std
::
string
&
file_path
,
const
Options
&
options
,
const
EnvOptions
&
env_options
,
uint64_t
*
table_magic_number
)
{
unique_ptr
<
RandomAccessFile
>
file
;
Status
s
=
options
.
env
->
NewRandomAccessFile
(
file_path
,
&
file
,
env_options
);
if
(
!
s
.
ok
())
{
return
s
;
}
uint64_t
file_size
;
options
.
env
->
GetFileSize
(
file_path
,
&
file_size
);
if
(
file_size
<
Footer
::
kEncodedLength
)
{
return
Status
::
InvalidArgument
(
"file is too short to be an sstable"
);
}
Footer
footer
;
s
=
ReadFooterFromFile
(
file
.
get
(),
file_size
,
&
footer
);
if
(
!
s
.
ok
())
{
return
s
;
}
*
table_magic_number
=
footer
.
table_magic_number
();
return
Status
::
OK
();
}
}
// namespace rocksdb
table/meta_blocks.h
浏览文件 @
2b8c4463
...
...
@@ -8,6 +8,7 @@
#include <memory>
#include <string>
#include "db/builder.h"
#include "rocksdb/comparator.h"
#include "rocksdb/options.h"
#include "rocksdb/slice.h"
...
...
@@ -118,4 +119,10 @@ Status ReadTableProperties(
Logger
*
info_log
,
TableProperties
*
properties
);
// Read the magic number of the specified file directly. The magic number
// of a valid sst table the last 8-byte of the file.
Status
ReadTableMagicNumber
(
const
std
::
string
&
file_path
,
const
Options
&
options
,
const
EnvOptions
&
env_options
,
uint64_t
*
table_magic_number
);
}
// namespace rocksdb
table/table_properties.cc
浏览文件 @
2b8c4463
...
...
@@ -40,50 +40,31 @@ std::string TableProperties::ToString(
result
.
reserve
(
1024
);
// Basic Info
AppendProperty
(
result
,
"# data blocks"
,
num_data_blocks
,
prop_delim
,
kv_delim
);
AppendProperty
(
result
,
"# data blocks"
,
num_data_blocks
,
prop_delim
,
kv_delim
);
AppendProperty
(
result
,
"# entries"
,
num_entries
,
prop_delim
,
kv_delim
);
AppendProperty
(
result
,
"raw key size"
,
raw_key_size
,
prop_delim
,
kv_delim
);
AppendProperty
(
result
,
"raw average key size"
,
num_entries
!=
0
?
1.0
*
raw_key_size
/
num_entries
:
0.0
,
prop_delim
,
kv_delim
);
AppendProperty
(
result
,
"raw value size"
,
raw_value_size
,
prop_delim
,
kv_delim
);
AppendProperty
(
result
,
"raw average value size"
,
num_entries
!=
0
?
1.0
*
raw_value_size
/
num_entries
:
0.0
,
prop_delim
,
kv_delim
);
AppendProperty
(
result
,
"raw average key size"
,
num_entries
!=
0
?
1.0
*
raw_key_size
/
num_entries
:
0.0
,
prop_delim
,
kv_delim
);
AppendProperty
(
result
,
"raw value size"
,
raw_value_size
,
prop_delim
,
kv_delim
);
AppendProperty
(
result
,
"raw average value size"
,
num_entries
!=
0
?
1.0
*
raw_value_size
/
num_entries
:
0.0
,
prop_delim
,
kv_delim
);
AppendProperty
(
result
,
"data block size"
,
data_size
,
prop_delim
,
kv_delim
);
AppendProperty
(
result
,
"index block size"
,
index_size
,
prop_delim
,
kv_delim
);
AppendProperty
(
result
,
"filter block size"
,
filter_size
,
prop_delim
,
kv_delim
);
AppendProperty
(
result
,
"(estimated) table size"
,
data_size
+
index_size
+
filter_size
,
prop_delim
,
kv_delim
);
AppendProperty
(
result
,
"filter block size"
,
filter_size
,
prop_delim
,
kv_delim
);
AppendProperty
(
result
,
"(estimated) table size"
,
data_size
+
index_size
+
filter_size
,
prop_delim
,
kv_delim
);
AppendProperty
(
result
,
"filter policy name"
,
result
,
"filter policy name"
,
filter_policy_name
.
empty
()
?
std
::
string
(
"N/A"
)
:
filter_policy_name
,
prop_delim
,
kv_delim
);
prop_delim
,
kv_delim
);
return
result
;
}
...
...
table/table_test.cc
浏览文件 @
2b8c4463
...
...
@@ -6,6 +6,9 @@
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include <inttypes.h>
#include <stdio.h>
#include <algorithm>
#include <map>
#include <string>
...
...
@@ -25,6 +28,7 @@
#include "rocksdb/slice_transform.h"
#include "rocksdb/memtablerep.h"
#include "table/block.h"
#include "table/meta_blocks.h"
#include "table/block_based_table_builder.h"
#include "table/block_based_table_factory.h"
#include "table/block_based_table_reader.h"
...
...
@@ -946,10 +950,7 @@ TEST(BlockBasedTableTest, BasicBlockBasedTableProperties) {
block_builder
.
Add
(
item
.
first
,
item
.
second
);
}
Slice
content
=
block_builder
.
Finish
();
ASSERT_EQ
(
content
.
size
()
+
kBlockTrailerSize
,
props
.
data_size
);
ASSERT_EQ
(
content
.
size
()
+
kBlockTrailerSize
,
props
.
data_size
);
}
TEST
(
BlockBasedTableTest
,
FilterPolicyNameProperties
)
{
...
...
@@ -958,9 +959,7 @@ TEST(BlockBasedTableTest, FilterPolicyNameProperties) {
std
::
vector
<
std
::
string
>
keys
;
KVMap
kvmap
;
Options
options
;
std
::
unique_ptr
<
const
FilterPolicy
>
filter_policy
(
NewBloomFilterPolicy
(
10
)
);
std
::
unique_ptr
<
const
FilterPolicy
>
filter_policy
(
NewBloomFilterPolicy
(
10
));
options
.
filter_policy
=
filter_policy
.
get
();
c
.
Finish
(
options
,
GetPlainInternalComparator
(
options
.
comparator
),
&
keys
,
...
...
@@ -1032,10 +1031,8 @@ TEST(BlockBasedTableTest, NumBlockStat) {
KVMap
kvmap
;
c
.
Finish
(
options
,
GetPlainInternalComparator
(
options
.
comparator
),
&
ks
,
&
kvmap
);
ASSERT_EQ
(
kvmap
.
size
(),
c
.
table_reader
()
->
GetTableProperties
().
num_data_blocks
);
ASSERT_EQ
(
kvmap
.
size
(),
c
.
table_reader
()
->
GetTableProperties
().
num_data_blocks
);
}
class
BlockCacheProperties
{
...
...
@@ -1050,32 +1047,26 @@ class BlockCacheProperties {
}
// Check if the fetched props matches the expected ones.
void
AssertEqual
(
long
index_block_cache_miss
,
long
index_block_cache_hit
,
long
data_block_cache_miss
,
long
data_block_cache_hit
)
const
{
void
AssertEqual
(
int64_t
index_block_cache_miss
,
int64_t
index_block_cache_hit
,
int64_t
data_block_cache_miss
,
int64_t
data_block_cache_hit
)
const
{
ASSERT_EQ
(
index_block_cache_miss
,
this
->
index_block_cache_miss
);
ASSERT_EQ
(
index_block_cache_hit
,
this
->
index_block_cache_hit
);
ASSERT_EQ
(
data_block_cache_miss
,
this
->
data_block_cache_miss
);
ASSERT_EQ
(
data_block_cache_hit
,
this
->
data_block_cache_hit
);
ASSERT_EQ
(
index_block_cache_miss
+
data_block_cache_miss
,
this
->
block_cache_miss
);
ASSERT_EQ
(
index_block_cache_hit
+
data_block_cache_hit
,
this
->
block_cache_hit
);
ASSERT_EQ
(
index_block_cache_miss
+
data_block_cache_miss
,
this
->
block_cache_miss
);
ASSERT_EQ
(
index_block_cache_hit
+
data_block_cache_hit
,
this
->
block_cache_hit
);
}
private:
long
block_cache_miss
=
0
;
long
block_cache_hit
=
0
;
long
index_block_cache_miss
=
0
;
long
index_block_cache_hit
=
0
;
long
data_block_cache_miss
=
0
;
long
data_block_cache_hit
=
0
;
int64_t
block_cache_miss
=
0
;
int64_t
block_cache_hit
=
0
;
int64_t
index_block_cache_miss
=
0
;
int64_t
index_block_cache_hit
=
0
;
int64_t
data_block_cache_miss
=
0
;
int64_t
data_block_cache_hit
=
0
;
};
TEST
(
BlockBasedTableTest
,
BlockCacheTest
)
{
...
...
@@ -1105,12 +1096,8 @@ TEST(BlockBasedTableTest, BlockCacheTest) {
{
BlockCacheProperties
props
(
options
.
statistics
.
get
());
// index will be added to block cache.
props
.
AssertEqual
(
1
,
// index block miss
0
,
0
,
0
);
props
.
AssertEqual
(
1
,
// index block miss
0
,
0
,
0
);
}
// Only index block will be accessed
...
...
@@ -1120,24 +1107,16 @@ TEST(BlockBasedTableTest, BlockCacheTest) {
// NOTE: to help better highlight the "detla" of each ticker, I use
// <last_value> + <added_value> to indicate the increment of changed
// value; other numbers remain the same.
props
.
AssertEqual
(
1
,
0
+
1
,
// index block hit
0
,
0
);
props
.
AssertEqual
(
1
,
0
+
1
,
// index block hit
0
,
0
);
}
// Only data block will be accessed
{
iter
->
SeekToFirst
();
BlockCacheProperties
props
(
options
.
statistics
.
get
());
props
.
AssertEqual
(
1
,
1
,
0
+
1
,
// data block miss
0
);
props
.
AssertEqual
(
1
,
1
,
0
+
1
,
// data block miss
0
);
}
// Data block will be in cache
...
...
@@ -1145,12 +1124,8 @@ TEST(BlockBasedTableTest, BlockCacheTest) {
iter
.
reset
(
c
.
NewIterator
());
iter
->
SeekToFirst
();
BlockCacheProperties
props
(
options
.
statistics
.
get
());
props
.
AssertEqual
(
1
,
1
+
1
,
// index block hit
1
,
0
+
1
// data block hit
);
props
.
AssertEqual
(
1
,
1
+
1
,
/* index block hit */
1
,
0
+
1
/* data block hit */
);
}
// release the iterator so that the block cache can reset correctly.
iter
.
reset
();
...
...
@@ -1176,12 +1151,8 @@ TEST(BlockBasedTableTest, BlockCacheTest) {
c
.
Reopen
(
options
);
{
BlockCacheProperties
props
(
options
.
statistics
.
get
());
props
.
AssertEqual
(
1
,
// index block miss
0
,
0
,
0
);
props
.
AssertEqual
(
1
,
// index block miss
0
,
0
,
0
);
}
...
...
@@ -1191,12 +1162,9 @@ TEST(BlockBasedTableTest, BlockCacheTest) {
// is only 1, index block will be purged after data block is inserted.
iter
.
reset
(
c
.
NewIterator
());
BlockCacheProperties
props
(
options
.
statistics
.
get
());
props
.
AssertEqual
(
1
+
1
,
// index block miss
0
,
0
,
// data block miss
0
);
props
.
AssertEqual
(
1
+
1
,
// index block miss
0
,
0
,
// data block miss
0
);
}
{
...
...
@@ -1204,12 +1172,8 @@ TEST(BlockBasedTableTest, BlockCacheTest) {
// block's cache miss.
iter
->
SeekToFirst
();
BlockCacheProperties
props
(
options
.
statistics
.
get
());
props
.
AssertEqual
(
2
,
0
,
0
+
1
,
// data block miss
0
);
props
.
AssertEqual
(
2
,
0
,
0
+
1
,
// data block miss
0
);
}
}
...
...
@@ -1316,7 +1280,6 @@ TEST(GeneralTableTest, ApproximateOffsetOfPlain) {
ASSERT_TRUE
(
Between
(
c
.
ApproximateOffsetOf
(
"k06"
),
510000
,
511000
));
ASSERT_TRUE
(
Between
(
c
.
ApproximateOffsetOf
(
"k07"
),
510000
,
511000
));
ASSERT_TRUE
(
Between
(
c
.
ApproximateOffsetOf
(
"xyz"
),
610000
,
612000
));
}
static
void
DoCompressionTest
(
CompressionType
comp
)
{
...
...
@@ -1360,11 +1323,9 @@ TEST(GeneralTableTest, ApproximateOffsetOfCompressed) {
valid
++
;
}
for
(
int
i
=
0
;
i
<
valid
;
i
++
)
{
for
(
int
i
=
0
;
i
<
valid
;
i
++
)
{
DoCompressionTest
(
compression_state
[
i
]);
}
}
TEST
(
Harness
,
Randomized
)
{
...
...
@@ -1375,8 +1336,8 @@ TEST(Harness, Randomized) {
for
(
int
num_entries
=
0
;
num_entries
<
2000
;
num_entries
+=
(
num_entries
<
50
?
1
:
200
))
{
if
((
num_entries
%
10
)
==
0
)
{
fprintf
(
stderr
,
"case %d of %d: num_entries = %d
\n
"
,
(
i
+
1
),
int
(
args
.
size
()),
num_entries
);
fprintf
(
stderr
,
"case %d of %d: num_entries = %d
\n
"
,
(
i
+
1
),
static_cast
<
int
>
(
args
.
size
()),
num_entries
);
}
for
(
int
e
=
0
;
e
<
num_entries
;
e
++
)
{
std
::
string
v
;
...
...
tools/sst_dump.cc
浏览文件 @
2b8c4463
...
...
@@ -14,10 +14,14 @@
#include "rocksdb/db.h"
#include "rocksdb/env.h"
#include "rocksdb/iterator.h"
#include "rocksdb/slice_transform.h"
#include "rocksdb/table.h"
#include "rocksdb/table_properties.h"
#include "table/block_based_table_factory.h"
#include "table/plain_table_factory.h"
#include "table/block.h"
#include "table/block_builder.h"
#include "table/meta_blocks.h"
#include "table/format.h"
#include "util/ldb_cmd.h"
#include "util/random.h"
...
...
@@ -44,6 +48,9 @@ class SstFileReader {
private:
Status
NewTableReader
(
const
std
::
string
&
file_path
);
Status
SetTableOptionsByMagicNumber
(
uint64_t
table_magic_number
,
RandomAccessFile
*
file
,
uint64_t
file_size
);
std
::
string
file_name_
;
uint64_t
read_num_
;
...
...
@@ -54,9 +61,9 @@ class SstFileReader {
Status
init_result_
;
unique_ptr
<
TableReader
>
table_reader_
;
unique_ptr
<
RandomAccessFile
>
file_
;
//
table_
options_ and internal_comparator_ will also be used in
// options_ and internal_comparator_ will also be used in
// ReadSequential internally (specifically, seek-related operations)
Options
table_
options_
;
Options
options_
;
InternalKeyComparator
internal_comparator_
;
};
...
...
@@ -70,21 +77,68 @@ SstFileReader::SstFileReader(const std::string& file_path,
init_result_
=
NewTableReader
(
file_name_
);
}
extern
uint64_t
kBlockBasedTableMagicNumber
;
extern
uint64_t
kPlainTableMagicNumber
;
Status
SstFileReader
::
NewTableReader
(
const
std
::
string
&
file_path
)
{
Status
s
=
table_options_
.
env
->
NewRandomAccessFile
(
file_path
,
&
file_
,
soptions_
);
uint64_t
magic_number
;
Status
s
=
ReadTableMagicNumber
(
file_path
,
options_
,
soptions_
,
&
magic_number
);
if
(
!
s
.
ok
())
{
return
s
;
}
if
(
magic_number
==
kPlainTableMagicNumber
)
{
soptions_
.
use_mmap_reads
=
true
;
}
options_
.
comparator
=
&
internal_comparator_
;
s
=
options_
.
env
->
NewRandomAccessFile
(
file_path
,
&
file_
,
soptions_
);
if
(
!
s
.
ok
())
{
return
s
;
}
uint64_t
file_size
;
table_options_
.
env
->
GetFileSize
(
file_path
,
&
file_size
);
unique_ptr
<
TableFactory
>
table_factory
;
s
=
table_options_
.
table_factory
->
NewTableReader
(
table_options_
,
soptions_
,
internal_comparator_
,
std
::
move
(
file_
),
file_size
,
&
table_reader_
);
options_
.
env
->
GetFileSize
(
file_path
,
&
file_size
);
s
=
SetTableOptionsByMagicNumber
(
magic_number
,
file_
.
get
(),
file_size
);
if
(
!
s
.
ok
())
{
return
s
;
}
s
=
options_
.
table_factory
->
NewTableReader
(
options_
,
soptions_
,
internal_comparator_
,
std
::
move
(
file_
),
file_size
,
&
table_reader_
);
return
s
;
}
Status
SstFileReader
::
SetTableOptionsByMagicNumber
(
uint64_t
table_magic_number
,
RandomAccessFile
*
file
,
uint64_t
file_size
)
{
TableProperties
table_properties
;
Status
s
=
rocksdb
::
ReadTableProperties
(
file
,
file_size
,
table_magic_number
,
options_
.
env
,
options_
.
info_log
.
get
(),
&
table_properties
);
if
(
!
s
.
ok
())
{
return
s
;
}
if
(
table_magic_number
==
kBlockBasedTableMagicNumber
)
{
options_
.
table_factory
=
std
::
make_shared
<
BlockBasedTableFactory
>
();
fprintf
(
stdout
,
"Sst file format: block-based
\n
"
);
}
else
if
(
table_magic_number
==
kPlainTableMagicNumber
)
{
options_
.
allow_mmap_reads
=
true
;
options_
.
table_factory
=
std
::
make_shared
<
PlainTableFactory
>
(
table_properties
.
fixed_key_len
,
2
,
0.8
);
options_
.
prefix_extractor
=
NewNoopTransform
();
fprintf
(
stdout
,
"Sst file format: plain table
\n
"
);
}
else
{
char
error_msg_buffer
[
80
];
snprintf
(
error_msg_buffer
,
sizeof
(
error_msg_buffer
)
-
1
,
"Unsupported table magic number --- %lx)"
,
table_magic_number
);
return
Status
::
InvalidArgument
(
error_msg_buffer
);
}
return
Status
::
OK
();
}
Status
SstFileReader
::
ReadSequential
(
bool
print_kv
,
uint64_t
read_num
,
bool
has_from
,
...
...
util/env_posix.cc
浏览文件 @
2b8c4463
...
...
@@ -1047,16 +1047,16 @@ class PosixEnv : public Env {
unique_ptr
<
RandomRWFile
>*
result
,
const
EnvOptions
&
options
)
{
result
->
reset
();
// no support for mmap yet
if
(
options
.
use_mmap_writes
||
options
.
use_mmap_reads
)
{
return
Status
::
NotSupported
(
"No support for mmap read/write yet"
);
}
Status
s
;
const
int
fd
=
open
(
fname
.
c_str
(),
O_CREAT
|
O_RDWR
,
0644
);
if
(
fd
<
0
)
{
s
=
IOError
(
fname
,
errno
);
}
else
{
SetFD_CLOEXEC
(
fd
,
&
options
);
// no support for mmap yet
if
(
options
.
use_mmap_writes
||
options
.
use_mmap_reads
)
{
return
Status
::
NotSupported
(
"No support for mmap read/write yet"
);
}
result
->
reset
(
new
PosixRandomRWFile
(
fname
,
fd
,
options
));
}
return
s
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录