Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
2dot5
ClickHouse
提交
aac7cc73
C
ClickHouse
项目概览
2dot5
/
ClickHouse
通知
3
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
C
ClickHouse
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
aac7cc73
编写于
4月 18, 2020
作者:
A
alexey-milovidov
提交者:
GitHub
4月 18, 2020
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #10126 from Jokser/replicated-merge-tree-s3
Support ReplicatedMergeTree over S3
上级
8cea4531
46a5c75b
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
157 addition
and
22 deletion
+157
-22
src/Storages/MergeTree/DataPartsExchange.cpp
src/Storages/MergeTree/DataPartsExchange.cpp
+15
-15
src/Storages/MergeTree/MergeTreeData.h
src/Storages/MergeTree/MergeTreeData.h
+2
-0
tests/integration/test_merge_tree_s3/configs/config.d/storage_conf.xml
...tion/test_merge_tree_s3/configs/config.d/storage_conf.xml
+2
-2
tests/integration/test_merge_tree_s3/test.py
tests/integration/test_merge_tree_s3/test.py
+11
-5
tests/integration/test_replicated_merge_tree_s3/__init__.py
tests/integration/test_replicated_merge_tree_s3/__init__.py
+0
-0
tests/integration/test_replicated_merge_tree_s3/configs/config.d/storage_conf.xml
...eplicated_merge_tree_s3/configs/config.d/storage_conf.xml
+21
-0
tests/integration/test_replicated_merge_tree_s3/test.py
tests/integration/test_replicated_merge_tree_s3/test.py
+106
-0
未找到文件。
src/Storages/MergeTree/DataPartsExchange.cpp
浏览文件 @
aac7cc73
...
...
@@ -255,23 +255,23 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPart(
const
ReservationPtr
reservation
,
PooledReadWriteBufferFromHTTP
&
in
)
{
size_t
files
;
readBinary
(
files
,
in
);
auto
disk
=
reservation
->
getDisk
();
static
const
String
TMP_PREFIX
=
"tmp_fetch_"
;
String
tmp_prefix
=
tmp_prefix_
.
empty
()
?
TMP_PREFIX
:
tmp_prefix_
;
String
relative_part_path
=
String
(
to_detached
?
"detached/"
:
""
)
+
tmp_prefix
+
part_name
;
String
absolute_part_path
=
Poco
::
Path
(
data
.
getFullPathOnDisk
(
reservation
->
getDisk
())
+
relative_part_path
+
"/"
).
absolute
().
toString
();
Poco
::
File
part_file
(
absolute_part_path
);
String
part_relative_path
=
String
(
to_detached
?
"detached/"
:
""
)
+
tmp_prefix
+
part_name
;
String
part_download_path
=
data
.
getRelativeDataPath
()
+
part_relative_path
+
"/"
;
if
(
part_file
.
exists
(
))
throw
Exception
(
"Directory "
+
absolute_part_path
+
" already exists."
,
ErrorCodes
::
DIRECTORY_ALREADY_EXISTS
);
if
(
disk
->
exists
(
part_download_path
))
throw
Exception
(
"Directory "
+
fullPath
(
disk
,
part_download_path
)
+
" already exists."
,
ErrorCodes
::
DIRECTORY_ALREADY_EXISTS
);
CurrentMetrics
::
Increment
metric_increment
{
CurrentMetrics
::
ReplicatedFetch
};
part_file
.
createDirectory
(
);
disk
->
createDirectories
(
part_download_path
);
MergeTreeData
::
DataPart
::
Checksums
checksums
;
for
(
size_t
i
=
0
;
i
<
files
;
++
i
)
...
...
@@ -284,21 +284,21 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPart(
/// File must be inside "absolute_part_path" directory.
/// Otherwise malicious ClickHouse replica may force us to write to arbitrary path.
String
absolute_file_path
=
Poco
::
Path
(
absolute_part
_path
+
file_name
).
absolute
().
toString
();
if
(
!
startsWith
(
absolute_file_path
,
absolute_part_path
))
throw
Exception
(
"File path ("
+
absolute_file_path
+
") doesn't appear to be inside part path ("
+
absolute_part
_path
+
")."
String
absolute_file_path
=
Poco
::
Path
(
part_download
_path
+
file_name
).
absolute
().
toString
();
if
(
!
startsWith
(
absolute_file_path
,
Poco
::
Path
(
part_download_path
).
absolute
().
toString
()
))
throw
Exception
(
"File path ("
+
absolute_file_path
+
") doesn't appear to be inside part path ("
+
part_download
_path
+
")."
" This may happen if we are trying to download part from malicious replica or logical error."
,
ErrorCodes
::
INSECURE_PATH
);
WriteBufferFromFile
file_out
(
absolute_file_path
);
HashingWriteBuffer
hashing_out
(
file_out
);
auto
file_out
=
disk
->
writeFile
(
part_download_path
+
file_name
);
HashingWriteBuffer
hashing_out
(
*
file_out
);
copyData
(
in
,
hashing_out
,
file_size
,
blocker
.
getCounter
());
if
(
blocker
.
isCancelled
())
{
/// NOTE The is_cancelled flag also makes sense to check every time you read over the network, performing a poll with a not very large timeout.
/// And now we check it only between read chunks (in the `copyData` function).
part_file
.
remove
(
true
);
disk
->
removeRecursive
(
part_download_path
);
throw
Exception
(
"Fetching of part was cancelled"
,
ErrorCodes
::
ABORTED
);
}
...
...
@@ -306,7 +306,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPart(
readPODBinary
(
expected_hash
,
in
);
if
(
expected_hash
!=
hashing_out
.
getHash
())
throw
Exception
(
"Checksum mismatch for file "
+
absolute_part_path
+
file_name
+
" transferred from "
+
replica_path
,
throw
Exception
(
"Checksum mismatch for file "
+
fullPath
(
disk
,
part_download_path
+
file_name
)
+
" transferred from "
+
replica_path
,
ErrorCodes
::
CHECKSUM_DOESNT_MATCH
);
if
(
file_name
!=
"checksums.txt"
&&
...
...
@@ -316,7 +316,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPart(
assertEOF
(
in
);
MergeTreeData
::
MutableDataPartPtr
new_data_part
=
data
.
createPart
(
part_name
,
reservation
->
getDisk
(),
relative_part
_path
);
MergeTreeData
::
MutableDataPartPtr
new_data_part
=
data
.
createPart
(
part_name
,
reservation
->
getDisk
(),
part_relative
_path
);
new_data_part
->
is_temp
=
true
;
new_data_part
->
modification_time
=
time
(
nullptr
);
new_data_part
->
loadColumnsChecksumsIndexes
(
true
,
false
);
...
...
src/Storages/MergeTree/MergeTreeData.h
浏览文件 @
aac7cc73
...
...
@@ -620,6 +620,8 @@ public:
return
storage_settings
.
get
();
}
String
getRelativeDataPath
()
const
{
return
relative_data_path
;
}
/// Get table path on disk
String
getFullPathOnDisk
(
const
DiskPtr
&
disk
)
const
;
...
...
tests/integration/test_merge_tree_s3/configs/config.d/storage_conf.xml
浏览文件 @
aac7cc73
...
...
@@ -13,7 +13,7 @@
</hdd>
</disks>
<policies>
<
default
>
<
s3
>
<volumes>
<main>
<disk>
s3
</disk>
...
...
@@ -22,7 +22,7 @@
<disk>
hdd
</disk>
</external>
</volumes>
</
default
>
</
s3
>
</policies>
</storage_configuration>
</yandex>
tests/integration/test_merge_tree_s3/test.py
浏览文件 @
aac7cc73
...
...
@@ -67,7 +67,9 @@ def create_table(cluster, table_name, additional_settings=None):
PARTITION BY dt
ORDER BY (dt, id)
SETTINGS
old_parts_lifetime=0, index_granularity=512
storage_policy='s3',
old_parts_lifetime=0,
index_granularity=512
"""
.
format
(
table_name
)
if
additional_settings
:
...
...
@@ -84,7 +86,12 @@ def drop_table(cluster):
minio
=
cluster
.
minio_client
node
.
query
(
"DROP TABLE IF EXISTS s3_test"
)
assert
len
(
list
(
minio
.
list_objects
(
cluster
.
minio_bucket
,
'data/'
)))
==
0
try
:
assert
len
(
list
(
minio
.
list_objects
(
cluster
.
minio_bucket
,
'data/'
)))
==
0
finally
:
# Remove extra objects to prevent tests cascade failing
for
obj
in
list
(
minio
.
list_objects
(
cluster
.
minio_bucket
,
'data/'
)):
minio
.
remove_object
(
cluster
.
minio_bucket
,
obj
.
object_name
)
@
pytest
.
mark
.
parametrize
(
...
...
@@ -210,7 +217,7 @@ def test_attach_detach_partition(cluster):
assert
len
(
list
(
minio
.
list_objects
(
cluster
.
minio_bucket
,
'data/'
)))
==
FILES_OVERHEAD
+
FILES_OVERHEAD_PER_PART_WIDE
node
.
query
(
"ALTER TABLE s3_test DETACH PARTITION '2020-01-04'"
)
node
.
query
(
"
SET allow_drop_detached=1; ALTER TABLE s3_test DROP DETACHED PARTITION '2020-01-04'"
)
node
.
query
(
"
ALTER TABLE s3_test DROP DETACHED PARTITION '2020-01-04'"
,
settings
=
{
"allow_drop_detached"
:
1
}
)
assert
node
.
query
(
"SELECT count(*) FROM s3_test FORMAT Values"
)
==
"(0)"
assert
len
(
list
(
minio
.
list_objects
(
cluster
.
minio_bucket
,
'data/'
)))
==
FILES_OVERHEAD
...
...
@@ -245,8 +252,7 @@ def test_table_manipulations(cluster):
assert
len
(
list
(
minio
.
list_objects
(
cluster
.
minio_bucket
,
'data/'
)))
==
FILES_OVERHEAD
+
FILES_OVERHEAD_PER_PART_WIDE
*
2
node
.
query
(
"RENAME TABLE s3_renamed TO s3_test"
)
# TODO: Doesn't work with min_max index.
#assert node.query("SET check_query_single_value_result='false'; CHECK TABLE s3_test FORMAT Values") == "(1)"
assert
node
.
query
(
"CHECK TABLE s3_test FORMAT Values"
)
==
"(1)"
node
.
query
(
"DETACH TABLE s3_test"
)
node
.
query
(
"ATTACH TABLE s3_test"
)
...
...
tests/integration/test_replicated_merge_tree_s3/__init__.py
0 → 100644
浏览文件 @
aac7cc73
tests/integration/test_replicated_merge_tree_s3/configs/config.d/storage_conf.xml
0 → 100644
浏览文件 @
aac7cc73
<yandex>
<storage_configuration>
<disks>
<s3>
<type>
s3
</type>
<endpoint>
http://minio1:9001/root/data/
</endpoint>
<access_key_id>
minio
</access_key_id>
<secret_access_key>
minio123
</secret_access_key>
</s3>
</disks>
<policies>
<s3>
<volumes>
<main>
<disk>
s3
</disk>
</main>
</volumes>
</s3>
</policies>
</storage_configuration>
</yandex>
tests/integration/test_replicated_merge_tree_s3/test.py
0 → 100644
浏览文件 @
aac7cc73
import
logging
import
random
import
string
import
time
import
pytest
from
helpers.cluster
import
ClickHouseCluster
logging
.
getLogger
().
setLevel
(
logging
.
INFO
)
logging
.
getLogger
().
addHandler
(
logging
.
StreamHandler
())
# Creates S3 bucket for tests and allows anonymous read-write access to it.
def
prepare_s3_bucket
(
cluster
):
minio_client
=
cluster
.
minio_client
if
minio_client
.
bucket_exists
(
cluster
.
minio_bucket
):
minio_client
.
remove_bucket
(
cluster
.
minio_bucket
)
minio_client
.
make_bucket
(
cluster
.
minio_bucket
)
@
pytest
.
fixture
(
scope
=
"module"
)
def
cluster
():
try
:
cluster
=
ClickHouseCluster
(
__file__
)
cluster
.
add_instance
(
"node1"
,
config_dir
=
"configs"
,
macros
=
{
'cluster'
:
'test1'
},
with_minio
=
True
,
with_zookeeper
=
True
)
cluster
.
add_instance
(
"node2"
,
config_dir
=
"configs"
,
macros
=
{
'cluster'
:
'test1'
},
with_zookeeper
=
True
)
cluster
.
add_instance
(
"node3"
,
config_dir
=
"configs"
,
macros
=
{
'cluster'
:
'test1'
},
with_zookeeper
=
True
)
logging
.
info
(
"Starting cluster..."
)
cluster
.
start
()
logging
.
info
(
"Cluster started"
)
prepare_s3_bucket
(
cluster
)
logging
.
info
(
"S3 bucket created"
)
yield
cluster
finally
:
cluster
.
shutdown
()
FILES_OVERHEAD
=
1
FILES_OVERHEAD_PER_COLUMN
=
2
# Data and mark files
FILES_OVERHEAD_PER_PART
=
FILES_OVERHEAD_PER_COLUMN
*
3
+
2
+
6
def
random_string
(
length
):
letters
=
string
.
ascii_letters
return
''
.
join
(
random
.
choice
(
letters
)
for
i
in
range
(
length
))
def
generate_values
(
date_str
,
count
,
sign
=
1
):
data
=
[[
date_str
,
sign
*
(
i
+
1
),
random_string
(
10
)]
for
i
in
range
(
count
)]
data
.
sort
(
key
=
lambda
tup
:
tup
[
1
])
return
","
.
join
([
"('{}',{},'{}')"
.
format
(
x
,
y
,
z
)
for
x
,
y
,
z
in
data
])
def
create_table
(
cluster
):
create_table_statement
=
"""
CREATE TABLE s3_test (
dt Date,
id Int64,
data String,
INDEX min_max (id) TYPE minmax GRANULARITY 3
) ENGINE=ReplicatedMergeTree('/clickhouse/{cluster}/tables/test/s3', '{instance}')
PARTITION BY dt
ORDER BY (dt, id)
SETTINGS storage_policy='s3'
"""
for
node
in
cluster
.
instances
.
values
():
node
.
query
(
create_table_statement
)
@
pytest
.
fixture
(
autouse
=
True
)
def
drop_table
(
cluster
):
yield
for
node
in
cluster
.
instances
.
values
():
node
.
query
(
"DROP TABLE IF EXISTS s3_test"
)
minio
=
cluster
.
minio_client
# Remove extra objects to prevent tests cascade failing
for
obj
in
list
(
minio
.
list_objects
(
cluster
.
minio_bucket
,
'data/'
)):
minio
.
remove_object
(
cluster
.
minio_bucket
,
obj
.
object_name
)
def
test_insert_select_replicated
(
cluster
):
create_table
(
cluster
)
all_values
=
""
for
node_idx
in
range
(
1
,
4
):
node
=
cluster
.
instances
[
"node"
+
str
(
node_idx
)]
values
=
generate_values
(
"2020-01-0"
+
str
(
node_idx
),
4096
)
node
.
query
(
"INSERT INTO s3_test VALUES {}"
.
format
(
values
),
settings
=
{
"insert_quorum"
:
3
})
if
node_idx
!=
1
:
all_values
+=
","
all_values
+=
values
for
node_idx
in
range
(
1
,
4
):
node
=
cluster
.
instances
[
"node"
+
str
(
node_idx
)]
assert
node
.
query
(
"SELECT * FROM s3_test order by dt, id FORMAT Values"
,
settings
=
{
"select_sequential_consistency"
:
1
})
==
all_values
minio
=
cluster
.
minio_client
assert
len
(
list
(
minio
.
list_objects
(
cluster
.
minio_bucket
,
'data/'
)))
==
3
*
(
FILES_OVERHEAD
+
FILES_OVERHEAD_PER_PART
*
3
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录