Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
2dot5
ClickHouse
提交
2c124ea3
C
ClickHouse
项目概览
2dot5
/
ClickHouse
通知
3
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
C
ClickHouse
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
2c124ea3
编写于
1月 28, 2015
作者:
A
Alexey Milovidov
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
dbms: StorageJoin: development [#METR-2944].
上级
684b2e70
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
109 addition
and
206 deletion
+109
-206
dbms/include/DB/Storages/StorageJoin.h
dbms/include/DB/Storages/StorageJoin.h
+10
-19
dbms/include/DB/Storages/StorageSet.h
dbms/include/DB/Storages/StorageSet.h
+50
-22
dbms/src/Storages/StorageFactory.cpp
dbms/src/Storages/StorageFactory.cpp
+1
-0
dbms/src/Storages/StorageJoin.cpp
dbms/src/Storages/StorageJoin.cpp
+5
-121
dbms/src/Storages/StorageSet.cpp
dbms/src/Storages/StorageSet.cpp
+43
-44
未找到文件。
dbms/include/DB/Storages/StorageJoin.h
浏览文件 @
2c124ea3
#pragma once
#include <DB/Storages/
IStorage
.h>
#include <DB/Storages/
StorageSet
.h>
#include <DB/Interpreters/Join.h>
...
...
@@ -11,10 +11,8 @@ namespace DB
* При вставке в таблицу, данные будут вставлены в состояние,
* а также записаны в файл-бэкап, для восстановления после перезапуска.
* Чтение из таблицы напрямую невозможно - возможно лишь указание в правой части JOIN.
*
* NOTE: В основном, повторяет StorageSet. Можно обобщить.
*/
class
StorageJoin
:
public
IStorag
e
class
StorageJoin
:
public
StorageSetOrJoinBas
e
{
public:
static
StoragePtr
create
(
...
...
@@ -28,28 +26,22 @@ public:
const
ColumnDefaults
&
column_defaults_
)
{
return
(
new
StorageJoin
{
path_
,
name_
,
columns_
,
path_
,
name_
,
key_names_
,
kind_
,
strictness_
,
columns_
,
materialized_columns_
,
alias_columns_
,
column_defaults_
})
->
thisPtr
();
}
String
getName
()
const
override
{
return
"Join"
;
}
String
getTableName
()
const
override
{
return
name
;
}
const
NamesAndTypesList
&
getColumnsListImpl
()
const
override
{
return
*
columns
;
}
BlockOutputStreamPtr
write
(
ASTPtr
query
)
override
;
void
rename
(
const
String
&
new_path_to_db
,
const
String
&
new_database_name
,
const
String
&
new_table_name
)
override
;
/// Получить доступ к внутренностям.
JoinPtr
&
getJoin
()
{
return
join
;
}
private:
String
path
;
String
name
;
NamesAndTypesListPtr
columns
;
const
Names
&
key_names
;
ASTJoin
::
Kind
kind
;
/// LEFT | INNER
ASTJoin
::
Strictness
strictness
;
/// ANY | ALL
UInt64
increment
=
0
;
/// Для имён файлов бэкапа.
JoinPtr
join
;
StorageJoin
(
...
...
@@ -62,9 +54,8 @@ private:
const
NamesAndTypesList
&
alias_columns_
,
const
ColumnDefaults
&
column_defaults_
);
/// Восстановление из бэкапа.
void
restore
();
void
restoreFromFile
(
const
String
&
file_path
,
const
DataTypeFactory
&
data_type_factory
);
void
insertBlock
(
const
Block
&
block
)
override
{
join
->
insertFromBlock
(
block
);
}
size_t
getSize
()
const
override
{
return
join
->
getTotalRowCount
();
};
};
}
dbms/include/DB/Storages/StorageSet.h
浏览文件 @
2c124ea3
#pragma once
#include <DB/IO/WriteBufferFromFile.h>
#include <DB/IO/CompressedWriteBuffer.h>
#include <DB/DataStreams/NativeBlockOutputStream.h>
#include <DB/Storages/IStorage.h>
#include <DB/Interpreters/Set.h>
...
...
@@ -10,25 +14,62 @@ namespace DB
/** Общая часть StorageSet и StorageJoin.
*/
class
StorageSet
And
JoinBase
:
public
IStorage
class
StorageSet
Or
JoinBase
:
public
IStorage
{
friend
class
SetOrJoinBlockOutputStream
;
public:
String
getTableName
()
const
override
{
return
name
;
}
const
NamesAndTypesList
&
getColumnsListImpl
()
const
override
{
return
*
columns
;
}
void
rename
(
const
String
&
new_path_to_db
,
const
String
&
new_database_name
,
const
String
&
new_table_name
)
override
;
BlockOutputStreamPtr
write
(
ASTPtr
query
)
override
;
protected:
StorageSetOrJoinBase
(
const
String
&
path_
,
const
String
&
name_
,
NamesAndTypesListPtr
columns_
,
const
NamesAndTypesList
&
materialized_columns_
,
const
NamesAndTypesList
&
alias_columns_
,
const
ColumnDefaults
&
column_defaults_
);
String
path
;
String
name
;
NamesAndTypesListPtr
columns
;
UInt64
increment
=
0
;
/// Для имён файлов бэкапа.
String
getTableName
()
const
override
{
return
name
;
}
const
NamesAndTypesList
&
getColumnsListImpl
()
const
override
{
return
*
columns
;
}
void
rename
(
const
String
&
new_path_to_db
,
const
String
&
new_database_name
,
const
String
&
new_table_name
)
override
;
/// Восстановление из бэкапа.
void
restore
();
private:
void
restoreFromFile
(
const
String
&
file_path
,
const
DataTypeFactory
&
data_type_factory
);
/// Вставить блок в состояние.
virtual
void
insertBlock
(
const
Block
&
block
)
=
0
;
virtual
size_t
getSize
()
const
=
0
;
};
class
SetOrJoinBlockOutputStream
:
public
IBlockOutputStream
{
public:
SetOrJoinBlockOutputStream
(
StorageSetOrJoinBase
&
table_
,
const
String
&
backup_path_
,
const
String
&
backup_tmp_path_
,
const
String
&
backup_file_name_
);
void
write
(
const
Block
&
block
)
override
;
void
writeSuffix
()
override
;
private:
StorageSetOrJoinBase
&
table
;
String
backup_path
;
String
backup_tmp_path
;
String
backup_file_name
;
WriteBufferFromFile
backup_buf
;
CompressedWriteBuffer
compressed_backup_buf
;
NativeBlockOutputStream
backup_stream
;
};
...
...
@@ -37,7 +78,7 @@ protected:
* а также записаны в файл-бэкап, для восстановления после перезапуска.
* Чтение из таблицы напрямую невозможно - возможно лишь указание в правой части оператора IN.
*/
class
StorageSet
:
public
IStorag
e
class
StorageSet
:
public
StorageSetOrJoinBas
e
{
public:
static
StoragePtr
create
(
...
...
@@ -54,23 +95,11 @@ public:
}
String
getName
()
const
override
{
return
"Set"
;
}
String
getTableName
()
const
override
{
return
name
;
}
const
NamesAndTypesList
&
getColumnsListImpl
()
const
override
{
return
*
columns
;
}
BlockOutputStreamPtr
write
(
ASTPtr
query
)
override
;
void
rename
(
const
String
&
new_path_to_db
,
const
String
&
new_database_name
,
const
String
&
new_table_name
)
override
;
/// Получить доступ к внутренностям.
SetPtr
&
getSet
()
{
return
set
;
}
private:
String
path
;
String
name
;
NamesAndTypesListPtr
columns
;
UInt64
increment
=
0
;
/// Для имён файлов бэкапа.
SetPtr
set
{
new
Set
{
Limits
{}}
};
StorageSet
(
...
...
@@ -81,9 +110,8 @@ private:
const
NamesAndTypesList
&
alias_columns_
,
const
ColumnDefaults
&
column_defaults_
);
/// Восстановление из бэкапа.
void
restore
();
void
restoreFromFile
(
const
String
&
file_path
,
const
DataTypeFactory
&
data_type_factory
);
void
insertBlock
(
const
Block
&
block
)
override
{
set
->
insertFromBlock
(
block
);
}
size_t
getSize
()
const
override
{
return
set
->
getTotalRowCount
();
};
};
}
dbms/src/Storages/StorageFactory.cpp
浏览文件 @
2c124ea3
...
...
@@ -26,6 +26,7 @@
#include <DB/Storages/StorageChunkMerger.h>
#include <DB/Storages/StorageReplicatedMergeTree.h>
#include <DB/Storages/StorageSet.h>
#include <DB/Storages/StorageJoin.h>
namespace
DB
...
...
dbms/src/Storages/StorageJoin.cpp
浏览文件 @
2c124ea3
#include <DB/Storages/StorageJoin.h>
#include <DB/IO/WriteBufferFromFile.h>
#include <DB/IO/ReadBufferFromFile.h>
#include <DB/IO/CompressedWriteBuffer.h>
#include <DB/IO/CompressedReadBuffer.h>
#include <DB/DataStreams/NativeBlockOutputStream.h>
#include <DB/DataStreams/NativeBlockInputStream.h>
#include <DB/Common/escapeForFileName.h>
namespace
DB
{
class
JoinBlockOutputStream
:
public
IBlockOutputStream
{
public:
JoinBlockOutputStream
(
JoinPtr
&
join_
,
const
String
&
backup_path_
,
const
String
&
backup_tmp_path_
,
const
String
&
backup_file_name_
)
:
join
(
join_
),
backup_path
(
backup_path_
),
backup_tmp_path
(
backup_tmp_path_
),
backup_file_name
(
backup_file_name_
),
backup_buf
(
backup_tmp_path
+
backup_file_name
),
compressed_backup_buf
(
backup_buf
),
backup_stream
(
compressed_backup_buf
)
{
}
void
write
(
const
Block
&
block
)
override
{
join
->
insertFromBlock
(
block
);
backup_stream
.
write
(
block
);
}
void
writeSuffix
()
override
{
backup_stream
.
flush
();
compressed_backup_buf
.
next
();
backup_buf
.
next
();
Poco
::
File
(
backup_tmp_path
+
backup_file_name
).
renameTo
(
backup_path
+
backup_file_name
);
}
private:
JoinPtr
join
;
String
backup_path
;
String
backup_tmp_path
;
String
backup_file_name
;
WriteBufferFromFile
backup_buf
;
CompressedWriteBuffer
compressed_backup_buf
;
NativeBlockOutputStream
backup_stream
;
};
BlockOutputStreamPtr
StorageJoin
::
write
(
ASTPtr
query
)
{
++
increment
;
return
new
JoinBlockOutputStream
(
join
,
path
,
path
+
"tmp/"
,
toString
(
increment
)
+
".bin"
);
}
StorageJoin
::
StorageJoin
(
const
String
&
path_
,
const
String
&
name_
,
const
Names
&
key_names_
,
ASTJoin
::
Kind
kind_
,
ASTJoin
::
Strictness
strictness_
,
NamesAndTypesListPtr
columns_
,
const
NamesAndTypesList
&
materialized_columns_
,
const
NamesAndTypesList
&
alias_columns_
,
const
ColumnDefaults
&
column_defaults_
)
:
IStorage
{
materialized_columns_
,
alias_columns_
,
column_defaults_
},
path
(
path_
+
escapeForFileName
(
name_
)
+
'/'
),
name
(
name_
),
columns
(
column
s_
)
:
StorageSetOrJoinBase
{
path_
,
name_
,
columns_
,
materialized_columns_
,
alias_columns_
,
column_defaults_
},
key_names
(
key_names_
),
kind
(
kind_
),
strictness
(
strictnes
s_
)
{
join
=
new
Join
(
key_names
,
key_names
,
Limits
(),
kind
,
strictness
);
restore
();
}
void
StorageJoin
::
restore
()
{
Poco
::
File
tmp_dir
(
path
+
"tmp/"
);
if
(
!
tmp_dir
.
exists
())
{
tmp_dir
.
createDirectories
();
return
;
}
constexpr
auto
file_suffix
=
".bin"
;
constexpr
auto
file_suffix_size
=
strlen
(
file_suffix
);
DataTypeFactory
data_type_factory
;
Poco
::
DirectoryIterator
dir_end
;
for
(
Poco
::
DirectoryIterator
dir_it
(
path
);
dir_end
!=
dir_it
;
++
dir_it
)
{
const
auto
&
name
=
dir_it
.
name
();
if
(
dir_it
->
isFile
()
&&
name
.
size
()
>
file_suffix_size
&&
0
==
name
.
compare
(
name
.
size
()
-
file_suffix_size
,
file_suffix_size
,
file_suffix
)
&&
dir_it
->
getSize
()
>
0
)
{
/// Вычисляем максимальный номер имеющихся файлов с бэкапом, чтобы добавлять следующие файлы с большими номерами.
UInt64
file_num
=
parse
<
UInt64
>
(
name
.
substr
(
0
,
name
.
size
()
-
file_suffix_size
));
if
(
file_num
>
increment
)
increment
=
file_num
;
restoreFromFile
(
dir_it
->
path
(),
data_type_factory
);
}
}
}
void
StorageJoin
::
restoreFromFile
(
const
String
&
file_path
,
const
DataTypeFactory
&
data_type_factory
)
{
ReadBufferFromFile
backup_buf
(
file_path
);
CompressedReadBuffer
compressed_backup_buf
(
backup_buf
);
NativeBlockInputStream
backup_stream
(
compressed_backup_buf
,
data_type_factory
);
backup_stream
.
readPrefix
();
while
(
Block
block
=
backup_stream
.
read
())
join
->
insertFromBlock
(
block
);
backup_stream
.
readSuffix
();
/// TODO Добавить скорость, сжатые байты, объём данных в памяти, коэффициент сжатия... Обобщить всё логгирование статистики в проекте.
LOG_INFO
(
&
Logger
::
get
(
"StorageJoin"
),
std
::
fixed
<<
std
::
setprecision
(
2
)
<<
"Loaded from backup file "
<<
file_path
<<
". "
<<
backup_stream
.
getInfo
().
rows
<<
" rows, "
<<
backup_stream
.
getInfo
().
bytes
/
1048576.0
<<
" MiB. "
<<
"Join has "
<<
join
->
getTotalRowCount
()
<<
" unique rows."
);
}
void
StorageJoin
::
rename
(
const
String
&
new_path_to_db
,
const
String
&
new_database_name
,
const
String
&
new_table_name
)
{
/// Переименовываем директорию с данными.
String
new_path
=
new_path_to_db
+
escapeForFileName
(
new_table_name
);
Poco
::
File
(
path
).
renameTo
(
new_path
);
path
=
new_path
+
"/"
;
name
=
new_table_name
;
}
}
dbms/src/Storages/StorageSet.cpp
浏览文件 @
2c124ea3
#include <DB/Storages/StorageSet.h>
#include <DB/IO/WriteBufferFromFile.h>
#include <DB/IO/ReadBufferFromFile.h>
#include <DB/IO/CompressedWriteBuffer.h>
#include <DB/IO/CompressedReadBuffer.h>
#include <DB/DataStreams/NativeBlockOutputStream.h>
#include <DB/DataStreams/NativeBlockInputStream.h>
#include <DB/Common/escapeForFileName.h>
...
...
@@ -12,53 +9,42 @@ namespace DB
{
class
SetBlockOutputStream
:
public
IBlockOutputStream
SetOrJoinBlockOutputStream
::
SetOrJoinBlockOutputStream
(
StorageSetOrJoinBase
&
table_
,
const
String
&
backup_path_
,
const
String
&
backup_tmp_path_
,
const
String
&
backup_file_name_
)
:
table
(
table_
),
backup_path
(
backup_path_
),
backup_tmp_path
(
backup_tmp_path_
),
backup_file_name
(
backup_file_name_
),
backup_buf
(
backup_tmp_path
+
backup_file_name
),
compressed_backup_buf
(
backup_buf
),
backup_stream
(
compressed_backup_buf
)
{
public:
SetBlockOutputStream
(
SetPtr
&
set_
,
const
String
&
backup_path_
,
const
String
&
backup_tmp_path_
,
const
String
&
backup_file_name_
)
:
set
(
set_
),
backup_path
(
backup_path_
),
backup_tmp_path
(
backup_tmp_path_
),
backup_file_name
(
backup_file_name_
),
backup_buf
(
backup_tmp_path
+
backup_file_name
),
compressed_backup_buf
(
backup_buf
),
backup_stream
(
compressed_backup_buf
)
{
}
}
void
write
(
const
Block
&
block
)
override
{
set
->
insertFrom
Block
(
block
);
backup_stream
.
write
(
block
);
}
void
SetOrJoinBlockOutputStream
::
write
(
const
Block
&
block
)
{
table
.
insert
Block
(
block
);
backup_stream
.
write
(
block
);
}
void
writeSuffix
()
override
{
backup_stream
.
flush
();
compressed_backup_buf
.
next
();
backup_buf
.
next
();
void
SetOrJoinBlockOutputStream
::
writeSuffix
()
{
backup_stream
.
flush
();
compressed_backup_buf
.
next
();
backup_buf
.
next
();
Poco
::
File
(
backup_tmp_path
+
backup_file_name
).
renameTo
(
backup_path
+
backup_file_name
);
}
Poco
::
File
(
backup_tmp_path
+
backup_file_name
).
renameTo
(
backup_path
+
backup_file_name
);
}
private:
SetPtr
set
;
String
backup_path
;
String
backup_tmp_path
;
String
backup_file_name
;
WriteBufferFromFile
backup_buf
;
CompressedWriteBuffer
compressed_backup_buf
;
NativeBlockOutputStream
backup_stream
;
};
BlockOutputStreamPtr
StorageSet
::
write
(
ASTPtr
query
)
BlockOutputStreamPtr
StorageSet
OrJoinBase
::
write
(
ASTPtr
query
)
{
++
increment
;
return
new
Set
BlockOutputStream
(
set
,
path
,
path
+
"tmp/"
,
toString
(
increment
)
+
".bin"
);
return
new
Set
OrJoinBlockOutputStream
(
*
this
,
path
,
path
+
"tmp/"
,
toString
(
increment
)
+
".bin"
);
}
StorageSet
::
StorageSet
(
StorageSet
OrJoinBase
::
StorageSetOrJoinBase
(
const
String
&
path_
,
const
String
&
name_
,
NamesAndTypesListPtr
columns_
,
...
...
@@ -67,12 +53,25 @@ StorageSet::StorageSet(
const
ColumnDefaults
&
column_defaults_
)
:
IStorage
{
materialized_columns_
,
alias_columns_
,
column_defaults_
},
path
(
path_
+
escapeForFileName
(
name_
)
+
'/'
),
name
(
name_
),
columns
(
columns_
)
{
}
StorageSet
::
StorageSet
(
const
String
&
path_
,
const
String
&
name_
,
NamesAndTypesListPtr
columns_
,
const
NamesAndTypesList
&
materialized_columns_
,
const
NamesAndTypesList
&
alias_columns_
,
const
ColumnDefaults
&
column_defaults_
)
:
StorageSetOrJoinBase
{
path_
,
name_
,
columns_
,
materialized_columns_
,
alias_columns_
,
column_defaults_
}
{
restore
();
}
void
StorageSet
::
restore
()
void
StorageSet
OrJoinBase
::
restore
()
{
Poco
::
File
tmp_dir
(
path
+
"tmp/"
);
if
(
!
tmp_dir
.
exists
())
...
...
@@ -107,7 +106,7 @@ void StorageSet::restore()
}
void
StorageSet
::
restoreFromFile
(
const
String
&
file_path
,
const
DataTypeFactory
&
data_type_factory
)
void
StorageSet
OrJoinBase
::
restoreFromFile
(
const
String
&
file_path
,
const
DataTypeFactory
&
data_type_factory
)
{
ReadBufferFromFile
backup_buf
(
file_path
);
CompressedReadBuffer
compressed_backup_buf
(
backup_buf
);
...
...
@@ -115,19 +114,19 @@ void StorageSet::restoreFromFile(const String & file_path, const DataTypeFactory
backup_stream
.
readPrefix
();
while
(
Block
block
=
backup_stream
.
read
())
set
->
insertFrom
Block
(
block
);
insert
Block
(
block
);
backup_stream
.
readSuffix
();
/// TODO Добавить скорость, сжатые байты, объём данных в памяти, коэффициент сжатия... Обобщить всё логгирование статистики в проекте.
LOG_INFO
(
&
Logger
::
get
(
"StorageSet"
),
std
::
fixed
<<
std
::
setprecision
(
2
)
LOG_INFO
(
&
Logger
::
get
(
"StorageSet
OrJoinBase
"
),
std
::
fixed
<<
std
::
setprecision
(
2
)
<<
"Loaded from backup file "
<<
file_path
<<
". "
<<
backup_stream
.
getInfo
().
rows
<<
" rows, "
<<
backup_stream
.
getInfo
().
bytes
/
1048576.0
<<
" MiB. "
<<
"S
et has "
<<
set
->
getTotalRowCount
()
<<
" unique rows."
);
<<
"S
tate has "
<<
getSize
()
<<
" unique rows."
);
}
void
StorageSet
::
rename
(
const
String
&
new_path_to_db
,
const
String
&
new_database_name
,
const
String
&
new_table_name
)
void
StorageSet
OrJoinBase
::
rename
(
const
String
&
new_path_to_db
,
const
String
&
new_database_name
,
const
String
&
new_table_name
)
{
/// Переименовываем директорию с данными.
String
new_path
=
new_path_to_db
+
escapeForFileName
(
new_table_name
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录