Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
2dot5
ClickHouse
提交
76153ed8
C
ClickHouse
项目概览
2dot5
/
ClickHouse
通知
3
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
C
ClickHouse
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
76153ed8
编写于
2月 17, 2020
作者:
A
Alexey Milovidov
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
clickhouse-obfuscator: allow to generate more data than was in source
上级
c67dbc51
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
71 addition
and
12 deletion
+71
-12
dbms/programs/obfuscator/Obfuscator.cpp
dbms/programs/obfuscator/Obfuscator.cpp
+71
-12
未找到文件。
dbms/programs/obfuscator/Obfuscator.cpp
浏览文件 @
76153ed8
...
...
@@ -108,6 +108,9 @@ public:
/// Call generate: pass source data column to obtain a column with anonymized data as a result.
virtual
ColumnPtr
generate
(
const
IColumn
&
column
);
/// Deterministically change seed to some other value. This can be used to generate more values than were in source.
virtual
void
updateSeed
();
virtual
~
IModel
()
{}
};
...
...
@@ -175,7 +178,7 @@ static UInt64 transform(UInt64 x, UInt64 seed)
class
UnsignedIntegerModel
:
public
IModel
{
private:
const
UInt64
seed
;
UInt64
seed
;
public:
UnsignedIntegerModel
(
UInt64
seed_
)
:
seed
(
seed_
)
{}
...
...
@@ -195,6 +198,11 @@ public:
return
res
;
}
void
updateSeed
()
override
{
seed
=
hash
(
seed
);
}
};
...
...
@@ -211,7 +219,7 @@ static Int64 transformSigned(Int64 x, UInt64 seed)
class
SignedIntegerModel
:
public
IModel
{
private:
const
UInt64
seed
;
UInt64
seed
;
public:
SignedIntegerModel
(
UInt64
seed_
)
:
seed
(
seed_
)
{}
...
...
@@ -231,6 +239,11 @@ public:
return
res
;
}
void
updateSeed
()
override
{
seed
=
hash
(
seed
);
}
};
...
...
@@ -253,7 +266,7 @@ template <typename Float>
class
FloatModel
:
public
IModel
{
private:
const
UInt64
seed
;
UInt64
seed
;
Float
src_prev_value
=
0
;
Float
res_prev_value
=
0
;
...
...
@@ -280,6 +293,11 @@ public:
return
res_column
;
}
void
updateSeed
()
override
{
seed
=
hash
(
seed
);
}
};
...
...
@@ -294,6 +312,10 @@ public:
{
return
column
.
cloneResized
(
column
.
size
());
}
void
updateSeed
()
override
{
}
};
...
...
@@ -347,7 +369,7 @@ static void transformFixedString(const UInt8 * src, UInt8 * dst, size_t size, UI
class
FixedStringModel
:
public
IModel
{
private:
const
UInt64
seed
;
UInt64
seed
;
public:
FixedStringModel
(
UInt64
seed_
)
:
seed
(
seed_
)
{}
...
...
@@ -373,6 +395,11 @@ public:
return
res_column
;
}
void
updateSeed
()
override
{
seed
=
hash
(
seed
);
}
};
...
...
@@ -380,7 +407,7 @@ public:
class
DateTimeModel
:
public
IModel
{
private:
const
UInt64
seed
;
UInt64
seed
;
UInt32
src_prev_value
=
0
;
UInt32
res_prev_value
=
0
;
...
...
@@ -418,6 +445,11 @@ public:
return
res_column
;
}
void
updateSeed
()
override
{
seed
=
hash
(
seed
);
}
};
...
...
@@ -790,6 +822,11 @@ public:
return
res_column
;
}
void
updateSeed
()
override
{
seed
=
hash
(
seed
);
}
};
...
...
@@ -823,6 +860,11 @@ public:
return
ColumnArray
::
create
((
*
std
::
move
(
new_nested_column
)).
mutate
(),
(
*
std
::
move
(
column_array
.
getOffsetsPtr
())).
mutate
());
}
void
updateSeed
()
override
{
nested_model
->
updateSeed
();
}
};
...
...
@@ -856,6 +898,11 @@ public:
return
ColumnNullable
::
create
((
*
std
::
move
(
new_nested_column
)).
mutate
(),
(
*
std
::
move
(
column_nullable
.
getNullMapColumnPtr
())).
mutate
());
}
void
updateSeed
()
override
{
nested_model
->
updateSeed
();
}
};
...
...
@@ -939,6 +986,12 @@ public:
res
[
i
]
=
models
[
i
]
->
generate
(
*
columns
[
i
]);
return
res
;
}
void
updateSeed
()
{
for
(
auto
&
model
:
models
)
model
->
updateSeed
();
}
};
}
...
...
@@ -993,7 +1046,7 @@ try
std
::
string
input_format
=
options
[
"input-format"
].
as
<
std
::
string
>
();
std
::
string
output_format
=
options
[
"output-format"
].
as
<
std
::
string
>
();
std
::
optional
<
UInt64
>
limit
;
UInt64
limit
=
0
;
if
(
options
.
count
(
"limit"
))
limit
=
options
[
"limit"
].
as
<
UInt64
>
();
...
...
@@ -1045,27 +1098,32 @@ try
UInt64
max_block_size
=
8192
;
/// Train step
UInt64
source_rows
=
0
;
{
if
(
!
silent
)
std
::
cerr
<<
"Training models
\n
"
;
BlockInputStreamPtr
input
=
context
.
getInputFormat
(
input_format
,
file_in
,
header
,
max_block_size
);
UInt64
processed_rows
=
0
;
input
->
readPrefix
();
while
(
Block
block
=
input
->
read
())
{
obfuscator
.
train
(
block
.
getColumns
());
processed
_rows
+=
block
.
rows
();
source
_rows
+=
block
.
rows
();
if
(
!
silent
)
std
::
cerr
<<
"Processed "
<<
processed
_rows
<<
" rows
\n
"
;
std
::
cerr
<<
"Processed "
<<
source
_rows
<<
" rows
\n
"
;
}
input
->
readSuffix
();
}
obfuscator
.
finalize
();
if
(
!
limit
)
limit
=
source_rows
;
/// Generation step
UInt64
processed_rows
=
0
;
while
(
processed_rows
<
limit
)
{
if
(
!
silent
)
std
::
cerr
<<
"Generating data
\n
"
;
...
...
@@ -1075,10 +1133,9 @@ try
BlockInputStreamPtr
input
=
context
.
getInputFormat
(
input_format
,
file_in
,
header
,
max_block_size
);
BlockOutputStreamPtr
output
=
context
.
getOutputFormat
(
output_format
,
file_out
,
header
);
if
(
limit
)
input
=
std
::
make_shared
<
LimitBlockInputStream
>
(
input
,
*
limit
,
0
);
if
(
processed_rows
+
source_rows
>
limit
)
input
=
std
::
make_shared
<
LimitBlockInputStream
>
(
input
,
limit
-
processed_rows
,
0
);
UInt64
processed_rows
=
0
;
input
->
readPrefix
();
output
->
writePrefix
();
while
(
Block
block
=
input
->
read
())
...
...
@@ -1091,6 +1148,8 @@ try
}
output
->
writeSuffix
();
input
->
readSuffix
();
obfuscator
.
updateSeed
();
}
return
0
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录