Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
2dot5
ClickHouse
提交
604de2bf
C
ClickHouse
项目概览
2dot5
/
ClickHouse
通知
3
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
C
ClickHouse
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
604de2bf
编写于
6月 21, 2017
作者:
A
Alexey Milovidov
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Better [#DEVTOOLS-3381].
上级
a736ef61
变更
27
隐藏空白更改
内联
并排
Showing
27 changed file
with
92 addition
and
121 deletion
+92
-121
contrib/libcityhash/include/city.h
contrib/libcityhash/include/city.h
+11
-3
contrib/libcityhash/include/citycrc.h
contrib/libcityhash/include/citycrc.h
+1
-1
contrib/libcityhash/src/city.cc
contrib/libcityhash/src/city.cc
+14
-13
dbms/src/AggregateFunctions/AggregateFunctionUniq.h
dbms/src/AggregateFunctions/AggregateFunctionUniq.h
+2
-2
dbms/src/AggregateFunctions/AggregateFunctionUniqUpTo.h
dbms/src/AggregateFunctions/AggregateFunctionUniqUpTo.h
+1
-1
dbms/src/AggregateFunctions/UniqVariadicHash.h
dbms/src/AggregateFunctions/UniqVariadicHash.h
+4
-4
dbms/src/Common/UInt128.h
dbms/src/Common/UInt128.h
+2
-2
dbms/src/Common/tests/hashes_test.cpp
dbms/src/Common/tests/hashes_test.cpp
+5
-5
dbms/src/Core/StringRef.h
dbms/src/Core/StringRef.h
+2
-2
dbms/src/Functions/FunctionsHashing.h
dbms/src/Functions/FunctionsHashing.h
+7
-7
dbms/src/IO/CompressedReadBufferBase.cpp
dbms/src/IO/CompressedReadBufferBase.cpp
+2
-2
dbms/src/IO/CompressedWriteBuffer.cpp
dbms/src/IO/CompressedWriteBuffer.cpp
+1
-1
dbms/src/IO/HashingWriteBuffer.cpp
dbms/src/IO/HashingWriteBuffer.cpp
+0
-33
dbms/src/IO/HashingWriteBuffer.h
dbms/src/IO/HashingWriteBuffer.h
+4
-8
dbms/src/IO/ReadHelpers.h
dbms/src/IO/ReadHelpers.h
+8
-9
dbms/src/IO/WriteHelpers.h
dbms/src/IO/WriteHelpers.h
+4
-5
dbms/src/IO/tests/hashing_buffer.h
dbms/src/IO/tests/hashing_buffer.h
+4
-4
dbms/src/IO/tests/hashing_read_buffer.cpp
dbms/src/IO/tests/hashing_read_buffer.cpp
+1
-2
dbms/src/IO/tests/hashing_write_buffer.cpp
dbms/src/IO/tests/hashing_write_buffer.cpp
+1
-1
dbms/src/Interpreters/tests/hash_map_string.cpp
dbms/src/Interpreters/tests/hash_map_string.cpp
+1
-1
dbms/src/Interpreters/tests/hash_map_string_2.cpp
dbms/src/Interpreters/tests/hash_map_string_2.cpp
+1
-1
dbms/src/Interpreters/tests/hash_map_string_3.cpp
dbms/src/Interpreters/tests/hash_map_string_3.cpp
+1
-1
dbms/src/Storages/MergeTree/DataPartsExchange.cpp
dbms/src/Storages/MergeTree/DataPartsExchange.cpp
+3
-3
dbms/src/Storages/MergeTree/MergeTreeData.cpp
dbms/src/Storages/MergeTree/MergeTreeData.cpp
+1
-1
dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp
dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp
+5
-5
dbms/src/Storages/MergeTree/MergeTreeDataPart.h
dbms/src/Storages/MergeTree/MergeTreeDataPart.h
+3
-1
dbms/src/Storages/MergeTree/ShardedPartitionUploader.cpp
dbms/src/Storages/MergeTree/ShardedPartitionUploader.cpp
+3
-3
未找到文件。
contrib/libcityhash/include/city.h
浏览文件 @
604de2bf
...
...
@@ -47,14 +47,22 @@
#include <stdint.h>
#include <utility>
/** This is a version of CityHash that predates v1.0.3 algorithm change.
* Why we need exactly this version?
* Although hash values of CityHash are not recommended for storing persistently anywhere,
* it has already been used this way in ClickHouse:
* - for calculation of checksums of compressed chunks and for data parts;
* - this version of CityHash is exposed in cityHash64 function in ClickHouse SQL language;
* - and already used by many users for data ordering, sampling and sharding.
*/
namespace
CityHash64_v1_0_2
{
typedef
uint8_t
uint8
;
typedef
uint32_t
uint32
;
typedef
uint64_t
uint64
;
typedef
std
::
pair
<
uint64
,
uint64
>
uint128
;
/// This is a version of CityHash that predates v1.0.3 algorithm change.
namespace
DB
{
inline
uint64
Uint128Low64
(
const
uint128
&
x
)
{
return
x
.
first
;
}
inline
uint64
Uint128High64
(
const
uint128
&
x
)
{
return
x
.
second
;
}
...
...
contrib/libcityhash/include/citycrc.h
浏览文件 @
604de2bf
...
...
@@ -30,7 +30,7 @@
#include <city.h>
namespace
DB
namespace
CityHash64_v1_0_2
{
// Hash function for a byte array.
...
...
contrib/libcityhash/src/city.cc
浏览文件 @
604de2bf
...
...
@@ -35,17 +35,6 @@
using
namespace
std
;
static
uint64
UNALIGNED_LOAD64
(
const
char
*
p
)
{
uint64
result
;
memcpy
(
&
result
,
p
,
sizeof
(
result
));
return
result
;
}
static
uint32
UNALIGNED_LOAD32
(
const
char
*
p
)
{
uint32
result
;
memcpy
(
&
result
,
p
,
sizeof
(
result
));
return
result
;
}
#if !defined(WORDS_BIGENDIAN)
...
...
@@ -82,9 +71,21 @@ static uint32 UNALIGNED_LOAD32(const char *p) {
#endif
#endif
namespace
DB
namespace
CityHash64_v1_0_2
{
static
uint64
UNALIGNED_LOAD64
(
const
char
*
p
)
{
uint64
result
;
memcpy
(
&
result
,
p
,
sizeof
(
result
));
return
result
;
}
static
uint32
UNALIGNED_LOAD32
(
const
char
*
p
)
{
uint32
result
;
memcpy
(
&
result
,
p
,
sizeof
(
result
));
return
result
;
}
static
uint64
Fetch64
(
const
char
*
p
)
{
return
uint64_in_expected_order
(
UNALIGNED_LOAD64
(
p
));
}
...
...
@@ -362,7 +363,7 @@ uint128 CityHash128(const char *s, size_t len) {
#include <citycrc.h>
#include <nmmintrin.h>
namespace
DB
namespace
CityHash64_v1_0_2
{
// Requires len >= 240.
...
...
dbms/src/AggregateFunctions/AggregateFunctionUniq.h
浏览文件 @
604de2bf
...
...
@@ -266,7 +266,7 @@ struct OneAdder<T, Data, typename std::enable_if<
typename
std
::
enable_if
<
std
::
is_same
<
T2
,
String
>::
value
>::
type
*
=
nullptr
)
{
StringRef
value
=
column
.
getDataAt
(
row_num
);
data
.
set
.
insert
(
CityHash64
(
value
.
data
,
value
.
size
));
data
.
set
.
insert
(
CityHash64
_v1_0_2
::
CityHash64
(
value
.
data
,
value
.
size
));
}
};
...
...
@@ -290,7 +290,7 @@ struct OneAdder<T, Data, typename std::enable_if<
typename
std
::
enable_if
<
std
::
is_same
<
T2
,
String
>::
value
>::
type
*
=
nullptr
)
{
StringRef
value
=
column
.
getDataAt
(
row_num
);
data
.
set
.
insert
(
CityHash64
(
value
.
data
,
value
.
size
));
data
.
set
.
insert
(
CityHash64
_v1_0_2
::
CityHash64
(
value
.
data
,
value
.
size
));
}
};
...
...
dbms/src/AggregateFunctions/AggregateFunctionUniqUpTo.h
浏览文件 @
604de2bf
...
...
@@ -107,7 +107,7 @@ struct AggregateFunctionUniqUpToData<String> : AggregateFunctionUniqUpToData<UIn
{
/// Keep in mind that calculations are approximate.
StringRef
value
=
column
.
getDataAt
(
row_num
);
insert
(
CityHash64
(
value
.
data
,
value
.
size
),
threshold
);
insert
(
CityHash64
_v1_0_2
::
CityHash64
(
value
.
data
,
value
.
size
),
threshold
);
}
};
...
...
dbms/src/AggregateFunctions/UniqVariadicHash.h
浏览文件 @
604de2bf
...
...
@@ -39,14 +39,14 @@ struct UniqVariadicHash<false, false>
{
StringRef
value
=
(
*
column
)
->
getDataAt
(
row_num
);
hash
=
CityHash64
(
value
.
data
,
value
.
size
);
hash
=
CityHash64
_v1_0_2
::
CityHash64
(
value
.
data
,
value
.
size
);
++
column
;
}
while
(
column
<
columns_end
)
{
StringRef
value
=
(
*
column
)
->
getDataAt
(
row_num
);
hash
=
Hash128to64
(
uint128
(
CityHash64
(
value
.
data
,
value
.
size
),
hash
));
hash
=
CityHash64_v1_0_2
::
Hash128to64
(
CityHash64_v1_0_2
::
uint128
(
CityHash64_v1_0_2
::
CityHash64
(
value
.
data
,
value
.
size
),
hash
));
++
column
;
}
...
...
@@ -68,14 +68,14 @@ struct UniqVariadicHash<false, true>
{
StringRef
value
=
column
->
get
()
->
getDataAt
(
row_num
);
hash
=
CityHash64
(
value
.
data
,
value
.
size
);
hash
=
CityHash64
_v1_0_2
::
CityHash64
(
value
.
data
,
value
.
size
);
++
column
;
}
while
(
column
<
columns_end
)
{
StringRef
value
=
column
->
get
()
->
getDataAt
(
row_num
);
hash
=
Hash128to64
(
uint128
(
CityHash64
(
value
.
data
,
value
.
size
),
hash
));
hash
=
CityHash64_v1_0_2
::
Hash128to64
(
CityHash64_v1_0_2
::
uint128
(
CityHash64_v1_0_2
::
CityHash64
(
value
.
data
,
value
.
size
),
hash
));
++
column
;
}
...
...
dbms/src/Common/UInt128.h
浏览文件 @
604de2bf
...
...
@@ -42,7 +42,7 @@ struct UInt128Hash
{
size_t
operator
()(
UInt128
x
)
const
{
return
Hash128to64
({
x
.
first
,
x
.
second
});
return
CityHash64_v1_0_2
::
Hash128to64
({
x
.
first
,
x
.
second
});
}
};
...
...
@@ -122,7 +122,7 @@ struct UInt256Hash
size_t
operator
()(
UInt256
x
)
const
{
/// NOTE suboptimal
return
Hash128to64
({
Hash128to64
({
x
.
a
,
x
.
b
}),
Hash128to64
({
x
.
c
,
x
.
d
})});
return
CityHash64_v1_0_2
::
Hash128to64
({
CityHash64_v1_0_2
::
Hash128to64
({
x
.
a
,
x
.
b
}),
CityHash64_v1_0_2
::
Hash128to64
({
x
.
c
,
x
.
d
})});
}
};
...
...
dbms/src/Common/tests/hashes_test.cpp
浏览文件 @
604de2bf
...
...
@@ -48,12 +48,12 @@ int main(int argc, char ** argv)
for
(
size_t
i
=
0
;
i
<
rows
;
++
i
)
{
*
reinterpret_cast
<
UInt64
*>
(
&
hashes
[
i
*
16
])
=
DB
::
CityHash64
(
strings
[
i
].
data
(),
strings
[
i
].
size
());
*
reinterpret_cast
<
UInt64
*>
(
&
hashes
[
i
*
16
])
=
CityHash64_v1_0_2
::
CityHash64
(
strings
[
i
].
data
(),
strings
[
i
].
size
());
}
watch
.
stop
();
UInt64
check
=
DB
::
CityHash64
(
&
hashes
[
0
],
hashes
.
size
());
UInt64
check
=
CityHash64_v1_0_2
::
CityHash64
(
&
hashes
[
0
],
hashes
.
size
());
std
::
cerr
<<
std
::
fixed
<<
std
::
setprecision
(
2
)
<<
"CityHash64 (check = "
<<
check
<<
")"
...
...
@@ -78,7 +78,7 @@ int main(int argc, char ** argv)
watch.stop();
UInt64 check =
DB
::CityHash64(&hashes[0], hashes.size());
UInt64 check =
CityHash64_v1_0_2
::CityHash64(&hashes[0], hashes.size());
std::cerr << std::fixed << std::setprecision(2)
<< "SipHash (check = " << check << ")"
...
...
@@ -99,7 +99,7 @@ int main(int argc, char ** argv)
watch
.
stop
();
UInt64
check
=
DB
::
CityHash64
(
&
hashes
[
0
],
hashes
.
size
());
UInt64
check
=
CityHash64_v1_0_2
::
CityHash64
(
&
hashes
[
0
],
hashes
.
size
());
std
::
cerr
<<
std
::
fixed
<<
std
::
setprecision
(
2
)
<<
"SipHash, stream (check = "
<<
check
<<
")"
...
...
@@ -121,7 +121,7 @@ int main(int argc, char ** argv)
watch
.
stop
();
UInt64
check
=
DB
::
CityHash64
(
&
hashes
[
0
],
hashes
.
size
());
UInt64
check
=
CityHash64_v1_0_2
::
CityHash64
(
&
hashes
[
0
],
hashes
.
size
());
std
::
cerr
<<
std
::
fixed
<<
std
::
setprecision
(
2
)
<<
"MD5 (check = "
<<
check
<<
")"
...
...
dbms/src/Core/StringRef.h
浏览文件 @
604de2bf
...
...
@@ -165,7 +165,7 @@ struct StringRefHash64
{
size_t
operator
()
(
StringRef
x
)
const
{
return
DB
::
CityHash64
(
x
.
data
,
x
.
size
);
return
CityHash64_v1_0_2
::
CityHash64
(
x
.
data
,
x
.
size
);
}
};
...
...
@@ -177,7 +177,7 @@ struct StringRefHash64
inline
UInt64
hashLen16
(
UInt64
u
,
UInt64
v
)
{
return
DB
::
Hash128to64
(
uint128
(
u
,
v
));
return
CityHash64_v1_0_2
::
Hash128to64
(
CityHash64_v1_0_2
::
uint128
(
u
,
v
));
}
inline
UInt64
shiftMix
(
UInt64
val
)
...
...
dbms/src/Functions/FunctionsHashing.h
浏览文件 @
604de2bf
...
...
@@ -638,9 +638,9 @@ struct URLHashImpl
{
/// do not take last slash, '?' or '#' character into account
if
(
size
>
0
&&
(
data
[
size
-
1
]
==
'/'
||
data
[
size
-
1
]
==
'?'
||
data
[
size
-
1
]
==
'#'
))
return
CityHash64
(
data
,
size
-
1
);
return
CityHash64
_v1_0_2
::
CityHash64
(
data
,
size
-
1
);
return
CityHash64
(
data
,
size
);
return
CityHash64
_v1_0_2
::
CityHash64
(
data
,
size
);
}
};
...
...
@@ -844,10 +844,10 @@ struct NameIntHash64 { static constexpr auto name = "intHash64"; };
struct
ImplCityHash64
{
static
constexpr
auto
name
=
"cityHash64"
;
using
uint128_t
=
uint128
;
using
uint128_t
=
CityHash64_v1_0_2
::
uint128
;
static
auto
Hash128to64
(
const
uint128_t
&
x
)
{
return
DB
::
Hash128to64
(
x
);
}
static
auto
Hash64
(
const
char
*
const
s
,
const
std
::
size_t
len
)
{
return
CityHash64
(
s
,
len
);
}
static
auto
Hash128to64
(
const
uint128_t
&
x
)
{
return
CityHash64_v1_0_2
::
Hash128to64
(
x
);
}
static
auto
Hash64
(
const
char
*
const
s
,
const
std
::
size_t
len
)
{
return
CityHash64
_v1_0_2
::
CityHash64
(
s
,
len
);
}
};
struct
ImplFarmHash64
...
...
@@ -862,9 +862,9 @@ struct ImplFarmHash64
struct
ImplMetroHash64
{
static
constexpr
auto
name
=
"metroHash64"
;
using
uint128_t
=
uint128
;
using
uint128_t
=
CityHash64_v1_0_2
::
uint128
;
static
auto
Hash128to64
(
const
uint128_t
&
x
)
{
return
DB
::
Hash128to64
(
x
);
}
static
auto
Hash128to64
(
const
uint128_t
&
x
)
{
return
CityHash64_v1_0_2
::
Hash128to64
(
x
);
}
static
auto
Hash64
(
const
char
*
const
s
,
const
std
::
size_t
len
)
{
union
{
...
...
dbms/src/IO/CompressedReadBufferBase.cpp
浏览文件 @
604de2bf
...
...
@@ -42,7 +42,7 @@ size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed,
if
(
compressed_in
->
eof
())
return
0
;
uint128
checksum
;
CityHash64_v1_0_2
::
uint128
checksum
;
compressed_in
->
readStrict
(
reinterpret_cast
<
char
*>
(
&
checksum
),
sizeof
(
checksum
));
own_compressed_buffer
.
resize
(
COMPRESSED_BLOCK_HEADER_SIZE
);
...
...
@@ -80,7 +80,7 @@ size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed,
compressed_in
->
readStrict
(
&
compressed_buffer
[
COMPRESSED_BLOCK_HEADER_SIZE
],
size_compressed
-
COMPRESSED_BLOCK_HEADER_SIZE
);
}
if
(
!
disable_checksum
&&
checksum
!=
CityHash128
(
&
compressed_buffer
[
0
],
size_compressed
))
if
(
!
disable_checksum
&&
checksum
!=
CityHash
64_v1_0_2
::
CityHash
128
(
&
compressed_buffer
[
0
],
size_compressed
))
throw
Exception
(
"Checksum doesn't match: corrupted data."
,
ErrorCodes
::
CHECKSUM_DOESNT_MATCH
);
return
size_compressed
+
sizeof
(
checksum
);
...
...
dbms/src/IO/CompressedWriteBuffer.cpp
浏览文件 @
604de2bf
...
...
@@ -102,7 +102,7 @@ void CompressedWriteBuffer::nextImpl()
throw
Exception
(
"Unknown compression method"
,
ErrorCodes
::
UNKNOWN_COMPRESSION_METHOD
);
}
uint128
checksum
=
CityHash128
(
compressed_buffer_ptr
,
compressed_size
);
CityHash64_v1_0_2
::
uint128
checksum
=
CityHash64_v1_0_2
::
CityHash128
(
compressed_buffer_ptr
,
compressed_size
);
out
.
write
(
reinterpret_cast
<
const
char
*>
(
&
checksum
),
sizeof
(
checksum
));
out
.
write
(
compressed_buffer_ptr
,
compressed_size
);
...
...
dbms/src/IO/HashingWriteBuffer.cpp
浏览文件 @
604de2bf
...
...
@@ -52,36 +52,3 @@ template class IHashingBuffer<DB::ReadBuffer>;
template
class
IHashingBuffer
<
DB
::
WriteBuffer
>;
}
/// UInt64 is 39 characters in 10th number system
static
const
size_t
UINT64_DECIMAL_SIZE
=
39
;
std
::
string
uint128ToString
(
uint128
data
)
{
std
::
stringstream
ss
;
ss
<<
std
::
setw
(
UINT64_DECIMAL_SIZE
)
<<
std
::
setfill
(
'0'
)
<<
data
.
first
<<
std
::
setw
(
UINT64_DECIMAL_SIZE
)
<<
std
::
setfill
(
'0'
)
<<
data
.
second
;
return
ss
.
str
();
}
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
uint128
&
data
)
{
os
<<
uint128ToString
(
data
);
return
os
;
}
std
::
istream
&
operator
>>
(
std
::
istream
&
is
,
uint128
&
data
)
{
std
::
vector
<
char
>
buffer
(
UINT64_DECIMAL_SIZE
);
is
.
read
(
buffer
.
data
(),
UINT64_DECIMAL_SIZE
);
data
.
first
=
DB
::
parse
<
UInt64
>
(
buffer
.
data
(),
UINT64_DECIMAL_SIZE
);
if
(
!
is
)
throw
DB
::
Exception
(
std
::
string
(
"Fail to parse uint128 from "
)
+
buffer
.
data
());
is
.
read
(
buffer
.
data
(),
UINT64_DECIMAL_SIZE
);
data
.
first
=
DB
::
parse
<
UInt64
>
(
buffer
.
data
(),
UINT64_DECIMAL_SIZE
);
if
(
!
is
)
throw
DB
::
Exception
(
std
::
string
(
"Fail to parse uint128 from "
)
+
buffer
.
data
());
return
is
;
}
dbms/src/IO/HashingWriteBuffer.h
浏览文件 @
604de2bf
...
...
@@ -15,6 +15,8 @@ template <class Buffer>
class
IHashingBuffer
:
public
BufferWithOwnMemory
<
Buffer
>
{
public:
using
uint128
=
CityHash64_v1_0_2
::
uint128
;
IHashingBuffer
<
Buffer
>
(
size_t
block_size_
=
DBMS_DEFAULT_HASHING_BLOCK_SIZE
)
:
BufferWithOwnMemory
<
Buffer
>
(
block_size_
),
block_pos
(
0
),
block_size
(
block_size_
),
state
(
0
,
0
)
{
...
...
@@ -23,14 +25,14 @@ public:
uint128
getHash
()
{
if
(
block_pos
)
return
CityHash128WithSeed
(
&
BufferWithOwnMemory
<
Buffer
>::
memory
[
0
],
block_pos
,
state
);
return
CityHash
64_v1_0_2
::
CityHash
128WithSeed
(
&
BufferWithOwnMemory
<
Buffer
>::
memory
[
0
],
block_pos
,
state
);
else
return
state
;
}
void
append
(
DB
::
BufferBase
::
Position
data
)
{
state
=
CityHash128WithSeed
(
data
,
block_size
,
state
);
state
=
CityHash
64_v1_0_2
::
CityHash
128WithSeed
(
data
,
block_size
,
state
);
}
/// computation of the hash depends on the partitioning of blocks
...
...
@@ -82,9 +84,3 @@ public:
}
};
}
std
::
string
uint128ToString
(
uint128
data
);
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
uint128
&
data
);
std
::
istream
&
operator
>>
(
std
::
istream
&
is
,
uint128
&
data
);
dbms/src/IO/ReadHelpers.h
浏览文件 @
604de2bf
...
...
@@ -657,9 +657,8 @@ template <typename T>
inline
typename
std
::
enable_if
<
std
::
is_arithmetic
<
T
>::
value
,
void
>::
type
readBinary
(
T
&
x
,
ReadBuffer
&
buf
)
{
readPODBinary
(
x
,
buf
);
}
inline
void
readBinary
(
String
&
x
,
ReadBuffer
&
buf
)
{
readStringBinary
(
x
,
buf
);
}
inline
void
readBinary
(
uint128
&
x
,
ReadBuffer
&
buf
)
{
readPODBinary
(
x
,
buf
);
}
inline
void
readBinary
(
LocalDate
&
x
,
ReadBuffer
&
buf
)
{
readPODBinary
(
x
,
buf
);
}
inline
void
readBinary
(
String
&
x
,
ReadBuffer
&
buf
)
{
readStringBinary
(
x
,
buf
);
}
inline
void
readBinary
(
LocalDate
&
x
,
ReadBuffer
&
buf
)
{
readPODBinary
(
x
,
buf
);
}
inline
void
readBinary
(
LocalDateTime
&
x
,
ReadBuffer
&
buf
)
{
readPODBinary
(
x
,
buf
);
}
...
...
@@ -672,9 +671,9 @@ template <typename T>
inline
typename
std
::
enable_if
<
std
::
is_floating_point
<
T
>::
value
,
void
>::
type
readText
(
T
&
x
,
ReadBuffer
&
buf
)
{
readFloatText
(
x
,
buf
);
}
inline
void
readText
(
bool
&
x
,
ReadBuffer
&
buf
)
{
readBoolText
(
x
,
buf
);
}
inline
void
readText
(
String
&
x
,
ReadBuffer
&
buf
)
{
readEscapedString
(
x
,
buf
);
}
inline
void
readText
(
LocalDate
&
x
,
ReadBuffer
&
buf
)
{
readDateText
(
x
,
buf
);
}
inline
void
readText
(
bool
&
x
,
ReadBuffer
&
buf
)
{
readBoolText
(
x
,
buf
);
}
inline
void
readText
(
String
&
x
,
ReadBuffer
&
buf
)
{
readEscapedString
(
x
,
buf
);
}
inline
void
readText
(
LocalDate
&
x
,
ReadBuffer
&
buf
)
{
readDateText
(
x
,
buf
);
}
inline
void
readText
(
LocalDateTime
&
x
,
ReadBuffer
&
buf
)
{
readDateTimeText
(
x
,
buf
);
}
...
...
@@ -684,7 +683,7 @@ template <typename T>
inline
typename
std
::
enable_if
<
std
::
is_arithmetic
<
T
>::
value
,
void
>::
type
readQuoted
(
T
&
x
,
ReadBuffer
&
buf
)
{
readText
(
x
,
buf
);
}
inline
void
readQuoted
(
String
&
x
,
ReadBuffer
&
buf
)
{
readQuotedString
(
x
,
buf
);
}
inline
void
readQuoted
(
String
&
x
,
ReadBuffer
&
buf
)
{
readQuotedString
(
x
,
buf
);
}
inline
void
readQuoted
(
LocalDate
&
x
,
ReadBuffer
&
buf
)
{
...
...
@@ -706,7 +705,7 @@ template <typename T>
inline
typename
std
::
enable_if
<
std
::
is_arithmetic
<
T
>::
value
,
void
>::
type
readDoubleQuoted
(
T
&
x
,
ReadBuffer
&
buf
)
{
readText
(
x
,
buf
);
}
inline
void
readDoubleQuoted
(
String
&
x
,
ReadBuffer
&
buf
)
{
readDoubleQuotedString
(
x
,
buf
);
}
inline
void
readDoubleQuoted
(
String
&
x
,
ReadBuffer
&
buf
)
{
readDoubleQuotedString
(
x
,
buf
);
}
inline
void
readDoubleQuoted
(
LocalDate
&
x
,
ReadBuffer
&
buf
)
{
...
...
@@ -746,7 +745,7 @@ inline typename std::enable_if<std::is_arithmetic<T>::value, void>::type
readCSV
(
T
&
x
,
ReadBuffer
&
buf
)
{
readCSVSimple
(
x
,
buf
);
}
inline
void
readCSV
(
String
&
x
,
ReadBuffer
&
buf
,
const
char
delimiter
=
','
)
{
readCSVString
(
x
,
buf
,
delimiter
);
}
inline
void
readCSV
(
LocalDate
&
x
,
ReadBuffer
&
buf
)
{
readCSVSimple
(
x
,
buf
);
}
inline
void
readCSV
(
LocalDate
&
x
,
ReadBuffer
&
buf
)
{
readCSVSimple
(
x
,
buf
);
}
inline
void
readCSV
(
LocalDateTime
&
x
,
ReadBuffer
&
buf
)
{
readCSVSimple
(
x
,
buf
);
}
...
...
dbms/src/IO/WriteHelpers.h
浏览文件 @
604de2bf
...
...
@@ -581,11 +581,10 @@ template <typename T>
inline
typename
std
::
enable_if
<
std
::
is_arithmetic
<
T
>::
value
,
void
>::
type
writeBinary
(
const
T
&
x
,
WriteBuffer
&
buf
)
{
writePODBinary
(
x
,
buf
);
}
inline
void
writeBinary
(
const
String
&
x
,
WriteBuffer
&
buf
)
{
writeStringBinary
(
x
,
buf
);
}
inline
void
writeBinary
(
const
StringRef
&
x
,
WriteBuffer
&
buf
)
{
writeStringBinary
(
x
,
buf
);
}
inline
void
writeBinary
(
const
uint128
&
x
,
WriteBuffer
&
buf
)
{
writePODBinary
(
x
,
buf
);
}
inline
void
writeBinary
(
const
LocalDate
&
x
,
WriteBuffer
&
buf
)
{
writePODBinary
(
x
,
buf
);
}
inline
void
writeBinary
(
const
LocalDateTime
&
x
,
WriteBuffer
&
buf
)
{
writePODBinary
(
x
,
buf
);
}
inline
void
writeBinary
(
const
String
&
x
,
WriteBuffer
&
buf
)
{
writeStringBinary
(
x
,
buf
);
}
inline
void
writeBinary
(
const
StringRef
&
x
,
WriteBuffer
&
buf
)
{
writeStringBinary
(
x
,
buf
);
}
inline
void
writeBinary
(
const
LocalDate
&
x
,
WriteBuffer
&
buf
)
{
writePODBinary
(
x
,
buf
);
}
inline
void
writeBinary
(
const
LocalDateTime
&
x
,
WriteBuffer
&
buf
)
{
writePODBinary
(
x
,
buf
);
}
/// Methods for outputting the value in text form for a tab-separated format.
...
...
dbms/src/IO/tests/hashing_buffer.h
浏览文件 @
604de2bf
...
...
@@ -4,19 +4,19 @@
#define FAIL(msg) { std::cout << msg; exit(1); }
uint128
referenceHash
(
const
char
*
data
,
size_t
len
)
CityHash64_v1_0_2
::
uint128
referenceHash
(
const
char
*
data
,
size_t
len
)
{
const
size_t
block_size
=
DBMS_DEFAULT_HASHING_BLOCK_SIZE
;
uint128
state
(
0
,
0
);
CityHash64_v1_0_2
::
uint128
state
(
0
,
0
);
size_t
pos
;
for
(
pos
=
0
;
pos
+
block_size
<=
len
;
pos
+=
block_size
)
{
state
=
DB
::
CityHash128WithSeed
(
data
+
pos
,
block_size
,
state
);
state
=
CityHash64_v1_0_2
::
CityHash128WithSeed
(
data
+
pos
,
block_size
,
state
);
}
if
(
pos
<
len
)
state
=
DB
::
CityHash128WithSeed
(
data
+
pos
,
len
-
pos
,
state
);
state
=
CityHash64_v1_0_2
::
CityHash128WithSeed
(
data
+
pos
,
len
-
pos
,
state
);
return
state
;
}
dbms/src/IO/tests/hashing_read_buffer.cpp
浏览文件 @
604de2bf
...
...
@@ -12,7 +12,7 @@ void test(size_t data_size)
for
(
size_t
i
=
0
;
i
<
data_size
;
++
i
)
data
[
i
]
=
rand
()
&
255
;
uint128
reference
=
referenceHash
(
data
,
data_size
);
CityHash64_v1_0_2
::
uint128
reference
=
referenceHash
(
data
,
data_size
);
std
::
vector
<
size_t
>
block_sizes
=
{
56
,
128
,
513
,
2048
,
3055
,
4097
,
4096
};
for
(
size_t
read_buffer_block_size
:
block_sizes
)
...
...
@@ -52,7 +52,6 @@ void test(size_t data_size)
if
(
buf
.
getHash
()
!=
reference
)
{
//std::cout << uint128ToString(buf.getHash()) << " " << uint128ToString(reference) << std::endl;
FAIL
(
"failed on data size "
<<
data_size
<<
" reading by blocks of size "
<<
read_buffer_block_size
);
}
if
(
buf
.
getHash
()
!=
out
.
getHash
())
...
...
dbms/src/IO/tests/hashing_write_buffer.cpp
浏览文件 @
604de2bf
...
...
@@ -11,7 +11,7 @@ void test(size_t data_size)
for
(
size_t
i
=
0
;
i
<
data_size
;
++
i
)
data
[
i
]
=
rand
()
&
255
;
uint128
reference
=
referenceHash
(
data
,
data_size
);
CityHash64_v1_0_2
::
uint128
reference
=
referenceHash
(
data
,
data_size
);
DB
::
WriteBufferFromFile
sink
(
"/dev/null"
,
1
<<
16
);
...
...
dbms/src/Interpreters/tests/hash_map_string.cpp
浏览文件 @
604de2bf
...
...
@@ -76,7 +76,7 @@ struct DefaultHash<CompactStringRef>
{
size_t
operator
()
(
CompactStringRef
x
)
const
{
return
DB
::
CityHash64
(
x
.
data
(),
x
.
size
);
return
CityHash64_v1_0_2
::
CityHash64
(
x
.
data
(),
x
.
size
);
}
};
...
...
dbms/src/Interpreters/tests/hash_map_string_2.cpp
浏览文件 @
604de2bf
...
...
@@ -65,7 +65,7 @@ struct DefaultHash<STRUCT> \
{ \
size_t operator() (STRUCT x) const \
{ \
return
DB
::CityHash64(x.data, x.size); \
return
CityHash64_v1_0_2
::CityHash64(x.data, x.size); \
} \
};
...
...
dbms/src/Interpreters/tests/hash_map_string_3.cpp
浏览文件 @
604de2bf
...
...
@@ -68,7 +68,7 @@ struct DefaultHash<STRUCT> \
{ \
size_t operator() (STRUCT x) const \
{ \
return
DB
::CityHash64(x.data, x.size); \
return
CityHash64_v1_0_2
::CityHash64(x.data, x.size); \
} \
};
...
...
dbms/src/Storages/MergeTree/DataPartsExchange.cpp
浏览文件 @
604de2bf
...
...
@@ -126,7 +126,7 @@ void Service::processQuery(const Poco::Net::HTMLForm & params, ReadBuffer & body
if
(
hashing_out
.
count
()
!=
size
)
throw
Exception
(
"Unexpected size of file "
+
path
,
ErrorCodes
::
BAD_SIZE_OF_FILE_IN_DATA_PART
);
writeBinary
(
hashing_out
.
getHash
(),
out
);
write
POD
Binary
(
hashing_out
.
getHash
(),
out
);
if
(
file_name
!=
"checksums.txt"
&&
file_name
!=
"columns.txt"
)
...
...
@@ -250,8 +250,8 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPartImpl(
throw
Exception
(
"Fetching of part was cancelled"
,
ErrorCodes
::
ABORTED
);
}
uint128
expected_hash
;
readBinary
(
expected_hash
,
in
);
MergeTreeDataPartChecksum
::
uint128
expected_hash
;
read
POD
Binary
(
expected_hash
,
in
);
if
(
expected_hash
!=
hashing_out
.
getHash
())
throw
Exception
(
"Checksum mismatch for file "
+
absolute_part_path
+
file_name
+
" transferred from "
+
replica_path
);
...
...
dbms/src/Storages/MergeTree/MergeTreeData.cpp
浏览文件 @
604de2bf
...
...
@@ -959,7 +959,7 @@ MergeTreeData::AlterDataPartTransactionPtr MergeTreeData::alterDataPart(
/// Update primary key if needed.
size_t
new_primary_key_file_size
{};
uint128
new_primary_key_hash
{};
MergeTreeDataPartChecksum
::
uint128
new_primary_key_hash
{};
if
(
new_primary_key
.
get
()
!=
primary_expr_ast
.
get
())
{
...
...
dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp
浏览文件 @
604de2bf
...
...
@@ -175,13 +175,13 @@ bool MergeTreeDataPartChecksums::read_v3(ReadBuffer & in)
readBinary
(
name
,
in
);
readVarUInt
(
sum
.
file_size
,
in
);
readBinary
(
sum
.
file_hash
,
in
);
read
POD
Binary
(
sum
.
file_hash
,
in
);
readBinary
(
sum
.
is_compressed
,
in
);
if
(
sum
.
is_compressed
)
{
readVarUInt
(
sum
.
uncompressed_size
,
in
);
readBinary
(
sum
.
uncompressed_hash
,
in
);
read
POD
Binary
(
sum
.
uncompressed_hash
,
in
);
}
files
.
emplace
(
std
::
move
(
name
),
sum
);
...
...
@@ -210,18 +210,18 @@ void MergeTreeDataPartChecksums::write(WriteBuffer & to) const
writeBinary
(
name
,
out
);
writeVarUInt
(
sum
.
file_size
,
out
);
writeBinary
(
sum
.
file_hash
,
out
);
write
POD
Binary
(
sum
.
file_hash
,
out
);
writeBinary
(
sum
.
is_compressed
,
out
);
if
(
sum
.
is_compressed
)
{
writeVarUInt
(
sum
.
uncompressed_size
,
out
);
writeBinary
(
sum
.
uncompressed_hash
,
out
);
write
POD
Binary
(
sum
.
uncompressed_hash
,
out
);
}
}
}
void
MergeTreeDataPartChecksums
::
addFile
(
const
String
&
file_name
,
size_t
file_size
,
uint128
file_hash
)
void
MergeTreeDataPartChecksums
::
addFile
(
const
String
&
file_name
,
size_t
file_size
,
MergeTreeDataPartChecksum
::
uint128
file_hash
)
{
files
[
file_name
]
=
Checksum
(
file_size
,
file_hash
);
}
...
...
dbms/src/Storages/MergeTree/MergeTreeDataPart.h
浏览文件 @
604de2bf
...
...
@@ -16,6 +16,8 @@ namespace DB
/// Checksum of one file.
struct
MergeTreeDataPartChecksum
{
using
uint128
=
CityHash64_v1_0_2
::
uint128
;
size_t
file_size
{};
uint128
file_hash
{};
...
...
@@ -44,7 +46,7 @@ struct MergeTreeDataPartChecksums
using
FileChecksums
=
std
::
map
<
String
,
Checksum
>
;
FileChecksums
files
;
void
addFile
(
const
String
&
file_name
,
size_t
file_size
,
uint128
file_hash
);
void
addFile
(
const
String
&
file_name
,
size_t
file_size
,
Checksum
::
uint128
file_hash
);
void
add
(
MergeTreeDataPartChecksums
&&
rhs_checksums
);
...
...
dbms/src/Storages/MergeTree/ShardedPartitionUploader.cpp
浏览文件 @
604de2bf
...
...
@@ -91,8 +91,8 @@ void Service::processQuery(const Poco::Net::HTMLForm & params, ReadBuffer & body
throw
Exception
{
"Fetching of part was cancelled"
,
ErrorCodes
::
ABORTED
};
}
uint128
expected_hash
;
readBinary
(
expected_hash
,
body
);
MergeTreeDataPartChecksum
::
uint128
expected_hash
;
read
POD
Binary
(
expected_hash
,
body
);
if
(
expected_hash
!=
hashing_out
.
getHash
())
throw
Exception
{
"Checksum mismatch for file "
+
absolute_part_path
+
file_name
+
" transferred from "
+
replica_path
};
...
...
@@ -182,7 +182,7 @@ bool Client::send(const std::string & part_name, size_t shard_no,
if
(
hashing_out
.
count
()
!=
size
)
throw
Exception
{
"Unexpected size of file "
+
path
,
ErrorCodes
::
BAD_SIZE_OF_FILE_IN_DATA_PART
};
writeBinary
(
hashing_out
.
getHash
(),
out
);
write
POD
Binary
(
hashing_out
.
getHash
(),
out
);
if
(
file_name
!=
"checksums.txt"
&&
file_name
!=
"columns.txt"
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录