Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
2dot5
ClickHouse
提交
45882dc0
C
ClickHouse
项目概览
2dot5
/
ClickHouse
通知
3
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
C
ClickHouse
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
45882dc0
编写于
1月 16, 2019
作者:
D
Danila Kutenin
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'danlark1-master'
上级
4f29102e
12bcff48
变更
3
展开全部
隐藏空白更改
内联
并排
Showing
3 changed file
with
201 addition
and
195 deletion
+201
-195
dbms/src/Common/Volnitsky.h
dbms/src/Common/Volnitsky.h
+175
-149
dbms/src/Functions/FunctionsStringSearch.cpp
dbms/src/Functions/FunctionsStringSearch.cpp
+21
-36
dbms/src/Functions/FunctionsStringSearch.h
dbms/src/Functions/FunctionsStringSearch.h
+5
-10
未找到文件。
dbms/src/Common/Volnitsky.h
浏览文件 @
45882dc0
此差异已折叠。
点击以展开。
dbms/src/Functions/FunctionsStringSearch.cpp
浏览文件 @
45882dc0
...
...
@@ -53,7 +53,7 @@ struct PositionCaseSensitiveASCII
return
SearcherInSmallHaystack
(
needle_data
,
needle_size
);
}
static
MultiSearcherInBigHaystack
createMultiSearcherInBigHaystack
(
const
std
::
vector
<
String
>
&
needles
)
static
MultiSearcherInBigHaystack
createMultiSearcherInBigHaystack
(
const
std
::
vector
<
String
Ref
>
&
needles
)
{
return
MultiSearcherInBigHaystack
(
needles
);
}
...
...
@@ -83,7 +83,7 @@ struct PositionCaseInsensitiveASCII
return
SearcherInSmallHaystack
(
needle_data
,
needle_size
);
}
static
MultiSearcherInBigHaystack
createMultiSearcherInBigHaystack
(
const
std
::
vector
<
String
>
&
needles
)
static
MultiSearcherInBigHaystack
createMultiSearcherInBigHaystack
(
const
std
::
vector
<
String
Ref
>
&
needles
)
{
return
MultiSearcherInBigHaystack
(
needles
);
}
...
...
@@ -109,7 +109,7 @@ struct PositionCaseSensitiveUTF8
return
SearcherInSmallHaystack
(
needle_data
,
needle_size
);
}
static
MultiSearcherInBigHaystack
createMultiSearcherInBigHaystack
(
const
std
::
vector
<
String
>
&
needles
)
static
MultiSearcherInBigHaystack
createMultiSearcherInBigHaystack
(
const
std
::
vector
<
String
Ref
>
&
needles
)
{
return
MultiSearcherInBigHaystack
(
needles
);
}
...
...
@@ -142,7 +142,7 @@ struct PositionCaseInsensitiveUTF8
return
SearcherInSmallHaystack
(
needle_data
,
needle_size
);
}
static
MultiSearcherInBigHaystack
createMultiSearcherInBigHaystack
(
const
std
::
vector
<
String
>
&
needles
)
static
MultiSearcherInBigHaystack
createMultiSearcherInBigHaystack
(
const
std
::
vector
<
String
Ref
>
&
needles
)
{
return
MultiSearcherInBigHaystack
(
needles
);
}
...
...
@@ -267,7 +267,7 @@ struct PositionImpl
}
}
/// Find many substrings in
one line
.
/// Find many substrings in
single string
.
static
void
constant_vector
(
const
String
&
haystack
,
const
ColumnString
::
Chars
&
needle_data
,
...
...
@@ -316,31 +316,18 @@ struct MultiPositionImpl
{
using
ResultType
=
UInt64
;
static
void
multi_constant_vector
(
static
void
vector_constant
(
const
ColumnString
::
Chars
&
haystack_data
,
const
ColumnString
::
Offsets
&
haystack_offsets
,
const
std
::
vector
<
String
>
&
needles
,
const
std
::
vector
<
String
Ref
>
&
needles
,
PaddedPODArray
<
UInt64
>
&
res
)
{
const
size_t
needles_size
=
needles
.
size
();
const
size_t
haystack_offsets_size
=
haystack_offsets
.
size
();
size_t
k
=
0
;
const
auto
result
=
Impl
::
createMultiSearcherInBigHaystack
(
needles
).
search_all
(
haystack_data
,
haystack_offsets
);
for
(
size_t
j
=
0
;
j
<
haystack_offsets_size
;
++
j
)
auto
resCallback
=
[](
const
UInt8
*
start
,
const
UInt8
*
end
)
->
UInt64
{
for
(
size_t
i
=
0
;
i
<
needles_size
;
++
i
)
{
const
char
*
ptr
=
result
[
k
];
if
(
ptr
)
{
const
char
*
start
=
reinterpret_cast
<
const
char
*>
(
&
haystack_data
[
j
==
0
?
0
:
haystack_offsets
[
j
-
1
]]);
res
[
k
]
=
1
+
Impl
::
countChars
(
start
,
ptr
);
}
else
res
[
k
]
=
0
;
++
k
;
}
}
return
1
+
Impl
::
countChars
(
reinterpret_cast
<
const
char
*>
(
start
),
reinterpret_cast
<
const
char
*>
(
end
));
};
Impl
::
createMultiSearcherInBigHaystack
(
needles
).
searchAll
(
haystack_data
,
haystack_offsets
,
resCallback
,
res
);
}
};
...
...
@@ -349,14 +336,13 @@ struct MultiSearchImpl
{
using
ResultType
=
UInt64
;
static
void
multi_constant_vector
(
static
void
vector_constant
(
const
ColumnString
::
Chars
&
haystack_data
,
const
ColumnString
::
Offsets
&
haystack_offsets
,
const
std
::
vector
<
String
>
&
needles
,
const
std
::
vector
<
String
Ref
>
&
needles
,
PaddedPODArray
<
UInt64
>
&
res
)
{
const
auto
result
=
Impl
::
createMultiSearcherInBigHaystack
(
needles
).
search
(
haystack_data
,
haystack_offsets
);
std
::
copy
(
result
.
begin
(),
result
.
end
(),
res
.
begin
());
Impl
::
createMultiSearcherInBigHaystack
(
needles
).
search
(
haystack_data
,
haystack_offsets
,
res
);
}
};
...
...
@@ -365,14 +351,13 @@ struct FirstMatchImpl
{
using
ResultType
=
UInt64
;
static
void
multi_constant_vector
(
static
void
vector_constant
(
const
ColumnString
::
Chars
&
haystack_data
,
const
ColumnString
::
Offsets
&
haystack_offsets
,
const
std
::
vector
<
String
>
&
needles
,
const
std
::
vector
<
String
Ref
>
&
needles
,
PaddedPODArray
<
UInt64
>
&
res
)
{
const
auto
result
=
Impl
::
createMultiSearcherInBigHaystack
(
needles
).
search_index
(
haystack_data
,
haystack_offsets
);
std
::
copy
(
result
.
begin
(),
result
.
end
(),
res
.
begin
());
Impl
::
createMultiSearcherInBigHaystack
(
needles
).
searchIndex
(
haystack_data
,
haystack_offsets
,
res
);
}
};
...
...
@@ -543,7 +528,7 @@ struct MatchImpl
size_t
str_size
=
(
i
!=
0
?
offsets
[
i
]
-
offsets
[
i
-
1
]
:
offsets
[
0
])
-
1
;
/** Even in the case of `required_substring_is_prefix` use UNANCHORED check for regexp,
* so that it can match when `required_substring` occurs into the
line
several times,
* so that it can match when `required_substring` occurs into the
string
several times,
* and at the first occurrence, the regexp is not a match.
*/
...
...
@@ -875,7 +860,7 @@ struct ReplaceStringImpl
if
(
i
==
offsets
.
size
())
break
;
/// Is it true that this
line
no longer needs to perform transformations.
/// Is it true that this
string
no longer needs to perform transformations.
bool
can_finish_current_string
=
false
;
/// We check that the entry does not go through the boundaries of strings.
...
...
@@ -964,7 +949,7 @@ struct ReplaceStringImpl
memcpy
(
&
res_data
[
res_offset
],
pos
,
match
-
pos
);
res_offset
+=
(
match
-
pos
);
/// Is it true that this
line
no longer needs to perform conversions.
/// Is it true that this
string
no longer needs to perform conversions.
bool
can_finish_current_string
=
false
;
/// We check that the entry does not pass through the boundaries of strings.
...
...
dbms/src/Functions/FunctionsStringSearch.h
浏览文件 @
45882dc0
...
...
@@ -12,7 +12,6 @@
#include <Functions/IFunction.h>
#include <IO/WriteHelpers.h>
#include <common/StringRef.h>
#include <ext/range.h>
namespace
DB
{
...
...
@@ -241,11 +240,9 @@ public:
Array
src_arr
=
col_const_arr
->
getValue
<
Array
>
();
std
::
vector
<
String
>
refs
;
std
::
vector
<
String
Ref
>
refs
;
for
(
const
auto
&
el
:
src_arr
)
{
refs
.
push_back
(
el
.
get
<
String
>
());
}
refs
.
emplace_back
(
el
.
get
<
String
>
());
const
size_t
column_haystack_size
=
column_haystack
->
size
();
...
...
@@ -258,7 +255,7 @@ public:
vec_res
.
resize
(
column_haystack_size
*
refs
.
size
());
if
(
col_haystack_vector
)
Impl
::
multi_constant_vector
(
col_haystack_vector
->
getChars
(),
col_haystack_vector
->
getOffsets
(),
refs
,
vec_res
);
Impl
::
vector_constant
(
col_haystack_vector
->
getChars
(),
col_haystack_vector
->
getOffsets
(),
refs
,
vec_res
);
else
throw
Exception
(
"Illegal column "
+
block
.
getByPosition
(
arguments
[
0
]).
column
->
getName
(),
ErrorCodes
::
ILLEGAL_COLUMN
);
...
...
@@ -266,9 +263,7 @@ public:
size_t
accum
=
refs_size
;
for
(
size_t
i
=
0
;
i
<
column_haystack_size
;
++
i
,
accum
+=
refs_size
)
{
offsets_res
[
i
]
=
accum
;
}
block
.
getByPosition
(
result
).
column
=
ColumnArray
::
create
(
std
::
move
(
col_res
),
std
::
move
(
col_offsets
));
}
...
...
@@ -324,7 +319,7 @@ public:
Array
src_arr
=
col_const_arr
->
getValue
<
Array
>
();
std
::
vector
<
String
>
refs
;
std
::
vector
<
String
Ref
>
refs
;
refs
.
reserve
(
src_arr
.
size
());
for
(
const
auto
&
el
:
src_arr
)
...
...
@@ -339,7 +334,7 @@ public:
vec_res
.
resize
(
column_haystack_size
);
if
(
col_haystack_vector
)
Impl
::
multi_constant_vector
(
col_haystack_vector
->
getChars
(),
col_haystack_vector
->
getOffsets
(),
refs
,
vec_res
);
Impl
::
vector_constant
(
col_haystack_vector
->
getChars
(),
col_haystack_vector
->
getOffsets
(),
refs
,
vec_res
);
else
throw
Exception
(
"Illegal column "
+
block
.
getByPosition
(
arguments
[
0
]).
column
->
getName
(),
ErrorCodes
::
ILLEGAL_COLUMN
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录