Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
milvus
milvus
提交
daae0564
M
milvus
项目概览
milvus
/
milvus
11 个月 前同步成功
通知
260
Star
22476
Fork
2472
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
milvus
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
未验证
提交
daae0564
编写于
8月 25, 2021
作者:
C
congqixia
提交者:
GitHub
8月 25, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Construct bloom filter when inserting (#7268)
Signed-off-by:
N
Congqi Xia
<
congqi.xia@zilliz.com
>
上级
2c3779cf
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
136 addition
and
0 deletion
+136
-0
internal/datanode/flow_graph_insert_buffer_node.go
internal/datanode/flow_graph_insert_buffer_node.go
+2
-0
internal/datanode/segment_replica.go
internal/datanode/segment_replica.go
+57
-0
internal/datanode/segment_replica_test.go
internal/datanode/segment_replica_test.go
+77
-0
未找到文件。
internal/datanode/flow_graph_insert_buffer_node.go
浏览文件 @
daae0564
...
...
@@ -483,6 +483,8 @@ func (ibNode *insertBufferNode) Operate(in []flowgraph.Msg) []flowgraph.Msg {
// store current endPositions as Segment->EndPostion
ibNode
.
replica
.
updateSegmentEndPosition
(
currentSegID
,
iMsg
.
endPositions
[
0
])
// update segment pk filter
ibNode
.
replica
.
updateSegmentPKRange
(
currentSegID
,
msg
.
GetRowIDs
())
}
if
len
(
iMsg
.
insertMessages
)
>
0
{
...
...
internal/datanode/segment_replica.go
浏览文件 @
daae0564
...
...
@@ -13,12 +13,15 @@ package datanode
import
(
"context"
"encoding/binary"
"fmt"
"math"
"sync"
"sync/atomic"
"go.uber.org/zap"
"github.com/bits-and-blooms/bloom/v3"
"github.com/milvus-io/milvus/internal/log"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/internalpb"
...
...
@@ -26,6 +29,12 @@ import (
"github.com/milvus-io/milvus/internal/types"
)
const
(
// TODO silverxia maybe need set from config
bloomFilterSize
uint
=
100000
maxBloomFalsePositive
float64
=
0.005
)
type
Replica
interface
{
getCollectionID
()
UniqueID
getCollectionSchema
(
collectionID
UniqueID
,
ts
Timestamp
)
(
*
schemapb
.
CollectionSchema
,
error
)
...
...
@@ -37,6 +46,7 @@ type Replica interface {
listSegmentsCheckPoints
()
map
[
UniqueID
]
segmentCheckPoint
updateSegmentEndPosition
(
segID
UniqueID
,
endPos
*
internalpb
.
MsgPosition
)
updateSegmentCheckPoint
(
segID
UniqueID
)
updateSegmentPKRange
(
segID
UniqueID
,
rowIDs
[]
int64
)
hasSegment
(
segID
UniqueID
,
countFlushed
bool
)
bool
updateStatistics
(
segID
UniqueID
,
numRows
int64
)
error
...
...
@@ -58,6 +68,11 @@ type Segment struct {
checkPoint
segmentCheckPoint
startPos
*
internalpb
.
MsgPosition
// TODO readonly
endPos
*
internalpb
.
MsgPosition
pkFilter
*
bloom
.
BloomFilter
// bloom filter of pk inside a segment
// TODO silverxia, needs to change to interface to support `string` type PK
minPK
int64
// minimal pk value, shortcut for checking whether a pk is inside this segment
maxPK
int64
// maximal pk value, same above
}
// SegmentReplica is the data replication of persistent data in datanode.
...
...
@@ -74,6 +89,20 @@ type SegmentReplica struct {
metaService
*
metaService
}
func
(
s
*
Segment
)
updatePKRange
(
rowIDs
[]
int64
)
{
buf
:=
make
([]
byte
,
8
)
for
_
,
rowID
:=
range
rowIDs
{
binary
.
BigEndian
.
PutUint64
(
buf
,
uint64
(
rowID
))
s
.
pkFilter
.
Add
(
buf
)
if
rowID
>
s
.
maxPK
{
s
.
maxPK
=
rowID
}
if
rowID
<
s
.
minPK
{
s
.
minPK
=
rowID
}
}
}
var
_
Replica
=
&
SegmentReplica
{}
func
newReplica
(
rc
types
.
RootCoord
,
collID
UniqueID
)
Replica
{
...
...
@@ -176,6 +205,10 @@ func (replica *SegmentReplica) addNewSegment(segID, collID, partitionID UniqueID
checkPoint
:
segmentCheckPoint
{
0
,
*
startPos
},
startPos
:
startPos
,
endPos
:
endPos
,
pkFilter
:
bloom
.
NewWithEstimates
(
bloomFilterSize
,
maxBloomFalsePositive
),
minPK
:
math
.
MaxInt64
,
// use max value, represents no value
maxPK
:
math
.
MinInt64
,
// use min value represents no value
}
seg
.
isNew
.
Store
(
true
)
...
...
@@ -211,6 +244,11 @@ func (replica *SegmentReplica) addNormalSegment(segID, collID, partitionID Uniqu
checkPoint
:
*
cp
,
endPos
:
&
cp
.
pos
,
//TODO silverxia, normal segments bloom filter and pk range should be loaded from serialized files
pkFilter
:
bloom
.
NewWithEstimates
(
bloomFilterSize
,
maxBloomFalsePositive
),
minPK
:
math
.
MaxInt64
,
// use max value, represents no value
maxPK
:
math
.
MinInt64
,
// use min value represents no value
}
seg
.
isNew
.
Store
(
false
)
...
...
@@ -278,6 +316,25 @@ func (replica *SegmentReplica) updateSegmentEndPosition(segID UniqueID, endPos *
log
.
Warn
(
"No match segment"
,
zap
.
Int64
(
"ID"
,
segID
))
}
func
(
replica
*
SegmentReplica
)
updateSegmentPKRange
(
segID
UniqueID
,
rowIDs
[]
int64
)
{
replica
.
segMu
.
Lock
()
defer
replica
.
segMu
.
Unlock
()
seg
,
ok
:=
replica
.
newSegments
[
segID
]
if
ok
{
seg
.
updatePKRange
(
rowIDs
)
return
}
seg
,
ok
=
replica
.
normalSegments
[
segID
]
if
ok
{
seg
.
updatePKRange
(
rowIDs
)
return
}
log
.
Warn
(
"No match segment to update PK range"
,
zap
.
Int64
(
"ID"
,
segID
))
}
func
(
replica
*
SegmentReplica
)
removeSegment
(
segID
UniqueID
)
error
{
return
nil
}
...
...
internal/datanode/segment_replica_test.go
浏览文件 @
daae0564
...
...
@@ -12,8 +12,12 @@
package
datanode
import
(
"encoding/binary"
"math"
"math/rand"
"testing"
"github.com/bits-and-blooms/bloom/v3"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
...
...
@@ -40,6 +44,13 @@ func TestSegmentReplica(t *testing.T) {
rc
:=
&
RootCoordFactory
{}
collID
:=
UniqueID
(
1
)
t
.
Run
(
"Test coll mot match"
,
func
(
t
*
testing
.
T
)
{
replica
:=
newSegmentReplica
(
rc
,
collID
)
err
:=
replica
.
addNewSegment
(
1
,
collID
+
1
,
0
,
""
,
nil
,
nil
)
assert
.
NotNil
(
t
,
err
)
})
t
.
Run
(
"Test segmentFlushed"
,
func
(
t
*
testing
.
T
)
{
testReplica
:=
&
SegmentReplica
{
newSegments
:
make
(
map
[
UniqueID
]
*
Segment
),
...
...
@@ -191,3 +202,69 @@ func TestSegmentReplica(t *testing.T) {
assert
.
Equal
(
t
,
int64
(
20
),
replica
.
normalSegments
[
UniqueID
(
1
)]
.
checkPoint
.
numRows
)
})
}
func
TestSegmentUpdatePKRange
(
t
*
testing
.
T
)
{
seg
:=
&
Segment
{
pkFilter
:
bloom
.
NewWithEstimates
(
100000
,
0.005
),
maxPK
:
math
.
MinInt64
,
minPK
:
math
.
MaxInt64
,
}
cases
:=
make
([]
int64
,
0
,
100
)
for
i
:=
0
;
i
<
100
;
i
++
{
cases
=
append
(
cases
,
rand
.
Int63
())
}
buf
:=
make
([]
byte
,
8
)
for
_
,
c
:=
range
cases
{
seg
.
updatePKRange
([]
int64
{
c
})
assert
.
LessOrEqual
(
t
,
seg
.
minPK
,
c
)
assert
.
GreaterOrEqual
(
t
,
seg
.
maxPK
,
c
)
binary
.
BigEndian
.
PutUint64
(
buf
,
uint64
(
c
))
assert
.
True
(
t
,
seg
.
pkFilter
.
Test
(
buf
))
}
}
func
TestReplicaUpdatePKRange
(
t
*
testing
.
T
)
{
rc
:=
&
RootCoordFactory
{}
collID
:=
UniqueID
(
1
)
partID
:=
UniqueID
(
2
)
chanName
:=
"insert-02"
startPos
:=
&
internalpb
.
MsgPosition
{
ChannelName
:
chanName
,
Timestamp
:
Timestamp
(
100
)}
endPos
:=
&
internalpb
.
MsgPosition
{
ChannelName
:
chanName
,
Timestamp
:
Timestamp
(
200
)}
cpPos
:=
&
internalpb
.
MsgPosition
{
ChannelName
:
chanName
,
Timestamp
:
Timestamp
(
10
)}
cp
:=
&
segmentCheckPoint
{
int64
(
10
),
*
cpPos
}
replica
:=
newSegmentReplica
(
rc
,
collID
)
err
:=
replica
.
addNewSegment
(
1
,
collID
,
partID
,
chanName
,
startPos
,
endPos
)
assert
.
Nil
(
t
,
err
)
err
=
replica
.
addNormalSegment
(
2
,
collID
,
partID
,
chanName
,
100
,
cp
)
assert
.
Nil
(
t
,
err
)
segNew
:=
replica
.
newSegments
[
1
]
segNormal
:=
replica
.
normalSegments
[
2
]
cases
:=
make
([]
int64
,
0
,
100
)
for
i
:=
0
;
i
<
100
;
i
++
{
cases
=
append
(
cases
,
rand
.
Int63
())
}
buf
:=
make
([]
byte
,
8
)
for
_
,
c
:=
range
cases
{
replica
.
updateSegmentPKRange
(
1
,
[]
int64
{
c
})
// new segment
replica
.
updateSegmentPKRange
(
2
,
[]
int64
{
c
})
// normal segment
replica
.
updateSegmentPKRange
(
3
,
[]
int64
{
c
})
// non-exist segment
assert
.
LessOrEqual
(
t
,
segNew
.
minPK
,
c
)
assert
.
GreaterOrEqual
(
t
,
segNew
.
maxPK
,
c
)
assert
.
LessOrEqual
(
t
,
segNormal
.
minPK
,
c
)
assert
.
GreaterOrEqual
(
t
,
segNormal
.
maxPK
,
c
)
binary
.
BigEndian
.
PutUint64
(
buf
,
uint64
(
c
))
assert
.
True
(
t
,
segNew
.
pkFilter
.
Test
(
buf
))
assert
.
True
(
t
,
segNormal
.
pkFilter
.
Test
(
buf
))
}
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录