Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
milvus
milvus
提交
b3eb2b1d
M
milvus
项目概览
milvus
/
milvus
11 个月 前同步成功
通知
261
Star
22476
Fork
2472
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
milvus
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
b3eb2b1d
编写于
5月 12, 2022
作者:
L
Letian Jiang
提交者:
GitHub
5月 12, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Support deltaLog loading on growing segment (#16903)
Signed-off-by:
N
Letian Jiang
<
letian.jiang@zilliz.com
>
上级
a8b81e21
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
90 addition
and
30 deletion
+90
-30
internal/core/src/segcore/SegmentGrowingImpl.cpp
internal/core/src/segcore/SegmentGrowingImpl.cpp
+22
-0
internal/core/src/segcore/SegmentGrowingImpl.h
internal/core/src/segcore/SegmentGrowingImpl.h
+3
-0
internal/core/src/segcore/SegmentInterface.h
internal/core/src/segcore/SegmentInterface.h
+4
-0
internal/core/src/segcore/SegmentSealed.h
internal/core/src/segcore/SegmentSealed.h
+0
-2
internal/core/src/segcore/segment_c.cpp
internal/core/src/segcore/segment_c.cpp
+2
-3
internal/querynode/mock_test.go
internal/querynode/mock_test.go
+6
-4
internal/querynode/segment.go
internal/querynode/segment.go
+0
-4
internal/querynode/segment_loader.go
internal/querynode/segment_loader.go
+3
-4
internal/querynode/segment_loader_test.go
internal/querynode/segment_loader_test.go
+50
-13
未找到文件。
internal/core/src/segcore/SegmentGrowingImpl.cpp
浏览文件 @
b3eb2b1d
...
...
@@ -194,6 +194,28 @@ SegmentGrowingImpl::GetMemoryUsageInBytes() const {
return
total_bytes
;
}
void
SegmentGrowingImpl
::
LoadDeletedRecord
(
const
LoadDeletedRecordInfo
&
info
)
{
AssertInfo
(
info
.
row_count
>
0
,
"The row count of deleted record is 0"
);
AssertInfo
(
info
.
primary_keys
,
"Deleted primary keys is null"
);
AssertInfo
(
info
.
timestamps
,
"Deleted timestamps is null"
);
// step 1: get pks and timestamps
auto
field_id
=
schema_
->
get_primary_field_id
().
value_or
(
FieldId
(
INVALID_FIELD_ID
));
AssertInfo
(
field_id
.
get
()
!=
INVALID_FIELD_ID
,
"Primary key has invalid field id"
);
auto
&
field_meta
=
schema_
->
operator
[](
field_id
);
int64_t
size
=
info
.
row_count
;
std
::
vector
<
PkType
>
pks
(
size
);
ParsePksFromIDs
(
pks
,
field_meta
.
get_data_type
(),
*
info
.
primary_keys
);
auto
timestamps
=
reinterpret_cast
<
const
Timestamp
*>
(
info
.
timestamps
);
// step 2: fill pks and timestamps
deleted_record_
.
pks_
.
set_data_raw
(
0
,
pks
.
data
(),
size
);
deleted_record_
.
timestamps_
.
set_data_raw
(
0
,
timestamps
,
size
);
deleted_record_
.
ack_responder_
.
AddSegment
(
0
,
size
);
deleted_record_
.
reserved
.
fetch_add
(
size
);
deleted_record_
.
record_size_
=
size
;
}
SpanBase
SegmentGrowingImpl
::
chunk_data_impl
(
FieldId
field_id
,
int64_t
chunk_id
)
const
{
auto
vec
=
get_insert_record
().
get_field_data_base
(
field_id
);
...
...
internal/core/src/segcore/SegmentGrowingImpl.h
浏览文件 @
b3eb2b1d
...
...
@@ -59,6 +59,9 @@ class SegmentGrowingImpl : public SegmentGrowing {
int64_t
GetMemoryUsageInBytes
()
const
override
;
void
LoadDeletedRecord
(
const
LoadDeletedRecordInfo
&
info
)
override
;
std
::
string
debug
()
const
override
;
...
...
internal/core/src/segcore/SegmentInterface.h
浏览文件 @
b3eb2b1d
...
...
@@ -24,6 +24,7 @@
#include "common/Span.h"
#include "common/SystemProperty.h"
#include "common/Types.h"
#include "common/LoadInfo.h"
#include "common/BitsetView.h"
#include "common/QueryResult.h"
#include "knowhere/index/vector_index/VecIndex.h"
...
...
@@ -66,6 +67,9 @@ class SegmentInterface {
virtual
Status
Delete
(
int64_t
reserved_offset
,
int64_t
size
,
const
IdArray
*
pks
,
const
Timestamp
*
timestamps
)
=
0
;
virtual
void
LoadDeletedRecord
(
const
LoadDeletedRecordInfo
&
info
)
=
0
;
};
// internal API for DSL calculation
...
...
internal/core/src/segcore/SegmentSealed.h
浏览文件 @
b3eb2b1d
...
...
@@ -29,8 +29,6 @@ class SegmentSealed : public SegmentInternalInterface {
virtual
void
LoadFieldData
(
const
LoadFieldDataInfo
&
info
)
=
0
;
virtual
void
LoadDeletedRecord
(
const
LoadDeletedRecordInfo
&
info
)
=
0
;
virtual
void
DropIndex
(
const
FieldId
field_id
)
=
0
;
virtual
void
DropFieldData
(
const
FieldId
field_id
)
=
0
;
...
...
internal/core/src/segcore/segment_c.cpp
浏览文件 @
b3eb2b1d
...
...
@@ -210,8 +210,7 @@ CStatus
LoadDeletedRecord
(
CSegmentInterface
c_segment
,
CLoadDeletedRecordInfo
deleted_record_info
)
{
try
{
auto
segment_interface
=
reinterpret_cast
<
milvus
::
segcore
::
SegmentInterface
*>
(
c_segment
);
auto
segment
=
dynamic_cast
<
milvus
::
segcore
::
SegmentSealed
*>
(
segment_interface
);
AssertInfo
(
segment
!=
nullptr
,
"segment conversion failed"
);
AssertInfo
(
segment_interface
!=
nullptr
,
"segment conversion failed"
);
auto
proto
=
std
::
string
(
deleted_record_info
.
primary_keys
);
Assert
(
!
proto
.
empty
());
auto
pks
=
std
::
make_unique
<
milvus
::
proto
::
schema
::
IDs
>
();
...
...
@@ -219,7 +218,7 @@ LoadDeletedRecord(CSegmentInterface c_segment, CLoadDeletedRecordInfo deleted_re
AssertInfo
(
suc
,
"unmarshal field data string failed"
);
auto
load_info
=
LoadDeletedRecordInfo
{
deleted_record_info
.
timestamps
,
pks
.
get
(),
deleted_record_info
.
row_count
};
segment
->
LoadDeletedRecord
(
load_info
);
segment
_interface
->
LoadDeletedRecord
(
load_info
);
return
milvus
::
SuccessCStatus
();
}
catch
(
std
::
exception
&
e
)
{
return
milvus
::
FailureCStatus
(
UnexpectedError
,
e
.
what
());
...
...
internal/querynode/mock_test.go
浏览文件 @
b3eb2b1d
...
...
@@ -1062,13 +1062,15 @@ func saveDeltaLog(collectionID UniqueID,
kvs
:=
make
(
map
[
string
][]
byte
,
1
)
// write insert binlog
// write delta log
pkFieldID
:=
UniqueID
(
106
)
fieldBinlog
:=
make
([]
*
datapb
.
FieldBinlog
,
0
)
log
.
Debug
(
"[query node unittest] save delta log"
,
zap
.
Int64
(
"fieldID"
,
999
))
key
:=
JoinIDPath
(
collectionID
,
partitionID
,
segmentID
,
999
)
log
.
Debug
(
"[query node unittest] save delta log"
,
zap
.
Int64
(
"fieldID"
,
pkFieldID
))
key
:=
JoinIDPath
(
collectionID
,
partitionID
,
segmentID
,
pkFieldID
)
key
+=
"delta"
// append suffix 'delta' to avoid conflicts against binlog
kvs
[
key
]
=
blob
.
Value
[
:
]
fieldBinlog
=
append
(
fieldBinlog
,
&
datapb
.
FieldBinlog
{
FieldID
:
999
,
FieldID
:
pkFieldID
,
Binlogs
:
[]
*
datapb
.
Binlog
{{
LogPath
:
key
}},
})
log
.
Debug
(
"[query node unittest] save delta log file to MinIO/S3"
)
...
...
internal/querynode/segment.go
浏览文件 @
b3eb2b1d
...
...
@@ -774,10 +774,6 @@ func (s *Segment) segmentLoadDeletedRecord(primaryKeys []primaryKey, timestamps
if
s
.
segmentPtr
==
nil
{
return
errors
.
New
(
"null seg core pointer"
)
}
if
s
.
segmentType
!=
segmentTypeSealed
{
errMsg
:=
fmt
.
Sprintln
(
"segmentLoadFieldData failed, illegal segment type "
,
s
.
segmentType
,
"segmentID = "
,
s
.
ID
())
return
errors
.
New
(
errMsg
)
}
if
len
(
primaryKeys
)
<=
0
{
return
fmt
.
Errorf
(
"empty pks to delete"
)
...
...
internal/querynode/segment_loader.go
浏览文件 @
b3eb2b1d
...
...
@@ -25,6 +25,9 @@ import (
"strconv"
"sync"
"github.com/panjf2000/ants/v2"
"go.uber.org/zap"
"github.com/milvus-io/milvus/internal/common"
etcdkv
"github.com/milvus-io/milvus/internal/kv/etcd"
"github.com/milvus-io/milvus/internal/log"
...
...
@@ -41,8 +44,6 @@ import (
"github.com/milvus-io/milvus/internal/util/funcutil"
"github.com/milvus-io/milvus/internal/util/metricsinfo"
"github.com/milvus-io/milvus/internal/util/timerecord"
"github.com/panjf2000/ants/v2"
"go.uber.org/zap"
)
// segmentLoader is only responsible for loading the field data from binlog
...
...
@@ -97,10 +98,8 @@ func (loader *segmentLoader) loadSegment(req *querypb.LoadSegmentsRequest, segme
switch
segmentType
{
case
segmentTypeGrowing
:
metaReplica
=
loader
.
streamingReplica
case
segmentTypeSealed
:
metaReplica
=
loader
.
historicalReplica
default
:
err
:=
fmt
.
Errorf
(
"illegal segment type when load segment, collectionID = %d"
,
req
.
CollectionID
)
log
.
Error
(
"load segment failed, illegal segment type"
,
...
...
internal/querynode/segment_loader_test.go
浏览文件 @
b3eb2b1d
...
...
@@ -430,7 +430,7 @@ func TestSegmentLoader_testLoadGrowingAndSealed(t *testing.T) {
deltaLogs
,
err
:=
saveDeltaLog
(
defaultCollectionID
,
defaultPartitionID
,
defaultSegmentID
)
assert
.
NoError
(
t
,
err
)
t
.
Run
(
"test load
growing and
sealed segments"
,
func
(
t
*
testing
.
T
)
{
t
.
Run
(
"test load sealed segments"
,
func
(
t
*
testing
.
T
)
{
node
,
err
:=
genSimpleQueryNode
(
ctx
)
assert
.
NoError
(
t
,
err
)
...
...
@@ -451,7 +451,6 @@ func TestSegmentLoader_testLoadGrowingAndSealed(t *testing.T) {
PartitionID
:
defaultPartitionID
,
CollectionID
:
defaultCollectionID
,
BinlogPaths
:
fieldBinlog
,
Deltalogs
:
deltaLogs
,
},
},
}
...
...
@@ -459,6 +458,10 @@ func TestSegmentLoader_testLoadGrowingAndSealed(t *testing.T) {
err
=
loader
.
loadSegment
(
req1
,
segmentTypeSealed
)
assert
.
NoError
(
t
,
err
)
segment1
,
err
:=
loader
.
historicalReplica
.
getSegmentByID
(
segmentID1
)
assert
.
NoError
(
t
,
err
)
assert
.
Equal
(
t
,
segment1
.
getRowCount
(),
int64
(
100
))
segmentID2
:=
UniqueID
(
101
)
req2
:=
&
querypb
.
LoadSegmentsRequest
{
Base
:
&
commonpb
.
MsgBase
{
...
...
@@ -473,25 +476,54 @@ func TestSegmentLoader_testLoadGrowingAndSealed(t *testing.T) {
PartitionID
:
defaultPartitionID
,
CollectionID
:
defaultCollectionID
,
BinlogPaths
:
fieldBinlog
,
Deltalogs
:
deltaLogs
,
},
},
}
err
=
loader
.
loadSegment
(
req2
,
segmentType
Growing
)
err
=
loader
.
loadSegment
(
req2
,
segmentType
Sealed
)
assert
.
NoError
(
t
,
err
)
segment
1
,
err
:=
loader
.
historicalReplica
.
getSegmentByID
(
segmentID1
)
segment
2
,
err
:=
loader
.
historicalReplica
.
getSegmentByID
(
segmentID2
)
assert
.
NoError
(
t
,
err
)
// Note: getRowCount currently does not return accurate counts. The deleted rows are also counted.
assert
.
Equal
(
t
,
segment2
.
getRowCount
(),
int64
(
100
))
// accurate counts should be 98
})
segment2
,
err
:=
loader
.
streamingReplica
.
getSegmentByID
(
segmentID2
)
t
.
Run
(
"test load growing segments"
,
func
(
t
*
testing
.
T
)
{
node
,
err
:=
genSimpleQueryNode
(
ctx
)
assert
.
NoError
(
t
,
err
)
loader
:=
node
.
loader
assert
.
NotNil
(
t
,
loader
)
segmentID1
:=
UniqueID
(
100
)
req1
:=
&
querypb
.
LoadSegmentsRequest
{
Base
:
&
commonpb
.
MsgBase
{
MsgType
:
commonpb
.
MsgType_WatchQueryChannels
,
MsgID
:
rand
.
Int63
(),
},
DstNodeID
:
0
,
Schema
:
schema
,
Infos
:
[]
*
querypb
.
SegmentLoadInfo
{
{
SegmentID
:
segmentID1
,
PartitionID
:
defaultPartitionID
,
CollectionID
:
defaultCollectionID
,
BinlogPaths
:
fieldBinlog
,
},
},
}
err
=
loader
.
loadSegment
(
req1
,
segmentTypeGrowing
)
assert
.
NoError
(
t
,
err
)
assert
.
Equal
(
t
,
segment1
.
getRowCount
(),
segment2
.
getRowCount
())
segment1
,
err
:=
loader
.
streamingReplica
.
getSegmentByID
(
segmentID1
)
assert
.
NoError
(
t
,
err
)
assert
.
Equal
(
t
,
segment1
.
getRowCount
(),
int64
(
100
))
// Loading growing segments with delta log, expect to fail (this is a bug).
// See: https://github.com/milvus-io/milvus/issues/16821
segmentID3
:=
UniqueID
(
102
)
req3
:=
&
querypb
.
LoadSegmentsRequest
{
segmentID2
:=
UniqueID
(
101
)
req2
:=
&
querypb
.
LoadSegmentsRequest
{
Base
:
&
commonpb
.
MsgBase
{
MsgType
:
commonpb
.
MsgType_WatchQueryChannels
,
MsgID
:
rand
.
Int63
(),
...
...
@@ -500,7 +532,7 @@ func TestSegmentLoader_testLoadGrowingAndSealed(t *testing.T) {
Schema
:
schema
,
Infos
:
[]
*
querypb
.
SegmentLoadInfo
{
{
SegmentID
:
segmentID
3
,
SegmentID
:
segmentID
2
,
PartitionID
:
defaultPartitionID
,
CollectionID
:
defaultCollectionID
,
BinlogPaths
:
fieldBinlog
,
...
...
@@ -509,8 +541,13 @@ func TestSegmentLoader_testLoadGrowingAndSealed(t *testing.T) {
},
}
err
=
loader
.
loadSegment
(
req3
,
segmentTypeGrowing
)
assert
.
Error
(
t
,
err
)
err
=
loader
.
loadSegment
(
req2
,
segmentTypeGrowing
)
assert
.
NoError
(
t
,
err
)
segment2
,
err
:=
loader
.
streamingReplica
.
getSegmentByID
(
segmentID2
)
assert
.
NoError
(
t
,
err
)
// Note: getRowCount currently does not return accurate counts. The deleted rows are also counted.
assert
.
Equal
(
t
,
segment2
.
getRowCount
(),
int64
(
100
))
// accurate counts should be 98
})
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录