未验证 提交 1a3dca9b 编写于 作者: Y yihao.dai 提交者: GitHub

Fix dynamic partitions loading (#24112)

Signed-off-by: Nbigsheeper <yihao.dai@zilliz.com>
上级 7da5a31b
......@@ -146,6 +146,7 @@ message LoadPartitionsRequest {
bool refresh = 8;
// resource group names
repeated string resource_groups = 9;
repeated index.IndexInfo index_info_list = 10;
}
message ReleasePartitionsRequest {
......@@ -199,7 +200,6 @@ message SyncNewCreatedPartitionRequest {
common.MsgBase base = 1;
int64 collectionID = 2;
int64 partitionID = 3;
schema.CollectionSchema schema = 4;
}
//-----------------query node grpc request and response proto----------------
......@@ -215,16 +215,15 @@ message WatchDmChannelsRequest {
int64 collectionID = 3;
repeated int64 partitionIDs = 4;
repeated data.VchannelInfo infos = 5;
schema.CollectionSchema schema = 6;
schema.CollectionSchema schema = 6; // keep it for compatibility of rolling upgrade from 2.2.x to 2.3
repeated data.SegmentInfo exclude_infos = 7;
LoadMetaInfo load_meta = 8;
LoadMetaInfo load_meta = 8; // keep it for compatibility of rolling upgrade from 2.2.x to 2.3
int64 replicaID = 9;
map<int64, data.SegmentInfo> segment_infos = 10;
// Deprecated
// for node down load balance, need to remove offline node in time after every watchDmChannel finish.
int64 offlineNodeID = 11;
int64 version = 12;
repeated index.IndexInfo index_info_list = 13;
}
message UnsubDmChannelRequest {
......@@ -275,10 +274,10 @@ message LoadSegmentsRequest {
common.MsgBase base = 1;
int64 dst_nodeID = 2;
repeated SegmentLoadInfo infos = 3;
schema.CollectionSchema schema = 4;
schema.CollectionSchema schema = 4; // keep it for compatibility of rolling upgrade from 2.2.x to 2.3
int64 source_nodeID = 5;
int64 collectionID = 6;
LoadMetaInfo load_meta = 7;
LoadMetaInfo load_meta = 7; // keep it for compatibility of rolling upgrade from 2.2.x to 2.3
int64 replicaID = 8;
repeated msg.MsgPosition delta_positions = 9; // keep it for compatibility of rolling upgrade from 2.2.x to 2.3
int64 version = 10;
......
......@@ -42,10 +42,10 @@ type LoadCollectionJob struct {
dist *meta.DistributionManager
meta *meta.Meta
broker meta.Broker
cluster session.Cluster
targetMgr *meta.TargetManager
targetObserver *observers.TargetObserver
broker meta.Broker
nodeMgr *session.NodeManager
}
......@@ -54,10 +54,10 @@ func NewLoadCollectionJob(
req *querypb.LoadCollectionRequest,
dist *meta.DistributionManager,
meta *meta.Meta,
broker meta.Broker,
cluster session.Cluster,
targetMgr *meta.TargetManager,
targetObserver *observers.TargetObserver,
broker meta.Broker,
nodeMgr *session.NodeManager,
) *LoadCollectionJob {
return &LoadCollectionJob{
......@@ -66,10 +66,10 @@ func NewLoadCollectionJob(
undo: NewUndoList(ctx, meta, cluster, targetMgr, targetObserver),
dist: dist,
meta: meta,
broker: broker,
cluster: cluster,
targetMgr: targetMgr,
targetObserver: targetObserver,
broker: broker,
nodeMgr: nodeMgr,
}
}
......@@ -131,22 +131,6 @@ func (job *LoadCollectionJob) Execute() error {
job.undo.LackPartitions = lackPartitionIDs
log.Info("find partitions to load", zap.Int64s("partitions", lackPartitionIDs))
// 2. loadPartitions on QueryNodes
err = loadPartitions(job.ctx, job.meta, job.cluster, nil, false, req.GetCollectionID(), lackPartitionIDs...)
if err != nil {
return err
}
job.undo.PartitionsLoaded = true
// 3. update next target
_, err = job.targetObserver.UpdateNextTarget(req.GetCollectionID(), partitionIDs...)
if err != nil {
msg := "failed to update next target"
log.Error(msg, zap.Error(err))
return utils.WrapError(msg, err)
}
job.undo.TargetUpdated = true
colExisted := job.meta.CollectionManager.Exist(req.GetCollectionID())
if !colExisted {
// Clear stale replicas, https://github.com/milvus-io/milvus/issues/20444
......@@ -158,7 +142,7 @@ func (job *LoadCollectionJob) Execute() error {
}
}
// 4. create replica if not exist
// 2. create replica if not exist
replicas := job.meta.ReplicaManager.GetByCollection(req.GetCollectionID())
if len(replicas) == 0 {
replicas, err = utils.SpawnReplicasWithRG(job.meta, req.GetCollectionID(), req.GetResourceGroups(), req.GetReplicaNumber())
......@@ -174,6 +158,21 @@ func (job *LoadCollectionJob) Execute() error {
job.undo.NewReplicaCreated = true
}
// 3. loadPartitions on QueryNodes
err = loadPartitions(job.ctx, job.meta, job.cluster, job.broker, req.GetCollectionID(), lackPartitionIDs...)
if err != nil {
return err
}
// 4. update next target
_, err = job.targetObserver.UpdateNextTarget(req.GetCollectionID(), partitionIDs...)
if err != nil {
msg := "failed to update next target"
log.Error(msg, zap.Error(err))
return utils.WrapError(msg, err)
}
job.undo.TargetUpdated = true
// 5. put collection/partitions meta
partitions := lo.Map(lackPartitionIDs, func(partID int64, _ int) *meta.Partition {
return &meta.Partition{
......@@ -221,10 +220,10 @@ type LoadPartitionJob struct {
dist *meta.DistributionManager
meta *meta.Meta
broker meta.Broker
cluster session.Cluster
targetMgr *meta.TargetManager
targetObserver *observers.TargetObserver
broker meta.Broker
nodeMgr *session.NodeManager
}
......@@ -233,10 +232,10 @@ func NewLoadPartitionJob(
req *querypb.LoadPartitionsRequest,
dist *meta.DistributionManager,
meta *meta.Meta,
broker meta.Broker,
cluster session.Cluster,
targetMgr *meta.TargetManager,
targetObserver *observers.TargetObserver,
broker meta.Broker,
nodeMgr *session.NodeManager,
) *LoadPartitionJob {
return &LoadPartitionJob{
......@@ -245,10 +244,10 @@ func NewLoadPartitionJob(
undo: NewUndoList(ctx, meta, cluster, targetMgr, targetObserver),
dist: dist,
meta: meta,
broker: broker,
cluster: cluster,
targetMgr: targetMgr,
targetObserver: targetObserver,
broker: broker,
nodeMgr: nodeMgr,
}
}
......@@ -305,22 +304,7 @@ func (job *LoadPartitionJob) Execute() error {
job.undo.LackPartitions = lackPartitionIDs
log.Info("find partitions to load", zap.Int64s("partitions", lackPartitionIDs))
// 2. loadPartitions on QueryNodes
err := loadPartitions(job.ctx, job.meta, job.cluster, nil, false, req.GetCollectionID(), lackPartitionIDs...)
if err != nil {
return err
}
job.undo.PartitionsLoaded = true
// 3. update next target
_, err = job.targetObserver.UpdateNextTarget(req.GetCollectionID(), append(loadedPartitionIDs, lackPartitionIDs...)...)
if err != nil {
msg := "failed to update next target"
log.Error(msg, zap.Error(err))
return utils.WrapError(msg, err)
}
job.undo.TargetUpdated = true
var err error
if !job.meta.CollectionManager.Exist(req.GetCollectionID()) {
// Clear stale replicas, https://github.com/milvus-io/milvus/issues/20444
err = job.meta.ReplicaManager.RemoveCollection(req.GetCollectionID())
......@@ -331,7 +315,7 @@ func (job *LoadPartitionJob) Execute() error {
}
}
// 4. create replica if not exist
// 2. create replica if not exist
replicas := job.meta.ReplicaManager.GetByCollection(req.GetCollectionID())
if len(replicas) == 0 {
replicas, err = utils.SpawnReplicasWithRG(job.meta, req.GetCollectionID(), req.GetResourceGroups(), req.GetReplicaNumber())
......@@ -347,6 +331,21 @@ func (job *LoadPartitionJob) Execute() error {
job.undo.NewReplicaCreated = true
}
// 3. loadPartitions on QueryNodes
err = loadPartitions(job.ctx, job.meta, job.cluster, job.broker, req.GetCollectionID(), lackPartitionIDs...)
if err != nil {
return err
}
// 4. update next target
_, err = job.targetObserver.UpdateNextTarget(req.GetCollectionID(), append(loadedPartitionIDs, lackPartitionIDs...)...)
if err != nil {
msg := "failed to update next target"
log.Error(msg, zap.Error(err))
return utils.WrapError(msg, err)
}
job.undo.TargetUpdated = true
// 5. put collection/partitions meta
partitions := lo.Map(lackPartitionIDs, func(partID int64, _ int) *meta.Partition {
return &meta.Partition{
......
......@@ -36,6 +36,8 @@ type ReleaseCollectionJob struct {
req *querypb.ReleaseCollectionRequest
dist *meta.DistributionManager
meta *meta.Meta
broker meta.Broker
cluster session.Cluster
targetMgr *meta.TargetManager
targetObserver *observers.TargetObserver
}
......@@ -44,6 +46,8 @@ func NewReleaseCollectionJob(ctx context.Context,
req *querypb.ReleaseCollectionRequest,
dist *meta.DistributionManager,
meta *meta.Meta,
broker meta.Broker,
cluster session.Cluster,
targetMgr *meta.TargetManager,
targetObserver *observers.TargetObserver,
) *ReleaseCollectionJob {
......@@ -52,6 +56,8 @@ func NewReleaseCollectionJob(ctx context.Context,
req: req,
dist: dist,
meta: meta,
broker: broker,
cluster: cluster,
targetMgr: targetMgr,
targetObserver: targetObserver,
}
......@@ -66,7 +72,11 @@ func (job *ReleaseCollectionJob) Execute() error {
return nil
}
lenPartitions := len(job.meta.CollectionManager.GetPartitionsByCollection(req.GetCollectionID()))
loadedPartitions := job.meta.CollectionManager.GetPartitionsByCollection(req.GetCollectionID())
toRelease := lo.Map(loadedPartitions, func(partition *meta.Partition, _ int) int64 {
return partition.GetPartitionID()
})
releasePartitions(job.ctx, job.meta, job.cluster, req.GetCollectionID(), toRelease...)
err := job.meta.CollectionManager.RemoveCollection(req.GetCollectionID())
if err != nil {
......@@ -85,7 +95,7 @@ func (job *ReleaseCollectionJob) Execute() error {
job.targetObserver.ReleaseCollection(req.GetCollectionID())
waitCollectionReleased(job.dist, req.GetCollectionID())
metrics.QueryCoordNumCollections.WithLabelValues().Dec()
metrics.QueryCoordNumPartitions.WithLabelValues().Sub(float64(lenPartitions))
metrics.QueryCoordNumPartitions.WithLabelValues().Sub(float64(len(toRelease)))
metrics.QueryCoordReleaseCount.WithLabelValues(metrics.TotalLabel).Inc()
metrics.QueryCoordReleaseCount.WithLabelValues(metrics.SuccessLabel).Inc()
return nil
......@@ -98,6 +108,7 @@ type ReleasePartitionJob struct {
req *querypb.ReleasePartitionsRequest
dist *meta.DistributionManager
meta *meta.Meta
broker meta.Broker
cluster session.Cluster
targetMgr *meta.TargetManager
targetObserver *observers.TargetObserver
......@@ -107,6 +118,7 @@ func NewReleasePartitionJob(ctx context.Context,
req *querypb.ReleasePartitionsRequest,
dist *meta.DistributionManager,
meta *meta.Meta,
broker meta.Broker,
cluster session.Cluster,
targetMgr *meta.TargetManager,
targetObserver *observers.TargetObserver,
......@@ -116,6 +128,7 @@ func NewReleasePartitionJob(ctx context.Context,
req: req,
dist: dist,
meta: meta,
broker: broker,
cluster: cluster,
targetMgr: targetMgr,
targetObserver: targetObserver,
......@@ -143,6 +156,7 @@ func (job *ReleasePartitionJob) Execute() error {
log.Warn("releasing partition(s) not loaded")
return nil
}
releasePartitions(job.ctx, job.meta, job.cluster, req.GetCollectionID(), toRelease...)
// If all partitions are released and LoadType is LoadPartition, clear all
if len(toRelease) == len(loadedPartitions) &&
......@@ -163,14 +177,8 @@ func (job *ReleasePartitionJob) Execute() error {
metrics.QueryCoordNumCollections.WithLabelValues().Dec()
waitCollectionReleased(job.dist, req.GetCollectionID())
} else {
err := releasePartitions(job.ctx, job.meta, job.cluster, false, req.GetCollectionID(), toRelease...)
if err != nil {
loadPartitions(job.ctx, job.meta, job.cluster, nil, true, req.GetCollectionID(), toRelease...)
return err
}
err = job.meta.CollectionManager.RemovePartition(toRelease...)
err := job.meta.CollectionManager.RemovePartition(toRelease...)
if err != nil {
loadPartitions(job.ctx, job.meta, job.cluster, nil, true, req.GetCollectionID(), toRelease...)
msg := "failed to release partitions from store"
log.Warn(msg, zap.Error(err))
return utils.WrapError(msg, err)
......
......@@ -20,7 +20,6 @@ import (
"context"
"time"
"github.com/cockroachdb/errors"
"go.uber.org/zap"
"github.com/milvus-io/milvus/internal/proto/querypb"
......@@ -35,6 +34,7 @@ type SyncNewCreatedPartitionJob struct {
req *querypb.SyncNewCreatedPartitionRequest
meta *meta.Meta
cluster session.Cluster
broker meta.Broker
}
func NewSyncNewCreatedPartitionJob(
......@@ -42,12 +42,14 @@ func NewSyncNewCreatedPartitionJob(
req *querypb.SyncNewCreatedPartitionRequest,
meta *meta.Meta,
cluster session.Cluster,
broker meta.Broker,
) *SyncNewCreatedPartitionJob {
return &SyncNewCreatedPartitionJob{
BaseJob: NewBaseJob(ctx, req.Base.GetMsgID(), req.GetCollectionID()),
req: req,
meta: meta,
cluster: cluster,
broker: broker,
}
}
......@@ -72,7 +74,7 @@ func (job *SyncNewCreatedPartitionJob) Execute() error {
zap.Int64("partitionID", req.GetPartitionID()),
)
err := loadPartitions(job.ctx, job.meta, job.cluster, req.GetSchema(), false, req.GetCollectionID(), req.GetPartitionID())
err := loadPartitions(job.ctx, job.meta, job.cluster, job.broker, req.GetCollectionID(), req.GetPartitionID())
if err != nil {
return err
}
......@@ -95,9 +97,3 @@ func (job *SyncNewCreatedPartitionJob) Execute() error {
return nil
}
func (job *SyncNewCreatedPartitionJob) PostExecute() {
if job.Error() != nil && !errors.Is(job.Error(), ErrPartitionNotInTarget) {
releasePartitions(job.ctx, job.meta, job.cluster, true, job.req.GetCollectionID(), job.req.GetPartitionID())
}
}
......@@ -18,6 +18,7 @@ package job
import (
"context"
"fmt"
"testing"
"github.com/cockroachdb/errors"
......@@ -120,6 +121,11 @@ func (suite *JobSuite) SetupSuite() {
suite.broker.EXPECT().GetRecoveryInfoV2(mock.Anything, collection).Return(vChannels, segmentBinlogs, nil)
}
suite.broker.EXPECT().GetCollectionSchema(mock.Anything, mock.Anything).
Return(nil, nil)
suite.broker.EXPECT().DescribeIndex(mock.Anything, mock.Anything).
Return(nil, nil)
suite.cluster = session.NewMockCluster(suite.T())
suite.cluster.EXPECT().
LoadPartitions(mock.Anything, mock.Anything, mock.Anything).
......@@ -203,10 +209,10 @@ func (suite *JobSuite) TestLoadCollection() {
req,
suite.dist,
suite.meta,
suite.broker,
suite.cluster,
suite.targetMgr,
suite.targetObserver,
suite.broker,
suite.nodeMgr,
)
suite.scheduler.Add(job)
......@@ -230,10 +236,10 @@ func (suite *JobSuite) TestLoadCollection() {
req,
suite.dist,
suite.meta,
suite.broker,
suite.cluster,
suite.targetMgr,
suite.targetObserver,
suite.broker,
suite.nodeMgr,
)
suite.scheduler.Add(job)
......@@ -255,10 +261,10 @@ func (suite *JobSuite) TestLoadCollection() {
req,
suite.dist,
suite.meta,
suite.broker,
suite.cluster,
suite.targetMgr,
suite.targetObserver,
suite.broker,
suite.nodeMgr,
)
suite.scheduler.Add(job)
......@@ -282,10 +288,10 @@ func (suite *JobSuite) TestLoadCollection() {
req,
suite.dist,
suite.meta,
suite.broker,
suite.cluster,
suite.targetMgr,
suite.targetObserver,
suite.broker,
suite.nodeMgr,
)
suite.scheduler.Add(job)
......@@ -308,10 +314,10 @@ func (suite *JobSuite) TestLoadCollection() {
req,
suite.dist,
suite.meta,
suite.broker,
suite.cluster,
suite.targetMgr,
suite.targetObserver,
suite.broker,
suite.nodeMgr,
)
suite.scheduler.Add(job)
......@@ -329,10 +335,10 @@ func (suite *JobSuite) TestLoadCollection() {
req,
suite.dist,
suite.meta,
suite.broker,
suite.cluster,
suite.targetMgr,
suite.targetObserver,
suite.broker,
suite.nodeMgr,
)
suite.scheduler.Add(job)
......@@ -358,10 +364,10 @@ func (suite *JobSuite) TestLoadCollectionWithReplicas() {
req,
suite.dist,
suite.meta,
suite.broker,
suite.cluster,
suite.targetMgr,
suite.targetObserver,
suite.broker,
suite.nodeMgr,
)
suite.scheduler.Add(job)
......@@ -390,10 +396,10 @@ func (suite *JobSuite) TestLoadCollectionWithDiffIndex() {
req,
suite.dist,
suite.meta,
suite.broker,
suite.cluster,
suite.targetMgr,
suite.targetObserver,
suite.broker,
suite.nodeMgr,
)
suite.scheduler.Add(job)
......@@ -420,10 +426,10 @@ func (suite *JobSuite) TestLoadCollectionWithDiffIndex() {
req,
suite.dist,
suite.meta,
suite.broker,
suite.cluster,
suite.targetMgr,
suite.targetObserver,
suite.broker,
suite.nodeMgr,
)
suite.scheduler.Add(job)
......@@ -451,10 +457,10 @@ func (suite *JobSuite) TestLoadPartition() {
req,
suite.dist,
suite.meta,
suite.broker,
suite.cluster,
suite.targetMgr,
suite.targetObserver,
suite.broker,
suite.nodeMgr,
)
suite.scheduler.Add(job)
......@@ -481,10 +487,10 @@ func (suite *JobSuite) TestLoadPartition() {
req,
suite.dist,
suite.meta,
suite.broker,
suite.cluster,
suite.targetMgr,
suite.targetObserver,
suite.broker,
suite.nodeMgr,
)
suite.scheduler.Add(job)
......@@ -508,10 +514,10 @@ func (suite *JobSuite) TestLoadPartition() {
req,
suite.dist,
suite.meta,
suite.broker,
suite.cluster,
suite.targetMgr,
suite.targetObserver,
suite.broker,
suite.nodeMgr,
)
suite.scheduler.Add(job)
......@@ -535,10 +541,10 @@ func (suite *JobSuite) TestLoadPartition() {
req,
suite.dist,
suite.meta,
suite.broker,
suite.cluster,
suite.targetMgr,
suite.targetObserver,
suite.broker,
suite.nodeMgr,
)
suite.scheduler.Add(job)
......@@ -561,10 +567,10 @@ func (suite *JobSuite) TestLoadPartition() {
req,
suite.dist,
suite.meta,
suite.broker,
suite.cluster,
suite.targetMgr,
suite.targetObserver,
suite.broker,
suite.nodeMgr,
)
suite.scheduler.Add(job)
......@@ -577,7 +583,6 @@ func (suite *JobSuite) TestLoadPartition() {
suite.meta.ResourceManager.AddResourceGroup("rg3")
// test load 3 replica in 1 rg, should pass rg check
suite.broker.EXPECT().GetRecoveryInfoV2(mock.Anything, int64(999)).Return(nil, nil, nil)
req := &querypb.LoadPartitionsRequest{
CollectionID: 999,
PartitionIDs: []int64{888},
......@@ -589,10 +594,10 @@ func (suite *JobSuite) TestLoadPartition() {
req,
suite.dist,
suite.meta,
suite.broker,
suite.cluster,
suite.targetMgr,
suite.targetObserver,
suite.broker,
suite.nodeMgr,
)
suite.scheduler.Add(job)
......@@ -600,7 +605,6 @@ func (suite *JobSuite) TestLoadPartition() {
suite.Contains(err.Error(), meta.ErrNodeNotEnough.Error())
// test load 3 replica in 3 rg, should pass rg check
suite.broker.EXPECT().GetRecoveryInfoV2(mock.Anything, int64(999)).Return(nil, nil, nil)
req = &querypb.LoadPartitionsRequest{
CollectionID: 999,
PartitionIDs: []int64{888},
......@@ -612,10 +616,10 @@ func (suite *JobSuite) TestLoadPartition() {
req,
suite.dist,
suite.meta,
suite.broker,
suite.cluster,
suite.targetMgr,
suite.targetObserver,
suite.broker,
suite.nodeMgr,
)
suite.scheduler.Add(job)
......@@ -639,10 +643,10 @@ func (suite *JobSuite) TestDynamicLoad() {
req,
suite.dist,
suite.meta,
suite.broker,
suite.cluster,
suite.targetMgr,
suite.targetObserver,
suite.broker,
suite.nodeMgr,
)
return job
......@@ -657,10 +661,10 @@ func (suite *JobSuite) TestDynamicLoad() {
req,
suite.dist,
suite.meta,
suite.broker,
suite.cluster,
suite.targetMgr,
suite.targetObserver,
suite.broker,
suite.nodeMgr,
)
return job
......@@ -756,10 +760,10 @@ func (suite *JobSuite) TestLoadPartitionWithReplicas() {
req,
suite.dist,
suite.meta,
suite.broker,
suite.cluster,
suite.targetMgr,
suite.targetObserver,
suite.broker,
suite.nodeMgr,
)
suite.scheduler.Add(job)
......@@ -789,10 +793,10 @@ func (suite *JobSuite) TestLoadPartitionWithDiffIndex() {
req,
suite.dist,
suite.meta,
suite.broker,
suite.cluster,
suite.targetMgr,
suite.targetObserver,
suite.broker,
suite.nodeMgr,
)
suite.scheduler.Add(job)
......@@ -821,10 +825,10 @@ func (suite *JobSuite) TestLoadPartitionWithDiffIndex() {
req,
suite.dist,
suite.meta,
suite.broker,
suite.cluster,
suite.targetMgr,
suite.targetObserver,
suite.broker,
suite.nodeMgr,
)
suite.scheduler.Add(job)
......@@ -848,6 +852,8 @@ func (suite *JobSuite) TestReleaseCollection() {
req,
suite.dist,
suite.meta,
suite.broker,
suite.cluster,
suite.targetMgr,
suite.targetObserver,
)
......@@ -867,6 +873,8 @@ func (suite *JobSuite) TestReleaseCollection() {
req,
suite.dist,
suite.meta,
suite.broker,
suite.cluster,
suite.targetMgr,
suite.targetObserver,
)
......@@ -893,6 +901,7 @@ func (suite *JobSuite) TestReleasePartition() {
req,
suite.dist,
suite.meta,
suite.broker,
suite.cluster,
suite.targetMgr,
suite.targetObserver,
......@@ -914,6 +923,7 @@ func (suite *JobSuite) TestReleasePartition() {
req,
suite.dist,
suite.meta,
suite.broker,
suite.cluster,
suite.targetMgr,
suite.targetObserver,
......@@ -937,6 +947,7 @@ func (suite *JobSuite) TestReleasePartition() {
req,
suite.dist,
suite.meta,
suite.broker,
suite.cluster,
suite.targetMgr,
suite.targetObserver,
......@@ -968,6 +979,7 @@ func (suite *JobSuite) TestDynamicRelease() {
req,
suite.dist,
suite.meta,
suite.broker,
suite.cluster,
suite.targetMgr,
suite.targetObserver,
......@@ -983,6 +995,8 @@ func (suite *JobSuite) TestDynamicRelease() {
req,
suite.dist,
suite.meta,
suite.broker,
suite.cluster,
suite.targetMgr,
suite.targetObserver,
)
......@@ -1076,10 +1090,10 @@ func (suite *JobSuite) TestLoadCollectionStoreFailed() {
req,
suite.dist,
suite.meta,
suite.broker,
suite.cluster,
suite.targetMgr,
suite.targetObserver,
suite.broker,
suite.nodeMgr,
)
suite.scheduler.Add(job)
......@@ -1120,10 +1134,10 @@ func (suite *JobSuite) TestLoadPartitionStoreFailed() {
req,
suite.dist,
suite.meta,
suite.broker,
suite.cluster,
suite.targetMgr,
suite.targetObserver,
suite.broker,
suite.nodeMgr,
)
suite.scheduler.Add(job)
......@@ -1147,10 +1161,10 @@ func (suite *JobSuite) TestLoadCreateReplicaFailed() {
req,
suite.dist,
suite.meta,
suite.broker,
suite.cluster,
suite.targetMgr,
suite.targetObserver,
suite.broker,
suite.nodeMgr,
)
suite.scheduler.Add(job)
......@@ -1159,6 +1173,159 @@ func (suite *JobSuite) TestLoadCreateReplicaFailed() {
}
}
func (suite *JobSuite) TestCallLoadPartitionFailed() {
// call LoadPartitions failed at get index info
getIndexErr := fmt.Errorf("mock get index error")
suite.broker.ExpectedCalls = lo.Filter(suite.broker.ExpectedCalls, func(call *mock.Call, _ int) bool {
return call.Method != "DescribeIndex"
})
for _, collection := range suite.collections {
suite.broker.EXPECT().DescribeIndex(mock.Anything, collection).Return(nil, getIndexErr)
loadCollectionReq := &querypb.LoadCollectionRequest{
CollectionID: collection,
}
loadCollectionJob := NewLoadCollectionJob(
context.Background(),
loadCollectionReq,
suite.dist,
suite.meta,
suite.broker,
suite.cluster,
suite.targetMgr,
suite.targetObserver,
suite.nodeMgr,
)
suite.scheduler.Add(loadCollectionJob)
err := loadCollectionJob.Wait()
suite.T().Logf("%s", err)
suite.ErrorIs(err, getIndexErr)
loadPartitionReq := &querypb.LoadPartitionsRequest{
CollectionID: collection,
PartitionIDs: suite.partitions[collection],
}
loadPartitionJob := NewLoadPartitionJob(
context.Background(),
loadPartitionReq,
suite.dist,
suite.meta,
suite.broker,
suite.cluster,
suite.targetMgr,
suite.targetObserver,
suite.nodeMgr,
)
suite.scheduler.Add(loadPartitionJob)
err = loadPartitionJob.Wait()
suite.ErrorIs(err, getIndexErr)
}
// call LoadPartitions failed at get schema
getSchemaErr := fmt.Errorf("mock get schema error")
suite.broker.ExpectedCalls = lo.Filter(suite.broker.ExpectedCalls, func(call *mock.Call, _ int) bool {
return call.Method != "GetCollectionSchema"
})
for _, collection := range suite.collections {
suite.broker.EXPECT().GetCollectionSchema(mock.Anything, collection).Return(nil, getSchemaErr)
loadCollectionReq := &querypb.LoadCollectionRequest{
CollectionID: collection,
}
loadCollectionJob := NewLoadCollectionJob(
context.Background(),
loadCollectionReq,
suite.dist,
suite.meta,
suite.broker,
suite.cluster,
suite.targetMgr,
suite.targetObserver,
suite.nodeMgr,
)
suite.scheduler.Add(loadCollectionJob)
err := loadCollectionJob.Wait()
suite.ErrorIs(err, getSchemaErr)
loadPartitionReq := &querypb.LoadPartitionsRequest{
CollectionID: collection,
PartitionIDs: suite.partitions[collection],
}
loadPartitionJob := NewLoadPartitionJob(
context.Background(),
loadPartitionReq,
suite.dist,
suite.meta,
suite.broker,
suite.cluster,
suite.targetMgr,
suite.targetObserver,
suite.nodeMgr,
)
suite.scheduler.Add(loadPartitionJob)
err = loadPartitionJob.Wait()
suite.ErrorIs(err, getSchemaErr)
}
suite.broker.ExpectedCalls = lo.Filter(suite.broker.ExpectedCalls, func(call *mock.Call, _ int) bool {
return call.Method != "DescribeIndex" && call.Method != "GetCollectionSchema"
})
suite.broker.EXPECT().GetCollectionSchema(mock.Anything, mock.Anything).Return(nil, nil)
suite.broker.EXPECT().DescribeIndex(mock.Anything, mock.Anything).Return(nil, nil)
}
func (suite *JobSuite) TestCallReleasePartitionFailed() {
ctx := context.Background()
suite.loadAll()
releasePartitionErr := fmt.Errorf("mock release partitions error")
suite.cluster.ExpectedCalls = lo.Filter(suite.cluster.ExpectedCalls, func(call *mock.Call, _ int) bool {
return call.Method != "ReleasePartitions"
})
suite.cluster.EXPECT().ReleasePartitions(mock.Anything, mock.Anything, mock.Anything).
Return(nil, releasePartitionErr)
for _, collection := range suite.collections {
releaseCollectionReq := &querypb.ReleaseCollectionRequest{
CollectionID: collection,
}
releaseCollectionJob := NewReleaseCollectionJob(
ctx,
releaseCollectionReq,
suite.dist,
suite.meta,
suite.broker,
suite.cluster,
suite.targetMgr,
suite.targetObserver,
)
suite.scheduler.Add(releaseCollectionJob)
err := releaseCollectionJob.Wait()
suite.NoError(err)
releasePartitionReq := &querypb.ReleasePartitionsRequest{
CollectionID: collection,
PartitionIDs: suite.partitions[collection],
}
releasePartitionJob := NewReleasePartitionJob(
ctx,
releasePartitionReq,
suite.dist,
suite.meta,
suite.broker,
suite.cluster,
suite.targetMgr,
suite.targetObserver,
)
suite.scheduler.Add(releasePartitionJob)
err = releasePartitionJob.Wait()
suite.NoError(err)
}
suite.cluster.ExpectedCalls = lo.Filter(suite.cluster.ExpectedCalls, func(call *mock.Call, _ int) bool {
return call.Method != "ReleasePartitions"
})
suite.cluster.EXPECT().ReleasePartitions(mock.Anything, mock.Anything, mock.Anything).
Return(utils.WrapStatus(commonpb.ErrorCode_Success, ""), nil)
}
func (suite *JobSuite) TestSyncNewCreatedPartition() {
newPartition := int64(999)
......@@ -1173,6 +1340,7 @@ func (suite *JobSuite) TestSyncNewCreatedPartition() {
req,
suite.meta,
suite.cluster,
suite.broker,
)
suite.scheduler.Add(job)
err := job.Wait()
......@@ -1191,6 +1359,7 @@ func (suite *JobSuite) TestSyncNewCreatedPartition() {
req,
suite.meta,
suite.cluster,
suite.broker,
)
suite.scheduler.Add(job)
err = job.Wait()
......@@ -1206,6 +1375,7 @@ func (suite *JobSuite) TestSyncNewCreatedPartition() {
req,
suite.meta,
suite.cluster,
suite.broker,
)
suite.scheduler.Add(job)
err = job.Wait()
......@@ -1224,10 +1394,10 @@ func (suite *JobSuite) loadAll() {
req,
suite.dist,
suite.meta,
suite.broker,
suite.cluster,
suite.targetMgr,
suite.targetObserver,
suite.broker,
suite.nodeMgr,
)
suite.scheduler.Add(job)
......@@ -1248,10 +1418,10 @@ func (suite *JobSuite) loadAll() {
req,
suite.dist,
suite.meta,
suite.broker,
suite.cluster,
suite.targetMgr,
suite.targetObserver,
suite.broker,
suite.nodeMgr,
)
suite.scheduler.Add(job)
......@@ -1277,6 +1447,8 @@ func (suite *JobSuite) releaseAll() {
req,
suite.dist,
suite.meta,
suite.broker,
suite.cluster,
suite.targetMgr,
suite.targetObserver,
)
......
......@@ -25,7 +25,6 @@ import (
)
type UndoList struct {
PartitionsLoaded bool // indicates if partitions loaded in QueryNodes during loading
TargetUpdated bool // indicates if target updated during loading
NewReplicaCreated bool // indicates if created new replicas during loading
......@@ -51,8 +50,8 @@ func NewUndoList(ctx context.Context, meta *meta.Meta,
}
func (u *UndoList) RollBack() {
if u.PartitionsLoaded {
releasePartitions(u.ctx, u.meta, u.cluster, true, u.CollectionID, u.LackPartitions...)
if u.NewReplicaCreated {
u.meta.ReplicaManager.RemoveCollection(u.CollectionID)
}
if u.TargetUpdated {
if !u.meta.CollectionManager.Exist(u.CollectionID) {
......@@ -62,7 +61,4 @@ func (u *UndoList) RollBack() {
u.targetMgr.RemovePartition(u.CollectionID, u.LackPartitions...)
}
}
if u.NewReplicaCreated {
u.meta.ReplicaManager.RemoveCollection(u.CollectionID)
}
}
......@@ -18,16 +18,17 @@ package job
import (
"context"
"fmt"
"time"
"github.com/samber/lo"
"go.uber.org/zap"
"github.com/milvus-io/milvus-proto/go-api/commonpb"
"github.com/milvus-io/milvus-proto/go-api/schemapb"
"github.com/milvus-io/milvus/internal/proto/querypb"
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
"github.com/milvus-io/milvus/internal/querycoordv2/session"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/util/merr"
"github.com/milvus-io/milvus/pkg/util/typeutil"
)
......@@ -60,30 +61,39 @@ func waitCollectionReleased(dist *meta.DistributionManager, collection int64, pa
func loadPartitions(ctx context.Context,
meta *meta.Meta,
cluster session.Cluster,
schema *schemapb.CollectionSchema,
ignoreErr bool,
broker meta.Broker,
collection int64,
partitions ...int64) error {
schema, err := broker.GetCollectionSchema(ctx, collection)
if err != nil {
return err
}
indexes, err := broker.DescribeIndex(ctx, collection)
if err != nil {
return err
}
replicas := meta.ReplicaManager.GetByCollection(collection)
loadReq := &querypb.LoadPartitionsRequest{
Base: &commonpb.MsgBase{
MsgType: commonpb.MsgType_LoadPartitions,
},
CollectionID: collection,
PartitionIDs: partitions,
Schema: schema,
CollectionID: collection,
PartitionIDs: partitions,
Schema: schema,
IndexInfoList: indexes,
}
for _, replica := range replicas {
for _, node := range replica.GetNodes() {
status, err := cluster.LoadPartitions(ctx, node, loadReq)
if ignoreErr {
continue
}
// There is no need to rollback LoadPartitions as the load job will fail
// and the Delegator will not be created,
// resulting in search and query requests failing due to the absence of Delegator.
if err != nil {
return err
}
if status.GetErrorCode() != commonpb.ErrorCode_Success {
return fmt.Errorf("QueryNode failed to loadPartition, nodeID=%d, err=%s", node, status.GetReason())
if !merr.Ok(status) {
return merr.Error(status)
}
}
}
......@@ -93,9 +103,9 @@ func loadPartitions(ctx context.Context,
func releasePartitions(ctx context.Context,
meta *meta.Meta,
cluster session.Cluster,
ignoreErr bool,
collection int64,
partitions ...int64) error {
partitions ...int64) {
log := log.Ctx(ctx).With(zap.Int64("collection", collection), zap.Int64s("partitions", partitions))
replicas := meta.ReplicaManager.GetByCollection(collection)
releaseReq := &querypb.ReleasePartitionsRequest{
Base: &commonpb.MsgBase{
......@@ -107,16 +117,15 @@ func releasePartitions(ctx context.Context,
for _, replica := range replicas {
for _, node := range replica.GetNodes() {
status, err := cluster.ReleasePartitions(ctx, node, releaseReq)
if ignoreErr {
continue
}
// Ignore error as the Delegator will be removed from the query node,
// causing search and query requests to fail due to the absence of Delegator.
if err != nil {
return err
log.Warn("failed to ReleasePartitions", zap.Int64("node", node), zap.Error(err))
continue
}
if status.GetErrorCode() != commonpb.ErrorCode_Success {
return fmt.Errorf("QueryNode failed to releasePartitions, nodeID=%d, err=%s", node, status.GetReason())
if !merr.Ok(status) {
log.Warn("failed to ReleasePartitions", zap.Int64("node", node), zap.Error(merr.Error(status)))
}
}
}
return nil
}
......@@ -234,10 +234,10 @@ func (s *Server) LoadCollection(ctx context.Context, req *querypb.LoadCollection
req,
s.dist,
s.meta,
s.broker,
s.cluster,
s.targetMgr,
s.targetObserver,
s.broker,
s.nodeMgr,
)
s.jobScheduler.Add(loadJob)
......@@ -272,6 +272,8 @@ func (s *Server) ReleaseCollection(ctx context.Context, req *querypb.ReleaseColl
req,
s.dist,
s.meta,
s.broker,
s.cluster,
s.targetMgr,
s.targetObserver,
)
......@@ -331,10 +333,10 @@ func (s *Server) LoadPartitions(ctx context.Context, req *querypb.LoadPartitions
req,
s.dist,
s.meta,
s.broker,
s.cluster,
s.targetMgr,
s.targetObserver,
s.broker,
s.nodeMgr,
)
s.jobScheduler.Add(loadJob)
......@@ -394,6 +396,7 @@ func (s *Server) ReleasePartitions(ctx context.Context, req *querypb.ReleasePart
req,
s.dist,
s.meta,
s.broker,
s.cluster,
s.targetMgr,
s.targetObserver,
......@@ -536,7 +539,7 @@ func (s *Server) SyncNewCreatedPartition(ctx context.Context, req *querypb.SyncN
return merr.Status(err), nil
}
syncJob := job.NewSyncNewCreatedPartitionJob(ctx, req, s.meta, s.cluster)
syncJob := job.NewSyncNewCreatedPartitionJob(ctx, req, s.meta, s.cluster, s.broker)
s.jobScheduler.Add(syncJob)
err := syncJob.Wait()
if err != nil && !errors.Is(err, job.ErrPartitionNotInTarget) {
......
......@@ -317,6 +317,7 @@ func (suite *ServiceSuite) TestLoadCollection() {
// Test load all collections
for _, collection := range suite.collections {
suite.expectGetRecoverInfo(collection)
suite.expectLoadPartitions()
req := &querypb.LoadCollectionRequest{
CollectionID: collection,
......@@ -777,6 +778,7 @@ func (suite *ServiceSuite) TestLoadPartition() {
// Test load all partitions
for _, collection := range suite.collections {
suite.expectLoadPartitions()
suite.expectGetRecoverInfo(collection)
req := &querypb.LoadPartitionsRequest{
......@@ -824,8 +826,6 @@ func (suite *ServiceSuite) TestLoadPartition() {
}
// Test load with more partitions
suite.cluster.EXPECT().LoadPartitions(mock.Anything, mock.Anything, mock.Anything).
Return(utils.WrapStatus(commonpb.ErrorCode_Success, ""), nil)
for _, collection := range suite.collections {
if suite.loadTypes[collection] != querypb.LoadType_LoadPartition {
continue
......@@ -874,6 +874,9 @@ func (suite *ServiceSuite) TestReleaseCollection() {
ctx := context.Background()
server := suite.server
suite.cluster.EXPECT().ReleasePartitions(mock.Anything, mock.Anything, mock.Anything).
Return(merr.Status(nil), nil)
// Test release all collections
for _, collection := range suite.collections {
req := &querypb.ReleaseCollectionRequest{
......@@ -1563,6 +1566,7 @@ func (suite *ServiceSuite) TestHandleNodeUp() {
func (suite *ServiceSuite) loadAll() {
ctx := context.Background()
for _, collection := range suite.collections {
suite.expectLoadPartitions()
suite.expectGetRecoverInfo(collection)
if suite.loadTypes[collection] == querypb.LoadType_LoadCollection {
req := &querypb.LoadCollectionRequest{
......@@ -1574,10 +1578,10 @@ func (suite *ServiceSuite) loadAll() {
req,
suite.dist,
suite.meta,
suite.broker,
suite.cluster,
suite.targetMgr,
suite.targetObserver,
suite.broker,
suite.nodeMgr,
)
suite.jobScheduler.Add(job)
......@@ -1598,10 +1602,10 @@ func (suite *ServiceSuite) loadAll() {
req,
suite.dist,
suite.meta,
suite.broker,
suite.cluster,
suite.targetMgr,
suite.targetObserver,
suite.broker,
suite.nodeMgr,
)
suite.jobScheduler.Add(job)
......@@ -1697,6 +1701,15 @@ func (suite *ServiceSuite) expectGetRecoverInfo(collection int64) {
Return(vChannels, segmentBinlogs, nil)
}
func (suite *ServiceSuite) expectLoadPartitions() {
suite.broker.EXPECT().GetCollectionSchema(mock.Anything, mock.Anything).
Return(nil, nil)
suite.broker.EXPECT().DescribeIndex(mock.Anything, mock.Anything).
Return(nil, nil)
suite.cluster.EXPECT().LoadPartitions(mock.Anything, mock.Anything, mock.Anything).
Return(utils.WrapStatus(commonpb.ErrorCode_Success, ""), nil)
}
func (suite *ServiceSuite) getAllSegments(collection int64) []int64 {
allSegments := make([]int64, 0)
for _, segments := range suite.segments[collection] {
......
......@@ -31,7 +31,6 @@ import (
"github.com/milvus-io/milvus/internal/kv"
etcdkv "github.com/milvus-io/milvus/internal/kv/etcd"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/indexpb"
"github.com/milvus-io/milvus/internal/proto/querypb"
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
. "github.com/milvus-io/milvus/internal/querycoordv2/params"
......@@ -186,13 +185,6 @@ func (suite *TaskSuite) TestSubscribeChannelTask() {
Return(&schemapb.CollectionSchema{
Name: "TestSubscribeChannelTask",
}, nil)
suite.broker.EXPECT().DescribeIndex(mock.Anything, suite.collection).Return([]*indexpb.IndexInfo{
{
CollectionID: suite.collection,
TypeParams: []*commonpb.KeyValuePair{},
IndexParams: []*commonpb.KeyValuePair{},
},
}, nil)
for channel, segment := range suite.growingSegments {
suite.broker.EXPECT().GetSegmentInfo(mock.Anything, segment).
Return(&datapb.GetSegmentInfoResponse{Infos: []*datapb.SegmentInfo{
......
......@@ -20,15 +20,12 @@ import (
"context"
"time"
"go.uber.org/zap"
"github.com/milvus-io/milvus-proto/go-api/commonpb"
"github.com/milvus-io/milvus-proto/go-api/msgpb"
"github.com/milvus-io/milvus-proto/go-api/schemapb"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/querypb"
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/util/commonpbutil"
"github.com/milvus-io/milvus/pkg/util/typeutil"
)
......@@ -93,8 +90,8 @@ func packLoadSegmentRequest(
commonpbutil.WithMsgID(task.ID()),
),
Infos: []*querypb.SegmentLoadInfo{loadInfo},
Schema: schema,
LoadMeta: loadMeta,
Schema: schema, // assign it for compatibility of rolling upgrade from 2.2.x to 2.3
LoadMeta: loadMeta, // assign it for compatibility of rolling upgrade from 2.2.x to 2.3
CollectionID: task.CollectionID(),
ReplicaID: task.ReplicaID(),
DeltaPositions: []*msgpb.MsgPosition{loadInfo.GetDeltaPosition()}, // assign it for compatibility of rolling upgrade from 2.2.x to 2.3
......@@ -143,8 +140,8 @@ func packSubChannelRequest(
NodeID: action.Node(),
CollectionID: task.CollectionID(),
Infos: []*datapb.VchannelInfo{channel.VchannelInfo},
Schema: schema,
LoadMeta: loadMeta,
Schema: schema, // assign it for compatibility of rolling upgrade from 2.2.x to 2.3
LoadMeta: loadMeta, // assign it for compatibility of rolling upgrade from 2.2.x to 2.3
ReplicaID: task.ReplicaID(),
Version: time.Now().UnixNano(),
}
......@@ -155,13 +152,6 @@ func fillSubChannelRequest(
req *querypb.WatchDmChannelsRequest,
broker meta.Broker,
) error {
indexes, err := broker.DescribeIndex(ctx, req.GetCollectionID())
if err != nil {
log.Warn("fail to get index meta when fillSubChannelRequest",
zap.Int64("collectionId", req.GetCollectionID()))
} else {
req.IndexInfoList = indexes
}
segmentIDs := typeutil.NewUniqueSet()
for _, vchannel := range req.GetInfos() {
segmentIDs.Insert(vchannel.GetFlushedSegmentIds()...)
......
......@@ -26,6 +26,8 @@ import "C"
import (
"github.com/milvus-io/milvus/internal/proto/segcorepb"
"github.com/milvus-io/milvus/pkg/log"
"go.uber.org/zap"
"sync"
"unsafe"
......@@ -106,11 +108,13 @@ func (c *Collection) AddPartition(partitions ...int64) {
for i := range partitions {
c.partitions.Insert(partitions[i])
}
log.Info("add partitions", zap.Int64("collection", c.ID()), zap.Int64s("partitions", partitions))
}
// removePartitionID removes the partition id from partition id list of collection
func (c *Collection) RemovePartition(partitionID int64) {
c.partitions.Remove(partitionID)
log.Info("remove partition", zap.Int64("collection", c.ID()), zap.Int64("partition", partitionID))
}
// getLoadType get the loadType of collection, which is loadTypeCollection or loadTypePartition
......
......@@ -220,32 +220,6 @@ func (node *QueryNode) WatchDmChannels(ctx context.Context, req *querypb.WatchDm
log.Info("channel already subscribed")
return util.SuccessStatus(), nil
}
fieldIndexMetas := make([]*segcorepb.FieldIndexMeta, 0)
for _, info := range req.GetIndexInfoList() {
fieldIndexMetas = append(fieldIndexMetas, &segcorepb.FieldIndexMeta{
CollectionID: info.GetCollectionID(),
FieldID: info.GetFieldID(),
IndexName: info.GetIndexName(),
TypeParams: info.GetTypeParams(),
IndexParams: info.GetIndexParams(),
IsAutoIndex: info.GetIsAutoIndex(),
UserIndexParams: info.GetUserIndexParams(),
})
}
sizePerRecord, err := typeutil.EstimateSizePerRecord(req.Schema)
maxIndexRecordPerSegment := int64(0)
if err != nil || sizePerRecord == 0 {
log.Warn("failed to transfer segment size to collection, because failed to estimate size per record", zap.Error(err))
} else {
threshold := paramtable.Get().DataCoordCfg.SegmentMaxSize.GetAsFloat() * 1024 * 1024
proportion := paramtable.Get().DataCoordCfg.SegmentSealProportion.GetAsFloat()
maxIndexRecordPerSegment = int64(threshold * proportion / float64(sizePerRecord))
}
node.manager.Collection.Put(req.GetCollectionID(), req.GetSchema(), &segcorepb.CollectionIndexMeta{
IndexMetas: fieldIndexMetas,
MaxIndexRowCount: maxIndexRecordPerSegment,
}, req.GetLoadMeta())
delegator, err := delegator.NewShardDelegator(req.GetCollectionID(), req.GetReplicaID(), channel.GetChannelName(), req.GetVersion(),
node.clusterManager, node.manager, node.tSafeManager, node.loader, node.factory, channel.GetSeekPosition().GetTimestamp())
if err != nil {
......@@ -387,7 +361,31 @@ func (node *QueryNode) LoadPartitions(ctx context.Context, req *querypb.LoadPart
if req.GetSchema() == nil {
return merr.Status(merr.WrapErrCollectionNotLoaded(req.GetCollectionID(), "failed to load partitions")), nil
}
node.manager.Collection.Put(req.GetCollectionID(), req.GetSchema(), nil, &querypb.LoadMetaInfo{
fieldIndexMetas := make([]*segcorepb.FieldIndexMeta, 0)
for _, info := range req.GetIndexInfoList() {
fieldIndexMetas = append(fieldIndexMetas, &segcorepb.FieldIndexMeta{
CollectionID: info.GetCollectionID(),
FieldID: info.GetFieldID(),
IndexName: info.GetIndexName(),
TypeParams: info.GetTypeParams(),
IndexParams: info.GetIndexParams(),
IsAutoIndex: info.GetIsAutoIndex(),
UserIndexParams: info.GetUserIndexParams(),
})
}
sizePerRecord, err := typeutil.EstimateSizePerRecord(req.Schema)
maxIndexRecordPerSegment := int64(0)
if err != nil || sizePerRecord == 0 {
log.Warn("failed to transfer segment size to collection, because failed to estimate size per record", zap.Error(err))
} else {
threshold := paramtable.Get().DataCoordCfg.SegmentMaxSize.GetAsFloat() * 1024 * 1024
proportion := paramtable.Get().DataCoordCfg.SegmentSealProportion.GetAsFloat()
maxIndexRecordPerSegment = int64(threshold * proportion / float64(sizePerRecord))
}
node.manager.Collection.Put(req.GetCollectionID(), req.GetSchema(), &segcorepb.CollectionIndexMeta{
IndexMetas: fieldIndexMetas,
MaxIndexRowCount: maxIndexRecordPerSegment,
}, &querypb.LoadMetaInfo{
CollectionID: req.GetCollectionID(),
PartitionIDs: req.GetPartitionIDs(),
LoadType: querypb.LoadType_LoadCollection, // TODO: dyh, remove loadType in querynode
......@@ -430,8 +428,6 @@ func (node *QueryNode) LoadSegments(ctx context.Context, req *querypb.LoadSegmen
return node.loadDeltaLogs(ctx, req), nil
}
node.manager.Collection.Put(req.GetCollectionID(), req.GetSchema(), nil, req.GetLoadMeta())
// Delegates request to workers
if req.GetNeedTransfer() {
delegator, ok := node.delegators.Get(segment.GetInsertChannel())
......@@ -1060,8 +1056,6 @@ func (node *QueryNode) SyncDistribution(ctx context.Context, req *querypb.SyncDi
commonpbutil.WithMsgID(req.Base.GetMsgID()),
),
Infos: infos,
Schema: req.GetSchema(),
LoadMeta: req.GetLoadMeta(),
CollectionID: req.GetCollectionID(),
ReplicaID: req.GetReplicaID(),
DstNodeID: nodeID,
......
......@@ -124,6 +124,15 @@ func (suite *ServiceSuite) SetupTest() {
// start node
err = suite.node.Start()
suite.NoError(err)
// init collection
schema := segments.GenTestCollectionSchema(suite.collectionName, schemapb.DataType_Int64)
LoadMeta := &querypb.LoadMetaInfo{
LoadType: querypb.LoadType_LoadCollection,
CollectionID: suite.collectionID,
PartitionIDs: suite.partitionIDs,
}
suite.node.manager.Collection.Put(suite.collectionID, schema, nil, LoadMeta)
}
func (suite *ServiceSuite) TearDownTest() {
......@@ -223,7 +232,6 @@ func (suite *ServiceSuite) TestWatchDmChannelsInt64() {
ctx := context.Background()
// data
schema := segments.GenTestCollectionSchema(suite.collectionName, schemapb.DataType_Int64)
req := &querypb.WatchDmChannelsRequest{
Base: &commonpb.MsgBase{
MsgType: commonpb.MsgType_WatchDmChannels,
......@@ -233,7 +241,6 @@ func (suite *ServiceSuite) TestWatchDmChannelsInt64() {
NodeID: suite.node.session.ServerID,
CollectionID: suite.collectionID,
PartitionIDs: suite.partitionIDs,
Schema: schema,
Infos: []*datapb.VchannelInfo{
{
CollectionID: suite.collectionID,
......@@ -243,11 +250,6 @@ func (suite *ServiceSuite) TestWatchDmChannelsInt64() {
DroppedSegmentIds: suite.droppedSegmentIDs,
},
},
LoadMeta: &querypb.LoadMetaInfo{
LoadType: querypb.LoadType_LoadCollection,
CollectionID: suite.collectionID,
PartitionIDs: suite.partitionIDs,
},
}
// mocks
......@@ -272,7 +274,6 @@ func (suite *ServiceSuite) TestWatchDmChannelsVarchar() {
ctx := context.Background()
// data
schema := segments.GenTestCollectionSchema(suite.collectionName, schemapb.DataType_VarChar)
req := &querypb.WatchDmChannelsRequest{
Base: &commonpb.MsgBase{
MsgType: commonpb.MsgType_WatchDmChannels,
......@@ -282,7 +283,6 @@ func (suite *ServiceSuite) TestWatchDmChannelsVarchar() {
NodeID: suite.node.session.ServerID,
CollectionID: suite.collectionID,
PartitionIDs: suite.partitionIDs,
Schema: schema,
Infos: []*datapb.VchannelInfo{
{
CollectionID: suite.collectionID,
......@@ -292,11 +292,6 @@ func (suite *ServiceSuite) TestWatchDmChannelsVarchar() {
DroppedSegmentIds: suite.droppedSegmentIDs,
},
},
LoadMeta: &querypb.LoadMetaInfo{
LoadType: querypb.LoadType_LoadCollection,
CollectionID: suite.collectionID,
PartitionIDs: suite.partitionIDs,
},
}
// mocks
......@@ -321,7 +316,6 @@ func (suite *ServiceSuite) TestWatchDmChannels_Failed() {
ctx := context.Background()
// data
schema := segments.GenTestCollectionSchema(suite.collectionName, schemapb.DataType_Int64)
req := &querypb.WatchDmChannelsRequest{
Base: &commonpb.MsgBase{
MsgType: commonpb.MsgType_WatchDmChannels,
......@@ -331,7 +325,6 @@ func (suite *ServiceSuite) TestWatchDmChannels_Failed() {
NodeID: suite.node.session.ServerID,
CollectionID: suite.collectionID,
PartitionIDs: suite.partitionIDs,
Schema: schema,
Infos: []*datapb.VchannelInfo{
{
CollectionID: suite.collectionID,
......@@ -476,7 +469,6 @@ func (suite *ServiceSuite) TestLoadSegments_Int64() {
},
CollectionID: suite.collectionID,
DstNodeID: suite.node.session.ServerID,
Schema: schema,
Infos: suite.genSegmentLoadInfos(schema),
DeltaPositions: []*msgpb.MsgPosition{{Timestamp: 20000}},
NeedTransfer: true,
......@@ -493,6 +485,13 @@ func (suite *ServiceSuite) TestLoadSegments_VarChar() {
suite.TestWatchDmChannelsVarchar()
// data
schema := segments.GenTestCollectionSchema(suite.collectionName, schemapb.DataType_VarChar)
LoadMeta := &querypb.LoadMetaInfo{
LoadType: querypb.LoadType_LoadCollection,
CollectionID: suite.collectionID,
PartitionIDs: suite.partitionIDs,
}
suite.node.manager.Collection = segments.NewCollectionManager()
suite.node.manager.Collection.Put(suite.collectionID, schema, nil, LoadMeta)
req := &querypb.LoadSegmentsRequest{
Base: &commonpb.MsgBase{
MsgID: rand.Int63(),
......@@ -500,7 +499,6 @@ func (suite *ServiceSuite) TestLoadSegments_VarChar() {
},
CollectionID: suite.collectionID,
DstNodeID: suite.node.session.ServerID,
Schema: schema,
Infos: suite.genSegmentLoadInfos(schema),
DeltaPositions: []*msgpb.MsgPosition{{Timestamp: 20000}},
NeedTransfer: true,
......@@ -524,7 +522,6 @@ func (suite *ServiceSuite) TestLoadDeltaInt64() {
},
CollectionID: suite.collectionID,
DstNodeID: suite.node.session.ServerID,
Schema: schema,
Infos: suite.genSegmentLoadInfos(schema),
NeedTransfer: true,
LoadScope: querypb.LoadScope_Delta,
......@@ -548,7 +545,6 @@ func (suite *ServiceSuite) TestLoadDeltaVarchar() {
},
CollectionID: suite.collectionID,
DstNodeID: suite.node.session.ServerID,
Schema: schema,
Infos: suite.genSegmentLoadInfos(schema),
NeedTransfer: true,
LoadScope: querypb.LoadScope_Delta,
......@@ -571,7 +567,6 @@ func (suite *ServiceSuite) TestLoadSegments_Failed() {
},
CollectionID: suite.collectionID,
DstNodeID: suite.node.session.ServerID,
Schema: schema,
Infos: suite.genSegmentLoadInfos(schema),
NeedTransfer: true,
}
......@@ -613,7 +608,6 @@ func (suite *ServiceSuite) TestLoadSegments_Transfer() {
},
CollectionID: suite.collectionID,
DstNodeID: suite.node.session.ServerID,
Schema: schema,
Infos: suite.genSegmentLoadInfos(schema),
NeedTransfer: true,
}
......@@ -634,7 +628,6 @@ func (suite *ServiceSuite) TestLoadSegments_Transfer() {
},
CollectionID: suite.collectionID,
DstNodeID: suite.node.session.ServerID,
Schema: schema,
Infos: suite.genSegmentLoadInfos(schema),
NeedTransfer: true,
}
......@@ -660,7 +653,6 @@ func (suite *ServiceSuite) TestLoadSegments_Transfer() {
},
CollectionID: suite.collectionID,
DstNodeID: suite.node.session.ServerID,
Schema: schema,
Infos: suite.genSegmentLoadInfos(schema),
NeedTransfer: true,
}
......@@ -1425,6 +1417,7 @@ func (suite *ServiceSuite) TestLoadPartition() {
suite.node.UpdateStateCode(commonpb.StateCode_Healthy)
// collection not exist and schema is nil
suite.node.manager.Collection = segments.NewCollectionManager()
status, err = suite.node.LoadPartitions(ctx, req)
suite.NoError(err)
suite.Equal(commonpb.ErrorCode_UnexpectedError, status.GetErrorCode())
......
......@@ -121,21 +121,10 @@ func (b *ServerBroker) ReleasePartitions(ctx context.Context, collectionID Uniqu
func (b *ServerBroker) SyncNewCreatedPartition(ctx context.Context, collectionID UniqueID, partitionID UniqueID) error {
log := log.Ctx(ctx).With(zap.Int64("collection", collectionID), zap.Int64("partitionID", partitionID))
log.Info("begin to sync new partition")
collection, err := b.s.meta.GetCollectionByID(ctx, collectionID, typeutil.MaxTimestamp, false)
if err != nil {
return err
}
schema := &schemapb.CollectionSchema{
Name: collection.Name,
Description: collection.Description,
AutoID: collection.AutoID,
Fields: model.MarshalFieldModels(collection.Fields),
}
resp, err := b.s.queryCoord.SyncNewCreatedPartition(ctx, &querypb.SyncNewCreatedPartitionRequest{
Base: commonpbutil.NewMsgBase(commonpbutil.WithMsgType(commonpb.MsgType_ReleasePartitions)),
CollectionID: collectionID,
PartitionID: partitionID,
Schema: schema,
})
if err != nil {
return err
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册