提交 95b162cc 编写于 作者: S sunby 提交者: yefu.chen

Refactor flush scheduler

Signed-off-by: Nsunby <bingyi.sun@zilliz.com>
上级 d5d9fa03
......@@ -68,6 +68,18 @@ func (kv *EtcdKV) Load(key string) (string, error) {
return string(resp.Kvs[0].Value), nil
}
func (kv *EtcdKV) GetCount(key string) (int64, error) {
key = path.Join(kv.rootPath, key)
ctx, cancel := context.WithTimeout(context.TODO(), RequestTimeout)
defer cancel()
resp, err := kv.client.Get(ctx, key)
if err != nil {
return -1, err
}
return resp.Count, nil
}
func (kv *EtcdKV) MultiLoad(keys []string) ([]string, error) {
ops := make([]clientv3.Op, 0, len(keys))
for _, keyLoad := range keys {
......
......@@ -90,12 +90,12 @@ func (m *MockBuildIndexClient) GetIndexFilePaths(indexID UniqueID) ([]string, er
}
type LoadIndexClient interface {
LoadIndex(indexPaths []string, segmentID int64, fieldID int64, fieldName string) error
LoadIndex(indexPaths []string, segmentID int64, fieldID int64, fieldName string, indexParams map[string]string) error
}
type MockLoadIndexClient struct {
}
func (m *MockLoadIndexClient) LoadIndex(indexPaths []string, segmentID int64, fieldID int64, fieldName string) error {
func (m *MockLoadIndexClient) LoadIndex(indexPaths []string, segmentID int64, fieldID int64, fieldName string, indexParams map[string]string) error {
return nil
}
......@@ -48,6 +48,9 @@ func (scheduler *FlushScheduler) schedule(id interface{}) error {
return nil
}
func (scheduler *FlushScheduler) describe() error {
timeTick := time.Tick(100 * time.Millisecond)
descTasks := make(map[UniqueID]bool)
closable := make([]UniqueID, 0)
for {
select {
case <-scheduler.ctx.Done():
......@@ -55,62 +58,72 @@ func (scheduler *FlushScheduler) describe() error {
log.Printf("broadcast context done, exit")
return errors.New("broadcast done exit")
}
case singleSegmentID := <-scheduler.segmentDescribeChan:
for {
case <-timeTick:
for singleSegmentID := range descTasks {
description, err := scheduler.client.DescribeSegment(singleSegmentID)
if err != nil {
return err
log.Printf("describe segment %d err %s", singleSegmentID, err.Error())
continue
}
if !description.IsClosed {
continue
}
log.Printf("flush segment %d is closed", singleSegmentID)
mapData, err := scheduler.client.GetInsertBinlogPaths(singleSegmentID)
if err != nil {
log.Printf("get insert binlog paths err, segID: %d, err: %s", singleSegmentID, err.Error())
continue
}
segMeta, err := scheduler.metaTable.GetSegmentByID(singleSegmentID)
if err != nil {
log.Printf("get segment from metable failed, segID: %d, err: %s", singleSegmentID, err.Error())
continue
}
if description.IsClosed {
log.Printf("flush segment %d is closed", singleSegmentID)
mapData, err := scheduler.client.GetInsertBinlogPaths(singleSegmentID)
for fieldID, data := range mapData {
// check field indexable
indexable, err := scheduler.metaTable.IsIndexable(segMeta.CollectionID, fieldID)
if err != nil {
return err
log.Printf("check field indexable from meta table failed, collID: %d, fieldID: %d, err %s", segMeta.CollectionID, fieldID, err.Error())
continue
}
for fieldID, data := range mapData {
// check field indexable
segMeta, err := scheduler.metaTable.GetSegmentByID(singleSegmentID)
if err != nil {
return err
}
indexable, err := scheduler.metaTable.IsIndexable(segMeta.CollectionID, fieldID)
if err != nil {
return err
}
if !indexable {
continue
}
info := &IndexBuildInfo{
segmentID: singleSegmentID,
fieldID: fieldID,
binlogFilePath: data,
}
err = scheduler.indexBuilderSch.Enqueue(info)
log.Printf("segment %d field %d enqueue build index scheduler", singleSegmentID, fieldID)
if err != nil {
return err
}
if !indexable {
continue
}
// Save data to meta table
segMeta, err := scheduler.metaTable.GetSegmentByID(singleSegmentID)
if err != nil {
return err
info := &IndexBuildInfo{
segmentID: singleSegmentID,
fieldID: fieldID,
binlogFilePath: data,
}
segMeta.BinlogFilePaths = make([]*etcdpb.FieldBinlogFiles, 0)
for k, v := range mapData {
segMeta.BinlogFilePaths = append(segMeta.BinlogFilePaths, &etcdpb.FieldBinlogFiles{
FieldID: k,
BinlogFiles: v,
})
}
if err = scheduler.metaTable.UpdateSegment(segMeta); err != nil {
return err
err = scheduler.indexBuilderSch.Enqueue(info)
log.Printf("segment %d field %d enqueue build index scheduler", singleSegmentID, fieldID)
if err != nil {
log.Printf("index build enqueue failed, %s", err.Error())
continue
}
log.Printf("flush segment %d finished", singleSegmentID)
break
}
time.Sleep(1 * time.Second)
// Save data to meta table
segMeta.BinlogFilePaths = make([]*etcdpb.FieldBinlogFiles, 0)
for k, v := range mapData {
segMeta.BinlogFilePaths = append(segMeta.BinlogFilePaths, &etcdpb.FieldBinlogFiles{
FieldID: k,
BinlogFiles: v,
})
}
if err = scheduler.metaTable.UpdateSegment(segMeta); err != nil {
return err
}
log.Printf("flush segment %d finished", singleSegmentID)
closable = append(closable, singleSegmentID)
}
// remove closed segment and clear closable
for _, segID := range closable {
delete(descTasks, segID)
}
closable = closable[:0]
case segID := <-scheduler.segmentDescribeChan:
descTasks[segID] = false
}
}
......
......@@ -133,6 +133,7 @@ func (scheduler *IndexBuildScheduler) describe() error {
fieldID: indexBuildInfo.fieldID,
fieldName: fieldName,
indexFilePaths: filePaths,
indexParams: channelInfo.indexParams,
}
// Save data to meta table
err = scheduler.metaTable.UpdateFieldIndexMeta(&etcdpb.FieldIndexMeta{
......
......@@ -3,12 +3,15 @@ package master
import (
"context"
"log"
"github.com/zilliztech/milvus-distributed/internal/proto/commonpb"
)
type IndexLoadInfo struct {
segmentID UniqueID
fieldID UniqueID
fieldName string
indexParams []*commonpb.KeyValuePair
indexFilePaths []string
}
......@@ -36,7 +39,11 @@ func NewIndexLoadScheduler(ctx context.Context, client LoadIndexClient, metaTabl
func (scheduler *IndexLoadScheduler) schedule(info interface{}) error {
indexLoadInfo := info.(*IndexLoadInfo)
err := scheduler.client.LoadIndex(indexLoadInfo.indexFilePaths, indexLoadInfo.segmentID, indexLoadInfo.fieldID, indexLoadInfo.fieldName)
indexParams := make(map[string]string)
for _, kv := range indexLoadInfo.indexParams {
indexParams[kv.Key] = kv.Value
}
err := scheduler.client.LoadIndex(indexLoadInfo.indexFilePaths, indexLoadInfo.segmentID, indexLoadInfo.fieldID, indexLoadInfo.fieldName, indexParams)
//TODO: Save data to meta table
if err != nil {
return err
......
......@@ -68,6 +68,7 @@ func (task *createIndexTask) Execute() error {
fieldID: fieldID,
fieldName: task.req.FieldName,
indexFilePaths: indexMeta.IndexFilePaths,
indexParams: indexMeta.IndexParams,
})
if err != nil {
return err
......
......@@ -10,6 +10,12 @@ import (
"sync/atomic"
"time"
"github.com/zilliztech/milvus-distributed/internal/querynode/client"
indexbuilderclient "github.com/zilliztech/milvus-distributed/internal/indexbuilder/client"
writerclient "github.com/zilliztech/milvus-distributed/internal/writenode/client"
etcdkv "github.com/zilliztech/milvus-distributed/internal/kv/etcd"
ms "github.com/zilliztech/milvus-distributed/internal/msgstream"
"github.com/zilliztech/milvus-distributed/internal/proto/masterpb"
......@@ -175,9 +181,15 @@ func CreateServer(ctx context.Context) (*Master, error) {
m.scheduler.SetDDMsgStream(pulsarDDStream)
m.scheduler.SetIDAllocator(func() (UniqueID, error) { return m.idAllocator.AllocOne() })
flushClient := &MockWriteNodeClient{}
buildIndexClient := &MockBuildIndexClient{}
loadIndexClient := &MockLoadIndexClient{}
flushClient, err := writerclient.NewWriterClient(Params.EtcdAddress, kvRootPath, Params.WriteNodeSegKvSubPath, pulsarDDStream)
if err != nil {
return nil, err
}
buildIndexClient, err := indexbuilderclient.NewBuildIndexClient(ctx, Params.IndexBuilderAddress)
if err != nil {
return nil, err
}
loadIndexClient := client.NewLoadIndexClient(ctx, Params.PulsarAddress, Params.LoadIndexChannelNames)
m.indexLoadSch = NewIndexLoadScheduler(ctx, loadIndexClient, m.metaTable)
m.indexBuildSch = NewIndexBuildScheduler(ctx, buildIndexClient, m.metaTable, m.indexLoadSch)
......
......@@ -50,6 +50,8 @@ type ParamTable struct {
MaxPartitionNum int64
DefaultPartitionTag string
LoadIndexChannelNames []string
}
var Params ParamTable
......@@ -97,6 +99,8 @@ func (p *ParamTable) Init() {
p.initMsgChannelSubName()
p.initMaxPartitionNum()
p.initDefaultPartitionTag()
p.initLoadIndexChannelNames()
}
func (p *ParamTable) initAddress() {
......@@ -356,3 +360,11 @@ func (p *ParamTable) initDefaultPartitionTag() {
p.DefaultPartitionTag = defaultTag
}
func (p *ParamTable) initLoadIndexChannelNames() {
loadIndexChannelName, err := p.Load("msgChannel.chanNamePrefix.cmd")
if err != nil {
panic(err)
}
p.LoadIndexChannelNames = []string{loadIndexChannelName}
}
......@@ -236,7 +236,7 @@ func printPayloadValues(colType schemapb.DataType, reader PayloadReaderInterface
return err
}
for i, v := range val {
fmt.Printf("\t\t%d : %f\n", i, v)
fmt.Printf("\t\t%d : %v\n", i, v)
}
case schemapb.DataType_STRING:
rows, err := reader.GetPayloadLengthFromReader()
......
......@@ -6,6 +6,7 @@ import (
"github.com/golang/protobuf/proto"
"go.etcd.io/etcd/clientv3"
"github.com/zilliztech/milvus-distributed/internal/errors"
"github.com/zilliztech/milvus-distributed/internal/kv"
etcdkv "github.com/zilliztech/milvus-distributed/internal/kv/etcd"
"github.com/zilliztech/milvus-distributed/internal/msgstream"
......@@ -79,6 +80,21 @@ func (c *Client) DescribeSegment(segmentID UniqueID) (*SegmentDescription, error
}
key := c.kvPrefix + strconv.FormatInt(segmentID, 10)
etcdKV, ok := c.kvClient.(*etcdkv.EtcdKV)
if !ok {
return nil, errors.New("type assertion failed for etcd kv")
}
count, err := etcdKV.GetCount(key)
if err != nil {
return nil, err
}
if count <= 0 {
ret.IsClosed = false
return ret, nil
}
value, err := c.kvClient.Load(key)
if err != nil {
return ret, err
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册