scheduler.py 1.5 KB
Newer Older
X
xj.lin 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
from engine.retrieval import search_index
from engine.ingestion import build_index

class Singleton(type):
    _instances = {}
    def __call__(cls, *args, **kwargs):
        if cls not in cls._instances:
            cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
        return cls._instances[cls]


class Scheduler(metaclass=Singleton):
    def Search(self, index_file_key, vectors, k):
        assert index_file_key
        assert vectors
        assert k

        return self.__scheduler(index_file_key, vectors, k)


    def __scheduler(self, index_data_key, vectors, k):
        result_list = []

        raw_data_list = index_data_key['raw']
        index_data_list = index_data_key['index']

        for key in raw_data_list:
            raw_data, d = self.GetRawData(key)
            index_builder = build_index.FactoryIndex()
            index = index_builder().build(d, raw_data)
            searcher = search_index.FaissSearch(index) # silly
            result_list.append(searcher.search_by_vectors(vectors, k))

        for key in index_data_list:
            index = self.GetIndexData(key)
            searcher = search_index.FaissSearch(index)
            result_list.append(searcher.search_by_vectors(vectors, k))

        if len(result_list) == 1:
            return result_list[0].vectors

        result = search_index.top_k(sum(result_list), k)
        return result


    def GetIndexData(self, key):
        pass

    def GetRawData(self, key):
        pass