提交 a7636a26 编写于 作者: J jinhai

Add batch import

上级 bd10891c
...@@ -11,6 +11,7 @@ logger = logging.getLogger(__name__) ...@@ -11,6 +11,7 @@ logger = logging.getLogger(__name__)
class TestVectorEngine: class TestVectorEngine:
def setup_class(self): def setup_class(self):
self.__vectors = [[1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8]]
self.__vector = [1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8] self.__vector = [1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8]
self.__limit = 1 self.__limit = 1
...@@ -44,29 +45,29 @@ class TestVectorEngine: ...@@ -44,29 +45,29 @@ class TestVectorEngine:
assert group_list == [{'group_name': 'test_group', 'file_number': 0}] assert group_list == [{'group_name': 'test_group', 'file_number': 0}]
# Add Vector for not exist group # Add Vector for not exist group
code, vector_id = VectorEngine.AddVector('not_exist_group', self.__vector) code, vector_id = VectorEngine.AddVector('not_exist_group', self.__vectors)
assert code == VectorEngine.GROUP_NOT_EXIST assert code == VectorEngine.GROUP_NOT_EXIST
assert vector_id == 'invalid' assert vector_id == 'invalid'
# Add vector for exist group # Add vector for exist group
code, vector_id = VectorEngine.AddVector('test_group', self.__vector) code, vector_id = VectorEngine.AddVector('test_group', self.__vectors)
assert code == VectorEngine.SUCCESS_CODE assert code == VectorEngine.SUCCESS_CODE
assert vector_id == 'test_group.0' assert vector_id == ['test_group.0']
# Add vector for exist group # Add vector for exist group
code, vector_id = VectorEngine.AddVector('test_group', self.__vector) code, vector_id = VectorEngine.AddVector('test_group', self.__vectors)
assert code == VectorEngine.SUCCESS_CODE assert code == VectorEngine.SUCCESS_CODE
assert vector_id == 'test_group.1' assert vector_id == ['test_group.1']
# Add vector for exist group # Add vector for exist group
code, vector_id = VectorEngine.AddVector('test_group', self.__vector) code, vector_id = VectorEngine.AddVector('test_group', self.__vectors)
assert code == VectorEngine.SUCCESS_CODE assert code == VectorEngine.SUCCESS_CODE
assert vector_id == 'test_group.2' assert vector_id == ['test_group.2']
# Add vector for exist group # Add vector for exist group
code, vector_id = VectorEngine.AddVector('test_group', self.__vector) code, vector_id = VectorEngine.AddVector('test_group', self.__vectors)
assert code == VectorEngine.SUCCESS_CODE assert code == VectorEngine.SUCCESS_CODE
assert vector_id == 'test_group.3' assert vector_id == ['test_group.3']
# Check search vector interface # Check search vector interface
code, vector_id = VectorEngine.SearchVector('test_group', self.__vector, self.__limit) code, vector_id = VectorEngine.SearchVector('test_group', self.__vector, self.__limit)
......
...@@ -48,17 +48,17 @@ class TestViews: ...@@ -48,17 +48,17 @@ class TestViews:
assert resp.status_code == 200 assert resp.status_code == 200
assert self.loads(resp)['code'] == 0 assert self.loads(resp)['code'] == 0
vector = {"vector": [1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8]} vector = {"vector": [[1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8]]}
resp = test_client.post('/vector/add/6', data=json.dumps(vector), headers = TestViews.HEADERS) resp = test_client.post('/vector/add/6', data=json.dumps(vector), headers = TestViews.HEADERS)
assert resp.status_code == 200 assert resp.status_code == 200
assert self.loads(resp)['code'] == 0 assert self.loads(resp)['code'] == 0
vector = {"vector": [1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8]} vector = {"vector": [[1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8]]}
resp = test_client.post('/vector/add/6', data=json.dumps(vector), headers = TestViews.HEADERS) resp = test_client.post('/vector/add/6', data=json.dumps(vector), headers = TestViews.HEADERS)
assert resp.status_code == 200 assert resp.status_code == 200
assert self.loads(resp)['code'] == 0 assert self.loads(resp)['code'] == 0
vector = {"vector": [1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8]} vector = {"vector": [[1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8], [1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8], [1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8]]}
resp = test_client.post('/vector/add/6', data=json.dumps(vector), headers = TestViews.HEADERS) resp = test_client.post('/vector/add/6', data=json.dumps(vector), headers = TestViews.HEADERS)
assert resp.status_code == 200 assert resp.status_code == 200
assert self.loads(resp)['code'] == 0 assert self.loads(resp)['code'] == 0
......
...@@ -80,62 +80,65 @@ class VectorEngine(object): ...@@ -80,62 +80,65 @@ class VectorEngine(object):
@staticmethod @staticmethod
def AddVector(group_id, vector): def AddVector(group_id, vectors):
print(group_id, vector) print(group_id, vectors)
code, _, _ = VectorEngine.GetGroup(group_id) code, _, _ = VectorEngine.GetGroup(group_id)
if code == VectorEngine.FAULT_CODE: if code == VectorEngine.FAULT_CODE:
return VectorEngine.GROUP_NOT_EXIST, 'invalid' return VectorEngine.GROUP_NOT_EXIST, 'invalid'
file = FileTable.query.filter(FileTable.group_name == group_id).filter(FileTable.type == 'raw').first() vector_str_list = []
group = GroupTable.query.filter(GroupTable.group_name == group_id).first() for vector in vectors:
if file: file = FileTable.query.filter(FileTable.group_name == group_id).filter(FileTable.type == 'raw').first()
print('insert into exist file') group = GroupTable.query.filter(GroupTable.group_name == group_id).first()
# create vector id
vector_id = file.seq_no + 1 if file:
# insert into raw file print('insert into exist file')
VectorEngine.InsertVectorIntoRawFile(group_id, file.filename, vector, vector_id) # create vector id
vector_id = file.seq_no + 1
# check if the file can be indexed # insert into raw file
if file.row_number + 1 >= ROW_LIMIT: VectorEngine.InsertVectorIntoRawFile(group_id, file.filename, vector, vector_id)
raw_vector_array, raw_vector_id_array = VectorEngine.GetVectorListFromRawFile(group_id)
d = group.dimension # check if the file can be indexed
if file.row_number + 1 >= ROW_LIMIT:
# create index raw_vector_array, raw_vector_id_array = VectorEngine.GetVectorListFromRawFile(group_id)
index_builder = build_index.FactoryIndex() d = group.dimension
index = index_builder().build(d, raw_vector_array, raw_vector_id_array)
# create index
# TODO(jinhai): store index into Cache index_builder = build_index.FactoryIndex()
index_filename = file.filename + '_index' index = index_builder().build(d, raw_vector_array, raw_vector_id_array)
serialize.write_index(file_name=index_filename, index=index)
# TODO(jinhai): store index into Cache
FileTable.query.filter(FileTable.group_name == group_id).filter(FileTable.type == 'raw').update({'row_number':file.row_number + 1, index_filename = file.filename + '_index'
'type': 'index', serialize.write_index(file_name=index_filename, index=index)
'filename': index_filename,
'seq_no': file.seq_no + 1}) FileTable.query.filter(FileTable.group_name == group_id).filter(FileTable.type == 'raw').update({'row_number':file.row_number + 1,
db.session.commit() 'type': 'index',
VectorEngine.group_dict = None 'filename': index_filename,
'seq_no': file.seq_no + 1})
db.session.commit()
VectorEngine.group_dict = None
else:
# we still can insert into exist raw file, update database
FileTable.query.filter(FileTable.group_name == group_id).filter(FileTable.type == 'raw').update({'row_number':file.row_number + 1,
'seq_no': file.seq_no + 1})
db.session.commit()
print('Update db for raw file insertion')
else: else:
# we still can insert into exist raw file, update database print('add a new raw file')
FileTable.query.filter(FileTable.group_name == group_id).filter(FileTable.type == 'raw').update({'row_number':file.row_number + 1, # first raw file
'seq_no': file.seq_no + 1}) raw_filename = group_id + '.raw'
# create vector id
vector_id = 0
# create and insert vector into raw file
VectorEngine.InsertVectorIntoRawFile(group_id, raw_filename, vector, vector_id)
# insert a record into database
db.session.add(FileTable(group_id, raw_filename, 'raw', 1))
db.session.commit() db.session.commit()
print('Update db for raw file insertion')
pass
else: vector_str_list.append(group_id + '.' + str(vector_id))
print('add a new raw file')
# first raw file
raw_filename = group_id + '.raw'
# create vector id
vector_id = 0
# create and insert vector into raw file
VectorEngine.InsertVectorIntoRawFile(group_id, raw_filename, vector, vector_id)
# insert a record into database
db.session.add(FileTable(group_id, raw_filename, 'raw', 1))
db.session.commit()
vector_id_str = group_id + '.' + str(vector_id) return VectorEngine.SUCCESS_CODE, vector_str_list
return VectorEngine.SUCCESS_CODE, vector_id_str
@staticmethod @staticmethod
......
...@@ -14,10 +14,9 @@ from flask_restful import request ...@@ -14,10 +14,9 @@ from flask_restful import request
class Vector(Resource): class Vector(Resource):
def __init__(self): def __init__(self):
self.__parser = reqparse.RequestParser() self.__parser = reqparse.RequestParser()
self.__parser.add_argument('vector', type=float, action='append', location=['json']) self.__parser.add_argument('vector', type=list, action='append', location=['json'])
def post(self, group_id): def post(self, group_id):
print(request.json)
args = self.__parser.parse_args() args = self.__parser.parse_args()
vector = args['vector'] vector = args['vector']
code, vector_id = VectorEngine.AddVector(group_id, vector) code, vector_id = VectorEngine.AddVector(group_id, vector)
......
pytest -v --disable-warnings pytest -vv --disable-warnings
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册