From a7636a26363f94867cd9058be7cac871cc6ace29 Mon Sep 17 00:00:00 2001 From: jinhai Date: Tue, 2 Apr 2019 19:33:22 +0800 Subject: [PATCH] Add batch import --- .../controller/tests/test_vector_engine.py | 19 ++-- .../engine/controller/tests/test_views.py | 6 +- pyengine/engine/controller/vector_engine.py | 99 ++++++++++--------- pyengine/engine/controller/views.py | 3 +- pyengine/engine/run_test.sh | 2 +- 5 files changed, 66 insertions(+), 63 deletions(-) diff --git a/pyengine/engine/controller/tests/test_vector_engine.py b/pyengine/engine/controller/tests/test_vector_engine.py index c9730555..b9465101 100644 --- a/pyengine/engine/controller/tests/test_vector_engine.py +++ b/pyengine/engine/controller/tests/test_vector_engine.py @@ -11,6 +11,7 @@ logger = logging.getLogger(__name__) class TestVectorEngine: def setup_class(self): + self.__vectors = [[1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8]] self.__vector = [1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8] self.__limit = 1 @@ -44,29 +45,29 @@ class TestVectorEngine: assert group_list == [{'group_name': 'test_group', 'file_number': 0}] # Add Vector for not exist group - code, vector_id = VectorEngine.AddVector('not_exist_group', self.__vector) + code, vector_id = VectorEngine.AddVector('not_exist_group', self.__vectors) assert code == VectorEngine.GROUP_NOT_EXIST assert vector_id == 'invalid' # Add vector for exist group - code, vector_id = VectorEngine.AddVector('test_group', self.__vector) + code, vector_id = VectorEngine.AddVector('test_group', self.__vectors) assert code == VectorEngine.SUCCESS_CODE - assert vector_id == 'test_group.0' + assert vector_id == ['test_group.0'] # Add vector for exist group - code, vector_id = VectorEngine.AddVector('test_group', self.__vector) + code, vector_id = VectorEngine.AddVector('test_group', self.__vectors) assert code == VectorEngine.SUCCESS_CODE - assert vector_id == 'test_group.1' + assert vector_id == ['test_group.1'] # Add vector for exist group - code, vector_id = VectorEngine.AddVector('test_group', self.__vector) + code, vector_id = VectorEngine.AddVector('test_group', self.__vectors) assert code == VectorEngine.SUCCESS_CODE - assert vector_id == 'test_group.2' + assert vector_id == ['test_group.2'] # Add vector for exist group - code, vector_id = VectorEngine.AddVector('test_group', self.__vector) + code, vector_id = VectorEngine.AddVector('test_group', self.__vectors) assert code == VectorEngine.SUCCESS_CODE - assert vector_id == 'test_group.3' + assert vector_id == ['test_group.3'] # Check search vector interface code, vector_id = VectorEngine.SearchVector('test_group', self.__vector, self.__limit) diff --git a/pyengine/engine/controller/tests/test_views.py b/pyengine/engine/controller/tests/test_views.py index 2cf89245..e933470b 100644 --- a/pyengine/engine/controller/tests/test_views.py +++ b/pyengine/engine/controller/tests/test_views.py @@ -48,17 +48,17 @@ class TestViews: assert resp.status_code == 200 assert self.loads(resp)['code'] == 0 - vector = {"vector": [1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8]} + vector = {"vector": [[1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8]]} resp = test_client.post('/vector/add/6', data=json.dumps(vector), headers = TestViews.HEADERS) assert resp.status_code == 200 assert self.loads(resp)['code'] == 0 - vector = {"vector": [1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8]} + vector = {"vector": [[1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8]]} resp = test_client.post('/vector/add/6', data=json.dumps(vector), headers = TestViews.HEADERS) assert resp.status_code == 200 assert self.loads(resp)['code'] == 0 - vector = {"vector": [1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8]} + vector = {"vector": [[1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8], [1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8], [1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8]]} resp = test_client.post('/vector/add/6', data=json.dumps(vector), headers = TestViews.HEADERS) assert resp.status_code == 200 assert self.loads(resp)['code'] == 0 diff --git a/pyengine/engine/controller/vector_engine.py b/pyengine/engine/controller/vector_engine.py index ed434d33..05f1706e 100644 --- a/pyengine/engine/controller/vector_engine.py +++ b/pyengine/engine/controller/vector_engine.py @@ -80,62 +80,65 @@ class VectorEngine(object): @staticmethod - def AddVector(group_id, vector): - print(group_id, vector) + def AddVector(group_id, vectors): + print(group_id, vectors) code, _, _ = VectorEngine.GetGroup(group_id) if code == VectorEngine.FAULT_CODE: return VectorEngine.GROUP_NOT_EXIST, 'invalid' - file = FileTable.query.filter(FileTable.group_name == group_id).filter(FileTable.type == 'raw').first() - group = GroupTable.query.filter(GroupTable.group_name == group_id).first() - if file: - print('insert into exist file') - # create vector id - vector_id = file.seq_no + 1 - # insert into raw file - VectorEngine.InsertVectorIntoRawFile(group_id, file.filename, vector, vector_id) - - # check if the file can be indexed - if file.row_number + 1 >= ROW_LIMIT: - raw_vector_array, raw_vector_id_array = VectorEngine.GetVectorListFromRawFile(group_id) - d = group.dimension - - # create index - index_builder = build_index.FactoryIndex() - index = index_builder().build(d, raw_vector_array, raw_vector_id_array) - - # TODO(jinhai): store index into Cache - index_filename = file.filename + '_index' - serialize.write_index(file_name=index_filename, index=index) - - FileTable.query.filter(FileTable.group_name == group_id).filter(FileTable.type == 'raw').update({'row_number':file.row_number + 1, - 'type': 'index', - 'filename': index_filename, - 'seq_no': file.seq_no + 1}) - db.session.commit() - VectorEngine.group_dict = None + vector_str_list = [] + for vector in vectors: + file = FileTable.query.filter(FileTable.group_name == group_id).filter(FileTable.type == 'raw').first() + group = GroupTable.query.filter(GroupTable.group_name == group_id).first() + + if file: + print('insert into exist file') + # create vector id + vector_id = file.seq_no + 1 + # insert into raw file + VectorEngine.InsertVectorIntoRawFile(group_id, file.filename, vector, vector_id) + + # check if the file can be indexed + if file.row_number + 1 >= ROW_LIMIT: + raw_vector_array, raw_vector_id_array = VectorEngine.GetVectorListFromRawFile(group_id) + d = group.dimension + + # create index + index_builder = build_index.FactoryIndex() + index = index_builder().build(d, raw_vector_array, raw_vector_id_array) + + # TODO(jinhai): store index into Cache + index_filename = file.filename + '_index' + serialize.write_index(file_name=index_filename, index=index) + + FileTable.query.filter(FileTable.group_name == group_id).filter(FileTable.type == 'raw').update({'row_number':file.row_number + 1, + 'type': 'index', + 'filename': index_filename, + 'seq_no': file.seq_no + 1}) + db.session.commit() + VectorEngine.group_dict = None + else: + # we still can insert into exist raw file, update database + FileTable.query.filter(FileTable.group_name == group_id).filter(FileTable.type == 'raw').update({'row_number':file.row_number + 1, + 'seq_no': file.seq_no + 1}) + db.session.commit() + print('Update db for raw file insertion') + else: - # we still can insert into exist raw file, update database - FileTable.query.filter(FileTable.group_name == group_id).filter(FileTable.type == 'raw').update({'row_number':file.row_number + 1, - 'seq_no': file.seq_no + 1}) + print('add a new raw file') + # first raw file + raw_filename = group_id + '.raw' + # create vector id + vector_id = 0 + # create and insert vector into raw file + VectorEngine.InsertVectorIntoRawFile(group_id, raw_filename, vector, vector_id) + # insert a record into database + db.session.add(FileTable(group_id, raw_filename, 'raw', 1)) db.session.commit() - print('Update db for raw file insertion') - pass - else: - print('add a new raw file') - # first raw file - raw_filename = group_id + '.raw' - # create vector id - vector_id = 0 - # create and insert vector into raw file - VectorEngine.InsertVectorIntoRawFile(group_id, raw_filename, vector, vector_id) - # insert a record into database - db.session.add(FileTable(group_id, raw_filename, 'raw', 1)) - db.session.commit() + vector_str_list.append(group_id + '.' + str(vector_id)) - vector_id_str = group_id + '.' + str(vector_id) - return VectorEngine.SUCCESS_CODE, vector_id_str + return VectorEngine.SUCCESS_CODE, vector_str_list @staticmethod diff --git a/pyengine/engine/controller/views.py b/pyengine/engine/controller/views.py index ecd42dcb..a40a113e 100644 --- a/pyengine/engine/controller/views.py +++ b/pyengine/engine/controller/views.py @@ -14,10 +14,9 @@ from flask_restful import request class Vector(Resource): def __init__(self): self.__parser = reqparse.RequestParser() - self.__parser.add_argument('vector', type=float, action='append', location=['json']) + self.__parser.add_argument('vector', type=list, action='append', location=['json']) def post(self, group_id): - print(request.json) args = self.__parser.parse_args() vector = args['vector'] code, vector_id = VectorEngine.AddVector(group_id, vector) diff --git a/pyengine/engine/run_test.sh b/pyengine/engine/run_test.sh index 10c0bae4..5e0da6ae 100755 --- a/pyengine/engine/run_test.sh +++ b/pyengine/engine/run_test.sh @@ -1 +1 @@ -pytest -v --disable-warnings +pytest -vv --disable-warnings -- GitLab