提交 09bf6f3f 编写于 作者: J jinhai

Merge branch 'jinhai' into 'develop'

Update for more function

See merge request jinhai/vecwise_engine!6
...@@ -10,7 +10,7 @@ app.config.from_object('engine.settings') ...@@ -10,7 +10,7 @@ app.config.from_object('engine.settings')
print ("Create database instance") print ("Create database instance")
db = SQLAlchemy(app) db = SQLAlchemy(app)
from engine.model.GroupTable import GroupTable from engine.model.group_table import GroupTable
from engine.model.FileTable import FileTable from engine.model.file_table import FileTable
from engine.controller import IndexManage from engine.controller import index_manager
import os, shutil import os, shutil
from engine.settings import DATABASE_DIRECTORY
class GroupHandler(object): class GroupHandler(object):
@staticmethod @staticmethod
def CreateGroupDirectory(group_id): def CreateGroupDirectory(group_id):
path = GetGroupDirectory(group_id) path = GroupHandler.GetGroupDirectory(group_id)
path = path.strip() path = path.strip()
path=path.rstrip("\\") path=path.rstrip("\\")
if not os.path.exists(): if not os.path.exists(path):
os.makedirs(path) os.makedirs(path)
print("CreateGroupDirectory, Path: ", path)
@staticmethod @staticmethod
def DeleteGroupDirectory(group_id): def DeleteGroupDirectory(group_id):
path = GetGroupDirectory(group_id) path = GroupHandler.GetGroupDirectory(group_id)
path = path.strip() path = path.strip()
path=path.rstrip("\\") path=path.rstrip("\\")
if os.path.exists(): if os.path.exists(path):
shutil.rmtree(path) shutil.rmtree(path)
print("DeleteGroupDirectory, Path: ", path)
@staticmethod @staticmethod
def GetGroupDirectory(group_id): def GetGroupDirectory(group_id):
return DATABASE_DIRECTORY + '/' + group_id print("GetGroupDirectory, Path: ", DATABASE_DIRECTORY + '/' + group_id)
\ No newline at end of file return DATABASE_DIRECTORY + '/' + group_id
class RawFileHandler(object): class IndexFileHandler(object):
@staticmethod @staticmethod
def Create(filename, type): def Create(filename, type):
# type means: csv, parquet # type means: csv, parquet
......
from flask import Flask, jsonify, request from flask import Flask, jsonify, request
from flask_restful import Resource, Api from flask_restful import Resource, Api
from engine import app, db from engine import app, db
from engine.model.GroupTable import GroupTable from engine.model.group_table import GroupTable
from engine.controller.VectorEngine import VectorEngine from engine.controller.vector_engine import VectorEngine
# app = Flask(__name__) # app = Flask(__name__)
api = Api(app) api = Api(app)
...@@ -25,12 +25,13 @@ class VectorSearch(Resource): ...@@ -25,12 +25,13 @@ class VectorSearch(Resource):
def __init__(self): def __init__(self):
self.__parser = reqparse.RequestParser() self.__parser = reqparse.RequestParser()
self.__parser.add_argument('vector', type=float, action='append', location=['json']) self.__parser.add_argument('vector', type=float, action='append', location=['json'])
self.__parser.add_argument('limit', type=int, action='append', location=['json'])
def post(self, group_id): def post(self, group_id):
args = self.__parser.parse_args() args = self.__parser.parse_args()
print('vector: ', args['vector']) print('vector: ', args['vector'])
# go to search every thing # go to search every thing
return "vectorSearch post" return VectorEngine.SearchVector(group_id, args['vector'], args['limit'])
class Index(Resource): class Index(Resource):
...@@ -46,9 +47,12 @@ class Group(Resource): ...@@ -46,9 +47,12 @@ class Group(Resource):
def __init__(self): def __init__(self):
self.__parser = reqparse.RequestParser() self.__parser = reqparse.RequestParser()
self.__parser.add_argument('group_id', type=str) self.__parser.add_argument('group_id', type=str)
self.__parser.add_argument('dimension', type=int, action='append', location=['json'])
def post(self, group_id): def post(self, group_id):
return VectorEngine.AddGroup(group_id) args = self.__parser.parse_args()
dimension = args['dimension']
return VectorEngine.AddGroup(group_id, dimension)
def get(self, group_id): def get(self, group_id):
return VectorEngine.GetGroup(group_id) return VectorEngine.GetGroup(group_id)
......
class RawFileHandler(object):
@staticmethod
def Create(filename, type):
# type means: csv, parquet
pass
@staticmethod
def Read(filename, type):
pass
@staticmethod
def Append(filename, type, record):
pass
@staticmethod
def GetRawFilename(group_id):
return group_id + '.raw'
\ No newline at end of file
from engine.model.GroupTable import GroupTable from engine.model.group_table import GroupTable
from engine.model.FileTable import FileTable from engine.model.file_table import FileTable
from engine.controller.RawFileHandler import RawFileHandler from engine.controller.raw_file_handler import RawFileHandler
from engine.controller.GroupHandler import GroupHandler from engine.controller.group_handler import GroupHandler
from engine.controller.index_file_handler import IndexFileHandler
from engine.settings import ROW_LIMIT
from flask import jsonify from flask import jsonify
from engine import db from engine import db
import sys, os import sys, os
class VectorEngine(object): class VectorEngine(object):
group_dict = None
@staticmethod @staticmethod
def AddGroup(group_id): def AddGroup(group_id, dimension):
group = GroupTable.query.filter(GroupTable.group_name==group_id).first() group = GroupTable.query.filter(GroupTable.group_name==group_id).first()
if group: if group:
print('Already create the group: ', group_id)
return jsonify({'code': 1, 'group_name': group_id, 'file_number': group.file_number}) return jsonify({'code': 1, 'group_name': group_id, 'file_number': group.file_number})
else: else:
new_group = GroupTable(group_id) print('To create the group: ', group_id)
new_group = GroupTable(group_id, dimension)
GroupHandler.CreateGroupDirectory(group_id)
# add into database
db.session.add(new_group) db.session.add(new_group)
db.session.commit() db.session.commit()
GroupHandler.CreateGroupDirectory(group_id)
return jsonify({'code': 0, 'group_name': group_id, 'file_number': 0}) return jsonify({'code': 0, 'group_name': group_id, 'file_number': 0})
@staticmethod @staticmethod
def GetGroup(group_id): def GetGroup(group_id):
group = GroupTable.query.filter(GroupTable.group_name==group_id).first() group = GroupTable.query.filter(GroupTable.group_name==group_id).first()
if group: if group:
print('Found the group: ', group_id)
return jsonify({'code': 0, 'group_name': group_id, 'file_number': group.file_number}) return jsonify({'code': 0, 'group_name': group_id, 'file_number': group.file_number})
else: else:
print('Not found the group: ', group_id)
return jsonify({'code': 1, 'group_name': group_id, 'file_number': 0}) # not found return jsonify({'code': 1, 'group_name': group_id, 'file_number': 0}) # not found
@staticmethod @staticmethod
def DeleteGroup(group_id): def DeleteGroup(group_id):
group = GroupTable.query.filter(GroupTable.group_name==group_id).first() group = GroupTable.query.filter(GroupTable.group_name==group_id).first()
...@@ -37,10 +47,18 @@ class VectorEngine(object): ...@@ -37,10 +47,18 @@ class VectorEngine(object):
db.session.delete(group) db.session.delete(group)
db.session.commit() db.session.commit()
GroupHandler.DeleteGroupDirectory(group_id) GroupHandler.DeleteGroupDirectory(group_id)
records = FileTable.query.filter(FileTable.group_name == group_id).all()
for record in records:
print("record.group_name: ", record.group_name)
db.session.delete(record)
db.session.commit()
return jsonify({'code': 0, 'group_name': group_id, 'file_number': group.file_number}) return jsonify({'code': 0, 'group_name': group_id, 'file_number': group.file_number})
else: else:
return jsonify({'code': 0, 'group_name': group_id, 'file_number': 0}) return jsonify({'code': 0, 'group_name': group_id, 'file_number': 0})
@staticmethod @staticmethod
def GetGroupList(): def GetGroupList():
group = GroupTable.query.all() group = GroupTable.query.all()
...@@ -54,56 +72,95 @@ class VectorEngine(object): ...@@ -54,56 +72,95 @@ class VectorEngine(object):
print(group_list) print(group_list)
return jsonify(results = group_list) return jsonify(results = group_list)
@staticmethod @staticmethod
def AddVector(group_id, vector): def AddVector(group_id, vector):
print(group_id, vector) print(group_id, vector)
file = FileTable.query.filter(and_(FileTable.group_name == group_id, FileTable.type == 'raw')).first() file = FileTable.query.filter(FileTable.group_name == group_id).filter(FileTable.type == 'raw').first()
if file: if file:
if file.row_number >= ROW_LIMIT: print('insert into exist file')
# insert into raw file
VectorEngine.InsertVectorIntoRawFile(group_id, file.filename, vector)
# check if the file can be indexed
if file.row_number + 1 >= ROW_LIMIT:
# read data from raw file
data = GetVectorsFromRawFile()
# create index # create index
index_filename = file.filename + "_index" index_filename = file.filename + '_index'
CreateIndex(group_id, index_filename) CreateIndex(group_id, index_filename, data)
# create another raw file # update record into database
raw_filename = file.seq_no FileTable.query.filter(FileTable.group_name == group_id).filter(FileTable.type == 'raw').update({'row_number':file.row_number + 1, 'type': 'index'})
InsertVectorIntoRawFile(group_id, raw_filename, vector) pass
# insert a record into database
db.session.add(FileTable(group_id, raw_filename, 'raw', 1))
db.session.commit()
else: else:
# we still can insert into exist raw file # we still can insert into exist raw file, update database
InsertVectorIntoRawFile(file.filename, vector) FileTable.query.filter(FileTable.group_name == group_id).filter(FileTable.type == 'raw').update({'row_number':file.row_number + 1})
# update database db.session.commit()
# FileTable.query.filter_by(FileTable.group_name == group_id).filter_by(FileTable.type == 'raw').update('row_number':file.row_number + 1) print('Update db for raw file insertion')
pass
else: else:
print('add a new raw file')
# first raw file # first raw file
raw_filename = group_id + '_0' raw_filename = group_id + '.raw'
# create and insert vector into raw file # create and insert vector into raw file
InsertVectorIntoRawFile(raw_filename, vector) VectorEngine.InsertVectorIntoRawFile(group_id, raw_filename, vector)
# insert a record into database # insert a record into database
db.session.add(FileTable(group_id, raw_filename, 'raw', 1)) db.session.add(FileTable(group_id, raw_filename, 'raw', 1))
db.session.commit() db.session.commit()
return jsonify({'code': 0}) return jsonify({'code': 0})
@staticmethod @staticmethod
def SearchVector(group_id, vector, limit): def SearchVector(group_id, vector, limit):
# find all files # find all files
files = FileTable.query.filter(FileTable.group_name == group_id).all()
for file in files:
if(file.type == 'raw'):
# create index
# add vector list
# train
# get topk
print('search in raw file: ', file.filename)
pass
else:
# get topk
print('search in index file: ', file.filename)
data = IndexFileHandler.Read(file.filename, file.type)
pass
# according to difference files get topk of each # according to difference files get topk of each
# reduce the topk from them # reduce the topk from them
# construct response and send back # construct response and send back
return jsonify({'code': 0}) return jsonify({'code': 0})
@staticmethod @staticmethod
def CreateIndex(group_id, filename): def CreateIndex(group_id):
path = GroupHandler.GetGroupDirectory(group_id) + '/' + filename # create index
print(group_id, path) file = FileTable.query.filter(FileTable.group_name == group_id).filter(FileTable.type == 'raw').first()
path = GroupHandler.GetGroupDirectory(group_id) + '/' + file.filename
print('Going to create index for: ', path)
return jsonify({'code': 0}) return jsonify({'code': 0})
@staticmethod @staticmethod
def InsertVectorIntoRawFile(group_id, filename, vector): def InsertVectorIntoRawFile(group_id, filename, vector):
print(sys._getframe().f_code.co_name) # print(sys._getframe().f_code.co_name, group_id, vector)
path = GroupHandler.GetGroupDirectory(group_id) + '/' + filename # path = GroupHandler.GetGroupDirectory(group_id) + '/' + filename
if VectorEngine.group_dict is None:
# print("VectorEngine.group_dict is None")
VectorEngine.group_dict = dict()
VectorEngine.group_dict[group_id] = []
VectorEngine.group_dict[group_id].append(vector)
print('InsertVectorIntoRawFile: ', VectorEngine.group_dict[group_id])
# if filename exist # if filename exist
# append # append
...@@ -112,3 +169,8 @@ class VectorEngine(object): ...@@ -112,3 +169,8 @@ class VectorEngine(object):
# append # append
return filename return filename
@staticmethod
def GetVectorListFromRawFile(group_id, filename):
return VectorEngine.group_dict[group_id]
...@@ -9,6 +9,7 @@ class FileTable(db.Model): ...@@ -9,6 +9,7 @@ class FileTable(db.Model):
row_number = db.Column(db.Integer) row_number = db.Column(db.Integer)
seq_no = db.Column(db.Integer) seq_no = db.Column(db.Integer)
def __init__(self, group_name, filename, type, row_number): def __init__(self, group_name, filename, type, row_number):
self.group_name = group_name self.group_name = group_name
self.filename = filename self.filename = filename
...@@ -17,5 +18,7 @@ class FileTable(db.Model): ...@@ -17,5 +18,7 @@ class FileTable(db.Model):
self.type = type self.type = type
self.seq_no = 0 self.seq_no = 0
def __repr__(self): def __repr__(self):
return '<FileTable $r>' % self.tablename return '<FileTable $r>' % self.tablename
\ No newline at end of file
...@@ -5,10 +5,15 @@ class GroupTable(db.Model): ...@@ -5,10 +5,15 @@ class GroupTable(db.Model):
id = db.Column(db.Integer, primary_key=True) id = db.Column(db.Integer, primary_key=True)
group_name = db.Column(db.String(100)) group_name = db.Column(db.String(100))
file_number = db.Column(db.Integer) file_number = db.Column(db.Integer)
dimension = db.Column(db.Integer)
def __init__(self, group_name):
def __init__(self, group_name, dimension):
self.group_name = group_name self.group_name = group_name
self.dimension = dimension
self.file_number = 0 self.file_number = 0
self.dimension = 0
def __repr__(self): def __repr__(self):
return '<GroupTable $s>' % self.group_name return '<GroupTable $s>' % self.group_name
\ No newline at end of file
...@@ -6,4 +6,4 @@ SQLALCHEMY_TRACK_MODIFICATIONS = False ...@@ -6,4 +6,4 @@ SQLALCHEMY_TRACK_MODIFICATIONS = False
SQLALCHEMY_DATABASE_URI = "mysql+pymysql://vecwise@127.0.0.1:3306/vecdata" SQLALCHEMY_DATABASE_URI = "mysql+pymysql://vecwise@127.0.0.1:3306/vecdata"
ROW_LIMIT = 10000000 ROW_LIMIT = 10000000
DATABASE_DIRECTORY = '/home/jinhai/Document/development/vecwise_engine/db' DATABASE_DIRECTORY = '/home/jinhai/disk0/vecwise/db'
\ No newline at end of file \ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册