提交 88cbb37a 编写于 作者: M MRXLT

move imdb reader to app

上级 46498409
......@@ -13,7 +13,7 @@
# limitations under the License.
# pylint: disable=doc-string-missing
from paddle_serving_client import Client
from imdb_reader import IMDBDataset
from paddle_serving_app import IMDBReader
import sys
client = Client()
......@@ -23,7 +23,7 @@ client.connect(["127.0.0.1:9292"])
# you can define any english sentence or dataset here
# This example reuses imdb reader in training, you
# can define your own data preprocessing easily.
imdb_dataset = IMDBDataset()
imdb_dataset = IMDBReader()
imdb_dataset.load_resource(sys.argv[2])
for line in sys.stdin:
......
......@@ -14,7 +14,7 @@
# pylint: disable=doc-string-missing
from paddle_serving_server.web_service import WebService
from imdb_reader import IMDBDataset
from paddle_serving_app import IMDBReader
import sys
......@@ -22,7 +22,7 @@ class IMDBService(WebService):
def prepare_dict(self, args={}):
if len(args) == 0:
exit(-1)
self.dataset = IMDBDataset()
self.dataset = IMDBReader()
self.dataset.load_resource(args["dict_file_path"])
def preprocess(self, feed={}, fetch=[]):
......
......@@ -15,5 +15,6 @@ from .reader.chinese_bert_reader import ChineseBertReader
from .reader.image_reader import ImageReader, File2Image, URL2Image, Sequential, Normalize, CenterCrop, Resize
from .reader.lac_reader import LACReader
from .reader.senta_reader import SentaReader
from .reader.imdb_reader import IMDBReader
from .models import ServingModels
from .local_predict import Debugger
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=doc-string-missing
import sys
import os
import paddle
import re
import paddle.fluid.incubate.data_generator as dg
py_version = sys.version_info[0]
class IMDBReader(dg.MultiSlotDataGenerator):
def load_resource(self, dictfile):
self._vocab = {}
wid = 0
if py_version == 2:
with open(dictfile) as f:
for line in f:
self._vocab[line.strip()] = wid
wid += 1
else:
with open(dictfile, encoding="utf-8") as f:
for line in f:
self._vocab[line.strip()] = wid
wid += 1
self._unk_id = len(self._vocab)
self._pattern = re.compile(r'(;|,|\.|\?|!|\s|\(|\))')
self.return_value = ("words", [1, 2, 3, 4, 5, 6]), ("label", [0])
def get_words_only(self, line):
sent = line.lower().replace("<br />", " ").strip()
words = [x for x in self._pattern.split(sent) if x and x != " "]
feas = [
self._vocab[x] if x in self._vocab else self._unk_id for x in words
]
return feas
def get_words_and_label(self, line):
send = '|'.join(line.split('|')[:-1]).lower().replace("<br />",
" ").strip()
label = [int(line.split('|')[-1])]
words = [x for x in self._pattern.split(send) if x and x != " "]
feas = [
self._vocab[x] if x in self._vocab else self._unk_id for x in words
]
return feas, label
def infer_reader(self, infer_filelist, batch, buf_size):
def local_iter():
for fname in infer_filelist:
with open(fname, "r") as fin:
for line in fin:
feas, label = self.get_words_and_label(line)
yield feas, label
import paddle
batch_iter = paddle.batch(
paddle.reader.shuffle(
local_iter, buf_size=buf_size),
batch_size=batch)
return batch_iter
def generate_sample(self, line):
def memory_iter():
for i in range(1000):
yield self.return_value
def data_iter():
feas, label = self.get_words_and_label(line)
yield ("words", feas), ("label", label)
return data_iter
if __name__ == "__main__":
imdb = IMDBReader()
imdb.load_resource("imdb.vocab")
imdb.run_from_stdin()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册