提交 3d289649 编写于 作者: D dangqingqing

follow comments

上级 aaecfcc4
...@@ -66,42 +66,27 @@ class SentimentPrediction(): ...@@ -66,42 +66,27 @@ class SentimentPrediction():
for v in open(label_file, 'r'): for v in open(label_file, 'r'):
self.label[int(v.split('\t')[1])] = v.split('\t')[0] self.label[int(v.split('\t')[1])] = v.split('\t')[0]
def get_data(self, data): def get_index(self, data):
""" """
Get input data of paddle format. transform word into integer index according to the dictionary.
""" """
for line in data: words = data.strip().split()
words = line.strip().split() word_slot = [
word_slot = [ self.word_dict[w] for w in words if w in self.word_dict
self.word_dict[w] for w in words if w in self.word_dict ]
] return word_slot
if not word_slot:
print "all words are not in dictionary: %s", line def batch_predict(self, data_batch):
continue input = self.converter(data_batch)
yield [word_slot] output = self.network.forwardTest(input)
prob = output[0]["value"]
def predict(self, batch_size): labs = np.argsort(-prob)
for idx, lab in enumerate(labs):
def batch_predict(batch_data): if self.label is None:
input = self.converter(self.get_data(batch_data)) print("predicting label is %d" % (lab[0]))
output = self.network.forwardTest(input) else:
prob = output[0]["value"] print("predicting label is %s" %
labs = np.argsort(-prob) (self.label[lab[0]]))
for idx, lab in enumerate(labs):
if self.label is None:
print("predicting label is %d" % (lab[0]))
else:
print("predicting label is %s" %
(self.label[lab[0]]))
batch = []
for line in sys.stdin:
batch.append(line)
if len(batch) == batch_size:
batch_predict(batch)
batch=[]
if len(batch) > 0:
batch_predict(batch)
def option_parser(): def option_parser():
usage = "python predict.py -n config -w model_dir -d dictionary -i input_file " usage = "python predict.py -n config -w model_dir -d dictionary -i input_file "
...@@ -152,8 +137,15 @@ def main(): ...@@ -152,8 +137,15 @@ def main():
label = options.label label = options.label
swig_paddle.initPaddle("--use_gpu=0") swig_paddle.initPaddle("--use_gpu=0")
predict = SentimentPrediction(train_conf, dict_file, model_path, label) predict = SentimentPrediction(train_conf, dict_file, model_path, label)
predict.predict(batch_size)
batch = []
for line in sys.stdin:
batch.append([predict.get_index(line)])
if len(batch) == batch_size:
predict.batch_predict(batch)
batch=[]
if len(batch) > 0:
predict.batch_predict(batch)
if __name__ == '__main__': if __name__ == '__main__':
main() main()
...@@ -293,20 +293,21 @@ predict.sh: ...@@ -293,20 +293,21 @@ predict.sh:
model=model_output/pass-00002/ model=model_output/pass-00002/
config=trainer_config.py config=trainer_config.py
label=data/pre-imdb/labels.list label=data/pre-imdb/labels.list
python predict.py \ cat ./data/aclImdb/test/pos/10007_10.txt | python predict.py \
-n $config\ --tconf=$config\
-w $model \ --model=$model \
-b $label \ --label=$label \
-d data/pre-imdb/dict.txt \ --dict=./data/pre-imdb/dict.txt \
-i data/aclImdb/test/pos/10007_10.txt --batch_size=1
``` ```
* `predict.py`: predicting interface. * `cat ./data/aclImdb/test/pos/10007_10.txt` : the input sample.
* -n $config : set network configure. * `predict.py` : predicting interface.
* -w $model: set model path. * `--tconf=$config` : set network configure.
* -b $label: set dictionary about corresponding relation between integer label and string label. * ` --model=$model` : set model path.
* -d data/pre-imdb/dict.txt: set dictionary. * `--label=$label` : set dictionary about corresponding relation between integer label and string label.
* -i data/aclImdb/test/pos/10014_7.txt: set one example file to predict. * `--dict=data/pre-imdb/dict.txt` : set dictionary.
* `--batch_size=1` : set batch size.
Note you should make sure the default model path `model_output/pass-00002` Note you should make sure the default model path `model_output/pass-00002`
exists or change the model path. exists or change the model path.
......
...@@ -291,20 +291,21 @@ predict.sh: ...@@ -291,20 +291,21 @@ predict.sh:
model=model_output/pass-00002/ model=model_output/pass-00002/
config=trainer_config.py config=trainer_config.py
label=data/pre-imdb/labels.list label=data/pre-imdb/labels.list
python predict.py \ cat ./data/aclImdb/test/pos/10007_10.txt | python predict.py \
-n $config\ --tconf=$config\
-w $model \ --model=$model \
-b $label \ --label=$label \
-d data/pre-imdb/dict.txt \ --dict=./data/pre-imdb/dict.txt \
-i data/aclImdb/test/pos/10007_10.txt --batch_size=1
``` ```
* `predict.py`: 预测接口脚本。 * `cat ./data/aclImdb/test/pos/10007_10.txt` : 输入预测样本。
* -n $config : 设置网络配置。 * `predict.py` : 预测接口脚本。
* -w $model: 设置模型路径。 * `--tconf=$config` : 设置网络配置。
* -b $label: 设置标签类别字典,这个字典是整数标签和字符串标签的一个对应。 * `--model=$model` : 设置模型路径。
* -d data/pre-imdb/dict.txt: 设置字典文件。 * `--label=$label` : 设置标签类别字典,这个字典是整数标签和字符串标签的一个对应。
* -i data/aclImdb/test/pos/10014_7.txt: 设置一个要预测的示例文件。 * `--dict=data/pre-imdb/dict.txt` : 设置字典文件。
* `--batch_size=1` : 设置batch size。
注意应该确保默认模型路径`model_output / pass-00002`存在或更改为其它模型路径。 注意应该确保默认模型路径`model_output / pass-00002`存在或更改为其它模型路径。
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册