Merge pull request #381 from wangjiawei04/jiawei/lac_sync

lac sync and fix README.md

Merge pull request #381 from wangjiawei04/jiawei/lac_sync
lac sync and fix README.md
029bab4a · Dong Daxiang · GitHub · 41ae1e4b · 31dcd6b4 · 029bab4a
7 changed file
--- a/README.md
+++ b/README.md
@@ -254,7 +254,7 @@ curl -H "Content-Type:application/json" -X POST -d '{"url": "https://paddle-serv
 - [GPU Benchmarks(Chinese)](doc/GPU_BENCHMARKING.md)

 ### FAQ
- [FAQ(Chinese)](doc/FAQ.md)
+- [FAQ(Chinese)](doc/deprecated/FAQ.md)


 ### Design

--- a/python/examples/lac/README.md
+++ b/python/examples/lac/README.md
+## Chinese Word Segmentation
+
+([简体中文](./README_CN.md)|English)
+
+### Get model files and sample data
+```
+sh get_data.sh
+```
+
+the package downloaded contains lac model config along with lac dictionary.
+
+#### Start RPC inference service
+
+```
+python -m paddle_serving_server.serve --model jieba_server_model/ --port 9292
+```
+### RPC Infer
+```
+echo "我爱北京天安门" | python lac_client.py jieba_client_conf/serving_client_conf.prototxt lac_dict/
+```
+
+it will get the segmentation result
+
+### Start HTTP inference service
+```
+python lac_web_service.py jieba_server_model/ lac_workdir 9292
+```
+### HTTP Infer
+
+```
+curl -H "Content-Type:application/json" -X POST -d '{"words": "我爱北京天安门", "fetch":["word_seg"]}' http://127.0.0.1:9292/lac/prediction
+```
--- a/python/examples/lac/README_CN.md
+++ b/python/examples/lac/README_CN.md
+## 中文分词模型
+
+(简体中文|[English](./README.md))
+
+### 获取模型和字典文件
+```
+sh get_data.sh
+```
+
+下载包里包含了lac模型和lac模型预测需要的字典文件
+
+#### 开启RPC预测服务
+
+```
+python -m paddle_serving_server.serve --model jieba_server_model/ --port 9292
+```
+### 执行RPC预测
+```
+echo "我爱北京天安门" | python lac_client.py jieba_client_conf/serving_client_conf.prototxt lac_dict/
+```
+
+我们就能得到分词结果
+
+### 开启HTTP预测服务
+```
+python lac_web_service.py jieba_server_model/ lac_workdir 9292
+```
+### 执行HTTP预测
+
+```
+curl -H "Content-Type:application/json" -X POST -d '{"words": "我爱北京天安门", "fetch":["word_seg"]}' http://127.0.0.1:9292/lac/prediction
+```
--- a/python/examples/lac/get_data.sh
+++ b/python/examples/lac/get_data.sh
-wget --no-check-certificate https://paddle-serving.bj.bcebos.com/lac/lac_model.tar.gz
-tar -zxvf lac_model.tar.gz
+wget --no-check-certificate https://paddle-serving.bj.bcebos.com/lac/lac_model_jieba_web.tar.gz
+tar -zxvf lac_model_jieba_web.tar.gz
--- a/python/examples/lac/lac_client.py
+++ b/python/examples/lac/lac_client.py
@@ -22,7 +22,7 @@ import io

 client = Client()
 client.load_client_config(sys.argv[1])
-client.connect(["127.0.0.1:9280"])
+client.connect(["127.0.0.1:9292"])

 reader = LACReader(sys.argv[2])
 for line in sys.stdin:

--- a/python/examples/lac/lac_reader.py
+++ b/python/examples/lac/lac_reader.py
@@ -99,3 +99,26 @@ class LACReader(object):
        words = sent.strip()
        word_ids = self.word_to_ids(words)
        return word_ids
+
+    def parse_result(self, words, crf_decode):
+        tags = [self.id2label_dict[str(x)] for x in crf_decode]
+
+        sent_out = []
+        tags_out = []
+        partial_word = ""
+        for ind, tag in enumerate(tags):
+            if partial_word == "":
+                partial_word = words[ind]
+                tags_out.append(tag.split('-')[0])
+                continue
+            if tag.endswith("-B") or (tag == "O" and tag[ind - 1] != "O"):
+                sent_out.append(partial_word)
+                tags_out.append(tag.split('-')[0])
+                partial_word = words[ind]
+                continue
+            partial_word += words[ind]
+
+        if len(sent_out) < len(tags_out):
+            sent_out.append(partial_word)
+
+        return sent_out
--- a/python/examples/lac/lac_web_service.py
+++ b/python/examples/lac/lac_web_service.py
@@ -25,8 +25,13 @@ class LACService(WebService):
        if "words" not in feed:
            raise ("feed data error!")
        feed_data = self.reader.process(feed["words"])
+        fetch = ["crf_decode"]
        return {"words": feed_data}, fetch

+    def postprocess(self, feed={}, fetch=[], fetch_map={}):
+        segs = self.reader.parse_result(feed["words"], fetch_map["crf_decode"])
+        return {"word_seg": "|".join(segs)}
+

 lac_service = LACService(name="lac")
 lac_service.load_model_config(sys.argv[1])