提交 0637d81f 编写于 作者: M MRXLT

Merge remote-tracking branch 'upstream/develop' into general-server-imagenet

......@@ -8,7 +8,10 @@
[中文](https://github.com/PaddlePaddle/Serving/blob/develop/README_CN.md)
## Motivation
Paddle Serving helps deep learning developers deploy an online inference service without much effort. **The goal of this project**: once you have trained a deep neural nets with [Paddle](https://github.com/PaddlePaddle/Paddle), you already have a model inference service.
Paddle Serving helps deep learning developers deploy an online inference service without much effort. **The goal of this project**: once you have trained a deep neural nets with [Paddle](https://github.com/PaddlePaddle/Paddle), you already have a model inference service. A demo of serving is as follows:
<p align="center">
<img src="doc/demo.gif" width="700">
</p>
## Key Features
- Integrate with Paddle training pipeline seemlessly, most paddle models can be deployed **with one line command**.
......@@ -27,13 +30,28 @@ pip install paddle-serving-server
## Quick Start Example
### download trained model
``` shell
wget --no-check-certificate https://paddle-serving.bj.bcebos.com/uci_housing.tar.gz
tar -xzf uci_housing.tar.gz
python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9292
```
Paddle Serving provides HTTP and RPC based service for users to access
### HTTP service
``` shell
python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9292 --name uci
```
``` shell
curl -H "Content-Type:application/json" -X POST -d '{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332], "fetch":["price"]}' http://127.0.0.1:9292/uci/prediction
```
Python Client Request
### RPC service
``` shell
python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9292
```
python client api
``` python
from paddle_serving_client import Client
......@@ -48,8 +66,6 @@ print(fetch_map)
```
## Document
[Design Doc(Chinese)](doc/DESIGN.md)
......
......@@ -3,83 +3,88 @@ import argparse
def parse_args():
parser = argparse.ArgumentParser(description="PaddlePaddle CTR example")
parser.add_argument(
'--train_data_path',
type=str,
default='./data/raw/train.txt',
help="The path of training dataset")
parser.add_argument(
'--test_data_path',
type=str,
default='./data/raw/valid.txt',
help="The path of testing dataset")
parser.add_argument(
'--batch_size',
type=int,
default=1000,
help="The size of mini-batch (default:1000)")
parser.add_argument(
'--embedding_size',
type=int,
default=10,
help="The size for embedding layer (default:10)")
parser.add_argument(
'--num_passes',
type=int,
default=10,
help="The number of passes to train (default: 10)")
parser.add_argument(
'--model_output_dir',
type=str,
default='models',
help='The path for model to store (default: models)')
parser.add_argument(
'--sparse_feature_dim',
type=int,
default=1000001,
help='sparse feature hashing space for index processing')
parser.add_argument(
'--is_local',
type=int,
default=1,
help='Local train or distributed train (default: 1)')
parser.add_argument(
'--cloud_train',
type=int,
default=0,
help='Local train or distributed train on paddlecloud (default: 0)')
parser.add_argument(
'--async_mode',
action='store_true',
default=False,
help='Whether start pserver in async mode to support ASGD')
parser.add_argument(
'--no_split_var',
action='store_true',
default=False,
help='Whether split variables into blocks when update_method is pserver')
parser.add_argument(
'--role',
type=str,
default='pserver', # trainer or pserver
help='The path for model to store (default: models)')
parser.add_argument(
'--endpoints',
type=str,
default='127.0.0.1:6000',
help='The pserver endpoints, like: 127.0.0.1:6000,127.0.0.1:6001')
parser.add_argument(
'--current_endpoint',
type=str,
default='127.0.0.1:6000',
help='The path for model to store (default: 127.0.0.1:6000)')
parser.add_argument(
'--trainer_id',
type=int,
default=0,
help='The path for model to store (default: models)')
parser.add_argument(
'--trainers',
type=int,
default=1,
help='The num of trianers, (default: 1)')
'--train_data_path',
type=str,
default='./data/raw/train.txt',
help="The path of training dataset")
parser.add_argument(
'--sparse_only',
type=bool,
default=False,
help="Whether we use sparse features only")
parser.add_argument(
'--test_data_path',
type=str,
default='./data/raw/valid.txt',
help="The path of testing dataset")
parser.add_argument(
'--batch_size',
type=int,
default=1000,
help="The size of mini-batch (default:1000)")
parser.add_argument(
'--embedding_size',
type=int,
default=10,
help="The size for embedding layer (default:10)")
parser.add_argument(
'--num_passes',
type=int,
default=10,
help="The number of passes to train (default: 10)")
parser.add_argument(
'--model_output_dir',
type=str,
default='models',
help='The path for model to store (default: models)')
parser.add_argument(
'--sparse_feature_dim',
type=int,
default=1000001,
help='sparse feature hashing space for index processing')
parser.add_argument(
'--is_local',
type=int,
default=1,
help='Local train or distributed train (default: 1)')
parser.add_argument(
'--cloud_train',
type=int,
default=0,
help='Local train or distributed train on paddlecloud (default: 0)')
parser.add_argument(
'--async_mode',
action='store_true',
default=False,
help='Whether start pserver in async mode to support ASGD')
parser.add_argument(
'--no_split_var',
action='store_true',
default=False,
help='Whether split variables into blocks when update_method is pserver')
parser.add_argument(
'--role',
type=str,
default='pserver', # trainer or pserver
help='The path for model to store (default: models)')
parser.add_argument(
'--endpoints',
type=str,
default='127.0.0.1:6000',
help='The pserver endpoints, like: 127.0.0.1:6000,127.0.0.1:6001')
parser.add_argument(
'--current_endpoint',
type=str,
default='127.0.0.1:6000',
help='The path for model to store (default: 127.0.0.1:6000)')
parser.add_argument(
'--trainer_id',
type=int,
default=0,
help='The path for model to store (default: models)')
parser.add_argument(
'--trainers',
type=int,
default=1,
help='The num of trianers, (default: 1)')
return parser.parse_args()
wget https://paddle-serving.bj.bcebos.com/data%2Fctr_prediction%2Fctr_data.tar.gz
tar -zxvf ctr_data.tar.gz
wget --no-check-certificate https://paddle-serving.bj.bcebos.com/data/ctr_prediction/ctr_data.tar.gz
tar -zxvf *ctr_data.tar.gz
......@@ -4,15 +4,16 @@ from args import parse_args
import os
import paddle.fluid as fluid
import sys
from network_conf import ctr_dnn_model_dataset
from network_conf import dnn_model
dense_feature_dim = 13
def train():
args = parse_args()
sparse_only = args.sparse_only
if not os.path.isdir(args.model_output_dir):
os.mkdir(args.model_output_dir)
dense_input = fluid.layers.data(
name="dense_input", shape=[dense_feature_dim], dtype='float32')
sparse_input_ids = [
......@@ -20,8 +21,10 @@ def train():
for i in range(1, 27)]
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
predict_y, loss, auc_var, batch_auc_var = ctr_dnn_model_dataset(
dense_input, sparse_input_ids, label,
#nn_input = None if sparse_only else dense_input
nn_input = dense_input
predict_y, loss, auc_var, batch_auc_var = dnn_model(
nn_input, sparse_input_ids, label,
args.embedding_size, args.sparse_feature_dim)
optimizer = fluid.optimizer.SGD(learning_rate=1e-4)
......@@ -31,29 +34,33 @@ def train():
exe.run(fluid.default_startup_program())
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
dataset.set_use_var([dense_input] + sparse_input_ids + [label])
python_executable = "python"
pipe_command = "{} criteo_reader.py {}".format(python_executable, args.sparse_feature_dim)
pipe_command = "{} criteo_reader.py {}".format(
python_executable, args.sparse_feature_dim)
dataset.set_pipe_command(pipe_command)
dataset.set_batch_size(128)
thread_num = 10
dataset.set_thread(thread_num)
whole_filelist = ["raw_data/part-%d" % x for x in range(len(os.listdir("raw_data")))]
#dataset.set_filelist(whole_filelist[:(len(whole_filelist)-thread_num)])
whole_filelist = ["raw_data/part-%d" % x for x in
range(len(os.listdir("raw_data")))]
dataset.set_filelist(whole_filelist[:thread_num])
dataset.load_into_memory()
epochs = 1
for i in range(epochs):
exe.train_from_dataset(program=fluid.default_main_program(),
dataset=dataset,
debug=True)
exe.train_from_dataset(
program=fluid.default_main_program(),
dataset=dataset, debug=True)
print("epoch {} finished".format(i))
import paddle_serving_client.io as server_io
feed_var_dict = {}
for i, sparse in enumerate(sparse_input_ids):
feed_var_dict["sparse_{}".format(i)] = sparse
feed_var_dict["dense_0"] = dense_input
fetch_var_dict = {"prob": predict_y}
server_io.save_model(
......
import paddle.fluid as fluid
import math
dense_feature_dim = 13
def dnn_model(dense_input, sparse_inputs, label,
embedding_size, sparse_feature_dim):
def ctr_dnn_model_dataset(dense_input, sparse_inputs, label,
embedding_size, sparse_feature_dim):
def embedding_layer(input):
emb = fluid.layers.embedding(
input=input,
......@@ -15,20 +14,30 @@ def ctr_dnn_model_dataset(dense_input, sparse_inputs, label,
initializer=fluid.initializer.Uniform()))
return fluid.layers.sequence_pool(input=emb, pool_type='sum')
sparse_embed_seq = list(map(embedding_layer, sparse_inputs))
concated = fluid.layers.concat(sparse_embed_seq, axis=1)
fc1 = fluid.layers.fc(input=concated, size=400, act='relu',
param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(
scale=1 / math.sqrt(concated.shape[1]))))
fc2 = fluid.layers.fc(input=fc1, size=400, act='relu',
param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(
scale=1 / math.sqrt(fc1.shape[1]))))
fc3 = fluid.layers.fc(input=fc2, size=400, act='relu',
param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(
scale=1 / math.sqrt(fc2.shape[1]))))
predict = fluid.layers.fc(input=fc3, size=2, act='softmax',
def mlp_input_tensor(emb_sums, dense_tensor):
if isinstance(dense_tensor, fluid.Variable):
return fluid.layers.concat(emb_sums, axis=1)
else:
return fluid.layers.concat(emb_sums + [dense_tensor], axis=1)
def mlp(mlp_input):
fc1 = fluid.layers.fc(input=mlp_input, size=400, act='relu',
param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(
scale=1 / math.sqrt(mlp_input.shape[1]))))
fc2 = fluid.layers.fc(input=fc1, size=400, act='relu',
param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(
scale=1 / math.sqrt(fc1.shape[1]))))
fc3 = fluid.layers.fc(input=fc2, size=400, act='relu',
param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(
scale=1 / math.sqrt(fc2.shape[1]))))
pre = fluid.layers.fc(input=fc3, size=2, act='softmax',
param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(
scale=1 / math.sqrt(fc3.shape[1]))))
return pre
emb_sums = list(map(embedding_layer, sparse_inputs))
mlp_in = mlp_input_tensor(emb_sums, dense_input)
predict = mlp(mlp_in)
cost = fluid.layers.cross_entropy(input=predict, label=label)
avg_cost = fluid.layers.reduce_sum(cost)
accuracy = fluid.layers.accuracy(input=predict, label=label)
......
# Fit a line example, prediction through rpc service
Start rpc service
``` shell
sh get_data.sh
python test_server.py uci_housing_model/
```
Prediction
``` shell
python test_client.py uci_housing_client/serving_client_conf.prototxt
```
# prediction through http service
Start a web service with default web service hosting modules
``` shell
python -m paddle_serving_server.web_serve --model uci_housing_model/ --thread 10 --name uci --port 9393 --name uci
```
Prediction through http post
``` shell
curl -H "Content-Type:application/json" -X POST -d '{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332], "fetch":["price"]}' http://127.0.0.1:9393/uci/prediction
```
\ No newline at end of file
wget --no-check-certificate https://paddle-serving.bj.bcebos.com/uci_housing.tar.gz
tar -xzf uci_housing.tar.gz
......@@ -4,12 +4,12 @@ import sys
client = Client()
client.load_client_config(sys.argv[1])
client.connect(["127.0.0.1:9292"])
client.connect(["127.0.0.1:9393"])
test_reader = paddle.batch(paddle.reader.shuffle(
paddle.dataset.uci_housing.test(), buf_size=500), batch_size=1)
for data in test_reader():
fetch_map = client.predict(feed={"x": data[0][0]}, fetch=["y"])
print("{} {}".format(fetch_map["y"][0], data[0][1][0]))
fetch_map = client.predict(feed={"x": data[0][0]}, fetch=["price"])
print("{} {}".format(fetch_map["price"][0], data[0][1][0]))
......@@ -7,13 +7,15 @@ from paddle_serving_server import Server
op_maker = OpMaker()
read_op = op_maker.create('general_reader')
general_infer_op = op_maker.create('general_infer')
response_op = op_maker.create('general_response')
op_seq_maker = OpSeqMaker()
op_seq_maker.add_op(read_op)
op_seq_maker.add_op(general_infer_op)
op_seq_maker.add_op(response_op)
server = Server()
server.set_op_sequence(op_seq_maker.get_op_sequence())
server.load_model_config(sys.argv[1])
server.prepare_server(workdir="work_dir1", port=9292, device="cpu")
server.prepare_server(workdir="work_dir1", port=9393, device="cpu")
server.run_server()
wget https://paddle-serving.bj.bcebos.com/imdb-demo%2Fimdb_service.tar.gz
tar -xzf imdb_service.tar.gz
wget --no-check-certificate https://fleet.bj.bcebos.com/text_classification_data.tar.gz
tar -zxvf text_classification_data.tar.gz
python text_classify_service.py serving_server_model/ workdir imdb.vocab
......@@ -65,6 +65,6 @@ if __name__ == "__main__":
program=fluid.default_main_program(), dataset=dataset, debug=False)
logger.info("TRAIN --> pass: {}".format(i))
if i == 5:
serving_io.save_model("serving_server_model", "serving_client_conf",
serving_io.save_model("imdb_model", "imdb_client_conf",
{"words": data}, {"prediction": prediction},
fluid.default_main_program())
......@@ -12,28 +12,26 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#!flask/bin/python
from plugin_service import PluginService
from paddle_serving_server.web_service import WebService
from imdb_reader import IMDBDataset
import sys
class IMDBService(PluginService):
def prepare_service(self, args={}):
class IMDBService(WebService):
def prepare_dict(self, args={}):
if len(args) == 0:
exit(-1)
self.word_dict = {}
with open(args["dict_file_path"]) as fin:
idx = 0
for line in fin:
self.word_dict[idx] = idx
idx += 1
self.dataset = IMDBDataset()
self.dataset.load_resource(args["dict_file_path"])
def preprocess(self, feed={}, fetch=[]):
if "words" not in feed:
exit(-1)
res_feed = {}
res_feed["words"] = [self.word_dict[int(x)] for x in feed["words"]]
print(res_feed)
res_feed["words"] = self.dataset.get_words_and_label(feed["words"])[0]
return res_feed, fetch
imdb_service = IMDBService(name="imdb", model=sys.argv[1], port=9898)
imdb_service.prepare_service({"dict_file_path":sys.argv[2]})
imdb_service.start_service()
imdb_service = IMDBService(name="imdb")
imdb_service.load_model_config(sys.argv[1])
imdb_service.prepare_server(workdir=sys.argv[2], port=9393, device="cpu")
imdb_service.prepare_dict({"dict_file_path":sys.argv[3]})
imdb_service.run_server()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Usage:
Host a trained paddle model with one line command
Example:
python -m paddle_serving_server.web_serve --model ./serving_server_model --port 9292
"""
import argparse
from multiprocessing import Pool, Process
from .web_service import WebService
def parse_args():
parser = argparse.ArgumentParser("web_serve")
parser.add_argument("--thread", type=int, default=10, help="Concurrency of server")
parser.add_argument("--model", type=str, default="", help="Model for serving")
parser.add_argument("--port", type=int, default=9292, help="Port the server")
parser.add_argument("--workdir", type=str, default="workdir", help="Working dir of current service")
parser.add_argument("--device", type=str, default="cpu", help="Type of device")
parser.add_argument("--name", type=str, default="default", help="Default service name")
return parser.parse_args()
if __name__ == "__main__":
args = parse_args()
service = WebService(name=args.name)
service.load_model_config(args.model)
service.prepare_server(workdir=args.workdir, port=args.port, device=args.device)
service.run_server()
......@@ -13,37 +13,69 @@
# limitations under the License.
#!flask/bin/python
from flask import Flask, request, abort
from multiprocessing import Pool, Process
from paddle_serving_server import OpMaker, OpSeqMaker, Server
from paddle_serving_client import Client
class PluginService(object):
def __init__(self, name="default_service", model="",
port=9091, concurrency=10):
class WebService(object):
def __init__(self, name="default_service"):
self.name = name
self.port = port
self.model = model
self.concurrency = concurrency
def prepare_service(self, args={}):
return
def load_model_config(self, model_config):
self.model_config = model_config
def start_service(self):
app_instance = Flask(__name__)
self.client_service = Client()
self.client_service.load_client_config(
"{}/serving_server_conf.prototxt".format(self.model))
self.client_service.connect(["127.0.0.1:9292"])
def _launch_rpc_service(self):
op_maker = OpMaker()
read_op = op_maker.create('general_reader')
general_infer_op = op_maker.create('general_infer')
general_response_op = op_maker.create('general_response')
op_seq_maker = OpSeqMaker()
op_seq_maker.add_op(read_op)
op_seq_maker.add_op(general_infer_op)
op_seq_maker.add_op(general_response_op)
server = Server()
server.set_op_sequence(op_seq_maker.get_op_sequence())
server.set_num_threads(16)
server.load_model_config(self.model_config)
server.prepare_server(
workdir=self.workdir, port=self.port+1, device=self.device)
server.run_server()
def prepare_server(self, workdir="", port=9393, device="cpu"):
self.workdir = workdir
self.port = port
self.device = device
@app_instance.route('/PaddleServing/v1.0/prediction', methods=['POST'])
def _launch_web_service(self):
app_instance = Flask(__name__)
client_service = Client()
client_service.load_client_config(
"{}/serving_server_conf.prototxt".format(self.model_config))
client_service.connect(["127.0.0.1:{}".format(self.port+1)])
service_name = "/" + self.name + "/prediction"
@app_instance.route(service_name, methods=['POST'])
def get_prediction():
if not request.json:
abort(400)
if "fetch" not in request.json:
abort(400)
feed, fetch = self.preprocess(request.json, request.json["fetch"])
fetch_map = self.client_service.predict(feed=feed, fetch=fetch)
fetch_map = client_service.predict(feed=feed, fetch=fetch)
fetch_map = self.postprocess(feed=request.json, fetch=fetch, fetch_map=fetch_map)
app_instance.run(host="127.0.0.1", port=self.port,
threaded=False, processes=1)
return fetch_map
app_instance.run(host="127.0.0.1", port=self.port, threaded=False, processes=1)
def run_server(self):
import socket
localIP = socket.gethostbyname(socket.gethostname())
print("web service address:")
print("http://{}:{}/{}/prediction".format(localIP, self.port, self.name))
p_rpc = Process(target=self._launch_rpc_service)
p_web = Process(target=self._launch_web_service)
p_rpc.start()
p_web.start()
p_web.join()
p_rpc.join()
def preprocess(self, feed={}, fetch=[]):
return feed, fetch
......
......@@ -30,6 +30,7 @@ max_version, mid_version, min_version = python_version()
REQUIRED_PACKAGES = [
'six >= 1.10.0', 'protobuf >= 3.1.0','paddlepaddle',
'paddle_serving_client'
]
packages=['paddle_serving_server',
......
......@@ -30,6 +30,7 @@ max_version, mid_version, min_version = python_version()
REQUIRED_PACKAGES = [
'six >= 1.10.0', 'protobuf >= 3.1.0','paddlepaddle',
'paddle_serving_client'
]
packages=['paddle_serving_server_gpu',
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册