diff --git a/README.md b/README.md index 11f9aa63b931309cfc98aa22021661cbde32c35a..42aa8bf78a02fb6f7dfac6fc7b8a819e7dc0b901 100644 --- a/README.md +++ b/README.md @@ -1,61 +1,68 @@ # Paddle Serving +An easy-to-use Machine Learning Model Inference Service Deployment Tool -Paddle Serving是PaddlePaddle的在线预估服务框架,能够帮助开发者轻松实现从移动端、服务器端调用深度学习模型的远程预测服务。当前Paddle Serving以支持PaddlePaddle训练的模型为主,可以与Paddle训练框架联合使用,快速部署预估服务。Paddle Serving围绕常见的工业级深度学习模型部署场景进行设计,一些常见的功能包括多模型管理、模型热加载、基于[Baidu-rpc](https://github.com/apache/incubator-brpc)的高并发低延迟响应能力、在线模型A/B实验等。与Paddle训练框架互相配合的API可以使用户在训练与远程部署之间无缝过度,提升深度学习模型的落地效率。 +[![Release](https://img.shields.io/badge/Release-0.0.3-yellowgreen)](Release) +[![Issues](https://img.shields.io/github/issues/PaddlePaddle/Serving)](Issues) +[![License](https://img.shields.io/github/license/PaddlePaddle/Serving)](LICENSE) ------------- +[中文](./README_CN.md) -## 快速上手指南 +Paddle Serving is the online inference service framework of [Paddle](https://github.com/PaddlePaddle/Paddle) that can help developers easily deploy a deep learning model service on server side and send request from mobile devices, edge devices as well as data centers. Currently, Paddle Serving supports the deep learning models produced by Paddle althought it can be very easy to support other deep learning framework's model inference. Paddle Serving is designed oriented from industrial practice. For example, multiple models management for online service, double buffers model loading, models online A/B testing are supported. Highly concurrent [Baidu-rpc](https://github.com/apache/incubator-brpc) is used as the underlying communication library which is also from industry practice. Paddle Serving provides user-friendly API that can integrate with Paddle training code seamlessly, and users can finish model training and model serving in an end-to-end fasion. -Paddle Serving当前的develop版本支持轻量级Python API进行快速预测,并且与Paddle的训练可以打通。我们以最经典的波士顿房价预测为示例,完整说明在单机进行模型训练以及使用Paddle Serving进行模型部署的过程。 -#### 安装 -``` + +## Quick Start + +Paddle Serving supports light-weighted Python API for model inference and can be integrated with trainining process seemlessly. Here is a Boston House Pricing example for users to do quick start. + +### Installation + +```shell pip install paddle-serving-client pip install paddle-serving-server ``` -#### 训练脚本 +### Training Scripts + ``` python -import sys import paddle -import paddle.fluid as fluid train_reader = paddle.batch(paddle.reader.shuffle( paddle.dataset.uci_housing.train(), buf_size=500), batch_size=16) -test_reader = paddle.batch(paddle.reader.shuffle( - paddle.dataset.uci_housing.test(), buf_size=500), batch_size=16) - -x = fluid.data(name='x', shape=[None, 13], dtype='float32') -y = fluid.data(name='y', shape=[None, 1], dtype='float32') +x = paddle.fluid.data(name='x', shape=[None, 13], dtype='float32') +y = paddle.fluid.data(name='y', shape=[None, 1], dtype='float32') -y_predict = fluid.layers.fc(input=x, size=1, act=None) -cost = fluid.layers.square_error_cost(input=y_predict, label=y) -avg_loss = fluid.layers.mean(cost) -sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.01) +y_predict = paddle.fluid.layers.fc(input=x, size=1, act=None) +cost = paddle.fluid.layers.square_error_cost(input=y_predict, label=y) +avg_loss = paddle.fluid.layers.mean(cost) +sgd_optimizer = paddle.fluid.optimizer.SGD(learning_rate=0.01) sgd_optimizer.minimize(avg_loss) -place = fluid.CPUPlace() -feeder = fluid.DataFeeder(place=place, feed_list=[x, y]) -exe = fluid.Executor(place) -exe.run(fluid.default_startup_program()) +place = paddle.fluid.CPUPlace() +feeder = paddle.fluid.DataFeeder(place=place, feed_list=[x, y]) +exe = paddle.fluid.Executor(place) +exe.run(paddle.fluid.default_startup_program()) import paddle_serving_client.io as serving_io for pass_id in range(30): for data_train in train_reader(): avg_loss_value, = exe.run( - fluid.default_main_program(), + paddle.fluid.default_main_program(), feed=feeder.feed(data_train), fetch_list=[avg_loss]) serving_io.save_model( "serving_server_model", "serving_client_conf", - {"x": x}, {"y": y_predict}, fluid.default_main_program()) + {"x": x}, {"y": y_predict}, paddle.fluid.default_main_program()) ``` -#### 服务器端代码 -``` python + + +### Server Side Scripts + +``` import sys from paddle_serving.serving_server import OpMaker from paddle_serving.serving_server import OpSeqMaker @@ -73,16 +80,17 @@ server = Server() server.set_op_sequence(op_seq_maker.get_op_sequence()) server.load_model_config(sys.argv[1]) server.prepare_server(workdir="work_dir1", port=9393, device="cpu") -server.run_server() ``` -#### 服务器端启动 -``` shell +### Start Server + +``` python test_server.py serving_server_model ``` -#### 客户端预测 -``` python +### Client Side Scripts + +``` from paddle_serving_client import Client import paddle import sys @@ -98,20 +106,23 @@ for data in test_reader(): fetch_map = client.predict(feed={"x": data[0][0]}, fetch=["y"]) print("{} {}".format(fetch_map["y"][0], data[0][1][0])) + ``` -### 文档 -[设计文档](doc/DESIGN.md) -[FAQ](doc/FAQ.md) +### Document + +[Design Doc(Chinese)](doc/DESIGN.md) + +[FAQ(Chinese)](doc/FAQ.md) -### 资深开发者使用指南 +### Advanced features and development -[基于C++核心从零开始写一个预测服务](doc/CREATING.md) +[Develop a serving application with C++(Chinese)](doc/CREATING.md) -[编译指南](doc/INSTALL.md) +[Compile from source code(Chinese)](doc/INSTALL.md) -## 贡献 -如果你想要给Paddle Serving做贡献,请参考[贡献指南](doc/CONTRIBUTE.md) +## Contribution +If you want to contribute code to Paddle Serving, please reference [Contribution Guidelines](doc/CONTRIBUTE.md) diff --git a/README_CN.md b/README_CN.md new file mode 100644 index 0000000000000000000000000000000000000000..11f9aa63b931309cfc98aa22021661cbde32c35a --- /dev/null +++ b/README_CN.md @@ -0,0 +1,117 @@ +# Paddle Serving + +Paddle Serving是PaddlePaddle的在线预估服务框架,能够帮助开发者轻松实现从移动端、服务器端调用深度学习模型的远程预测服务。当前Paddle Serving以支持PaddlePaddle训练的模型为主,可以与Paddle训练框架联合使用,快速部署预估服务。Paddle Serving围绕常见的工业级深度学习模型部署场景进行设计,一些常见的功能包括多模型管理、模型热加载、基于[Baidu-rpc](https://github.com/apache/incubator-brpc)的高并发低延迟响应能力、在线模型A/B实验等。与Paddle训练框架互相配合的API可以使用户在训练与远程部署之间无缝过度,提升深度学习模型的落地效率。 + +------------ + +## 快速上手指南 + +Paddle Serving当前的develop版本支持轻量级Python API进行快速预测,并且与Paddle的训练可以打通。我们以最经典的波士顿房价预测为示例,完整说明在单机进行模型训练以及使用Paddle Serving进行模型部署的过程。 + +#### 安装 +``` +pip install paddle-serving-client +pip install paddle-serving-server +``` + +#### 训练脚本 +``` python +import sys +import paddle +import paddle.fluid as fluid + +train_reader = paddle.batch(paddle.reader.shuffle( + paddle.dataset.uci_housing.train(), buf_size=500), batch_size=16) + +test_reader = paddle.batch(paddle.reader.shuffle( + paddle.dataset.uci_housing.test(), buf_size=500), batch_size=16) + +x = fluid.data(name='x', shape=[None, 13], dtype='float32') +y = fluid.data(name='y', shape=[None, 1], dtype='float32') + +y_predict = fluid.layers.fc(input=x, size=1, act=None) +cost = fluid.layers.square_error_cost(input=y_predict, label=y) +avg_loss = fluid.layers.mean(cost) +sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.01) +sgd_optimizer.minimize(avg_loss) + +place = fluid.CPUPlace() +feeder = fluid.DataFeeder(place=place, feed_list=[x, y]) +exe = fluid.Executor(place) +exe.run(fluid.default_startup_program()) + +import paddle_serving_client.io as serving_io + +for pass_id in range(30): + for data_train in train_reader(): + avg_loss_value, = exe.run( + fluid.default_main_program(), + feed=feeder.feed(data_train), + fetch_list=[avg_loss]) + +serving_io.save_model( + "serving_server_model", "serving_client_conf", + {"x": x}, {"y": y_predict}, fluid.default_main_program()) +``` + +#### 服务器端代码 +``` python +import sys +from paddle_serving.serving_server import OpMaker +from paddle_serving.serving_server import OpSeqMaker +from paddle_serving.serving_server import Server + +op_maker = OpMaker() +read_op = op_maker.create('general_reader') +general_infer_op = op_maker.create('general_infer') + +op_seq_maker = OpSeqMaker() +op_seq_maker.add_op(read_op) +op_seq_maker.add_op(general_infer_op) + +server = Server() +server.set_op_sequence(op_seq_maker.get_op_sequence()) +server.load_model_config(sys.argv[1]) +server.prepare_server(workdir="work_dir1", port=9393, device="cpu") +server.run_server() +``` + +#### 服务器端启动 +``` shell +python test_server.py serving_server_model +``` + +#### 客户端预测 +``` python +from paddle_serving_client import Client +import paddle +import sys + +client = Client() +client.load_client_config(sys.argv[1]) +client.connect(["127.0.0.1:9292"]) + +test_reader = paddle.batch(paddle.reader.shuffle( + paddle.dataset.uci_housing.test(), buf_size=500), batch_size=1) + +for data in test_reader(): + fetch_map = client.predict(feed={"x": data[0][0]}, fetch=["y"]) + print("{} {}".format(fetch_map["y"][0], data[0][1][0])) + +``` + +### 文档 + +[设计文档](doc/DESIGN.md) + +[FAQ](doc/FAQ.md) + +### 资深开发者使用指南 + +[基于C++核心从零开始写一个预测服务](doc/CREATING.md) + +[编译指南](doc/INSTALL.md) + +## 贡献 +如果你想要给Paddle Serving做贡献,请参考[贡献指南](doc/CONTRIBUTE.md) + diff --git a/README_EN.md b/README_EN.md new file mode 100644 index 0000000000000000000000000000000000000000..5a0b61138f96f01387c5586ebdfe4cdd1354e154 --- /dev/null +++ b/README_EN.md @@ -0,0 +1,126 @@ +# Paddle Serving + +Paddle Serving is the online inference service framework of [Paddle](https://github.com/PaddlePaddle/Paddle) that can help developers easily deploy a deep learning model service on server side and send request from mobile devices, edge devices as well as data centers. Currently, Paddle Serving supports the deep learning models produced by Paddle althought it can be very easy to support other deep learning framework's model inference. Paddle Serving is designed oriented from industrial practice. For example, multiple models management for online service, double buffers model loading, models online A/B testing are supported. Highly concurrent [Baidu-rpc](https://github.com/apache/incubator-brpc) is used as the underlying communication library which is also from industry practice. Paddle Serving provides user-friendly API that can integrate with Paddle training code seamlessly, and users can finish model training and model serving in an end-to-end fasion. + + + +## Quick Start + +Paddle Serving supports light-weighted Python API for model inference and can be integrated with trainining process seemlessly. Here is a Boston House Pricing example for users to do quick start. + +### Installation + +```shell +pip install paddle-serving-client +pip install paddle-serving-server +``` + +### Training Scripts + +``` python +import sys +import paddle +import paddle.fluid as fluid + +train_reader = paddle.batch(paddle.reader.shuffle( + paddle.dataset.uci_housing.train(), buf_size=500), batch_size=16) + +test_reader = paddle.batch(paddle.reader.shuffle( + paddle.dataset.uci_housing.test(), buf_size=500), batch_size=16) + +x = fluid.data(name='x', shape=[None, 13], dtype='float32') +y = fluid.data(name='y', shape=[None, 1], dtype='float32') + +y_predict = fluid.layers.fc(input=x, size=1, act=None) +cost = fluid.layers.square_error_cost(input=y_predict, label=y) +avg_loss = fluid.layers.mean(cost) +sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.01) +sgd_optimizer.minimize(avg_loss) + +place = fluid.CPUPlace() +feeder = fluid.DataFeeder(place=place, feed_list=[x, y]) +exe = fluid.Executor(place) +exe.run(fluid.default_startup_program()) + +import paddle_serving_client.io as serving_io + +for pass_id in range(30): + for data_train in train_reader(): + avg_loss_value, = exe.run( + fluid.default_main_program(), + feed=feeder.feed(data_train), + fetch_list=[avg_loss]) + +serving_io.save_model( + "serving_server_model", "serving_client_conf", + {"x": x}, {"y": y_predict}, fluid.default_main_program()) +``` + + + +### Server Side Scripts + +``` +import sys +from paddle_serving.serving_server import OpMaker +from paddle_serving.serving_server import OpSeqMaker +from paddle_serving.serving_server import Server + +op_maker = OpMaker() +read_op = op_maker.create('general_reader') +general_infer_op = op_maker.create('general_infer') + +op_seq_maker = OpSeqMaker() +op_seq_maker.add_op(read_op) +op_seq_maker.add_op(general_infer_op) + +server = Server() +server.set_op_sequence(op_seq_maker.get_op_sequence()) +server.load_model_config(sys.argv[1]) +server.prepare_server(workdir="work_dir1", port=9393, device="cpu") +``` + +### Start Server + +``` +python test_server.py serving_server_model +``` + +### Client Side Scripts + +``` +from paddle_serving_client import Client +import paddle +import sys + +client = Client() +client.load_client_config(sys.argv[1]) +client.connect(["127.0.0.1:9292"]) + +test_reader = paddle.batch(paddle.reader.shuffle( + paddle.dataset.uci_housing.test(), buf_size=500), batch_size=1) + +for data in test_reader(): + fetch_map = client.predict(feed={"x": data[0][0]}, fetch=["y"]) + print("{} {}".format(fetch_map["y"][0], data[0][1][0])) + + +``` + + + +### Document + +[Design Doc(Chinese)](doc/DESIGN.md) + +[FAQ(Chinese)](doc/FAQ.md) + +### Advanced features and development + +[Develop a serving application with C++(Chinese)](doc/CREATING.md) + +[Compile from source code(Chinese)](doc/INSTALL.md) + +## Contribution + +If you want to contribute code to Paddle Serving, please reference [Contribution Guidelines]( diff --git a/core/general-server/op/general_text_infer_op.cpp b/core/general-server/op/general_text_infer_op.cpp new file mode 100644 index 0000000000000000000000000000000000000000..79e671047f3bdd026a1a33cfbc056e5775ed6a68 --- /dev/null +++ b/core/general-server/op/general_text_infer_op.cpp @@ -0,0 +1,150 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include "core/general-server/op/general_text_infer_op.h" +#include "core/general-server/op/general_infer_op.h" +#include "core/general-server/op/general_text_reader_op.h" +#include "core/general-server/op/general_reader_op.h" +#include "core/predictor/framework/infer.h" +#include "core/predictor/framework/memory.h" +#include "core/predictor/framework/resource.h" +#include "core/util/include/timer.h" + +namespace baidu { +namespace paddle_serving { +namespace serving { + +using baidu::paddle_serving::serving::GENERAL_MODEL_NAME; +using baidu::paddle_serving::Timer; +using baidu::paddle_serving::predictor::MempoolWrapper; +using baidu::paddle_serving::predictor::general_model::Tensor; +using baidu::paddle_serving::predictor::general_model::Response; +using baidu::paddle_serving::predictor::general_model::Request; +using baidu::paddle_serving::predictor::general_model::FetchInst; +using baidu::paddle_serving::predictor::InferManager; +using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; + +int GeneralTextInferOp::inference() { + const GeneralTextReaderOutput *reader_out = + get_depend_argument("general_text_reader_op"); + if (!reader_out) { + LOG(ERROR) << "Failed mutable depended argument, op:" + << "general_text_reader_op"; + return -1; + } + + int reader_status = reader_out->reader_status; + if (reader_status != 0) { + LOG(ERROR) << "Read request wrong."; + return -1; + } + + const TensorVector *in = &reader_out->tensor_vector; + TensorVector *out = butil::get_object(); + int batch_size = 0; + if (in->at(0).lod.size() == 1) { + batch_size = in->at(0).lod[0].size() - 1; + } else { + batch_size = in->at(0).shape[0]; + } + VLOG(2) << "infer batch size: " << batch_size; + // infer + Timer timeline; + double infer_time = 0.0; + timeline.Start(); + if (InferManager::instance().infer(GENERAL_MODEL_NAME, in, out, batch_size)) { + LOG(ERROR) << "Failed do infer in fluid model: " << GENERAL_MODEL_NAME; + return -1; + } + timeline.Pause(); + infer_time = timeline.ElapsedUS(); + + const Request *req = dynamic_cast(get_request_message()); + + VLOG(2) << "start to call load general model_conf op"; + baidu::paddle_serving::predictor::Resource &resource = + baidu::paddle_serving::predictor::Resource::instance(); + + VLOG(2) << "get resource pointer done."; + std::shared_ptr model_config = + resource.get_general_model_config(); + + std::vector fetch_index; + fetch_index.resize(req->fetch_var_names_size()); + for (int i = 0; i < req->fetch_var_names_size(); ++i) { + fetch_index[i] = + model_config->_fetch_alias_name_to_index[req->fetch_var_names(i)]; + } + + // response inst with only fetch_var_names + Response *res = mutable_data(); + + res->set_mean_infer_us(infer_time); + + for (int i = 0; i < batch_size; ++i) { + FetchInst *fetch_inst = res->add_insts(); + for (auto & idx : fetch_index) { + Tensor *tensor = fetch_inst->add_tensor_array(); + // currently only response float tensor or lod_tensor + tensor->set_elem_type(1); + if (model_config->_is_lod_fetch[idx]) { + VLOG(2) << "out[" << idx << " is lod_tensor"; + tensor->add_shape(-1); + } else { + VLOG(2) << "out[" << idx << "] is tensor"; + for (int k = 1; k < out->at(idx).shape.size(); ++k) { + VLOG(2) << "shape[" << k - 1 << "]: " + << out->at(idx).shape[k]; + tensor->add_shape(out->at(idx).shape[k]); + } + } + } + } + + int var_idx = 0; + for (auto & idx : fetch_index) { + float *data_ptr = static_cast(out->at(idx).data.data()); + int cap = 1; + for (int j = 1; j < out->at(idx).shape.size(); ++j) { + cap *= out->at(idx).shape[j]; + } + if (model_config->_is_lod_fetch[idx]) { + for (int j = 0; j < batch_size; ++j) { + for (int k = out->at(idx).lod[0][j]; + k < out->at(idx).lod[0][j + 1]; k++) { + res->mutable_insts(j)->mutable_tensor_array(var_idx)->add_float_data( + data_ptr[k]); + } + } + } else { + for (int j = 0; j < batch_size; ++j) { + for (int k = j * cap; k < (j + 1) * cap; ++k) { + res->mutable_insts(j)->mutable_tensor_array(var_idx)->add_float_data( + data_ptr[k]); + } + } + } + var_idx++; + } + return 0; +} +DEFINE_OP(GeneralTextInferOp); + +} // namespace serving +} // namespace paddle_serving +} // namespace baidu diff --git a/core/general-server/op/general_text_infer_op.h b/core/general-server/op/general_text_infer_op.h new file mode 100644 index 0000000000000000000000000000000000000000..5c94a205f8fc03cb8b4d23bb556a8755b7cb55ea --- /dev/null +++ b/core/general-server/op/general_text_infer_op.h @@ -0,0 +1,45 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#ifdef BCLOUD +#ifdef WITH_GPU +#include "paddle/paddle_inference_api.h" +#else +#include "paddle/fluid/inference/api/paddle_inference_api.h" +#endif +#else +#include "paddle_inference_api.h" // NOLINT +#endif +#include "core/general-server/general_model_service.pb.h" + +namespace baidu { +namespace paddle_serving { +namespace serving { + +class GeneralTextInferOp + : public baidu::paddle_serving::predictor::OpWithChannel< + baidu::paddle_serving::predictor::general_model::Response> { + public: + typedef std::vector TensorVector; + + DECLARE_OP(GeneralTextInferOp); + + int inference(); +}; + +} // namespace serving +} // namespace paddle_serving +} // namespace baidu diff --git a/core/general-server/op/general_text_reader_op.cpp b/core/general-server/op/general_text_reader_op.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9e3c489f6557d6d5e9931ca317718e5f4e3e3683 --- /dev/null +++ b/core/general-server/op/general_text_reader_op.cpp @@ -0,0 +1,166 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include "core/general-server/op/general_text_reader_op.h" +#include "core/predictor/framework/infer.h" +#include "core/predictor/framework/memory.h" + +namespace baidu { +namespace paddle_serving { +namespace serving { + +using baidu::paddle_serving::predictor::MempoolWrapper; +using baidu::paddle_serving::predictor::general_model::Tensor; +using baidu::paddle_serving::predictor::general_model::Request; +using baidu::paddle_serving::predictor::general_model::FeedInst; +using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; + + +int GeneralTextReaderOp::inference() { + // reade request from client + const Request *req = dynamic_cast(get_request_message()); + + int batch_size = req->insts_size(); + int input_var_num = 0; + + std::vector elem_type; + std::vector elem_size; + std::vector capacity; + + GeneralTextReaderOutput *res = mutable_data(); + TensorVector *in = &res->tensor_vector; + + if (!res) { + LOG(ERROR) << "Failed get op tls reader object output"; + } + + if (batch_size <= 0) { + res->reader_status = -1; + return 0; + } + + int var_num = req->insts(0).tensor_array_size(); + VLOG(2) << "var num: " << var_num; + // read config + + VLOG(2) << "start to call load general model_conf op"; + baidu::paddle_serving::predictor::Resource &resource = + baidu::paddle_serving::predictor::Resource::instance(); + + VLOG(2) << "get resource pointer done."; + std::shared_ptr model_config = + resource.get_general_model_config(); + + VLOG(2) << "print general model config done."; + + elem_type.resize(var_num); + elem_size.resize(var_num); + capacity.resize(var_num); + for (int i = 0; i < var_num; ++i) { + paddle::PaddleTensor lod_tensor; + elem_type[i] = req->insts(0).tensor_array(i).elem_type(); + VLOG(2) << "var[" << i << "] has elem type: " << elem_type[i]; + if (elem_type[i] == 0) { // int64 + elem_size[i] = sizeof(int64_t); + lod_tensor.dtype = paddle::PaddleDType::INT64; + } else { + elem_size[i] = sizeof(float); + lod_tensor.dtype = paddle::PaddleDType::FLOAT32; + } + + if (req->insts(0).tensor_array(i).shape(0) == -1) { + lod_tensor.lod.resize(1); + lod_tensor.lod[0].push_back(0); + VLOG(2) << "var[" << i << "] is lod_tensor"; + } else { + lod_tensor.shape.push_back(batch_size); + capacity[i] = 1; + for (int k = 0; k < req->insts(0).tensor_array(i).shape_size(); ++k) { + int dim = req->insts(0).tensor_array(i).shape(k); + VLOG(2) << "shape for var[" << i << "]: " << dim; + capacity[i] *= dim; + lod_tensor.shape.push_back(dim); + } + VLOG(2) << "var[" << i << "] is tensor, capacity: " << capacity[i]; + } + lod_tensor.name = model_config->_feed_name[i]; + in->push_back(lod_tensor); + } + + for (int i = 0; i < var_num; ++i) { + if (in->at(i).lod.size() == 1) { + for (int j = 0; j < batch_size; ++j) { + const Tensor &tensor = req->insts(j).tensor_array(i); + int data_len = tensor.int_data_size(); + int cur_len = in->at(i).lod[0].back(); + in->at(i).lod[0].push_back(cur_len + data_len); + } + in->at(i).data.Resize(in->at(i).lod[0].back() * elem_size[i]); + in->at(i).shape = {in->at(i).lod[0].back(), 1}; + VLOG(2) << "var[" << i + << "] is lod_tensor and len=" << in->at(i).lod[0].back(); + } else { + in->at(i).data.Resize(batch_size * capacity[i] * elem_size[i]); + VLOG(2) << "var[" << i + << "] is tensor and capacity=" << batch_size * capacity[i]; + } + } + + for (int i = 0; i < var_num; ++i) { + if (elem_type[i] == 0) { + int64_t *dst_ptr = static_cast(in->at(i).data.data()); + int offset = 0; + for (int j = 0; j < batch_size; ++j) { + for (int k = 0; + k < req->insts(j).tensor_array(i).int_data_size(); + ++k) { + dst_ptr[offset + k] = + req->insts(j).tensor_array(i).int_data(k); + } + if (in->at(i).lod.size() == 1) { + offset = in->at(i).lod[0][j + 1]; + } else { + offset += capacity[i]; + } + } + } else { + float *dst_ptr = static_cast(in->at(i).data.data()); + int offset = 0; + for (int j = 0; j < batch_size; ++j) { + for (int k = 0; + k < req->insts(j).tensor_array(i).int_data_size(); + ++k) { + dst_ptr[offset + k] = + req->insts(j).tensor_array(i).int_data(k); + } + if (in->at(i).lod.size() == 1) { + offset = in->at(i).lod[0][j + 1]; + } else { + offset += capacity[i]; + } + } + } + } + + VLOG(2) << "read data from client success"; + return 0; +} +DEFINE_OP(GeneralTextReaderOp); +} // namespace serving +} // namespace paddle_serving +} // namespace baidu diff --git a/core/general-server/op/general_text_reader_op.h b/core/general-server/op/general_text_reader_op.h new file mode 100644 index 0000000000000000000000000000000000000000..e7f484a9915609887c2a6593ff1ad0655fc4789c --- /dev/null +++ b/core/general-server/op/general_text_reader_op.h @@ -0,0 +1,62 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#ifdef BCLOUD +#ifdef WITH_GPU +#include "paddle/paddle_inference_api.h" +#else +#include "paddle/fluid/inference/api/paddle_inference_api.h" +#endif +#else +#include "paddle_inference_api.h" // NOLINT +#endif +#include +#include "core/predictor/framework/resource.h" +#include "core/general-server/general_model_service.pb.h" +#include "core/general-server/load_general_model_service.pb.h" + +namespace baidu { +namespace paddle_serving { +namespace serving { + +struct GeneralTextReaderOutput { + std::vector tensor_vector; + int reader_status = 0; + + void Clear() { + size_t tensor_count = tensor_vector.size(); + for (size_t ti = 0; ti < tensor_count; ++ti) { + tensor_vector[ti].shape.clear(); + } + tensor_vector.clear(); + } + std::string ShortDebugString() const { return "Not implemented!"; } +}; + +class GeneralTextReaderOp : + public baidu::paddle_serving::predictor::OpWithChannel< + GeneralTextReaderOutput> { + public: + typedef std::vector TensorVector; + + DECLARE_OP(GeneralTextReaderOp); + + int inference(); +}; + +} // namespace serving +} // namespace paddle_serving +} // namespace baidu diff --git a/core/general-server/proto/general_model_service.proto b/core/general-server/proto/general_model_service.proto index 1f236ea34574e10af905aa936348bea9dd4c0ae5..36b63ec68bc704c1f5e1eb7a8ed4d3e4fb456d4f 100644 --- a/core/general-server/proto/general_model_service.proto +++ b/core/general-server/proto/general_model_service.proto @@ -21,8 +21,10 @@ option cc_generic_services = true; message Tensor { repeated bytes data = 1; - optional int32 elem_type = 2; - repeated int32 shape = 3; + repeated int32 int_data = 2; + repeated float float_data = 3; + optional int32 elem_type = 4; + repeated int32 shape = 5; }; message FeedInst { diff --git a/core/sdk-cpp/proto/general_model_service.proto b/core/sdk-cpp/proto/general_model_service.proto index d48a79e18a54cfca7eeca2d730dc4b0b357c2bb4..1d62c25f1ce40ecc23e98310e7834d7bcc847022 100644 --- a/core/sdk-cpp/proto/general_model_service.proto +++ b/core/sdk-cpp/proto/general_model_service.proto @@ -20,9 +20,11 @@ package baidu.paddle_serving.predictor.general_model; option cc_generic_services = true; message Tensor { - repeated bytes data = 1; - optional int32 elem_type = 2; - repeated int32 shape = 3; + repeated bytes data = 1; // most general format + repeated int32 int_data = 2; // for simple debug only + repeated float float_data = 3; // for simple debug only + optional int32 elem_type = 4; // support int64, float32 + repeated int32 shape = 5; }; message FeedInst { diff --git a/doc/CONTRIBUTE.md b/doc/CONTRIBUTE.md index 6d3027f586b5f8cd2fd29f271aa9e31411e81c9d..01b9f5b0b3ed369caeb041bb2d4fa5684b46b530 100644 --- a/doc/CONTRIBUTE.md +++ b/doc/CONTRIBUTE.md @@ -1,8 +1,8 @@ -# 贡献指南 +# Contribution Guideline -## 如何贡献代码 +## How to contribute -### 贡献代码 +### Contribute Code -如果您在Paddle Serving上有了改进,请给我们发Pull Requests!Github提供了提交Pull Requests的参考[howto](https://help.github.com/articles/using-pull-requests/)。 +If you have improvements on Paddle Serving, please send us Pull Requests! github.com provides guidelines for submitting pull requests [howto](https://help.github.com/articles/using-pull-requests/). diff --git a/doc/IMDB_GO_CLIENT.md b/doc/IMDB_GO_CLIENT.md new file mode 100644 index 0000000000000000000000000000000000000000..896a6e0b15a4568582e3535b0c61ee831d7d4259 --- /dev/null +++ b/doc/IMDB_GO_CLIENT.md @@ -0,0 +1,196 @@ +# How to use Go Client of Paddle Serving + +This document shows how to use Go as your client language. For Go client in Paddle Serving, a simple client package is provided https://github.com/PaddlePaddle/Serving/tree/develop/go/serving_client, a user can import this package as needed. Here is a simple example of sentiment analysis task based on IMDB dataset. + +### Install + +We suppose you have 1.9.2 or later version installed and python 2.7 version installed + +```shell +go get github.com/PaddlePaddle/Serving/go/serving_client +go get github.com/PaddlePaddle/Serving/go/proto +pip install paddle-serving-server +``` + +### Download Text Classification Model + +``` shell +wget https://paddle-serving.bj.bcebos.com/data%2Ftext_classification%2Fimdb_serving_example.tar.gz +tar -xzf imdb_serving_example.tar.gz +``` + +### Server Side Code + +```python +# test_server_go.py +import os +import sys +from paddle_serving_server import OpMaker +from paddle_serving_server import OpSeqMaker +from paddle_serving_server import Server + +op_maker = OpMaker() +read_op = op_maker.create('general_text_reader') +general_infer_op = op_maker.create('general_text_infer') + +op_seq_maker = OpSeqMaker() +op_seq_maker.add_op(read_op) +op_seq_maker.add_op(general_infer_op) + +server = Server() +server.set_op_sequence(op_seq_maker.get_op_sequence()) +server.load_model_config(sys.argv[1]) +server.prepare_server(workdir="work_dir1", port=9292, device="cpu") +server.run_server() +``` + +### Start Server + +``` shell +python test_server_go.py ./serving_server_model/ 9292 +``` + +### Client code example + +``` go +// imdb_client.go +package main + +import ( + "io" + "fmt" + "strings" + "bufio" + "strconv" + "os" + serving_client "github.com/PaddlePaddle/Serving/go/serving_client" +) + +func main() { + var config_file_path string + config_file_path = os.Args[1] + handle := serving_client.LoadModelConfig(config_file_path) + handle = serving_client.Connect("127.0.0.1", "9292", handle) + + test_file_path := os.Args[2] + fi, err := os.Open(test_file_path) + if err != nil { + fmt.Print(err) + } + + defer fi.Close() + br := bufio.NewReader(fi) + + fetch := []string{"cost", "acc", "prediction"} + + var result map[string][]float32 + + for { + line, err := br.ReadString('\n') + if err == io.EOF { + break + } + + line = strings.Trim(line, "\n") + + var words = []int64{} + + s := strings.Split(line, " ") + value, err := strconv.Atoi(s[0]) + var feed_int_map map[string][]int64 + + for _, v := range s[1:value + 1] { + int_v, _ := strconv.Atoi(v) + words = append(words, int64(int_v)) + } + + label, err := strconv.Atoi(s[len(s)-1]) + + if err != nil { + panic(err) + } + + feed_int_map = map[string][]int64{} + feed_int_map["words"] = words + feed_int_map["label"] = []int64{int64(label)} + + result = serving_client.Predict(handle, feed_int_map, fetch) + fmt.Println(result["prediction"][1], "\t", int64(label)) + } +} +``` + +### Prediction based on IMDB Test set + +``` python +go run imdb_client.go serving_client_conf/serving_client_conf.stream.prototxt test.data > result +``` + + + +### Compute accuracy + +```python +// acc.go +package main + +import ( + "io" + "os" + "fmt" + "bufio" + "strings" + "strconv" +) + +func main() { + score_file := os.Args[1] + fi, err := os.Open(score_file) + if err != nil { + fmt.Print(err) + } + + defer fi.Close() + br := bufio.NewReader(fi) + + total := int(0) + acc := int(0) + for { + line, err := br.ReadString('\n') + if err == io.EOF { + break + } + + line = strings.Trim(line, "\n") + s := strings.Split(line, "\t") + prob_str := strings.Trim(s[0], " ") + label_str := strings.Trim(s[1], " ") + prob, err := strconv.ParseFloat(prob_str, 32) + if err != nil { + panic(err) + } + label, err := strconv.ParseFloat(label_str, 32) + if err != nil { + panic(err) + } + if (prob - 0.5) * (label - 0.5) > 0 { + acc++ + } + total++ + } + fmt.Println("total num: ", total) + fmt.Println("acc num: ", acc) + fmt.Println("acc: ", float32(acc) / float32(total)) + +} +``` + +```shell +go run acc.go result +total num: 25000 +acc num: 22014 +acc: 0.88056 +``` + + + diff --git a/go/client_app/acc.go b/go/client_app/acc.go new file mode 100644 index 0000000000000000000000000000000000000000..d93f47360976c5dd22be2a9919fe278c283aea2c --- /dev/null +++ b/go/client_app/acc.go @@ -0,0 +1,50 @@ +package main + +import ( + "io" + "os" + "fmt" + "bufio" + "strings" + "strconv" +) + +func main() { + score_file := os.Args[1] + fi, err := os.Open(score_file) + if err != nil { + fmt.Print(err) + } + + defer fi.Close() + br := bufio.NewReader(fi) + + total := int(0) + acc := int(0) + for { + line, err := br.ReadString('\n') + if err == io.EOF { + break + } + + line = strings.Trim(line, "\n") + s := strings.Split(line, "\t") + prob_str := strings.Trim(s[0], " ") + label_str := strings.Trim(s[1], " ") + prob, err := strconv.ParseFloat(prob_str, 32) + if err != nil { + panic(err) + } + label, err := strconv.ParseFloat(label_str, 32) + if err != nil { + panic(err) + } + if (prob - 0.5) * (label - 0.5) > 0 { + acc++ + } + total++ + } + fmt.Println("total num: ", total) + fmt.Println("acc num: ", acc) + fmt.Println("acc: ", float32(acc) / float32(total)) +} \ No newline at end of file diff --git a/go/client_app/imdb_client.go b/go/client_app/imdb_client.go new file mode 100644 index 0000000000000000000000000000000000000000..f6ad12048e81bdad80b877325c17972b2481cf7a --- /dev/null +++ b/go/client_app/imdb_client.go @@ -0,0 +1,79 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "io" + "fmt" + "strings" + "bufio" + "strconv" + "os" + "serving_client" +) + +func main() { + var config_file_path string + config_file_path = os.Args[1] + handle := serving_client.LoadModelConfig(config_file_path) + handle = serving_client.Connect("127.0.0.1", "9292", handle) + + test_file_path := os.Args[2] + fi, err := os.Open(test_file_path) + if err != nil { + fmt.Print(err) + } + + defer fi.Close() + br := bufio.NewReader(fi) + + fetch := []string{"cost", "acc", "prediction"} + + var result map[string][]float32 + + for { + line, err := br.ReadString('\n') + if err == io.EOF { + break + } + + line = strings.Trim(line, "\n") + + var words = []int64{} + + s := strings.Split(line, " ") + value, err := strconv.Atoi(s[0]) + var feed_int_map map[string][]int64 + + for _, v := range s[1:value + 1] { + int_v, _ := strconv.Atoi(v) + words = append(words, int64(int_v)) + } + + label, err := strconv.Atoi(s[len(s)-1]) + + if err != nil { + panic(err) + } + + feed_int_map = map[string][]int64{} + feed_int_map["words"] = words + feed_int_map["label"] = []int64{int64(label)} + + result = serving_client.Predict(handle, + feed_int_map, fetch) + fmt.Println(result["prediction"][1], "\t", int64(label)) + } +} \ No newline at end of file diff --git a/go/proto/general_model_config.pb.go b/go/proto/general_model_config.pb.go new file mode 100644 index 0000000000000000000000000000000000000000..40d993ac1aa81d9180e51f1c6da464a3df251ff2 --- /dev/null +++ b/go/proto/general_model_config.pb.go @@ -0,0 +1,237 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// source: general_model_config.proto + +package baidu_paddle_serving_configure + +import ( + fmt "fmt" + proto "github.com/golang/protobuf/proto" + math "math" +) + +// Reference imports to suppress errors if they are not otherwise used. +var _ = proto.Marshal +var _ = fmt.Errorf +var _ = math.Inf + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the proto package it is being compiled against. +// A compilation error at this line likely means your copy of the +// proto package needs to be updated. +const _ = proto.ProtoPackageIsVersion3 // please upgrade the proto package + +type FeedVar struct { + Name *string `protobuf:"bytes,1,opt,name=name" json:"name,omitempty"` + AliasName *string `protobuf:"bytes,2,opt,name=alias_name,json=aliasName" json:"alias_name,omitempty"` + IsLodTensor *bool `protobuf:"varint,3,opt,name=is_lod_tensor,json=isLodTensor,def=0" json:"is_lod_tensor,omitempty"` + FeedType *int32 `protobuf:"varint,4,opt,name=feed_type,json=feedType,def=0" json:"feed_type,omitempty"` + Shape []int32 `protobuf:"varint,5,rep,name=shape" json:"shape,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *FeedVar) Reset() { *m = FeedVar{} } +func (m *FeedVar) String() string { return proto.CompactTextString(m) } +func (*FeedVar) ProtoMessage() {} +func (*FeedVar) Descriptor() ([]byte, []int) { + return fileDescriptor_efa52beffa29d37a, []int{0} +} + +func (m *FeedVar) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_FeedVar.Unmarshal(m, b) +} +func (m *FeedVar) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_FeedVar.Marshal(b, m, deterministic) +} +func (m *FeedVar) XXX_Merge(src proto.Message) { + xxx_messageInfo_FeedVar.Merge(m, src) +} +func (m *FeedVar) XXX_Size() int { + return xxx_messageInfo_FeedVar.Size(m) +} +func (m *FeedVar) XXX_DiscardUnknown() { + xxx_messageInfo_FeedVar.DiscardUnknown(m) +} + +var xxx_messageInfo_FeedVar proto.InternalMessageInfo + +const Default_FeedVar_IsLodTensor bool = false +const Default_FeedVar_FeedType int32 = 0 + +func (m *FeedVar) GetName() string { + if m != nil && m.Name != nil { + return *m.Name + } + return "" +} + +func (m *FeedVar) GetAliasName() string { + if m != nil && m.AliasName != nil { + return *m.AliasName + } + return "" +} + +func (m *FeedVar) GetIsLodTensor() bool { + if m != nil && m.IsLodTensor != nil { + return *m.IsLodTensor + } + return Default_FeedVar_IsLodTensor +} + +func (m *FeedVar) GetFeedType() int32 { + if m != nil && m.FeedType != nil { + return *m.FeedType + } + return Default_FeedVar_FeedType +} + +func (m *FeedVar) GetShape() []int32 { + if m != nil { + return m.Shape + } + return nil +} + +type FetchVar struct { + Name *string `protobuf:"bytes,1,opt,name=name" json:"name,omitempty"` + AliasName *string `protobuf:"bytes,2,opt,name=alias_name,json=aliasName" json:"alias_name,omitempty"` + IsLodTensor *bool `protobuf:"varint,3,opt,name=is_lod_tensor,json=isLodTensor,def=0" json:"is_lod_tensor,omitempty"` + Shape []int32 `protobuf:"varint,4,rep,name=shape" json:"shape,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *FetchVar) Reset() { *m = FetchVar{} } +func (m *FetchVar) String() string { return proto.CompactTextString(m) } +func (*FetchVar) ProtoMessage() {} +func (*FetchVar) Descriptor() ([]byte, []int) { + return fileDescriptor_efa52beffa29d37a, []int{1} +} + +func (m *FetchVar) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_FetchVar.Unmarshal(m, b) +} +func (m *FetchVar) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_FetchVar.Marshal(b, m, deterministic) +} +func (m *FetchVar) XXX_Merge(src proto.Message) { + xxx_messageInfo_FetchVar.Merge(m, src) +} +func (m *FetchVar) XXX_Size() int { + return xxx_messageInfo_FetchVar.Size(m) +} +func (m *FetchVar) XXX_DiscardUnknown() { + xxx_messageInfo_FetchVar.DiscardUnknown(m) +} + +var xxx_messageInfo_FetchVar proto.InternalMessageInfo + +const Default_FetchVar_IsLodTensor bool = false + +func (m *FetchVar) GetName() string { + if m != nil && m.Name != nil { + return *m.Name + } + return "" +} + +func (m *FetchVar) GetAliasName() string { + if m != nil && m.AliasName != nil { + return *m.AliasName + } + return "" +} + +func (m *FetchVar) GetIsLodTensor() bool { + if m != nil && m.IsLodTensor != nil { + return *m.IsLodTensor + } + return Default_FetchVar_IsLodTensor +} + +func (m *FetchVar) GetShape() []int32 { + if m != nil { + return m.Shape + } + return nil +} + +type GeneralModelConfig struct { + FeedVar []*FeedVar `protobuf:"bytes,1,rep,name=feed_var,json=feedVar" json:"feed_var,omitempty"` + FetchVar []*FetchVar `protobuf:"bytes,2,rep,name=fetch_var,json=fetchVar" json:"fetch_var,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *GeneralModelConfig) Reset() { *m = GeneralModelConfig{} } +func (m *GeneralModelConfig) String() string { return proto.CompactTextString(m) } +func (*GeneralModelConfig) ProtoMessage() {} +func (*GeneralModelConfig) Descriptor() ([]byte, []int) { + return fileDescriptor_efa52beffa29d37a, []int{2} +} + +func (m *GeneralModelConfig) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_GeneralModelConfig.Unmarshal(m, b) +} +func (m *GeneralModelConfig) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_GeneralModelConfig.Marshal(b, m, deterministic) +} +func (m *GeneralModelConfig) XXX_Merge(src proto.Message) { + xxx_messageInfo_GeneralModelConfig.Merge(m, src) +} +func (m *GeneralModelConfig) XXX_Size() int { + return xxx_messageInfo_GeneralModelConfig.Size(m) +} +func (m *GeneralModelConfig) XXX_DiscardUnknown() { + xxx_messageInfo_GeneralModelConfig.DiscardUnknown(m) +} + +var xxx_messageInfo_GeneralModelConfig proto.InternalMessageInfo + +func (m *GeneralModelConfig) GetFeedVar() []*FeedVar { + if m != nil { + return m.FeedVar + } + return nil +} + +func (m *GeneralModelConfig) GetFetchVar() []*FetchVar { + if m != nil { + return m.FetchVar + } + return nil +} + +func init() { + proto.RegisterType((*FeedVar)(nil), "baidu.paddle_serving.configure.FeedVar") + proto.RegisterType((*FetchVar)(nil), "baidu.paddle_serving.configure.FetchVar") + proto.RegisterType((*GeneralModelConfig)(nil), "baidu.paddle_serving.configure.GeneralModelConfig") +} + +func init() { proto.RegisterFile("general_model_config.proto", fileDescriptor_efa52beffa29d37a) } + +var fileDescriptor_efa52beffa29d37a = []byte{ + // 283 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xb4, 0xd0, 0x31, 0x4b, 0xc4, 0x30, + 0x14, 0x07, 0x70, 0x72, 0x6d, 0xb9, 0xf6, 0x1d, 0x2e, 0xc1, 0xa1, 0x08, 0x1e, 0xe5, 0x16, 0xe3, + 0x52, 0xc4, 0xf1, 0x46, 0xc5, 0x73, 0x51, 0x87, 0x72, 0xb8, 0x86, 0xd8, 0xbc, 0xb6, 0x81, 0x5c, + 0x53, 0x92, 0xde, 0xc1, 0x2d, 0x7e, 0x13, 0xf1, 0xab, 0x4a, 0x93, 0x43, 0x9c, 0x74, 0x72, 0x7b, + 0x79, 0xff, 0xf0, 0xde, 0xe3, 0x07, 0x17, 0x2d, 0xf6, 0x68, 0x85, 0xe6, 0x3b, 0x23, 0x51, 0xf3, + 0xda, 0xf4, 0x8d, 0x6a, 0xcb, 0xc1, 0x9a, 0xd1, 0xd0, 0xe5, 0x9b, 0x50, 0x72, 0x5f, 0x0e, 0x42, + 0x4a, 0x8d, 0xdc, 0xa1, 0x3d, 0xa8, 0xbe, 0x2d, 0xc3, 0x97, 0xbd, 0xc5, 0xd5, 0x07, 0x81, 0xf9, + 0x06, 0x51, 0xbe, 0x0a, 0x4b, 0x29, 0xc4, 0xbd, 0xd8, 0x61, 0x4e, 0x0a, 0xc2, 0xb2, 0xca, 0xd7, + 0xf4, 0x12, 0x40, 0x68, 0x25, 0x1c, 0xf7, 0xc9, 0xcc, 0x27, 0x99, 0xef, 0xbc, 0x4c, 0xf1, 0x35, + 0x9c, 0x29, 0xc7, 0xb5, 0x91, 0x7c, 0xc4, 0xde, 0x19, 0x9b, 0x47, 0x05, 0x61, 0xe9, 0x3a, 0x69, + 0x84, 0x76, 0x58, 0x2d, 0x94, 0x7b, 0x32, 0x72, 0xeb, 0x13, 0xba, 0x84, 0xac, 0x41, 0x94, 0x7c, + 0x3c, 0x0e, 0x98, 0xc7, 0x05, 0x61, 0xc9, 0x9a, 0xdc, 0x54, 0xe9, 0xd4, 0xdb, 0x1e, 0x07, 0xa4, + 0xe7, 0x90, 0xb8, 0x4e, 0x0c, 0x98, 0x27, 0x45, 0xc4, 0x92, 0x2a, 0x3c, 0x56, 0xef, 0x90, 0x6e, + 0x70, 0xac, 0xbb, 0xff, 0xbf, 0xef, 0x7b, 0x7f, 0xfc, 0x73, 0xff, 0x27, 0x01, 0xfa, 0x18, 0x78, + 0x9f, 0x27, 0xdd, 0x7b, 0x2f, 0x47, 0xef, 0xc0, 0x1f, 0xce, 0x0f, 0xc2, 0xe6, 0xa4, 0x88, 0xd8, + 0xe2, 0xf6, 0xaa, 0xfc, 0x5d, 0xba, 0x3c, 0x29, 0x57, 0xf3, 0xe6, 0xc4, 0xfd, 0x30, 0x81, 0x8c, + 0x75, 0xe7, 0x87, 0xcc, 0xfc, 0x10, 0xf6, 0xf7, 0x90, 0x60, 0x31, 0xb9, 0x85, 0xea, 0x2b, 0x00, + 0x00, 0xff, 0xff, 0x08, 0x27, 0x9c, 0x1a, 0xfe, 0x01, 0x00, 0x00, +} diff --git a/go/serving_client/serving_client_api.go b/go/serving_client/serving_client_api.go new file mode 100644 index 0000000000000000000000000000000000000000..486feffe32195ef0792b6094c24a70bf54996187 --- /dev/null +++ b/go/serving_client/serving_client_api.go @@ -0,0 +1,165 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package serving_client + +import ( + "bytes" + "encoding/json" + "io/ioutil" + "log" + "net/http" + pb "general_model" + "github.com/golang/protobuf/proto" +) + +type Tensor struct { + Data []byte `json:"data"` + FloatData []float32 `json:"float_data"` + IntData []int64 `json:"int_data"` + ElemType int `json:"elem_type"` + Shape []int `json:"shape"` +} + +type FeedInst struct { + TensorArray []Tensor `json:"tensor_array"` +} + +type FetchInst struct { + TensorArray []Tensor `json:"tensor_array"` +} + +type Request struct { + Insts []FeedInst `json:"insts"` + FetchVarNames []string `json:"fetch_var_names"` +} + +type Response struct { + Insts []FetchInst `json:"insts"` + MeanInferUs float32 `json:"mean_infer_us"` +} + +type Handle struct { + Url string + Port string + FeedAliasNameMap map[string]string + FeedShapeMap map[string][]int + FeedNameMap map[string]int + FeedAliasNames []string + FetchNameMap map[string]int + FetchAliasNameMap map[string]string +} + +func LoadModelConfig(config string) Handle { + in, err := ioutil.ReadFile(config) + if err != nil { + log.Fatalln("Failed to read general model: ", err) + } + general_model_config := &pb.GeneralModelConfig{} + if err := proto.Unmarshal(in, general_model_config); err != nil { + log.Fatalln("Failed to parse GeneralModelConfig: ", err) + } + log.Println("read protobuf succeed") + handle := Handle{} + handle.FeedNameMap = map[string]int{} + handle.FeedAliasNameMap = map[string]string{} + handle.FeedShapeMap = map[string][]int{} + handle.FetchNameMap = map[string]int{} + handle.FetchAliasNameMap = map[string]string{} + handle.FeedAliasNames = []string{} + + for i, v := range general_model_config.FeedVar { + handle.FeedNameMap[*v.Name] = i + tmp_array := []int{} + for _, vv := range v.Shape { + tmp_array = append(tmp_array, int(vv)) + } + handle.FeedShapeMap[*v.Name] = tmp_array + handle.FeedAliasNameMap[*v.AliasName] = *v.Name + handle.FeedAliasNames = append(handle.FeedAliasNames, *v.AliasName) + } + + for i, v := range general_model_config.FetchVar { + handle.FetchNameMap[*v.Name] = i + handle.FetchAliasNameMap[*v.AliasName] = *v.Name + } + + return handle +} + +func Connect(url string, port string, handle Handle) Handle { + handle.Url = url + handle.Port = port + return handle +} + +func Predict(handle Handle, int_feed_map map[string][]int64, fetch []string) map[string][]float32 { + contentType := "application/json;charset=utf-8" + + var tensor_array []Tensor + var inst FeedInst + tensor_array = []Tensor{} + inst = FeedInst{} + + for i := 0; i < len(handle.FeedAliasNames); i++ { + key_i := handle.FeedAliasNames[i] + var tmp Tensor + tmp.IntData = []int64{} + tmp.Shape = []int{} + tmp.IntData = int_feed_map[key_i] + tmp.ElemType = 0 + tmp.Shape = handle.FeedShapeMap[key_i] + tensor_array = append(tensor_array, tmp) + } + + inst.TensorArray = tensor_array + + req := &Request{ + Insts: []FeedInst{inst}, + FetchVarNames: fetch} + + b, err := json.Marshal(req) + + body := bytes.NewBuffer(b) + + var post_address bytes.Buffer + post_address.WriteString("http://") + post_address.WriteString(handle.Url) + post_address.WriteString(":") + post_address.WriteString(handle.Port) + post_address.WriteString("/GeneralModelService/inference") + + resp, err := http.Post(post_address.String(), contentType, body) + if err != nil { + log.Println("Post failed:", err) + } + + defer resp.Body.Close() + + content, err := ioutil.ReadAll(resp.Body) + if err != nil { + log.Println("Read failed:", err) + } + + response_json := Response{} + err = json.Unmarshal([]byte(content), &response_json) + + var result map[string][]float32 + result = map[string][]float32{} + for i, v := range fetch { + result[v] = response_json.Insts[0].TensorArray[i].FloatData + } + + return result +} diff --git a/python/paddle_serving_server/__init__.py b/python/paddle_serving_server/__init__.py index e73a0606d0aeb90c426733fd109ff684bb810ea8..458a3da5e02a9799eb042674708e994c98bc6701 100644 --- a/python/paddle_serving_server/__init__.py +++ b/python/paddle_serving_server/__init__.py @@ -23,12 +23,12 @@ from version import serving_server_version class OpMaker(object): def __init__(self): - self.op_dict = { - "general_infer": "GeneralInferOp", - "general_reader": "GeneralReaderOp", - "general_single_kv": "GeneralSingleKVOp", - "general_dist_kv": "GeneralDistKVOp" - } + self.op_dict = {"general_infer":"GeneralInferOp", + "general_text_infer":"GeneralTextInferOp", + "general_reader":"GeneralReaderOp", + "general_text_reader":"GeneralTextReaderOp", + "general_single_kv":"GeneralSingleKVOp", + "general_dist_kv":"GeneralDistKVOp"} # currently, inputs and outputs are not used # when we have OpGraphMaker, inputs and outputs are necessary