未验证 提交 bb9dae31 编写于 作者: T TeslaZhao 提交者: GitHub

Merge pull request #43 from PaddlePaddle/develop

Sync
...@@ -136,8 +136,10 @@ LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/mklml/lib) ...@@ -136,8 +136,10 @@ LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/mklml/lib)
SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib") SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib")
LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib) LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib)
ADD_LIBRARY(openblas STATIC IMPORTED GLOBAL) if (NOT WITH_MKLML)
SET_PROPERTY(TARGET openblas PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/openblas/lib/libopenblas.a) ADD_LIBRARY(openblas STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET openblas PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/openblas/lib/libopenblas.a)
endif()
ADD_LIBRARY(paddle_inference STATIC IMPORTED GLOBAL) ADD_LIBRARY(paddle_inference STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET paddle_inference PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_inference.a) SET_PROPERTY(TARGET paddle_inference PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_inference.a)
......
...@@ -22,7 +22,6 @@ ...@@ -22,7 +22,6 @@
#include "core/predictor/framework/resource.h" #include "core/predictor/framework/resource.h"
#include "core/util/include/timer.h" #include "core/util/include/timer.h"
/* /*
#include "opencv2/imgcodecs/legacy/constants_c.h" #include "opencv2/imgcodecs/legacy/constants_c.h"
#include "opencv2/imgproc/types_c.h" #include "opencv2/imgproc/types_c.h"
...@@ -52,18 +51,18 @@ int GeneralDetectionOp::inference() { ...@@ -52,18 +51,18 @@ int GeneralDetectionOp::inference() {
} }
const std::string pre_name = pre_node_names[0]; const std::string pre_name = pre_node_names[0];
const GeneralBlob *input_blob = get_depend_argument<GeneralBlob>(pre_name); const GeneralBlob* input_blob = get_depend_argument<GeneralBlob>(pre_name);
if (!input_blob) { if (!input_blob) {
LOG(ERROR) << "input_blob is nullptr,error"; LOG(ERROR) << "input_blob is nullptr,error";
return -1; return -1;
} }
uint64_t log_id = input_blob->GetLogId(); uint64_t log_id = input_blob->GetLogId();
VLOG(2) << "(logid=" << log_id << ") Get precedent op name: " << pre_name; VLOG(2) << "(logid=" << log_id << ") Get precedent op name: " << pre_name;
GeneralBlob *output_blob = mutable_data<GeneralBlob>(); GeneralBlob* output_blob = mutable_data<GeneralBlob>();
if (!output_blob) { if (!output_blob) {
LOG(ERROR) << "output_blob is nullptr,error"; LOG(ERROR) << "output_blob is nullptr,error";
return -1; return -1;
} }
output_blob->SetLogId(log_id); output_blob->SetLogId(log_id);
...@@ -73,7 +72,7 @@ int GeneralDetectionOp::inference() { ...@@ -73,7 +72,7 @@ int GeneralDetectionOp::inference() {
return -1; return -1;
} }
const TensorVector *in = &input_blob->tensor_vector; const TensorVector* in = &input_blob->tensor_vector;
TensorVector* out = &output_blob->tensor_vector; TensorVector* out = &output_blob->tensor_vector;
int batch_size = input_blob->_batch_size; int batch_size = input_blob->_batch_size;
...@@ -81,38 +80,39 @@ int GeneralDetectionOp::inference() { ...@@ -81,38 +80,39 @@ int GeneralDetectionOp::inference() {
output_blob->_batch_size = batch_size; output_blob->_batch_size = batch_size;
VLOG(2) << "(logid=" << log_id << ") infer batch size: " << batch_size;
std::vector<int> input_shape; std::vector<int> input_shape;
int in_num =0; int in_num = 0;
void* databuf_data = NULL; void* databuf_data = NULL;
char* databuf_char = NULL; char* databuf_char = NULL;
size_t databuf_size = 0; size_t databuf_size = 0;
// now only support single string
char* total_input_ptr = static_cast<char*>(in->at(0).data.data());
std::string base64str = total_input_ptr;
std::string* input_ptr = static_cast<std::string*>(in->at(0).data.data());
std::string base64str = input_ptr[0];
float ratio_h{}; float ratio_h{};
float ratio_w{}; float ratio_w{};
cv::Mat img = Base2Mat(base64str); cv::Mat img = Base2Mat(base64str);
cv::Mat srcimg; cv::Mat srcimg;
cv::Mat resize_img; cv::Mat resize_img;
cv::Mat resize_img_rec; cv::Mat resize_img_rec;
cv::Mat crop_img; cv::Mat crop_img;
img.copyTo(srcimg); img.copyTo(srcimg);
this->resize_op_.Run(img, resize_img, this->max_side_len_, ratio_h, ratio_w, this->resize_op_.Run(img,
resize_img,
this->max_side_len_,
ratio_h,
ratio_w,
this->use_tensorrt_); this->use_tensorrt_);
this->normalize_op_.Run(&resize_img, this->mean_det, this->scale_det, this->normalize_op_.Run(
this->is_scale_); &resize_img, this->mean_det, this->scale_det, this->is_scale_);
std::vector<float> input(1 * 3 * resize_img.rows * resize_img.cols, 0.0f); std::vector<float> input(1 * 3 * resize_img.rows * resize_img.cols, 0.0f);
this->permute_op_.Run(&resize_img, input.data()); this->permute_op_.Run(&resize_img, input.data());
TensorVector* real_in = new TensorVector(); TensorVector* real_in = new TensorVector();
if (!real_in) { if (!real_in) {
LOG(ERROR) << "real_in is nullptr,error"; LOG(ERROR) << "real_in is nullptr,error";
...@@ -121,14 +121,15 @@ int GeneralDetectionOp::inference() { ...@@ -121,14 +121,15 @@ int GeneralDetectionOp::inference() {
for (int i = 0; i < in->size(); ++i) { for (int i = 0; i < in->size(); ++i) {
input_shape = {1, 3, resize_img.rows, resize_img.cols}; input_shape = {1, 3, resize_img.rows, resize_img.cols};
in_num = std::accumulate(input_shape.begin(), input_shape.end(), 1, std::multiplies<int>()); in_num = std::accumulate(
databuf_size = in_num*sizeof(float); input_shape.begin(), input_shape.end(), 1, std::multiplies<int>());
databuf_size = in_num * sizeof(float);
databuf_data = MempoolWrapper::instance().malloc(databuf_size); databuf_data = MempoolWrapper::instance().malloc(databuf_size);
if (!databuf_data) { if (!databuf_data) {
LOG(ERROR) << "Malloc failed, size: " << databuf_size; LOG(ERROR) << "Malloc failed, size: " << databuf_size;
return -1; return -1;
} }
memcpy(databuf_data,input.data(),databuf_size); memcpy(databuf_data, input.data(), databuf_size);
databuf_char = reinterpret_cast<char*>(databuf_data); databuf_char = reinterpret_cast<char*>(databuf_data);
paddle::PaddleBuf paddleBuf(databuf_char, databuf_size); paddle::PaddleBuf paddleBuf(databuf_char, databuf_size);
paddle::PaddleTensor tensor_in; paddle::PaddleTensor tensor_in;
...@@ -143,21 +144,23 @@ int GeneralDetectionOp::inference() { ...@@ -143,21 +144,23 @@ int GeneralDetectionOp::inference() {
Timer timeline; Timer timeline;
int64_t start = timeline.TimeStampUS(); int64_t start = timeline.TimeStampUS();
timeline.Start(); timeline.Start();
if (InferManager::instance().infer( if (InferManager::instance().infer(
engine_name().c_str(), real_in, out, batch_size)) { engine_name().c_str(), real_in, out, batch_size)) {
LOG(ERROR) << "(logid=" << log_id LOG(ERROR) << "(logid=" << log_id
<< ") Failed do infer in fluid model: " << engine_name().c_str(); << ") Failed do infer in fluid model: " << engine_name().c_str();
return -1; return -1;
} }
delete real_in;
std::vector<int> output_shape; std::vector<int> output_shape;
int out_num =0; int out_num = 0;
void* databuf_data_out = NULL; void* databuf_data_out = NULL;
char* databuf_char_out = NULL; char* databuf_char_out = NULL;
size_t databuf_size_out = 0; size_t databuf_size_out = 0;
//this is special add for PaddleOCR postprecess // this is special add for PaddleOCR postprecess
int infer_outnum = out->size(); int infer_outnum = out->size();
for (int k = 0;k <infer_outnum; ++k) { for (int k = 0; k < infer_outnum; ++k) {
int n2 = out->at(k).shape[2]; int n2 = out->at(k).shape[2];
int n3 = out->at(k).shape[3]; int n3 = out->at(k).shape[3];
int n = n2 * n3; int n = n2 * n3;
...@@ -171,17 +174,19 @@ int GeneralDetectionOp::inference() { ...@@ -171,17 +174,19 @@ int GeneralDetectionOp::inference() {
cbuf[i] = (unsigned char)((out_data[i]) * 255); cbuf[i] = (unsigned char)((out_data[i]) * 255);
} }
cv::Mat cbuf_map(n2, n3, CV_8UC1, (unsigned char *)cbuf.data()); cv::Mat cbuf_map(n2, n3, CV_8UC1, (unsigned char*)cbuf.data());
cv::Mat pred_map(n2, n3, CV_32F, (float *)pred.data()); cv::Mat pred_map(n2, n3, CV_32F, (float*)pred.data());
const double threshold = this->det_db_thresh_ * 255; const double threshold = this->det_db_thresh_ * 255;
const double maxvalue = 255; const double maxvalue = 255;
cv::Mat bit_map; cv::Mat bit_map;
cv::threshold(cbuf_map, bit_map, threshold, maxvalue, cv::THRESH_BINARY); cv::threshold(cbuf_map, bit_map, threshold, maxvalue, cv::THRESH_BINARY);
cv::Mat dilation_map; cv::Mat dilation_map;
cv::Mat dila_ele = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(2, 2)); cv::Mat dila_ele =
cv::getStructuringElement(cv::MORPH_RECT, cv::Size(2, 2));
cv::dilate(bit_map, dilation_map, dila_ele); cv::dilate(bit_map, dilation_map, dila_ele);
boxes = post_processor_.BoxesFromBitmap(pred_map, dilation_map, boxes = post_processor_.BoxesFromBitmap(pred_map,
dilation_map,
this->det_db_box_thresh_, this->det_db_box_thresh_,
this->det_db_unclip_ratio_); this->det_db_unclip_ratio_);
...@@ -192,25 +197,28 @@ int GeneralDetectionOp::inference() { ...@@ -192,25 +197,28 @@ int GeneralDetectionOp::inference() {
float wh_ratio = float(crop_img.cols) / float(crop_img.rows); float wh_ratio = float(crop_img.cols) / float(crop_img.rows);
this->resize_op_rec.Run(crop_img, resize_img_rec, wh_ratio, this->use_tensorrt_); this->resize_op_rec.Run(
crop_img, resize_img_rec, wh_ratio, this->use_tensorrt_);
this->normalize_op_.Run(&resize_img_rec, this->mean_rec, this->scale_rec, this->normalize_op_.Run(
this->is_scale_); &resize_img_rec, this->mean_rec, this->scale_rec, this->is_scale_);
std::vector<float> output_rec(1 * 3 * resize_img_rec.rows * resize_img_rec.cols, 0.0f); std::vector<float> output_rec(
1 * 3 * resize_img_rec.rows * resize_img_rec.cols, 0.0f);
this->permute_op_.Run(&resize_img_rec, output_rec.data()); this->permute_op_.Run(&resize_img_rec, output_rec.data());
// Inference. // Inference.
output_shape = {1, 3, resize_img_rec.rows, resize_img_rec.cols}; output_shape = {1, 3, resize_img_rec.rows, resize_img_rec.cols};
out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1, std::multiplies<int>()); out_num = std::accumulate(
databuf_size_out = out_num*sizeof(float); output_shape.begin(), output_shape.end(), 1, std::multiplies<int>());
databuf_size_out = out_num * sizeof(float);
databuf_data_out = MempoolWrapper::instance().malloc(databuf_size_out); databuf_data_out = MempoolWrapper::instance().malloc(databuf_size_out);
if (!databuf_data_out) { if (!databuf_data_out) {
LOG(ERROR) << "Malloc failed, size: " << databuf_size_out; LOG(ERROR) << "Malloc failed, size: " << databuf_size_out;
return -1; return -1;
} }
memcpy(databuf_data_out,output_rec.data(),databuf_size_out); memcpy(databuf_data_out, output_rec.data(), databuf_size_out);
databuf_char_out = reinterpret_cast<char*>(databuf_data_out); databuf_char_out = reinterpret_cast<char*>(databuf_data_out);
paddle::PaddleBuf paddleBuf(databuf_char_out, databuf_size_out); paddle::PaddleBuf paddleBuf(databuf_char_out, databuf_size_out);
paddle::PaddleTensor tensor_out; paddle::PaddleTensor tensor_out;
...@@ -221,9 +229,8 @@ int GeneralDetectionOp::inference() { ...@@ -221,9 +229,8 @@ int GeneralDetectionOp::inference() {
out->push_back(tensor_out); out->push_back(tensor_out);
} }
} }
out->erase(out->begin(),out->begin()+infer_outnum); out->erase(out->begin(), out->begin() + infer_outnum);
int64_t end = timeline.TimeStampUS(); int64_t end = timeline.TimeStampUS();
CopyBlobInfo(input_blob, output_blob); CopyBlobInfo(input_blob, output_blob);
AddBlobInfo(output_blob, start); AddBlobInfo(output_blob, start);
...@@ -231,68 +238,62 @@ int GeneralDetectionOp::inference() { ...@@ -231,68 +238,62 @@ int GeneralDetectionOp::inference() {
return 0; return 0;
} }
cv::Mat GeneralDetectionOp::Base2Mat(std::string &base64_data) cv::Mat GeneralDetectionOp::Base2Mat(std::string& base64_data) {
{ cv::Mat img;
cv::Mat img; std::string s_mat;
std::string s_mat; s_mat = base64Decode(base64_data.data(), base64_data.size());
s_mat = base64Decode(base64_data.data(), base64_data.size()); std::vector<char> base64_img(s_mat.begin(), s_mat.end());
std::vector<char> base64_img(s_mat.begin(), s_mat.end()); img = cv::imdecode(base64_img, cv::IMREAD_COLOR); // CV_LOAD_IMAGE_COLOR
img = cv::imdecode(base64_img, cv::IMREAD_COLOR);//CV_LOAD_IMAGE_COLOR return img;
return img;
} }
std::string GeneralDetectionOp::base64Decode(const char* Data, int DataByte) std::string GeneralDetectionOp::base64Decode(const char* Data, int DataByte) {
{ const char
DecodeTable[] =
const char DecodeTable[] = {
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
62, // '+' 62, // '+'
0, 0, 0, 0, 0, 0,
63, // '/' 63, // '/'
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, // '0'-'9' 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, // '0'-'9'
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // 'A'-'Z' 23, 24, 25, // 'A'-'Z'
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 26, 27, 28, 29, 30, 31, 32, 33, 34,
26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49,
39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, // 'a'-'z' 50, 51, // 'a'-'z'
}; };
std::string strDecode; std::string strDecode;
int nValue; int nValue;
int i = 0; int i = 0;
while (i < DataByte) while (i < DataByte) {
{ if (*Data != '\r' && *Data != '\n') {
if (*Data != '\r' && *Data != '\n') nValue = DecodeTable[*Data++] << 18;
{ nValue += DecodeTable[*Data++] << 12;
nValue = DecodeTable[*Data++] << 18; strDecode += (nValue & 0x00FF0000) >> 16;
nValue += DecodeTable[*Data++] << 12; if (*Data != '=') {
strDecode += (nValue & 0x00FF0000) >> 16; nValue += DecodeTable[*Data++] << 6;
if (*Data != '=') strDecode += (nValue & 0x0000FF00) >> 8;
{ if (*Data != '=') {
nValue += DecodeTable[*Data++] << 6; nValue += DecodeTable[*Data++];
strDecode += (nValue & 0x0000FF00) >> 8; strDecode += nValue & 0x000000FF;
if (*Data != '=') }
{ }
nValue += DecodeTable[*Data++]; i += 4;
strDecode += nValue & 0x000000FF; } else // 回车换行,跳过
} {
} Data++;
i += 4; i++;
} }
else// 回车换行,跳过 }
{ return strDecode;
Data++;
i++;
}
}
return strDecode;
} }
cv::Mat GeneralDetectionOp::GetRotateCropImage(const cv::Mat &srcimage, cv::Mat GeneralDetectionOp::GetRotateCropImage(
std::vector<std::vector<int>> box) { const cv::Mat& srcimage, std::vector<std::vector<int>> box) {
cv::Mat image; cv::Mat image;
srcimage.copyTo(image); srcimage.copyTo(image);
std::vector<std::vector<int>> points = box; std::vector<std::vector<int>> points = box;
...@@ -332,7 +333,9 @@ cv::Mat GeneralDetectionOp::GetRotateCropImage(const cv::Mat &srcimage, ...@@ -332,7 +333,9 @@ cv::Mat GeneralDetectionOp::GetRotateCropImage(const cv::Mat &srcimage,
cv::Mat M = cv::getPerspectiveTransform(pointsf, pts_std); cv::Mat M = cv::getPerspectiveTransform(pointsf, pts_std);
cv::Mat dst_img; cv::Mat dst_img;
cv::warpPerspective(img_crop, dst_img, M, cv::warpPerspective(img_crop,
dst_img,
M,
cv::Size(img_crop_width, img_crop_height), cv::Size(img_crop_width, img_crop_height),
cv::BORDER_REPLICATE); cv::BORDER_REPLICATE);
...@@ -350,4 +353,4 @@ DEFINE_OP(GeneralDetectionOp); ...@@ -350,4 +353,4 @@ DEFINE_OP(GeneralDetectionOp);
} // namespace serving } // namespace serving
} // namespace paddle_serving } // namespace paddle_serving
} // namespace baidu } // namespace baidu
\ No newline at end of file
...@@ -77,9 +77,6 @@ int GeneralReaderOp::inference() { ...@@ -77,9 +77,6 @@ int GeneralReaderOp::inference() {
uint64_t log_id = req->log_id(); uint64_t log_id = req->log_id();
int input_var_num = 0; int input_var_num = 0;
std::vector<int64_t> elem_type;
std::vector<int64_t> elem_size;
std::vector<int64_t> databuf_size;
GeneralBlob *res = mutable_data<GeneralBlob>(); GeneralBlob *res = mutable_data<GeneralBlob>();
if (!res) { if (!res) {
...@@ -119,40 +116,44 @@ int GeneralReaderOp::inference() { ...@@ -119,40 +116,44 @@ int GeneralReaderOp::inference() {
} }
*/ */
// package tensor // package tensor
elem_type.resize(var_num);
elem_size.resize(var_num);
databuf_size.resize(var_num);
// prepare basic information for input // prepare basic information for input
// specify the memory needed for output tensor_vector // specify the memory needed for output tensor_vector
// fill the data into output general_blob // fill the data into output general_blob
int data_len = 0; int data_len = 0;
int64_t elem_type = 0;
int64_t elem_size = 0;
int64_t databuf_size = 0;
for (int i = 0; i < var_num; ++i) { for (int i = 0; i < var_num; ++i) {
paddle::PaddleTensor lod_tensor; paddle::PaddleTensor paddleTensor;
const Tensor &tensor = req->insts(0).tensor_array(i); const Tensor &tensor = req->insts(0).tensor_array(i);
data_len = 0; data_len = 0;
elem_type[i] = tensor.elem_type(); elem_type = 0;
VLOG(2) << "var[" << i << "] has elem type: " << elem_type[i]; elem_size = 0;
if (elem_type[i] == P_INT64) { // int64 databuf_size = 0;
elem_size[i] = sizeof(int64_t); elem_type = tensor.elem_type();
lod_tensor.dtype = paddle::PaddleDType::INT64; VLOG(2) << "var[" << i << "] has elem type: " << elem_type;
if (elem_type == P_INT64) { // int64
elem_size = sizeof(int64_t);
paddleTensor.dtype = paddle::PaddleDType::INT64;
data_len = tensor.int64_data_size(); data_len = tensor.int64_data_size();
} else if (elem_type[i] == P_FLOAT32) { } else if (elem_type == P_FLOAT32) {
elem_size[i] = sizeof(float); elem_size = sizeof(float);
lod_tensor.dtype = paddle::PaddleDType::FLOAT32; paddleTensor.dtype = paddle::PaddleDType::FLOAT32;
data_len = tensor.float_data_size(); data_len = tensor.float_data_size();
} else if (elem_type[i] == P_INT32) { } else if (elem_type == P_INT32) {
elem_size[i] = sizeof(int32_t); elem_size = sizeof(int32_t);
lod_tensor.dtype = paddle::PaddleDType::INT32; paddleTensor.dtype = paddle::PaddleDType::INT32;
data_len = tensor.int_data_size(); data_len = tensor.int_data_size();
} else if (elem_type[i] == P_STRING) { } else if (elem_type == P_STRING) {
// use paddle::PaddleDType::UINT8 as for String. // use paddle::PaddleDType::UINT8 as for String.
elem_size[i] = sizeof(uint8_t); elem_size = sizeof(char);
lod_tensor.dtype = paddle::PaddleDType::UINT8; paddleTensor.dtype = paddle::PaddleDType::UINT8;
// this is for vector<String>, cause the databuf_size != // this is for vector<String>, cause the databuf_size !=
// vector<String>.size()*sizeof(char); // vector<String>.size()*sizeof(char);
// data_len should be +1 cause '\0'
// now only support single string
for (int idx = 0; idx < tensor.data_size(); idx++) { for (int idx = 0; idx < tensor.data_size(); idx++) {
data_len += tensor.data()[idx].length(); data_len += tensor.data()[idx].length() + 1;
} }
} }
// implement lod tensor here // implement lod tensor here
...@@ -160,29 +161,29 @@ int GeneralReaderOp::inference() { ...@@ -160,29 +161,29 @@ int GeneralReaderOp::inference() {
// TODO(HexToString): support 2-D lod // TODO(HexToString): support 2-D lod
if (tensor.lod_size() > 0) { if (tensor.lod_size() > 0) {
VLOG(2) << "(logid=" << log_id << ") var[" << i << "] is lod_tensor"; VLOG(2) << "(logid=" << log_id << ") var[" << i << "] is lod_tensor";
lod_tensor.lod.resize(1); paddleTensor.lod.resize(1);
for (int k = 0; k < tensor.lod_size(); ++k) { for (int k = 0; k < tensor.lod_size(); ++k) {
lod_tensor.lod[0].push_back(tensor.lod(k)); paddleTensor.lod[0].push_back(tensor.lod(k));
} }
} }
for (int k = 0; k < tensor.shape_size(); ++k) { for (int k = 0; k < tensor.shape_size(); ++k) {
int dim = tensor.shape(k); int dim = tensor.shape(k);
VLOG(2) << "(logid=" << log_id << ") shape for var[" << i << "]: " << dim; VLOG(2) << "(logid=" << log_id << ") shape for var[" << i << "]: " << dim;
lod_tensor.shape.push_back(dim); paddleTensor.shape.push_back(dim);
} }
lod_tensor.name = model_config->_feed_name[i]; paddleTensor.name = model_config->_feed_name[i];
out->push_back(lod_tensor); out->push_back(paddleTensor);
VLOG(2) << "(logid=" << log_id << ") tensor size for var[" << i VLOG(2) << "(logid=" << log_id << ") tensor size for var[" << i
<< "]: " << data_len; << "]: " << data_len;
databuf_size[i] = data_len * elem_size[i]; databuf_size = data_len * elem_size;
out->at(i).data.Resize(data_len * elem_size[i]); out->at(i).data.Resize(databuf_size);
if (out->at(i).lod.size() > 0) { if (out->at(i).lod.size() > 0) {
VLOG(2) << "(logid=" << log_id << ") var[" << i VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] has lod_tensor and len=" << out->at(i).lod[0].back(); << "] has lod_tensor and len=" << out->at(i).lod[0].back();
} }
if (elem_type[i] == P_INT64) { if (elem_type == P_INT64) {
int64_t *dst_ptr = static_cast<int64_t *>(out->at(i).data.data()); int64_t *dst_ptr = static_cast<int64_t *>(out->at(i).data.data());
VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
<< "] is " << tensor.int64_data(0); << "] is " << tensor.int64_data(0);
...@@ -190,14 +191,14 @@ int GeneralReaderOp::inference() { ...@@ -190,14 +191,14 @@ int GeneralReaderOp::inference() {
LOG(ERROR) << "dst_ptr is nullptr"; LOG(ERROR) << "dst_ptr is nullptr";
return -1; return -1;
} }
memcpy(dst_ptr, tensor.int64_data().data(), databuf_size[i]); memcpy(dst_ptr, tensor.int64_data().data(), databuf_size);
/* /*
int elem_num = tensor.int64_data_size(); int elem_num = tensor.int64_data_size();
for (int k = 0; k < elem_num; ++k) { for (int k = 0; k < elem_num; ++k) {
dst_ptr[k] = tensor.int64_data(k); dst_ptr[k] = tensor.int64_data(k);
} }
*/ */
} else if (elem_type[i] == P_FLOAT32) { } else if (elem_type == P_FLOAT32) {
float *dst_ptr = static_cast<float *>(out->at(i).data.data()); float *dst_ptr = static_cast<float *>(out->at(i).data.data());
VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
<< "] is " << tensor.float_data(0); << "] is " << tensor.float_data(0);
...@@ -205,12 +206,12 @@ int GeneralReaderOp::inference() { ...@@ -205,12 +206,12 @@ int GeneralReaderOp::inference() {
LOG(ERROR) << "dst_ptr is nullptr"; LOG(ERROR) << "dst_ptr is nullptr";
return -1; return -1;
} }
memcpy(dst_ptr, tensor.float_data().data(), databuf_size[i]); memcpy(dst_ptr, tensor.float_data().data(), databuf_size);
/*int elem_num = tensor.float_data_size(); /*int elem_num = tensor.float_data_size();
for (int k = 0; k < elem_num; ++k) { for (int k = 0; k < elem_num; ++k) {
dst_ptr[k] = tensor.float_data(k); dst_ptr[k] = tensor.float_data(k);
}*/ }*/
} else if (elem_type[i] == P_INT32) { } else if (elem_type == P_INT32) {
int32_t *dst_ptr = static_cast<int32_t *>(out->at(i).data.data()); int32_t *dst_ptr = static_cast<int32_t *>(out->at(i).data.data());
VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
<< "] is " << tensor.int_data(0); << "] is " << tensor.int_data(0);
...@@ -218,15 +219,9 @@ int GeneralReaderOp::inference() { ...@@ -218,15 +219,9 @@ int GeneralReaderOp::inference() {
LOG(ERROR) << "dst_ptr is nullptr"; LOG(ERROR) << "dst_ptr is nullptr";
return -1; return -1;
} }
memcpy(dst_ptr, tensor.int_data().data(), databuf_size[i]); memcpy(dst_ptr, tensor.int_data().data(), databuf_size);
/* } else if (elem_type == P_STRING) {
int elem_num = tensor.int_data_size(); char *dst_ptr = static_cast<char *>(out->at(i).data.data());
for (int k = 0; k < elem_num; ++k) {
dst_ptr[k] = tensor.int_data(k);
}
*/
} else if (elem_type[i] == P_STRING) {
std::string *dst_ptr = static_cast<std::string *>(out->at(i).data.data());
VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
<< "] is " << tensor.data(0); << "] is " << tensor.data(0);
if (!dst_ptr) { if (!dst_ptr) {
...@@ -234,8 +229,12 @@ int GeneralReaderOp::inference() { ...@@ -234,8 +229,12 @@ int GeneralReaderOp::inference() {
return -1; return -1;
} }
int elem_num = tensor.data_size(); int elem_num = tensor.data_size();
int offset = 0;
for (int k = 0; k < elem_num; ++k) { for (int k = 0; k < elem_num; ++k) {
dst_ptr[k] = tensor.data(k); memcpy(dst_ptr + offset,
tensor.data(k).c_str(),
strlen(tensor.data(k).c_str()) + 1);
offset += strlen(tensor.data(k).c_str()) + 1;
} }
} }
} }
......
...@@ -27,7 +27,7 @@ In order to meet the needs of users in different scenarios, Paddle Serving's pro ...@@ -27,7 +27,7 @@ In order to meet the needs of users in different scenarios, Paddle Serving's pro
|-----|------|-----|-----|------|------| |-----|------|-----|-----|------|------|
| LOW | HIGH | LOW | HIGH |C++ Serving | High-performance,recall and ranking services of large-scale online recommendation systems| | LOW | HIGH | LOW | HIGH |C++ Serving | High-performance,recall and ranking services of large-scale online recommendation systems|
| HIGH | HIGH | HIGH | HIGH |Python Pipeline Serving| High-throughput, high-efficiency, asynchronous mode, fitting for single operator multi-model combination scenarios| | HIGH | HIGH | HIGH | HIGH |Python Pipeline Serving| High-throughput, high-efficiency, asynchronous mode, fitting for single operator multi-model combination scenarios|
| HIGH | LOW | HIGH| LOW |Python webserver| High-throughput,Low-traffic services or projects that require rapid iteration, model effect verification| | HIGH | LOW | HIGH| LOW |Python webservice| High-throughput,Low-traffic services or projects that require rapid iteration, model effect verification|
Performance index description: Performance index description:
1. Response time (ms): Average response time of a single request, calculate the response time of 50, 90, 95, 99 quantiles, the lower the better. 1. Response time (ms): Average response time of a single request, calculate the response time of 50, 90, 95, 99 quantiles, the lower the better.
...@@ -53,7 +53,7 @@ Paddle Serving takes into account a series of issues such as different operating ...@@ -53,7 +53,7 @@ Paddle Serving takes into account a series of issues such as different operating
Cross-platform is not dependent on the operating system, nor on the hardware environment. Applications developed under one operating system can still run under another operating system. Therefore, the design should consider not only the development language and the cross-platform components, but also the interpretation differences of the compilers on different systems. Cross-platform is not dependent on the operating system, nor on the hardware environment. Applications developed under one operating system can still run under another operating system. Therefore, the design should consider not only the development language and the cross-platform components, but also the interpretation differences of the compilers on different systems.
Docker is an open source application container engine that allows developers to package their applications and dependencies into a portable container, and then publish it to any popular Linux machine or Windows machine. We have packaged a variety of Docker images for the Paddle Serving framework. Refer to the image list《[Docker Images](DOCKER_IMAGES.md)》, Select mirrors according to user's usage. We provide Docker usage documentation《[How to run PaddleServing in Docker](RUN_IN_DOCKER.md)》.Currently, the Python webserver mode can be deployed and run on the native Linux and Windows dual systems.《[Paddle Serving for Windows Users](WINDOWS_TUTORIAL.md) Docker is an open source application container engine that allows developers to package their applications and dependencies into a portable container, and then publish it to any popular Linux machine or Windows machine. We have packaged a variety of Docker images for the Paddle Serving framework. Refer to the image list《[Docker Images](DOCKER_IMAGES.md)》, Select mirrors according to user's usage. We provide Docker usage documentation《[How to run PaddleServing in Docker](RUN_IN_DOCKER.md)》.Currently, the Python webservice mode can be deployed and run on the native Linux and Windows dual systems.《[Paddle Serving for Windows Users](WINDOWS_TUTORIAL.md)
> Support multiple development languages client ​​SDKs > Support multiple development languages client ​​SDKs
...@@ -141,7 +141,7 @@ The underlying communication of Paddle Serving is implemented with C++ as well a ...@@ -141,7 +141,7 @@ The underlying communication of Paddle Serving is implemented with C++ as well a
---- ----
## 4. Python Webserver Design ## 4. Python Webservice Design
### 4.1 Network Communication Mechanism ### 4.1 Network Communication Mechanism
There are many open source frameworks for web services. Paddle Serving currently integrates the Flask framework, but this part is not visible to users. In the future, a better-performing web framework may be provided as the underlying HTTP service integration engine. There are many open source frameworks for web services. Paddle Serving currently integrates the Flask framework, but this part is not visible to users. In the future, a better-performing web framework may be provided as the underlying HTTP service integration engine.
......
...@@ -29,7 +29,7 @@ Paddle Serving提供很多大规模场景需要的部署功能:1)模型管 ...@@ -29,7 +29,7 @@ Paddle Serving提供很多大规模场景需要的部署功能:1)模型管
|-----|------|-----|-----|------|------| |-----|------|-----|-----|------|------|
| 低 | 高 | 低 | 高 |C++ Serving | 高性能场景,大型在线推荐系统召回、排序服务| | 低 | 高 | 低 | 高 |C++ Serving | 高性能场景,大型在线推荐系统召回、排序服务|
| 高 | 高 | 较高 |高|Python Pipeline Serving| 兼顾吞吐和效率,单算子多模型组合场景,异步模式| | 高 | 高 | 较高 |高|Python Pipeline Serving| 兼顾吞吐和效率,单算子多模型组合场景,异步模式|
| 高 | 低 | 高| 低 |Python webserver| 高迭代效率场景,小型服务或需要快速迭代,模型效果验证| | 高 | 低 | 高| 低 |Python webservice| 高迭代效率场景,小型服务或需要快速迭代,模型效果验证|
性能指标说明: 性能指标说明:
...@@ -55,7 +55,7 @@ Paddle Serving从做顶层设计时考虑到不同团队在工业级场景中会 ...@@ -55,7 +55,7 @@ Paddle Serving从做顶层设计时考虑到不同团队在工业级场景中会
> 跨平台运行 > 跨平台运行
跨平台是不依赖于操作系统,也不依赖硬件环境。一个操作系统下开发的应用,放到另一个操作系统下依然可以运行。因此,设计上既要考虑开发语言、组件是跨平台的,同时也要考虑不同系统上编译器的解释差异。 跨平台是不依赖于操作系统,也不依赖硬件环境。一个操作系统下开发的应用,放到另一个操作系统下依然可以运行。因此,设计上既要考虑开发语言、组件是跨平台的,同时也要考虑不同系统上编译器的解释差异。
Docker 是一个开源的应用容器引擎,让开发者可以打包他们的应用以及依赖包到一个可移植的容器中,然后发布到任何流行的Linux机器或Windows机器上。我们将Paddle Serving框架打包了多种Docker镜像,镜像列表参考《[Docker镜像](DOCKER_IMAGES_CN.md)》,根据用户的使用场景选择镜像。为方便用户使用Docker,我们提供了帮助文档《[如何在Docker中运行PaddleServing](RUN_IN_DOCKER_CN.md)》。目前,Python webserver模式可在原生系统Linux和Windows双系统上部署运行。《[Windows平台使用Paddle Serving指导](WINDOWS_TUTORIAL_CN.md) Docker 是一个开源的应用容器引擎,让开发者可以打包他们的应用以及依赖包到一个可移植的容器中,然后发布到任何流行的Linux机器或Windows机器上。我们将Paddle Serving框架打包了多种Docker镜像,镜像列表参考《[Docker镜像](DOCKER_IMAGES_CN.md)》,根据用户的使用场景选择镜像。为方便用户使用Docker,我们提供了帮助文档《[如何在Docker中运行PaddleServing](RUN_IN_DOCKER_CN.md)》。目前,Python webservice模式可在原生系统Linux和Windows双系统上部署运行。《[Windows平台使用Paddle Serving指导](WINDOWS_TUTORIAL_CN.md)
> 支持多种开发语言SDK > 支持多种开发语言SDK
...@@ -144,7 +144,7 @@ Paddle Serving采用对称加密算法对模型进行加密,在服务加载模 ...@@ -144,7 +144,7 @@ Paddle Serving采用对称加密算法对模型进行加密,在服务加载模
由于Paddle Serving底层采用基于C++的通信组件,并且核心框架也是基于C/C++编写,当用户想要在服务端定义复杂的前处理与后处理逻辑时,一种办法是修改Paddle Serving底层框架,重新编译源码。另一种方式可以通过在服务端嵌入轻量级的Web服务,通过在Web服务中实现更复杂的预处理逻辑,从而搭建一套逻辑完整的服务。当访问量超过了Web服务能够接受的范围,开发者有足够的理由开发一些高性能的C++预处理逻辑,并嵌入到Serving的原生服务库中。Web服务和RPC服务的关系以及他们的组合方式可以参考下文`用户类型`中的说明。 由于Paddle Serving底层采用基于C++的通信组件,并且核心框架也是基于C/C++编写,当用户想要在服务端定义复杂的前处理与后处理逻辑时,一种办法是修改Paddle Serving底层框架,重新编译源码。另一种方式可以通过在服务端嵌入轻量级的Web服务,通过在Web服务中实现更复杂的预处理逻辑,从而搭建一套逻辑完整的服务。当访问量超过了Web服务能够接受的范围,开发者有足够的理由开发一些高性能的C++预处理逻辑,并嵌入到Serving的原生服务库中。Web服务和RPC服务的关系以及他们的组合方式可以参考下文`用户类型`中的说明。
---- ----
## 4. Python webserver设计与使用 ## 4. Python webservice设计与使用
### 4.1 网络框架 ### 4.1 网络框架
Web服务有很多开源的框架,Paddle Serving当前集成了Flask框架,但这部分对用户不可见,在未来可能会提供性能更好的Web框架作为底层HTTP服务集成引擎。 Web服务有很多开源的框架,Paddle Serving当前集成了Flask框架,但这部分对用户不可见,在未来可能会提供性能更好的Web框架作为底层HTTP服务集成引擎。
......
# Lod Introduction
(English|[简体中文](LOD_CN.md))
## Principle
LoD(Level-of-Detail) Tensor is an advanced feature of paddle and an extension of tensor. LoD Tensor improves training efficiency by sacrificing flexibility.
**Notice:** For most users, there is no need to pay attention to the usage of LoD Tensor. Currently, serving only supports the usage of one-dimensional LOD.
## Use
**Prerequisite:** Your prediction model needs to support variable length tensor input.
Take the visual task as an example. In the visual task, we often need to process video and image. These elements are high-dimensional objects.
Suppose that an existing Mini batch contains three videos, each video contains three frames, one frame and two frames respectively.
If each frame has the same size: 640x480, the mini batch can be expressed as:
```
3 1 2
口口口 口 口口
```
The size of the bottom tenor is (3 + 1 + 2) x640x480, and each 口 represents a 640x480 image.
Then, the shape of tensor is [6,640,480], lod=[0,3,4,6].
Where 0 is the starting value and 3-0 = 3; 4-3=1; 6-4 = 2, these three values just represent your variable length information.
The last element 6 in LOD should be equal to the total length of the first dimension in shape.
The variable length information recorded in LOD and the first dimension information of shape in tensor should be aligned in the above way.
# Lod字段说明
(简体中文|[English](LOD.md))
## 概念
LoD(Level-of-Detail) Tensor是Paddle的高级特性,是对Tensor的一种扩充。LoDTensor通过牺牲灵活性来提升训练的效率。
**注:** 对于大部分用户来说,无需关注LoDTensor的用法,目前Serving中仅支持一维Lod的用法。
## 使用
**前提:** 首先您的预测模型需要支持变长Tensor的输入。
以视觉任务为例,在视觉任务中,时常需要处理视频和图像这些元素是高维的对象,假设现存的一个mini-batch包含3个视频,分别有3个,1个和2个帧。
每个帧都具有相同大小:640x480,则这个mini-batch可以被表示为:
```
3 1 2
口口口 口 口口
```
最底层tensor大小为(3+1+2)x640x480,每一个 口 表示一个640x480的图像。
那么此时,Tensor的shape为[6,640,480],lod=[0,3,4,6].
其中0为起始值,3-0=3;4-3=1;6-4=2,这三个值正好表示您的变长信息,lod中的最后一个元素6,应等于shape中第一维度的总长度。
lod中记录的变长信息与Tensor中shape的第一维度的信息应按照上述方式对齐。
...@@ -17,7 +17,7 @@ python -m paddle_serving_client.convert --dirname ResNet50_quant ...@@ -17,7 +17,7 @@ python -m paddle_serving_client.convert --dirname ResNet50_quant
``` ```
Start RPC service, specify the GPU id and precision mode Start RPC service, specify the GPU id and precision mode
``` ```
python -m paddle_serving_server.serve --model serving_server --port 9393 --gpu_ids 0 --use_gpu --use_trt --precision int8 python -m paddle_serving_server.serve --model serving_server --port 9393 --gpu_ids 0 --use_trt --precision int8
``` ```
Request the serving service with Client Request the serving service with Client
``` ```
...@@ -27,7 +27,7 @@ from paddle_serving_app.reader import RGB2BGR, Transpose, Div, Normalize ...@@ -27,7 +27,7 @@ from paddle_serving_app.reader import RGB2BGR, Transpose, Div, Normalize
client = Client() client = Client()
client.load_client_config( client.load_client_config(
"resnet_v2_50_imagenet_client/serving_client_conf.prototxt") "serving_client/serving_client_conf.prototxt")
client.connect(["127.0.0.1:9393"]) client.connect(["127.0.0.1:9393"])
seq = Sequential([ seq = Sequential([
...@@ -37,11 +37,11 @@ seq = Sequential([ ...@@ -37,11 +37,11 @@ seq = Sequential([
image_file = "daisy.jpg" image_file = "daisy.jpg"
img = seq(image_file) img = seq(image_file)
fetch_map = client.predict(feed={"image": img}, fetch=["score"]) fetch_map = client.predict(feed={"image": img}, fetch=["save_infer_model/scale_0.tmp_0"])
print(fetch_map["score"].reshape(-1)) print(fetch_map["save_infer_model/scale_0.tmp_0"].reshape(-1))
``` ```
## Reference ## Reference
* [PaddleSlim](https://github.com/PaddlePaddle/PaddleSlim) * [PaddleSlim](https://github.com/PaddlePaddle/PaddleSlim)
* [Deploy the quantized model Using Paddle Inference on Intel CPU](https://paddle-inference.readthedocs.io/en/latest/optimize/paddle_x86_cpu_int8.html) * [Deploy the quantized model Using Paddle Inference on Intel CPU](https://paddle-inference.readthedocs.io/en/latest/optimize/paddle_x86_cpu_int8.html)
* [Deploy the quantized model Using Paddle Inference on Nvidia GPU](https://paddle-inference.readthedocs.io/en/latest/optimize/paddle_trt.html) * [Deploy the quantized model Using Paddle Inference on Nvidia GPU](https://paddle-inference.readthedocs.io/en/latest/optimize/paddle_trt.html)
\ No newline at end of file
...@@ -16,7 +16,7 @@ python -m paddle_serving_client.convert --dirname ResNet50_quant ...@@ -16,7 +16,7 @@ python -m paddle_serving_client.convert --dirname ResNet50_quant
``` ```
启动rpc服务, 设定所选GPU id、部署模型精度 启动rpc服务, 设定所选GPU id、部署模型精度
``` ```
python -m paddle_serving_server.serve --model serving_server --port 9393 --gpu_ids 0 --use_gpu --use_trt --precision int8 python -m paddle_serving_server.serve --model serving_server --port 9393 --gpu_ids 0 --use_trt --precision int8
``` ```
使用client进行请求 使用client进行请求
``` ```
...@@ -43,4 +43,4 @@ print(fetch_map["score"].reshape(-1)) ...@@ -43,4 +43,4 @@ print(fetch_map["score"].reshape(-1))
## 参考文档 ## 参考文档
* [PaddleSlim](https://github.com/PaddlePaddle/PaddleSlim) * [PaddleSlim](https://github.com/PaddlePaddle/PaddleSlim)
* PaddleInference Intel CPU部署量化模型[文档](https://paddle-inference.readthedocs.io/en/latest/optimize/paddle_x86_cpu_int8.html) * PaddleInference Intel CPU部署量化模型[文档](https://paddle-inference.readthedocs.io/en/latest/optimize/paddle_x86_cpu_int8.html)
* PaddleInference NV GPU部署量化模型[文档](https://paddle-inference.readthedocs.io/en/latest/optimize/paddle_trt.html) * PaddleInference NV GPU部署量化模型[文档](https://paddle-inference.readthedocs.io/en/latest/optimize/paddle_trt.html)
\ No newline at end of file
...@@ -425,6 +425,8 @@ service PipelineService { ...@@ -425,6 +425,8 @@ service PipelineService {
## ★ Classic examples ## ★ Classic examples
All examples of pipelines are in [examples/pipeline/](../python/examples/pipeline) directory.
Here, we build a simple imdb model enable example to show how to use Pipeline Serving. The relevant code can be found in the `python/examples/pipeline/imdb_model_ensemble` folder. The Server-side structure in the example is shown in the following figure: Here, we build a simple imdb model enable example to show how to use Pipeline Serving. The relevant code can be found in the `python/examples/pipeline/imdb_model_ensemble` folder. The Server-side structure in the example is shown in the following figure:
......
...@@ -132,20 +132,22 @@ def __init__(name=None, ...@@ -132,20 +132,22 @@ def __init__(name=None,
各参数含义如下 各参数含义如下
| 参数名 | 含义 | | 参数名 | 类型 | 含义 |
| :-------------------: | :----------------------------------------------------------: | | :-------------------: | :---------: |:------------------------------------------------: |
| name | (str)用于标识 OP 类型的字符串,该字段必须全局唯一。 | | name | (str) | 用于标识 OP 类型的字符串,该字段必须全局唯一。 |
| input_ops | (list)当前 OP 的所有前继 OP 的列表。 | | input_ops | (list) | 当前 OP 的所有前继 OP 的列表。 |
| server_endpoints | (list)远程 Paddle Serving Service 的 endpoints 列表。如果不设置该参数,认为是local_precditor模式,从local_service_conf中读取配置。 | | server_endpoints | (list) |远程 Paddle Serving Service 的 endpoints 列表。如果不设置该参数,认为是local_precditor模式,从local_service_conf中读取配置。 |
| fetch_list | (list)远程 Paddle Serving Service 的 fetch 列表。 | | fetch_list | (list) |远程 Paddle Serving Service 的 fetch 列表。 |
| client_config | (str)Paddle Serving Service 对应的 Client 端配置文件路径。 | | client_config | (str) |Paddle Serving Service 对应的 Client 端配置文件路径。 |
| client_type | (str) 可选择brpc、grpc或local_predictor。local_predictor不启动Serving服务,进程内预测。 | | client_type | (str) |可选择brpc、grpc或local_predictor。local_predictor不启动Serving服务,进程内预测。 |
| concurrency | (int)OP 的并发数。 | | concurrency | (int) | OP 的并发数。 |
| timeout | (int)process 操作的超时时间,单位为毫秒。若该值小于零,则视作不超时。 | | timeout | (int) |process 操作的超时时间,单位为毫秒。若该值小于零,则视作不超时。 |
| retry | (int)超时重试次数。当该值为 1 时,不进行重试。 | | retry | (int) |超时重试次数。当该值为 1 时,不进行重试。 |
| batch_size | (int)进行 Auto-Batching 的期望 batch_size 大小,由于构建 batch 可能超时,实际 batch_size 可能小于设定值,默认为 1。 | | batch_size | (int) |进行 Auto-Batching 的期望 batch_size 大小,由于构建 batch 可能超时,实际 batch_size 可能小于设定值,默认为 1。 |
| auto_batching_timeout | (float)进行 Auto-Batching 构建 batch 的超时时间,单位为毫秒。batch_size > 1时,要设置auto_batching_timeout,否则请求数量不足batch_size时会阻塞等待。 | | auto_batching_timeout | (float) |进行 Auto-Batching 构建 batch 的超时时间,单位为毫秒。batch_size > 1时,要设置auto_batching_timeout,否则请求数量不足batch_size时会阻塞等待。 |
| local_service_handler | (object) local predictor handler,Op init()入参赋值 或 在Op init()中创建| | local_service_handler | (object) |local predictor handler,Op init()入参赋值 或 在Op init()中创建|
...@@ -428,6 +430,8 @@ service PipelineService { ...@@ -428,6 +430,8 @@ service PipelineService {
## ★ 典型示例 ## ★ 典型示例
所有Pipeline示例在[examples/pipeline/](../python/examples/pipeline) 目录下。
这里通过搭建简单的 imdb model ensemble 例子来展示如何使用 Pipeline Serving,相关代码在 `python/examples/pipeline/imdb_model_ensemble` 文件夹下可以找到,例子中的 Server 端结构如下图所示: 这里通过搭建简单的 imdb model ensemble 例子来展示如何使用 Pipeline Serving,相关代码在 `python/examples/pipeline/imdb_model_ensemble` 文件夹下可以找到,例子中的 Server 端结构如下图所示:
......
...@@ -84,3 +84,9 @@ set environmental variable to specify which gpus are used, the command above mea ...@@ -84,3 +84,9 @@ set environmental variable to specify which gpus are used, the command above mea
``` ```
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "hello"}], "fetch":["pooled_output"]}' http://127.0.0.1:9292/bert/prediction curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "hello"}], "fetch":["pooled_output"]}' http://127.0.0.1:9292/bert/prediction
``` ```
## Benchmark
``` shell
bash benchmark.sh bert_seq128_model bert_seq128_client
```
The output log file of benchmark named `profile_log_bert_seq128_model`
...@@ -88,3 +88,10 @@ python bert_web_service_gpu.py bert_seq128_model/ 9292 #启动gpu预测服务 ...@@ -88,3 +88,10 @@ python bert_web_service_gpu.py bert_seq128_model/ 9292 #启动gpu预测服务
``` ```
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "hello"}], "fetch":["pooled_output"]}' http://127.0.0.1:9292/bert/prediction curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "hello"}], "fetch":["pooled_output"]}' http://127.0.0.1:9292/bert/prediction
``` ```
## 性能测试
``` shell
bash benchmark.sh bert_seq128_model bert_seq128_client
```
性能测试的日志文件为profile_log_bert_seq128_model
如需修改性能测试用例的参数,请修改benchmark.sh中的配置信息。
...@@ -21,6 +21,7 @@ import sys ...@@ -21,6 +21,7 @@ import sys
import time import time
import json import json
import requests import requests
import numpy as np
from paddle_serving_client import Client from paddle_serving_client import Client
from paddle_serving_client.utils import MultiThreadRunner from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args, show_latency from paddle_serving_client.utils import benchmark_args, show_latency
...@@ -56,7 +57,11 @@ def single_func(idx, resource): ...@@ -56,7 +57,11 @@ def single_func(idx, resource):
feed_batch = [] feed_batch = []
b_start = time.time() b_start = time.time()
for bi in range(args.batch_size): for bi in range(args.batch_size):
feed_batch.append(reader.process(dataset[bi])) feed_dict = reader.process(dataset[bi])
for key in feed_dict.keys():
feed_dict[key] = np.array(feed_dict[key]).reshape(
(1, 128, 1))
feed_batch.append(feed_dict)
b_end = time.time() b_end = time.time()
if profile_flags: if profile_flags:
...@@ -116,9 +121,7 @@ def single_func(idx, resource): ...@@ -116,9 +121,7 @@ def single_func(idx, resource):
if __name__ == '__main__': if __name__ == '__main__':
multi_thread_runner = MultiThreadRunner() multi_thread_runner = MultiThreadRunner()
endpoint_list = [ endpoint_list = ["127.0.0.1:9292", "127.0.0.1:9293"]
"127.0.0.1:9292", "127.0.0.1:9293", "127.0.0.1:9294", "127.0.0.1:9295"
]
turns = 100 turns = 100
start = time.time() start = time.time()
result = multi_thread_runner.run( result = multi_thread_runner.run(
......
rm profile_log* rm profile_log*
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1
export FLAGS_profile_server=1 export FLAGS_profile_server=1
export FLAGS_profile_client=1 export FLAGS_profile_client=1
export FLAGS_serving_latency=1 export FLAGS_serving_latency=1
...@@ -12,7 +12,7 @@ else ...@@ -12,7 +12,7 @@ else
mkdir utilization mkdir utilization
fi fi
#start server #start server
$PYTHONROOT/bin/python3 -m paddle_serving_server.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1,2,3 --mem_optim --ir_optim > elog 2>&1 & $PYTHONROOT/bin/python3 -m paddle_serving_server.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1 --mem_optim --ir_optim > elog 2>&1 &
sleep 5 sleep 5
#warm up #warm up
......
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1
python -m paddle_serving_server.serve --model bert_seq20_model/ --port 9295 --thread 4 --gpu_ids 0,1,2,3 2> elog > stdlog & python -m paddle_serving_server.serve --model bert_seq20_model/ --port 9295 --thread 4 --gpu_ids 0,1 2> elog > stdlog &
export FLAGS_profile_client=1 export FLAGS_profile_client=1
export FLAGS_profile_server=1 export FLAGS_profile_server=1
sleep 5 sleep 5
......
...@@ -18,7 +18,7 @@ import sys ...@@ -18,7 +18,7 @@ import sys
import numpy as np import numpy as np
preprocess = Sequential([ preprocess = Sequential([
File2Image(), BGR2RGB(), File2Image(), BGR2RGB(),
Normalize([123.675, 116.28, 103.53], [58.395, 57.12, 57.375], False), Normalize([123.675, 116.28, 103.53], [58.395, 57.12, 57.375], False),
Resize((512, 512)), Transpose((2, 0, 1)) Resize((512, 512)), Transpose((2, 0, 1))
]) ])
...@@ -33,6 +33,7 @@ im = preprocess(sys.argv[1]) ...@@ -33,6 +33,7 @@ im = preprocess(sys.argv[1])
fetch_map = client.predict( fetch_map = client.predict(
feed={ feed={
"image": im, "image": im,
"im_shape": np.array([512, 512]),
"scale_factor": np.array([1.0, 1.0]).reshape(-1), "scale_factor": np.array([1.0, 1.0]).reshape(-1),
}, },
fetch=["save_infer_model/scale_0.tmp_1"], fetch=["save_infer_model/scale_0.tmp_1"],
......
...@@ -42,3 +42,9 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po ...@@ -42,3 +42,9 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po
``` shell ``` shell
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]}], "fetch":["price"]}' http://127.0.0.1:9393/uci/prediction curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]}], "fetch":["price"]}' http://127.0.0.1:9393/uci/prediction
``` ```
## Benchmark
``` shell
bash benchmark.sh uci_housing_model uci_housing_client
```
The log file of benchmark named `profile_log_uci_housing_model`
...@@ -43,3 +43,10 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po ...@@ -43,3 +43,10 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po
``` shell ``` shell
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]}], "fetch":["price"]}' http://127.0.0.1:9393/uci/prediction curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]}], "fetch":["price"]}' http://127.0.0.1:9393/uci/prediction
``` ```
## 性能测试
``` shell
bash benchmark.sh uci_housing_model uci_housing_client
```
性能测试的日志文件为profile_log_uci_housing_model
如需修改性能测试用例的参数,请修改benchmark.sh中的配置信息。
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
from paddle_serving_client import Client from paddle_serving_client import Client
from paddle_serving_client.utils import MultiThreadRunner from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args from paddle_serving_client.utils import benchmark_args, show_latency
import time import time
import paddle import paddle
import sys import sys
...@@ -37,9 +37,6 @@ def single_func(idx, resource): ...@@ -37,9 +37,6 @@ def single_func(idx, resource):
client.connect([args.endpoint]) client.connect([args.endpoint])
start = time.time() start = time.time()
for data in train_reader(): for data in train_reader():
#new_data = np.zeros((1, 13)).astype("float32")
#new_data[0] = data[0][0]
#fetch_map = client.predict(feed={"x": new_data}, fetch=["price"], batch=True)
fetch_map = client.predict(feed={"x": data[0][0]}, fetch=["price"]) fetch_map = client.predict(feed={"x": data[0][0]}, fetch=["price"])
end = time.time() end = time.time()
return [[end - start], [total_number]] return [[end - start], [total_number]]
...@@ -57,6 +54,17 @@ def single_func(idx, resource): ...@@ -57,6 +54,17 @@ def single_func(idx, resource):
return [[end - start], [total_number]] return [[end - start], [total_number]]
start = time.time()
multi_thread_runner = MultiThreadRunner() multi_thread_runner = MultiThreadRunner()
result = multi_thread_runner.run(single_func, args.thread, {}) result = multi_thread_runner.run(single_func, args.thread, {})
print(result) end = time.time()
total_cost = end - start
avg_cost = 0
for i in range(args.thread):
avg_cost += result[0][i]
avg_cost = avg_cost / args.thread
print("total cost: {}s".format(total_cost))
print("each thread cost: {}s. ".format(avg_cost))
print("qps: {}samples/s".format(args.batch_size * args.thread / total_cost))
show_latency(result[1])
# resnet50 int8 example
(English|[简体中文](./README_CN.md))
## Obtain the quantized model through PaddleSlim tool
Train the low-precision models please refer to [PaddleSlim](https://paddleslim.readthedocs.io/zh_CN/latest/tutorials/quant/overview.html).
## Deploy the quantized model from PaddleSlim using Paddle Serving with Nvidia TensorRT int8 mode
Firstly, download the [Resnet50 int8 model](https://paddle-inference-dist.bj.bcebos.com/inference_demo/python/resnet50/ResNet50_quant.tar.gz) and convert to Paddle Serving's saved model。
```
wget https://paddle-inference-dist.bj.bcebos.com/inference_demo/python/resnet50/ResNet50_quant.tar.gz
tar zxvf ResNet50_quant.tar.gz
python -m paddle_serving_client.convert --dirname ResNet50_quant
```
Start RPC service, specify the GPU id and precision mode
```
python -m paddle_serving_server.serve --model serving_server --port 9393 --gpu_ids 0 --use_trt --precision int8
```
Request the serving service with Client
```
python resnet50_client.py
```
## Reference
* [PaddleSlim](https://github.com/PaddlePaddle/PaddleSlim)
* [Deploy the quantized model Using Paddle Inference on Intel CPU](https://paddle-inference.readthedocs.io/en/latest/optimize/paddle_x86_cpu_int8.html)
* [Deploy the quantized model Using Paddle Inference on Nvidia GPU](https://paddle-inference.readthedocs.io/en/latest/optimize/paddle_trt.html)
# resnet50 int8示例
(简体中文|[English](./README.md))
## 通过PaddleSlim量化生成低精度模型
详细见[PaddleSlim量化](https://paddleslim.readthedocs.io/zh_CN/latest/tutorials/quant/overview.html)
## 使用TensorRT int8加载PaddleSlim Int8量化模型进行部署
首先下载Resnet50 [PaddleSlim量化模型](https://paddle-inference-dist.bj.bcebos.com/inference_demo/python/resnet50/ResNet50_quant.tar.gz),并转换为Paddle Serving支持的部署模型格式。
```
wget https://paddle-inference-dist.bj.bcebos.com/inference_demo/python/resnet50/ResNet50_quant.tar.gz
tar zxvf ResNet50_quant.tar.gz
python -m paddle_serving_client.convert --dirname ResNet50_quant
```
启动rpc服务, 设定所选GPU id、部署模型精度
```
python -m paddle_serving_server.serve --model serving_server --port 9393 --gpu_ids 0 --use_trt --precision int8
```
使用client进行请求
```
python resnet50_client.py
```
## 参考文档
* [PaddleSlim](https://github.com/PaddlePaddle/PaddleSlim)
* PaddleInference Intel CPU部署量化模型[文档](https://paddle-inference.readthedocs.io/en/latest/optimize/paddle_x86_cpu_int8.html)
* PaddleInference NV GPU部署量化模型[文档](https://paddle-inference.readthedocs.io/en/latest/optimize/paddle_trt.html)
...@@ -13,30 +13,20 @@ ...@@ -13,30 +13,20 @@
# limitations under the License. # limitations under the License.
from paddle_serving_client import Client from paddle_serving_client import Client
from paddle_serving_client.utils import MultiThreadRunner from paddle_serving_app.reader import Sequential, File2Image, Resize, CenterCrop
import paddle from paddle_serving_app.reader import RGB2BGR, Transpose, Div, Normalize
import numpy as np
client = Client()
client.load_client_config(
"serving_client/serving_client_conf.prototxt")
client.connect(["127.0.0.1:9393"])
def single_func(idx, resource): seq = Sequential([
client = Client() File2Image(), Resize(256), CenterCrop(224), RGB2BGR(), Transpose((2, 0, 1)),
client.load_client_config( Div(255), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], True)
"./uci_housing_client/serving_client_conf.prototxt") ])
client.connect(["127.0.0.1:9293", "127.0.0.1:9292"])
x = [
0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584,
0.6283, 0.4919, 0.1856, 0.0795, -0.0332
]
x = np.array(x)
for i in range(1000):
fetch_map = client.predict(feed={"x": x}, fetch=["price"])
if fetch_map is None:
return [[None]]
return [[0]]
image_file = "daisy.jpg"
multi_thread_runner = MultiThreadRunner() img = seq(image_file)
thread_num = 4 fetch_map = client.predict(feed={"image": img}, fetch=["save_infer_model/scale_0.tmp_0"])
result = multi_thread_runner.run(single_func, thread_num, {}) print(fetch_map["save_infer_model/scale_0.tmp_0"].reshape(-1))
if None in result[0]:
exit(1)
...@@ -5,6 +5,7 @@ import collections ...@@ -5,6 +5,7 @@ import collections
profile_file = sys.argv[1] profile_file = sys.argv[1]
thread_num = sys.argv[2] thread_num = sys.argv[2]
time_dict = collections.OrderedDict() time_dict = collections.OrderedDict()
query_count = 0
def prase(line): def prase(line):
...@@ -26,12 +27,15 @@ def prase(line): ...@@ -26,12 +27,15 @@ def prase(line):
with open(profile_file) as f: with open(profile_file) as f:
query_count = 0
for line in f.readlines(): for line in f.readlines():
line = line.strip().split("\t") line = line.strip().split("\t")
if line[0] == "PROFILE": if line[0] == "PROFILE":
prase(line[2]) prase(line[2])
query_count += 1
print("thread_num: {}".format(thread_num)) print("thread_num: {}".format(thread_num))
print("query_count: {}".format(query_count))
for name in time_dict: for name in time_dict:
print("{} cost: {}s in each thread ".format(name, time_dict[name] / ( print("{} cost: {}s in each thread ".format(name, time_dict[name] / (
1000000.0 * float(thread_num)))) 1000000.0 * float(thread_num))))
## Prepare ## Prepare
### download model and extract
```
wget https://paddle-inference-dist.cdn.bcebos.com/PaddleLite/models_and_data_for_unittests/bert_base_chinese.tar.gz
tar zxvf bert_base_chinese.tar.gz
```
### convert model ### convert model
``` ```
python -m paddle_serving_client.convert --dirname infer_bert-base-chinese_ft_model_4000.pdparams python3 -m paddle_serving_client.convert --dirname bert_base_chinese --model_filename bert_base_chinese/model.pdmodel --params_filename bert_base_chinese/model.pdiparams
```
### or, you can get the serving saved model directly
```
wget https://paddle-serving.bj.bcebos.com/models/xpu/bert.tar.gz
tar zxvf bert.tar.gz
```
### Getting Dict and Sample Dataset
```
sh get_data.sh
``` ```
this script will download Chinese Dictionary File vocab.txt and Chinese Sample Data data-c.txt
## RPC Service ## RPC Service
### Start Service ### Start Service
``` ```
pytyon bert_web_service.py serving_server 7703 python3 bert_web_service.py serving_server 7703
``` ```
### Client Prediction ### Client Prediction
``` ```
python bert_client.py head data-c.txt | python3 bert_client.py --model serving_client/serving_client_conf.prototxt
``` ```
wget https://paddle-serving.bj.bcebos.com/bert_example/data-c.txt --no-check-certificate
wget https://paddle-serving.bj.bcebos.com/bert_example/vocab.txt --no-check-certificate
此差异已折叠。
## Prepare ## Prepare
### download model and extract
```
wget https://paddle-inference-dist.cdn.bcebos.com/PaddleLite/models_and_data_for_unittests/ernie.tar.gz
tar zxvf ernie.tar.gz
```
### convert model ### convert model
``` ```
python3 -m paddle_serving_client.convert --dirname erine python3 -m paddle_serving_client.convert --dirname ernie
```
### or, you can get the serving saved model directly
```
wget https://paddle-serving.bj.bcebos.com/models/xpu/ernie.tar.gz
tar zxvf ernie.tar.gz
```
### Getting Dict and Sample Dataset
```
sh get_data.sh
``` ```
this script will download Chinese Dictionary File vocab.txt and Chinese Sample Data data-c.txt
## RPC Service ## RPC Service
......
wget https://paddle-serving.bj.bcebos.com/bert_example/data-c.txt --no-check-certificate
wget https://paddle-serving.bj.bcebos.com/bert_example/vocab.txt --no-check-certificate
此差异已折叠。
...@@ -8,15 +8,10 @@ ...@@ -8,15 +8,10 @@
sh get_data.sh sh get_data.sh
``` ```
## RPC service ## RPC service
### Start server ### Start server
``` shell You can use the following code to start the RPC service
python test_server.py uci_housing_model/
```
You can alse use the following code to start the RPC service
```shell ```shell
python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --use_lite --use_xpu --ir_optim python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --use_lite --use_xpu --ir_optim
``` ```
...@@ -29,19 +24,17 @@ The `paddlepaddle` package is used in `test_client.py`, and you may need to down ...@@ -29,19 +24,17 @@ The `paddlepaddle` package is used in `test_client.py`, and you may need to down
python test_client.py uci_housing_client/serving_client_conf.prototxt python test_client.py uci_housing_client/serving_client_conf.prototxt
``` ```
## HTTP service ## HTTP service
### Start server ### Start server
Start a web service with default web service hosting modules: Start a web service with default web service hosting modules:
``` shell ``` shell
python test_server.py python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --use_lite --use_xpu --ir_optim --name uci
``` ```
### Client prediction ### Client prediction
``` shell ``` shell
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]}], "fetch":["price"]}' http://127.0.0.1:9292/uci/prediction curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]}], "fetch":["price"]}' http://127.0.0.1:9393/uci/prediction
``` ```
...@@ -32,8 +32,6 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po ...@@ -32,8 +32,6 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po
python test_client.py uci_housing_client/serving_client_conf.prototxt python test_client.py uci_housing_client/serving_client_conf.prototxt
``` ```
## HTTP服务 ## HTTP服务
### 开启服务端 ### 开启服务端
...@@ -41,11 +39,11 @@ python test_client.py uci_housing_client/serving_client_conf.prototxt ...@@ -41,11 +39,11 @@ python test_client.py uci_housing_client/serving_client_conf.prototxt
通过下面的一行代码开启默认web服务: 通过下面的一行代码开启默认web服务:
``` shell ``` shell
python test_server.py python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --use_lite --use_xpu --ir_optim --name uci
``` ```
### 客户端预测 ### 客户端预测
``` shell ``` shell
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]}], "fetch":["price"]}' http://127.0.0.1:9292/uci/prediction curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]}], "fetch":["price"]}' http://127.0.0.1:9393/uci/prediction
``` ```
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=doc-string-missing
from paddle_serving_server.web_service import WebService
import numpy as np
class UciService(WebService):
def preprocess(self, feed=[], fetch=[]):
feed_batch = []
is_batch = True
new_data = np.zeros((len(feed), 1, 13)).astype("float32")
for i, ins in enumerate(feed):
nums = np.array(ins["x"]).reshape(1, 1, 13)
new_data[i] = nums
feed = {"x": new_data}
return feed, fetch, is_batch
uci_service = UciService(name="uci")
uci_service.load_model_config("uci_housing_model")
uci_service.prepare_server(
workdir="workdir", port=9393, use_lite=True, use_xpu=True, ir_optim=True)
uci_service.run_rpc_service()
uci_service.run_web_service()
## Prepare ## Prepare
### download model and extract
```
wget https://paddle-inference-dist.bj.bcebos.com/PaddleLite/models_and_data_for_unittests/VGG19.tar.gz
tar zxvf VGG19.tar.gz
```
### convert model ### convert model
``` ```
python -m paddle_serving_client.convert --dirname VGG19 python3 -m paddle_serving_client.convert --dirname VGG19
```
### or, you can get the serving saved model directly
```
wget https://paddle-serving.bj.bcebos.com/models/xpu/vgg19.tar.gz
tar zxvf vgg19.tar.gz
``` ```
## RPC Service ## RPC Service
...@@ -10,7 +20,7 @@ python -m paddle_serving_client.convert --dirname VGG19 ...@@ -10,7 +20,7 @@ python -m paddle_serving_client.convert --dirname VGG19
### Start Service ### Start Service
``` ```
python -m paddle_serving_server.serve --model serving_server --port 7702 --use_lite --use_xpu --ir_optim python3 -m paddle_serving_server.serve --model serving_server --port 7702 --use_lite --use_xpu --ir_optim
``` ```
### Client Prediction ### Client Prediction
......
...@@ -386,8 +386,6 @@ class Server(object): ...@@ -386,8 +386,6 @@ class Server(object):
return return
if not os.path.exists(self.server_path): if not os.path.exists(self.server_path):
os.system("touch {}/{}.is_download".format(self.module_path,
folder_name))
print('Frist time run, downloading PaddleServing components ...') print('Frist time run, downloading PaddleServing components ...')
r = os.system('wget ' + bin_url + ' --no-check-certificate') r = os.system('wget ' + bin_url + ' --no-check-certificate')
...@@ -403,9 +401,10 @@ class Server(object): ...@@ -403,9 +401,10 @@ class Server(object):
tar = tarfile.open(tar_name) tar = tarfile.open(tar_name)
tar.extractall() tar.extractall()
tar.close() tar.close()
open(download_flag, "a").close()
except: except:
if os.path.exists(exe_path): if os.path.exists(self.server_path):
os.remove(exe_path) os.remove(self.server_path)
raise SystemExit( raise SystemExit(
'Decompressing failed, please check your permission of {} or disk space left.' 'Decompressing failed, please check your permission of {} or disk space left.'
.format(self.module_path)) .format(self.module_path))
......
...@@ -56,7 +56,6 @@ class PipelineServicer(pipeline_service_pb2_grpc.PipelineServiceServicer): ...@@ -56,7 +56,6 @@ class PipelineServicer(pipeline_service_pb2_grpc.PipelineServiceServicer):
resp = pipeline_service_pb2.Response() resp = pipeline_service_pb2.Response()
resp.err_no = channel.ChannelDataErrcode.NO_SERVICE.value resp.err_no = channel.ChannelDataErrcode.NO_SERVICE.value
resp.err_msg = "Failed to inference: Service name error." resp.err_msg = "Failed to inference: Service name error."
resp.result = ""
return resp return resp
resp = self._dag_executor.call(request) resp = self._dag_executor.call(request)
return resp return resp
......
...@@ -148,7 +148,7 @@ function before_hook() { ...@@ -148,7 +148,7 @@ function before_hook() {
setproxy setproxy
unsetproxy unsetproxy
cd ${build_path}/python cd ${build_path}/python
python3.6 -m pip install --upgrade pip python3.6 -m pip install --upgrade pip==20.0.1
python3.6 -m pip install requests python3.6 -m pip install requests
python3.6 -m pip install -r requirements.txt -i https://mirror.baidu.com/pypi/simple python3.6 -m pip install -r requirements.txt -i https://mirror.baidu.com/pypi/simple
python3.6 -m pip install numpy==1.16.4 python3.6 -m pip install numpy==1.16.4
...@@ -323,7 +323,7 @@ function bert_rpc_cpu() { ...@@ -323,7 +323,7 @@ function bert_rpc_cpu() {
link_data ${data_dir} link_data ${data_dir}
sed -i 's/8860/8861/g' bert_client.py sed -i 's/8860/8861/g' bert_client.py
python3.6 -m paddle_serving_server.serve --model bert_seq128_model/ --port 8861 > ${dir}server_log.txt 2>&1 & python3.6 -m paddle_serving_server.serve --model bert_seq128_model/ --port 8861 > ${dir}server_log.txt 2>&1 &
check_result server 3 check_result server 5
cp data-c.txt.1 data-c.txt cp data-c.txt.1 data-c.txt
head data-c.txt | python3.6 bert_client.py --model bert_seq128_client/serving_client_conf.prototxt > ${dir}client_log.txt 2>&1 head data-c.txt | python3.6 bert_client.py --model bert_seq128_client/serving_client_conf.prototxt > ${dir}client_log.txt 2>&1
check_result client "bert_CPU_RPC server test completed" check_result client "bert_CPU_RPC server test completed"
...@@ -338,7 +338,7 @@ function pipeline_imagenet() { ...@@ -338,7 +338,7 @@ function pipeline_imagenet() {
data_dir=${data}imagenet/ data_dir=${data}imagenet/
link_data ${data_dir} link_data ${data_dir}
python3.6 resnet50_web_service.py > ${dir}server_log.txt 2>&1 & python3.6 resnet50_web_service.py > ${dir}server_log.txt 2>&1 &
check_result server 5 check_result server 8
nvidia-smi nvidia-smi
python3.6 pipeline_rpc_client.py > ${dir}client_log.txt 2>&1 python3.6 pipeline_rpc_client.py > ${dir}client_log.txt 2>&1
check_result client "pipeline_imagenet_GPU_RPC server test completed" check_result client "pipeline_imagenet_GPU_RPC server test completed"
...@@ -355,7 +355,7 @@ function ResNet50_rpc() { ...@@ -355,7 +355,7 @@ function ResNet50_rpc() {
link_data ${data_dir} link_data ${data_dir}
sed -i 's/9696/8863/g' resnet50_rpc_client.py sed -i 's/9696/8863/g' resnet50_rpc_client.py
python3.6 -m paddle_serving_server.serve --model ResNet50_vd_model --port 8863 --gpu_ids 0 > ${dir}server_log.txt 2>&1 & python3.6 -m paddle_serving_server.serve --model ResNet50_vd_model --port 8863 --gpu_ids 0 > ${dir}server_log.txt 2>&1 &
check_result server 5 check_result server 8
nvidia-smi nvidia-smi
python3.6 resnet50_rpc_client.py ResNet50_vd_client_config/serving_client_conf.prototxt > ${dir}client_log.txt 2>&1 python3.6 resnet50_rpc_client.py ResNet50_vd_client_config/serving_client_conf.prototxt > ${dir}client_log.txt 2>&1
check_result client "ResNet50_GPU_RPC server test completed" check_result client "ResNet50_GPU_RPC server test completed"
...@@ -372,7 +372,7 @@ function ResNet101_rpc() { ...@@ -372,7 +372,7 @@ function ResNet101_rpc() {
link_data ${data_dir} link_data ${data_dir}
sed -i "22cclient.connect(['127.0.0.1:8864'])" image_rpc_client.py sed -i "22cclient.connect(['127.0.0.1:8864'])" image_rpc_client.py
python3.6 -m paddle_serving_server.serve --model ResNet101_vd_model --port 8864 --gpu_ids 0 > ${dir}server_log.txt 2>&1 & python3.6 -m paddle_serving_server.serve --model ResNet101_vd_model --port 8864 --gpu_ids 0 > ${dir}server_log.txt 2>&1 &
check_result server 5 check_result server 8
nvidia-smi nvidia-smi
python3.6 image_rpc_client.py ResNet101_vd_client_config/serving_client_conf.prototxt > ${dir}client_log.txt 2>&1 python3.6 image_rpc_client.py ResNet101_vd_client_config/serving_client_conf.prototxt > ${dir}client_log.txt 2>&1
check_result client "ResNet101_GPU_RPC server test completed" check_result client "ResNet101_GPU_RPC server test completed"
...@@ -482,7 +482,7 @@ function cascade_rcnn_rpc() { ...@@ -482,7 +482,7 @@ function cascade_rcnn_rpc() {
link_data ${data_dir} link_data ${data_dir}
sed -i "s/9292/8879/g" test_client.py sed -i "s/9292/8879/g" test_client.py
python3.6 -m paddle_serving_server.serve --model serving_server --port 8879 --gpu_ids 0 --thread 2 > ${dir}server_log.txt 2>&1 & python3.6 -m paddle_serving_server.serve --model serving_server --port 8879 --gpu_ids 0 --thread 2 > ${dir}server_log.txt 2>&1 &
check_result server 5 check_result server 8
nvidia-smi nvidia-smi
python3.6 test_client.py > ${dir}client_log.txt 2>&1 python3.6 test_client.py > ${dir}client_log.txt 2>&1
nvidia-smi nvidia-smi
...@@ -499,7 +499,7 @@ function deeplabv3_rpc() { ...@@ -499,7 +499,7 @@ function deeplabv3_rpc() {
link_data ${data_dir} link_data ${data_dir}
sed -i "s/9494/8880/g" deeplabv3_client.py sed -i "s/9494/8880/g" deeplabv3_client.py
python3.6 -m paddle_serving_server.serve --model deeplabv3_server --gpu_ids 0 --port 8880 --thread 2 > ${dir}server_log.txt 2>&1 & python3.6 -m paddle_serving_server.serve --model deeplabv3_server --gpu_ids 0 --port 8880 --thread 2 > ${dir}server_log.txt 2>&1 &
check_result server 5 check_result server 10
nvidia-smi nvidia-smi
python3.6 deeplabv3_client.py > ${dir}client_log.txt 2>&1 python3.6 deeplabv3_client.py > ${dir}client_log.txt 2>&1
nvidia-smi nvidia-smi
...@@ -516,7 +516,7 @@ function mobilenet_rpc() { ...@@ -516,7 +516,7 @@ function mobilenet_rpc() {
tar xf mobilenet_v2_imagenet.tar.gz tar xf mobilenet_v2_imagenet.tar.gz
sed -i "s/9393/8881/g" mobilenet_tutorial.py sed -i "s/9393/8881/g" mobilenet_tutorial.py
python3.6 -m paddle_serving_server.serve --model mobilenet_v2_imagenet_model --gpu_ids 0 --port 8881 > ${dir}server_log.txt 2>&1 & python3.6 -m paddle_serving_server.serve --model mobilenet_v2_imagenet_model --gpu_ids 0 --port 8881 > ${dir}server_log.txt 2>&1 &
check_result server 5 check_result server 8
nvidia-smi nvidia-smi
python3.6 mobilenet_tutorial.py > ${dir}client_log.txt 2>&1 python3.6 mobilenet_tutorial.py > ${dir}client_log.txt 2>&1
nvidia-smi nvidia-smi
...@@ -533,7 +533,7 @@ function unet_rpc() { ...@@ -533,7 +533,7 @@ function unet_rpc() {
link_data ${data_dir} link_data ${data_dir}
sed -i "s/9494/8882/g" seg_client.py sed -i "s/9494/8882/g" seg_client.py
python3.6 -m paddle_serving_server.serve --model unet_model --gpu_ids 0 --port 8882 > ${dir}server_log.txt 2>&1 & python3.6 -m paddle_serving_server.serve --model unet_model --gpu_ids 0 --port 8882 > ${dir}server_log.txt 2>&1 &
check_result server 5 check_result server 8
nvidia-smi nvidia-smi
python3.6 seg_client.py > ${dir}client_log.txt 2>&1 python3.6 seg_client.py > ${dir}client_log.txt 2>&1
nvidia-smi nvidia-smi
...@@ -599,7 +599,7 @@ function criteo_ctr_rpc_gpu() { ...@@ -599,7 +599,7 @@ function criteo_ctr_rpc_gpu() {
link_data ${data_dir} link_data ${data_dir}
sed -i "s/8885/8886/g" test_client.py sed -i "s/8885/8886/g" test_client.py
python3.6 -m paddle_serving_server.serve --model ctr_serving_model/ --port 8886 --gpu_ids 0 > ${dir}server_log.txt 2>&1 & python3.6 -m paddle_serving_server.serve --model ctr_serving_model/ --port 8886 --gpu_ids 0 > ${dir}server_log.txt 2>&1 &
check_result server 5 check_result server 8
nvidia-smi nvidia-smi
python3.6 test_client.py ctr_client_conf/serving_client_conf.prototxt raw_data/part-0 > ${dir}client_log.txt 2>&1 python3.6 test_client.py ctr_client_conf/serving_client_conf.prototxt raw_data/part-0 > ${dir}client_log.txt 2>&1
nvidia-smi nvidia-smi
...@@ -617,7 +617,7 @@ function yolov4_rpc_gpu() { ...@@ -617,7 +617,7 @@ function yolov4_rpc_gpu() {
sed -i "s/9393/8887/g" test_client.py sed -i "s/9393/8887/g" test_client.py
python3.6 -m paddle_serving_server.serve --model yolov4_model --port 8887 --gpu_ids 0 > ${dir}server_log.txt 2>&1 & python3.6 -m paddle_serving_server.serve --model yolov4_model --port 8887 --gpu_ids 0 > ${dir}server_log.txt 2>&1 &
nvidia-smi nvidia-smi
check_result server 5 check_result server 8
python3.6 test_client.py 000000570688.jpg > ${dir}client_log.txt 2>&1 python3.6 test_client.py 000000570688.jpg > ${dir}client_log.txt 2>&1
nvidia-smi nvidia-smi
check_result client "yolov4_GPU_RPC server test completed" check_result client "yolov4_GPU_RPC server test completed"
...@@ -634,7 +634,7 @@ function senta_rpc_cpu() { ...@@ -634,7 +634,7 @@ function senta_rpc_cpu() {
sed -i "s/9393/8887/g" test_client.py sed -i "s/9393/8887/g" test_client.py
python3.6 -m paddle_serving_server.serve --model yolov4_model --port 8887 --gpu_ids 0 > ${dir}server_log.txt 2>&1 & python3.6 -m paddle_serving_server.serve --model yolov4_model --port 8887 --gpu_ids 0 > ${dir}server_log.txt 2>&1 &
nvidia-smi nvidia-smi
check_result server 5 check_result server 8
python3.6 test_client.py 000000570688.jpg > ${dir}client_log.txt 2>&1 python3.6 test_client.py 000000570688.jpg > ${dir}client_log.txt 2>&1
nvidia-smi nvidia-smi
check_result client "senta_GPU_RPC server test completed" check_result client "senta_GPU_RPC server test completed"
...@@ -724,7 +724,7 @@ function bert_http() { ...@@ -724,7 +724,7 @@ function bert_http() {
cp vocab.txt.1 vocab.txt cp vocab.txt.1 vocab.txt
export CUDA_VISIBLE_DEVICES=0 export CUDA_VISIBLE_DEVICES=0
python3.6 bert_web_service.py bert_seq128_model/ 8878 > ${dir}server_log.txt 2>&1 & python3.6 bert_web_service.py bert_seq128_model/ 8878 > ${dir}server_log.txt 2>&1 &
check_result server 5 check_result server 8
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "hello"}], "fetch":["pooled_output"]}' http://127.0.0.1:8878/bert/prediction > ${dir}client_log.txt 2>&1 curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "hello"}], "fetch":["pooled_output"]}' http://127.0.0.1:8878/bert/prediction > ${dir}client_log.txt 2>&1
check_result client "bert_GPU_HTTP server test completed" check_result client "bert_GPU_HTTP server test completed"
kill_server_process kill_server_process
...@@ -762,7 +762,7 @@ function grpc_yolov4() { ...@@ -762,7 +762,7 @@ function grpc_yolov4() {
link_data ${data_dir} link_data ${data_dir}
echo -e "${GREEN_COLOR}grpc_impl_example_yolov4_GPU_gRPC server started${RES}" echo -e "${GREEN_COLOR}grpc_impl_example_yolov4_GPU_gRPC server started${RES}"
python3.6 -m paddle_serving_server.serve --model yolov4_model --port 9393 --gpu_ids 0 --use_multilang > ${dir}server_log.txt 2>&1 & python3.6 -m paddle_serving_server.serve --model yolov4_model --port 9393 --gpu_ids 0 --use_multilang > ${dir}server_log.txt 2>&1 &
check_result server 5 check_result server 10
echo -e "${GREEN_COLOR}grpc_impl_example_yolov4_GPU_gRPC client started${RES}" echo -e "${GREEN_COLOR}grpc_impl_example_yolov4_GPU_gRPC client started${RES}"
python3.6 test_client.py 000000570688.jpg > ${dir}client_log.txt 2>&1 python3.6 test_client.py 000000570688.jpg > ${dir}client_log.txt 2>&1
check_result client "grpc_yolov4_GPU_GRPC server test completed" check_result client "grpc_yolov4_GPU_GRPC server test completed"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册