提交 0abd3ac6 编写于 作者: H HexToString

add http client

上级 31640a40
......@@ -33,9 +33,7 @@ if (WITH_PYTHON)
add_custom_target(general_model_config_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
add_dependencies(general_model_config_py_proto general_model_config_py_proto_init)
py_grpc_proto_compile(multi_lang_general_model_service_py_proto SRCS proto/multi_lang_general_model_service.proto)
add_custom_target(multi_lang_general_model_service_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
add_dependencies(multi_lang_general_model_service_py_proto multi_lang_general_model_service_py_proto_init)
if (CLIENT)
py_proto_compile(sdk_configure_py_proto SRCS proto/sdk_configure.proto)
......@@ -53,11 +51,7 @@ if (WITH_PYTHON)
COMMENT "Copy generated general_model_config proto file into directory paddle_serving_client/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
add_custom_command(TARGET multi_lang_general_model_service_py_proto POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto
COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto
COMMENT "Copy generated multi_lang_general_model_service proto file into directory paddle_serving_client/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
endif()
if (APP)
......@@ -84,11 +78,6 @@ if (WITH_PYTHON)
COMMENT "Copy generated general_model_config proto file into directory paddle_serving_server/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
add_custom_command(TARGET multi_lang_general_model_service_py_proto POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto
COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto
COMMENT "Copy generated multi_lang_general_model_service proto file into directory paddle_serving_server/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
endif()
endif()
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package baidu.paddle_serving.multi_lang;
option java_multiple_files = true;
option java_package = "io.paddle.serving.grpc";
option java_outer_classname = "ServingProto";
message Tensor {
optional bytes data = 1;
repeated int32 int_data = 2;
repeated int64 int64_data = 3;
repeated float float_data = 4;
optional int32 elem_type = 5;
repeated int32 shape = 6;
repeated int32 lod = 7; // only for fetch tensor currently
};
message FeedInst { repeated Tensor tensor_array = 1; };
message FetchInst { repeated Tensor tensor_array = 1; };
message InferenceRequest {
repeated FeedInst insts = 1;
repeated string feed_var_names = 2;
repeated string fetch_var_names = 3;
required bool is_python = 4 [ default = false ];
required uint64 log_id = 5 [ default = 0 ];
};
message InferenceResponse {
repeated ModelOutput outputs = 1;
optional string tag = 2;
required int32 err_code = 3;
};
message ModelOutput {
repeated FetchInst insts = 1;
optional string engine_name = 2;
}
message SetTimeoutRequest { required int32 timeout_ms = 1; }
message SimpleResponse { required int32 err_code = 1; }
message GetClientConfigRequest {}
message GetClientConfigResponse { required string client_config_str = 1; }
service MultiLangGeneralModelService {
rpc Inference(InferenceRequest) returns (InferenceResponse) {}
rpc SetTimeout(SetTimeoutRequest) returns (SimpleResponse) {}
rpc GetClientConfig(GetClientConfigRequest)
returns (GetClientConfigResponse) {}
};
......@@ -207,7 +207,7 @@ class PredictorClient {
void init_gflags(std::vector<std::string> argv);
int init(const std::vector<std::string> &client_conf);
int init(const std::vector<std::string>& client_conf);
void set_predictor_conf(const std::string& conf_path,
const std::string& conf_file);
......@@ -218,23 +218,22 @@ class PredictorClient {
int destroy_predictor();
int numpy_predict(
const std::vector<std::vector<py::array_t<float>>>& float_feed_batch,
const std::vector<std::string>& float_feed_name,
const std::vector<std::vector<int>>& float_shape,
const std::vector<std::vector<int>>& float_lod_slot_batch,
const std::vector<std::vector<py::array_t<int64_t>>>& int_feed_batch,
const std::vector<std::string>& int_feed_name,
const std::vector<std::vector<int>>& int_shape,
const std::vector<std::vector<int>>& int_lod_slot_batch,
const std::vector<std::vector<std::string>>& string_feed_batch,
const std::vector<std::string>& string_feed_name,
const std::vector<std::vector<int>>& string_shape,
const std::vector<std::vector<int>>& string_lod_slot_batch,
const std::vector<std::string>& fetch_name,
PredictorRes& predict_res_batch, // NOLINT
const int& pid,
const uint64_t log_id);
int numpy_predict(const std::vector<py::array_t<float>>& float_feed,
const std::vector<std::string>& float_feed_name,
const std::vector<std::vector<int>>& float_shape,
const std::vector<std::vector<int>>& float_lod_slot_batch,
const std::vector<py::array_t<int64_t>>& int_feed,
const std::vector<std::string>& int_feed_name,
const std::vector<std::vector<int>>& int_shape,
const std::vector<std::vector<int>>& int_lod_slot_batch,
const std::vector<std::string>& string_feed,
const std::vector<std::string>& string_feed_name,
const std::vector<std::vector<int>>& string_shape,
const std::vector<std::vector<int>>& string_lod_slot_batch,
const std::vector<std::string>& fetch_name,
PredictorRes& predict_res_batch, // NOLINT
const int& pid,
const uint64_t log_id);
private:
PredictorApi _api;
......@@ -243,6 +242,7 @@ class PredictorClient {
std::string _predictor_path;
std::string _conf_file;
std::map<std::string, int> _feed_name_to_idx;
std::vector<std::string> _feed_name;
std::map<std::string, int> _fetch_name_to_idx;
std::map<std::string, std::string> _fetch_name_to_var_name;
std::map<std::string, int> _fetch_name_to_type;
......
......@@ -25,8 +25,6 @@ using baidu::paddle_serving::Timer;
using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::Tensor;
using baidu::paddle_serving::predictor::general_model::FeedInst;
using baidu::paddle_serving::predictor::general_model::FetchInst;
enum ProtoDataType { P_INT64, P_FLOAT32, P_INT32, P_STRING };
std::once_flag gflags_init_flag;
namespace py = pybind11;
......@@ -68,9 +66,13 @@ int PredictorClient::init(const std::vector<std::string> &conf_file) {
_fetch_name_to_idx.clear();
_shape.clear();
int feed_var_num = model_config.feed_var_size();
_feed_name.clear();
VLOG(2) << "feed var num: " << feed_var_num;
for (int i = 0; i < feed_var_num; ++i) {
_feed_name_to_idx[model_config.feed_var(i).alias_name()] = i;
VLOG(2) << "feed [" << i << "]"
<< " name: " << model_config.feed_var(i).name();
_feed_name.push_back(model_config.feed_var(i).name());
VLOG(2) << "feed alias name: " << model_config.feed_var(i).alias_name()
<< " index: " << i;
std::vector<int> tmp_feed_shape;
......@@ -146,15 +148,15 @@ int PredictorClient::create_predictor() {
}
int PredictorClient::numpy_predict(
const std::vector<std::vector<py::array_t<float>>> &float_feed_batch,
const std::vector<py::array_t<float>> &float_feed,
const std::vector<std::string> &float_feed_name,
const std::vector<std::vector<int>> &float_shape,
const std::vector<std::vector<int>> &float_lod_slot_batch,
const std::vector<std::vector<py::array_t<int64_t>>> &int_feed_batch,
const std::vector<py::array_t<int64_t>> &int_feed,
const std::vector<std::string> &int_feed_name,
const std::vector<std::vector<int>> &int_shape,
const std::vector<std::vector<int>> &int_lod_slot_batch,
const std::vector<std::vector<std::string>> &string_feed_batch,
const std::vector<std::string> &string_feed,
const std::vector<std::string> &string_feed_name,
const std::vector<std::vector<int>> &string_shape,
const std::vector<std::vector<int>> &string_lod_slot_batch,
......@@ -162,12 +164,6 @@ int PredictorClient::numpy_predict(
PredictorRes &predict_res_batch,
const int &pid,
const uint64_t log_id) {
int batch_size = std::max(float_feed_batch.size(), int_feed_batch.size());
batch_size = batch_size > string_feed_batch.size() ? batch_size
: string_feed_batch.size();
VLOG(2) << "batch size: " << batch_size;
// batch_size must be 1, cause batch is already in Tensor.
// I suggest to remove the outside vector<>.
predict_res_batch.clear();
Timer timeline;
int64_t preprocess_start = timeline.TimeStampUS();
......@@ -190,136 +186,122 @@ int PredictorClient::numpy_predict(
}
int vec_idx = 0;
// batch_size can only be 1, cause batch is already in Tensor.
// if batch_size is not 1, error will occur in C++ part.
for (int bi = 0; bi < batch_size; bi++) {
VLOG(2) << "prepare batch " << bi;
std::vector<Tensor *> tensor_vec;
FeedInst *inst = req.add_insts();
std::vector<py::array_t<float>> float_feed = float_feed_batch[bi];
std::vector<py::array_t<int64_t>> int_feed = int_feed_batch[bi];
std::vector<std::string> string_feed = string_feed_batch[bi];
for (auto &name : float_feed_name) {
tensor_vec.push_back(inst->add_tensor_array());
}
for (auto &name : int_feed_name) {
tensor_vec.push_back(inst->add_tensor_array());
}
// batch is already in Tensor.
std::vector<Tensor *> tensor_vec;
for (auto &name : string_feed_name) {
tensor_vec.push_back(inst->add_tensor_array());
}
for (auto &name : float_feed_name) {
tensor_vec.push_back(req.add_tensor());
}
VLOG(2) << "batch [" << bi << "] "
<< "prepared";
for (auto &name : int_feed_name) {
tensor_vec.push_back(req.add_tensor());
}
vec_idx = 0;
for (auto &name : float_feed_name) {
int idx = _feed_name_to_idx[name];
if (idx >= tensor_vec.size()) {
LOG(ERROR) << "idx > tensor_vec.size()";
return -1;
}
int nbytes = float_feed[vec_idx].nbytes();
void *rawdata_ptr = (void *)(float_feed[vec_idx].data(0));
int total_number = float_feed[vec_idx].size();
Tensor *tensor = tensor_vec[idx];
VLOG(2) << "prepare float feed " << name << " shape size "
<< float_shape[vec_idx].size();
for (uint32_t j = 0; j < float_shape[vec_idx].size(); ++j) {
tensor->add_shape(float_shape[vec_idx][j]);
}
for (uint32_t j = 0; j < float_lod_slot_batch[vec_idx].size(); ++j) {
tensor->add_lod(float_lod_slot_batch[vec_idx][j]);
}
tensor->set_elem_type(P_FLOAT32);
for (auto &name : string_feed_name) {
tensor_vec.push_back(req.add_tensor());
}
tensor->mutable_float_data()->Resize(total_number, 0);
memcpy(tensor->mutable_float_data()->mutable_data(), rawdata_ptr, nbytes);
vec_idx++;
vec_idx = 0;
for (auto &name : float_feed_name) {
int idx = _feed_name_to_idx[name];
if (idx >= tensor_vec.size()) {
LOG(ERROR) << "idx > tensor_vec.size()";
return -1;
}
VLOG(2) << "prepare float feed " << name << " idx " << idx;
int nbytes = float_feed[vec_idx].nbytes();
void *rawdata_ptr = (void *)(float_feed[vec_idx].data(0));
int total_number = float_feed[vec_idx].size();
Tensor *tensor = tensor_vec[idx];
VLOG(2) << "prepare float feed " << name << " shape size "
<< float_shape[vec_idx].size();
for (uint32_t j = 0; j < float_shape[vec_idx].size(); ++j) {
tensor->add_shape(float_shape[vec_idx][j]);
}
for (uint32_t j = 0; j < float_lod_slot_batch[vec_idx].size(); ++j) {
tensor->add_lod(float_lod_slot_batch[vec_idx][j]);
}
tensor->set_elem_type(P_FLOAT32);
VLOG(2) << "batch [" << bi << "] "
<< "float feed value prepared";
tensor->set_name(_feed_name[idx]);
tensor->set_alias_name(name);
vec_idx = 0;
for (auto &name : int_feed_name) {
int idx = _feed_name_to_idx[name];
if (idx >= tensor_vec.size()) {
LOG(ERROR) << "idx > tensor_vec.size()";
return -1;
}
Tensor *tensor = tensor_vec[idx];
int nbytes = int_feed[vec_idx].nbytes();
void *rawdata_ptr = (void *)(int_feed[vec_idx].data(0));
int total_number = int_feed[vec_idx].size();
tensor->mutable_float_data()->Resize(total_number, 0);
memcpy(tensor->mutable_float_data()->mutable_data(), rawdata_ptr, nbytes);
vec_idx++;
}
for (uint32_t j = 0; j < int_shape[vec_idx].size(); ++j) {
tensor->add_shape(int_shape[vec_idx][j]);
}
for (uint32_t j = 0; j < int_lod_slot_batch[vec_idx].size(); ++j) {
tensor->add_lod(int_lod_slot_batch[vec_idx][j]);
}
tensor->set_elem_type(_type[idx]);
if (_type[idx] == P_INT64) {
tensor->mutable_int64_data()->Resize(total_number, 0);
memcpy(
tensor->mutable_int64_data()->mutable_data(), rawdata_ptr, nbytes);
} else {
tensor->mutable_int_data()->Resize(total_number, 0);
memcpy(tensor->mutable_int_data()->mutable_data(), rawdata_ptr, nbytes);
}
vec_idx++;
vec_idx = 0;
for (auto &name : int_feed_name) {
int idx = _feed_name_to_idx[name];
if (idx >= tensor_vec.size()) {
LOG(ERROR) << "idx > tensor_vec.size()";
return -1;
}
Tensor *tensor = tensor_vec[idx];
int nbytes = int_feed[vec_idx].nbytes();
void *rawdata_ptr = (void *)(int_feed[vec_idx].data(0));
int total_number = int_feed[vec_idx].size();
VLOG(2) << "batch [" << bi << "] "
<< "int feed value prepared";
for (uint32_t j = 0; j < int_shape[vec_idx].size(); ++j) {
tensor->add_shape(int_shape[vec_idx][j]);
}
for (uint32_t j = 0; j < int_lod_slot_batch[vec_idx].size(); ++j) {
tensor->add_lod(int_lod_slot_batch[vec_idx][j]);
}
tensor->set_elem_type(_type[idx]);
tensor->set_name(_feed_name[idx]);
tensor->set_alias_name(name);
if (_type[idx] == P_INT64) {
tensor->mutable_int64_data()->Resize(total_number, 0);
memcpy(tensor->mutable_int64_data()->mutable_data(), rawdata_ptr, nbytes);
} else {
tensor->mutable_int_data()->Resize(total_number, 0);
memcpy(tensor->mutable_int_data()->mutable_data(), rawdata_ptr, nbytes);
}
vec_idx++;
}
vec_idx = 0;
for (auto &name : string_feed_name) {
int idx = _feed_name_to_idx[name];
if (idx >= tensor_vec.size()) {
LOG(ERROR) << "idx > tensor_vec.size()";
return -1;
}
Tensor *tensor = tensor_vec[idx];
vec_idx = 0;
for (auto &name : string_feed_name) {
int idx = _feed_name_to_idx[name];
if (idx >= tensor_vec.size()) {
LOG(ERROR) << "idx > tensor_vec.size()";
return -1;
}
Tensor *tensor = tensor_vec[idx];
for (uint32_t j = 0; j < string_shape[vec_idx].size(); ++j) {
tensor->add_shape(string_shape[vec_idx][j]);
}
for (uint32_t j = 0; j < string_lod_slot_batch[vec_idx].size(); ++j) {
tensor->add_lod(string_lod_slot_batch[vec_idx][j]);
}
tensor->set_elem_type(P_STRING);
const int string_shape_size = string_shape[vec_idx].size();
// string_shape[vec_idx] = [1];cause numpy has no datatype of string.
// we pass string via vector<vector<string> >.
if (string_shape_size != 1) {
LOG(ERROR) << "string_shape_size should be 1-D, but received is : "
<< string_shape_size;
return -1;
}
switch (string_shape_size) {
case 1: {
tensor->add_data(string_feed[vec_idx]);
break;
}
for (uint32_t j = 0; j < string_shape[vec_idx].size(); ++j) {
tensor->add_shape(string_shape[vec_idx][j]);
}
for (uint32_t j = 0; j < string_lod_slot_batch[vec_idx].size(); ++j) {
tensor->add_lod(string_lod_slot_batch[vec_idx][j]);
}
tensor->set_elem_type(P_STRING);
tensor->set_name(_feed_name[idx]);
tensor->set_alias_name(name);
const int string_shape_size = string_shape[vec_idx].size();
// string_shape[vec_idx] = [1];cause numpy has no datatype of string.
// we pass string via vector<vector<string> >.
if (string_shape_size != 1) {
LOG(ERROR) << "string_shape_size should be 1-D, but received is : "
<< string_shape_size;
return -1;
}
switch (string_shape_size) {
case 1: {
tensor->add_data(string_feed[vec_idx]);
break;
}
vec_idx++;
}
VLOG(2) << "batch [" << bi << "] "
<< "string feed value prepared";
vec_idx++;
}
int64_t preprocess_end = timeline.TimeStampUS();
int64_t client_infer_start = timeline.TimeStampUS();
Response res;
int64_t client_infer_end = 0;
......@@ -351,19 +333,18 @@ int PredictorClient::numpy_predict(
int idx = 0;
for (auto &name : fetch_name) {
// int idx = _fetch_name_to_idx[name];
int shape_size = output.insts(0).tensor_array(idx).shape_size();
int shape_size = output.tensor(idx).shape_size();
VLOG(2) << "fetch var " << name << " index " << idx << " shape size "
<< shape_size;
model._shape_map[name].resize(shape_size);
for (int i = 0; i < shape_size; ++i) {
model._shape_map[name][i] =
output.insts(0).tensor_array(idx).shape(i);
model._shape_map[name][i] = output.tensor(idx).shape(i);
}
int lod_size = output.insts(0).tensor_array(idx).lod_size();
int lod_size = output.tensor(idx).lod_size();
if (lod_size > 0) {
model._lod_map[name].resize(lod_size);
for (int i = 0; i < lod_size; ++i) {
model._lod_map[name][i] = output.insts(0).tensor_array(idx).lod(i);
model._lod_map[name][i] = output.tensor(idx).lod(i);
}
}
idx += 1;
......@@ -375,22 +356,22 @@ int PredictorClient::numpy_predict(
// int idx = _fetch_name_to_idx[name];
if (_fetch_name_to_type[name] == P_INT64) {
VLOG(2) << "ferch var " << name << "type int64";
int size = output.insts(0).tensor_array(idx).int64_data_size();
int size = output.tensor(idx).int64_data_size();
model._int64_value_map[name] = std::vector<int64_t>(
output.insts(0).tensor_array(idx).int64_data().begin(),
output.insts(0).tensor_array(idx).int64_data().begin() + size);
output.tensor(idx).int64_data().begin(),
output.tensor(idx).int64_data().begin() + size);
} else if (_fetch_name_to_type[name] == P_FLOAT32) {
VLOG(2) << "fetch var " << name << "type float";
int size = output.insts(0).tensor_array(idx).float_data_size();
int size = output.tensor(idx).float_data_size();
model._float_value_map[name] = std::vector<float>(
output.insts(0).tensor_array(idx).float_data().begin(),
output.insts(0).tensor_array(idx).float_data().begin() + size);
output.tensor(idx).float_data().begin(),
output.tensor(idx).float_data().begin() + size);
} else if (_fetch_name_to_type[name] == P_INT32) {
VLOG(2) << "fetch var " << name << "type int32";
int size = output.insts(0).tensor_array(idx).int_data_size();
int size = output.tensor(idx).int_data_size();
model._int32_value_map[name] = std::vector<int32_t>(
output.insts(0).tensor_array(idx).int_data().begin(),
output.insts(0).tensor_array(idx).int_data().begin() + size);
output.tensor(idx).int_data().begin(),
output.tensor(idx).int_data().begin() + size);
}
idx += 1;
}
......
......@@ -97,33 +97,31 @@ PYBIND11_MODULE(serving_client, m) {
[](PredictorClient &self) { self.destroy_predictor(); })
.def("numpy_predict",
[](PredictorClient &self,
const std::vector<std::vector<py::array_t<float>>>
&float_feed_batch,
const std::vector<py::array_t<float>> &float_feed,
const std::vector<std::string> &float_feed_name,
const std::vector<std::vector<int>> &float_shape,
const std::vector<std::vector<int>> &float_lod_slot_batch,
const std::vector<std::vector<py::array_t<int64_t>>>
&int_feed_batch,
const std::vector<py::array_t<int64_t>> &int_feed,
const std::vector<std::string> &int_feed_name,
const std::vector<std::vector<int>> &int_shape,
const std::vector<std::vector<int>> &int_lod_slot_batch,
const std::vector<std::vector<std::string>>& string_feed_batch,
const std::vector<std::string>& string_feed_name,
const std::vector<std::vector<int>>& string_shape,
const std::vector<std::vector<int>>& string_lod_slot_batch,
const std::vector<std::string> &string_feed,
const std::vector<std::string> &string_feed_name,
const std::vector<std::vector<int>> &string_shape,
const std::vector<std::vector<int>> &string_lod_slot_batch,
const std::vector<std::string> &fetch_name,
PredictorRes &predict_res_batch,
const int &pid,
const uint64_t log_id) {
return self.numpy_predict(float_feed_batch,
return self.numpy_predict(float_feed,
float_feed_name,
float_shape,
float_lod_slot_batch,
int_feed_batch,
int_feed,
int_feed_name,
int_shape,
int_lod_slot_batch,
string_feed_batch,
string_feed,
string_feed_name,
string_shape,
string_lod_slot_batch,
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "core/general-server/op/general_copy_op.h"
#include <algorithm>
#include <iostream>
#include <memory>
#include <sstream>
#include "core/general-server/op/general_infer_helper.h"
#include "core/predictor/framework/infer.h"
#include "core/predictor/framework/memory.h"
#include "core/util/include/timer.h"
namespace baidu {
namespace paddle_serving {
namespace serving {
using baidu::paddle_serving::Timer;
using baidu::paddle_serving::predictor::MempoolWrapper;
using baidu::paddle_serving::predictor::general_model::Tensor;
using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::FeedInst;
using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
int GeneralCopyOp::inference() {
// reade request from client
const std::vector<std::string> pre_node_names = pre_names();
if (pre_node_names.size() != 1) {
LOG(ERROR) << "This op(" << op_name()
<< ") can only have one predecessor op, but received "
<< pre_node_names.size();
return -1;
}
const std::string pre_name = pre_node_names[0];
const GeneralBlob *input_blob = get_depend_argument<GeneralBlob>(pre_name);
uint64_t log_id = input_blob->GetLogId();
VLOG(2) << "(logid=" << log_id << ") precedent name: " << pre_name;
const TensorVector *in = &input_blob->tensor_vector;
VLOG(2) << "(logid=" << log_id << ") input size: " << in->size();
int batch_size = input_blob->GetBatchSize();
int input_var_num = 0;
GeneralBlob *res = mutable_data<GeneralBlob>();
res->SetLogId(log_id);
TensorVector *out = &res->tensor_vector;
VLOG(2) << "(logid=" << log_id << ") input batch size: " << batch_size;
res->SetBatchSize(batch_size);
if (!res) {
LOG(ERROR) << "(logid=" << log_id
<< ") Failed get op tls reader object output";
}
Timer timeline;
int64_t start = timeline.TimeStampUS();
VLOG(2) << "(logid=" << log_id << ") Going to init lod tensor";
for (int i = 0; i < in->size(); ++i) {
paddle::PaddleTensor lod_tensor;
CopyLod(&in->at(i), &lod_tensor);
lod_tensor.dtype = in->at(i).dtype;
lod_tensor.name = in->at(i).name;
VLOG(2) << "(logid=" << log_id << ") lod tensor [" << i
<< "].name = " << lod_tensor.name;
out->push_back(lod_tensor);
}
VLOG(2) << "(logid=" << log_id << ") pack done.";
for (int i = 0; i < out->size(); ++i) {
int64_t *src_ptr = static_cast<int64_t *>(in->at(i).data.data());
out->at(i).data.Resize(out->at(i).lod[0].back() * sizeof(int64_t));
out->at(i).shape = {out->at(i).lod[0].back(), 1};
int64_t *tgt_ptr = static_cast<int64_t *>(out->at(i).data.data());
for (int j = 0; j < out->at(i).lod[0].back(); ++j) {
tgt_ptr[j] = src_ptr[j];
}
}
VLOG(2) << "(logid=" << log_id << ") output done.";
timeline.Pause();
int64_t end = timeline.TimeStampUS();
CopyBlobInfo(input_blob, res);
AddBlobInfo(res, start);
AddBlobInfo(res, end);
VLOG(2) << "(logid=" << log_id << ") read data from client success";
return 0;
}
DEFINE_OP(GeneralCopyOp);
} // namespace serving
} // namespace paddle_serving
} // namespace baidu
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <vector>
#include "core/general-server/general_model_service.pb.h"
#include "core/general-server/op/general_infer_helper.h"
#include "core/predictor/framework/resource.h"
#include "paddle_inference_api.h" // NOLINT
namespace baidu {
namespace paddle_serving {
namespace serving {
class GeneralCopyOp
: public baidu::paddle_serving::predictor::OpWithChannel<GeneralBlob> {
public:
typedef std::vector<paddle::PaddleTensor> TensorVector;
DECLARE_OP(GeneralCopyOp);
int inference();
};
} // namespace serving
} // namespace paddle_serving
} // namespace baidu
......@@ -36,7 +36,6 @@ using baidu::paddle_serving::predictor::MempoolWrapper;
using baidu::paddle_serving::predictor::general_model::Tensor;
using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::FetchInst;
using baidu::paddle_serving::predictor::InferManager;
using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
......
......@@ -34,7 +34,6 @@ using baidu::paddle_serving::predictor::MempoolWrapper;
using baidu::paddle_serving::predictor::general_model::Tensor;
using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::FetchInst;
using baidu::paddle_serving::predictor::InferManager;
using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
......
文件模式从 100644 更改为 100755
......@@ -35,7 +35,6 @@ using baidu::paddle_serving::predictor::MempoolWrapper;
using baidu::paddle_serving::predictor::general_model::Tensor;
using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::FetchInst;
using baidu::paddle_serving::predictor::InferManager;
using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
......@@ -117,9 +116,6 @@ int GeneralDistKVQuantInferOp::inference() {
std::unordered_map<int, int> in_out_map;
baidu::paddle_serving::predictor::Resource &resource =
baidu::paddle_serving::predictor::Resource::instance();
//TODO:Temporary addition, specific details to be studied by HexToString
std::shared_ptr<PaddleGeneralModelConfig> model_config =
resource.get_general_model_config()[0];
int cube_quant_bits = resource.get_cube_quant_bits();
size_t EMBEDDING_SIZE = 0;
if (cube_quant_bits == 0) {
......@@ -146,7 +142,7 @@ int GeneralDistKVQuantInferOp::inference() {
sparse_out[sparse_idx].shape.push_back(
sparse_out[sparse_idx].lod[0].back());
sparse_out[sparse_idx].shape.push_back(EMBEDDING_SIZE);
sparse_out[sparse_idx].name = model_config->_feed_name[i];
sparse_out[sparse_idx].name = in->at(i).name;
sparse_out[sparse_idx].data.Resize(sparse_out[sparse_idx].lod[0].back() *
EMBEDDING_SIZE * sizeof(float));
// END HERE
......
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
......@@ -31,7 +31,6 @@ using baidu::paddle_serving::predictor::MempoolWrapper;
using baidu::paddle_serving::predictor::general_model::Tensor;
using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::FetchInst;
using baidu::paddle_serving::predictor::InferManager;
using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
......@@ -49,7 +48,7 @@ int GeneralInferOp::inference() {
const GeneralBlob *input_blob = get_depend_argument<GeneralBlob>(pre_name);
if (!input_blob) {
LOG(ERROR) << "input_blob is nullptr,error";
return -1;
return -1;
}
uint64_t log_id = input_blob->GetLogId();
VLOG(2) << "(logid=" << log_id << ") Get precedent op name: " << pre_name;
......@@ -57,7 +56,7 @@ int GeneralInferOp::inference() {
GeneralBlob *output_blob = mutable_data<GeneralBlob>();
if (!output_blob) {
LOG(ERROR) << "output_blob is nullptr,error";
return -1;
return -1;
}
output_blob->SetLogId(log_id);
......
文件模式从 100644 更改为 100755
......@@ -30,42 +30,8 @@ using baidu::paddle_serving::Timer;
using baidu::paddle_serving::predictor::MempoolWrapper;
using baidu::paddle_serving::predictor::general_model::Tensor;
using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::FeedInst;
using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
enum ProtoDataType { P_INT64, P_FLOAT32, P_INT32, P_STRING };
int conf_check(const Request *req,
const std::shared_ptr<PaddleGeneralModelConfig> &model_config) {
int var_num = req->insts(0).tensor_array_size();
if (var_num != model_config->_feed_type.size()) {
LOG(ERROR) << "feed var number not match: model config["
<< model_config->_feed_type.size() << "] vs. actual[" << var_num
<< "]";
return -1;
}
VLOG(2) << "fetch var num in reader op: " << req->fetch_var_names_size();
for (int i = 0; i < var_num; ++i) {
const Tensor &tensor = req->insts(0).tensor_array(i);
if (model_config->_feed_type[i] != tensor.elem_type()) {
LOG(ERROR) << "feed type not match.";
return -1;
}
if (model_config->_feed_shape[i].size() == tensor.shape_size()) {
for (int j = 0; j < model_config->_feed_shape[i].size(); ++j) {
tensor.shape(j);
if (model_config->_feed_shape[i][j] != tensor.shape(j)) {
LOG(ERROR) << "feed shape not match.";
return -1;
}
}
} else {
LOG(ERROR) << "feed shape not match.";
return -1;
}
}
return 0;
}
int GeneralReaderOp::inference() {
// read request from client
......@@ -93,10 +59,8 @@ int GeneralReaderOp::inference() {
res->SetLogId(log_id);
Timer timeline;
int64_t start = timeline.TimeStampUS();
// only get insts(0), cause batch is already in Tensor.
// req can only include 1 inst.
// var_num means the number of feed_var.
int var_num = req->insts(0).tensor_array_size();
int var_num = req->tensor_size();
VLOG(2) << "(logid=" << log_id << ") var num: " << var_num
<< ") start to call load general model_conf op";
......@@ -105,19 +69,7 @@ int GeneralReaderOp::inference() {
baidu::paddle_serving::predictor::Resource::instance();
VLOG(2) << "(logid=" << log_id << ") get resource pointer done.";
// get the first InferOP's model_config as ReaderOp's model_config by default.
std::shared_ptr<PaddleGeneralModelConfig> model_config =
resource.get_general_model_config().front();
// TODO(guru4elephant): how to do conditional check?
/*
int ret = conf_check(req, model_config);
if (ret != 0) {
LOG(ERROR) << "model conf of server:";
resource.print_general_model_config(model_config);
return 0;
}
*/
// package tensor
// prepare basic information for input
// specify the memory needed for output tensor_vector
......@@ -128,7 +80,7 @@ int GeneralReaderOp::inference() {
int64_t databuf_size = 0;
for (int i = 0; i < var_num; ++i) {
paddle::PaddleTensor paddleTensor;
const Tensor &tensor = req->insts(0).tensor_array(i);
const Tensor &tensor = req->tensor(i);
data_len = 0;
elem_type = 0;
elem_size = 0;
......@@ -175,7 +127,7 @@ int GeneralReaderOp::inference() {
VLOG(2) << "(logid=" << log_id << ") shape for var[" << i << "]: " << dim;
paddleTensor.shape.push_back(dim);
}
paddleTensor.name = model_config->_feed_name[i];
paddleTensor.name = tensor.name();
out->push_back(paddleTensor);
VLOG(2) << "(logid=" << log_id << ") tensor size for var[" << i
......
文件模式从 100644 更改为 100755
......@@ -34,7 +34,6 @@ using baidu::paddle_serving::predictor::MempoolWrapper;
using baidu::paddle_serving::predictor::general_model::Tensor;
using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::FetchInst;
using baidu::paddle_serving::predictor::general_model::ModelOutput;
using baidu::paddle_serving::predictor::InferManager;
using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
......@@ -49,7 +48,6 @@ int GeneralResponseOp::inference() {
get_depend_argument<GeneralBlob>(pre_node_names[0])->GetLogId();
const Request *req = dynamic_cast<const Request *>(get_request_message());
// response inst with only fetch_var_names
Response *res = mutable_data<Response>();
Timer timeline;
......@@ -63,7 +61,8 @@ int GeneralResponseOp::inference() {
baidu::paddle_serving::predictor::Resource::instance();
VLOG(2) << "(logid=" << log_id << ") get resource pointer done.";
//get the last InferOP's model_config as ResponseOp's model_config by default.
// get the last InferOP's model_config as ResponseOp's model_config by
// default.
std::shared_ptr<PaddleGeneralModelConfig> model_config =
resource.get_general_model_config().back();
......@@ -71,6 +70,10 @@ int GeneralResponseOp::inference() {
<< ") max body size : " << brpc::fLU64::FLAGS_max_body_size;
std::vector<int> fetch_index;
// this is based on GetOutPutNames() is ordered map.
// and the order of Output is the same as the prototxt FetchVar.
// otherwise, you can only get the Output by the corresponding of
// Name -- Alias_name.
fetch_index.resize(req->fetch_var_names_size());
for (int i = 0; i < req->fetch_var_names_size(); ++i) {
fetch_index[i] =
......@@ -95,40 +98,37 @@ int GeneralResponseOp::inference() {
ModelOutput *output = res->add_outputs();
// To get the order of model return values
output->set_engine_name(pre_name);
FetchInst *fetch_inst = output->add_insts();
var_idx = 0;
// idx is the real index of FetchVar.
// idx is not the index of fetch_index.
for (auto &idx : fetch_index) {
Tensor *tensor = fetch_inst->add_tensor_array();
//tensor->set_elem_type(1);
if (model_config->_is_lod_fetch[idx]) {
VLOG(2) << "(logid=" << log_id << ") out[" << idx << "] "
<< model_config->_fetch_name[idx] << " is lod_tensor";
for (int k = 0; k < in->at(idx).shape.size(); ++k) {
VLOG(2) << "(logid=" << log_id << ") shape[" << k
<< "]: " << in->at(idx).shape[k];
tensor->add_shape(in->at(idx).shape[k]);
}
} else {
VLOG(2) << "(logid=" << log_id << ") out[" << idx << "] "
<< model_config->_fetch_name[idx] << " is tensor";
for (int k = 0; k < in->at(idx).shape.size(); ++k) {
VLOG(2) << "(logid=" << log_id << ") shape[" << k
<< "]: " << in->at(idx).shape[k];
tensor->add_shape(in->at(idx).shape[k]);
Tensor *tensor = output->add_tensor();
tensor->set_name(in->at(idx).name);
tensor->set_alias_name(model_config->_fetch_alias_name[idx]);
for (int k = 0; k < in->at(idx).shape.size(); ++k) {
VLOG(2) << "(logid=" << log_id << ") shape[" << k
<< "]: " << in->at(idx).shape[k];
tensor->add_shape(in->at(idx).shape[k]);
}
std::string str_tensor_type = "is tensor";
if (model_config->_is_lod_fetch[idx] && in->at(idx).lod.size() > 0) {
str_tensor_type = "is lod_tensor";
for (int j = 0; j < in->at(idx).lod[0].size(); ++j) {
tensor->add_lod(in->at(idx).lod[0][j]);
}
}
}
VLOG(2) << "(logid=" << log_id << ") out[" << idx << "] "
<< model_config->_fetch_name[idx] << str_tensor_type;
var_idx = 0;
for (auto &idx : fetch_index) {
cap = 1;
for (int j = 0; j < in->at(idx).shape.size(); ++j) {
cap *= in->at(idx).shape[j];
}
FetchInst *fetch_p = output->mutable_insts(0);
auto dtype = in->at(idx).dtype;
if (dtype == paddle::PaddleDType::INT64) {
tensor->set_elem_type(0);
VLOG(2) << "(logid=" << log_id << ") Prepare int64 var ["
<< model_config->_fetch_name[idx] << "].";
int64_t *data_ptr = static_cast<int64_t *>(in->at(idx).data.data());
......@@ -137,35 +137,24 @@ int GeneralResponseOp::inference() {
// `Swap` method is faster than `{}` method.
google::protobuf::RepeatedField<int64_t> tmp_data(data_ptr,
data_ptr + cap);
fetch_p->mutable_tensor_array(var_idx)->mutable_int64_data()->Swap(
&tmp_data);
output->mutable_tensor(var_idx)->mutable_int64_data()->Swap(&tmp_data);
} else if (dtype == paddle::PaddleDType::FLOAT32) {
tensor->set_elem_type(1);
VLOG(2) << "(logid=" << log_id << ") Prepare float var ["
<< model_config->_fetch_name[idx] << "].";
float *data_ptr = static_cast<float *>(in->at(idx).data.data());
google::protobuf::RepeatedField<float> tmp_data(data_ptr,
data_ptr + cap);
fetch_p->mutable_tensor_array(var_idx)->mutable_float_data()->Swap(
&tmp_data);
output->mutable_tensor(var_idx)->mutable_float_data()->Swap(&tmp_data);
} else if (dtype == paddle::PaddleDType::INT32) {
tensor->set_elem_type(2);
VLOG(2) << "(logid=" << log_id << ")Prepare int32 var ["
<< model_config->_fetch_name[idx] << "].";
int32_t *data_ptr = static_cast<int32_t *>(in->at(idx).data.data());
google::protobuf::RepeatedField<int32_t> tmp_data(data_ptr,
data_ptr + cap);
fetch_p->mutable_tensor_array(var_idx)->mutable_int_data()->Swap(
&tmp_data);
}
if (model_config->_is_lod_fetch[idx]) {
if (in->at(idx).lod.size() > 0) {
for (int j = 0; j < in->at(idx).lod[0].size(); ++j) {
fetch_p->mutable_tensor_array(var_idx)->add_lod(
in->at(idx).lod[0][j]);
}
}
output->mutable_tensor(var_idx)->mutable_int_data()->Swap(&tmp_data);
}
VLOG(2) << "(logid=" << log_id << ") fetch var ["
......@@ -205,4 +194,4 @@ DEFINE_OP(GeneralResponseOp);
} // namespace serving
} // namespace paddle_serving
} // namespace baidu
\ No newline at end of file
} // namespace baidu
文件模式从 100644 更改为 100755
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "core/general-server/op/general_text_reader_op.h"
#include <algorithm>
#include <iostream>
#include <memory>
#include <sstream>
#include "core/predictor/framework/infer.h"
#include "core/predictor/framework/memory.h"
#include "core/util/include/timer.h"
namespace baidu {
namespace paddle_serving {
namespace serving {
using baidu::paddle_serving::Timer;
using baidu::paddle_serving::predictor::MempoolWrapper;
using baidu::paddle_serving::predictor::general_model::Tensor;
using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::FeedInst;
using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
int GeneralTextReaderOp::inference() {
// reade request from client
const Request *req = dynamic_cast<const Request *>(get_request_message());
uint64_t log_id = req->log_id();
int batch_size = req->insts_size();
int input_var_num = 0;
std::vector<int64_t> elem_type;
std::vector<int64_t> elem_size;
std::vector<int64_t> capacity;
GeneralBlob *res = mutable_data<GeneralBlob>();
if (!res) {
LOG(ERROR) << "(logid=" << log_id
<< ") Failed get op tls reader object output";
}
TensorVector *out = &res->tensor_vector;
res->SetBatchSize(batch_size);
res->SetLogId(log_id);
if (batch_size <= 0) {
LOG(ERROR) << "(logid=" << log_id << ") Batch size < 0";
return -1;
}
Timer timeline;
int64_t start = timeline.TimeStampUS();
int var_num = req->insts(0).tensor_array_size();
VLOG(2) << "(logid=" << log_id << ") var num: " << var_num;
VLOG(2) << "(logid=" << log_id
<< ") start to call load general model_conf op";
baidu::paddle_serving::predictor::Resource &resource =
baidu::paddle_serving::predictor::Resource::instance();
VLOG(2) << "(logid=" << log_id << ") get resource pointer done.";
std::shared_ptr<PaddleGeneralModelConfig> model_config =
resource.get_general_model_config()[0];
VLOG(2) << "(logid=" << log_id << ") print general model config done.";
elem_type.resize(var_num);
elem_size.resize(var_num);
capacity.resize(var_num);
for (int i = 0; i < var_num; ++i) {
paddle::PaddleTensor lod_tensor;
elem_type[i] = req->insts(0).tensor_array(i).elem_type();
VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] has elem type: " << elem_type[i];
if (elem_type[i] == 0) { // int64
elem_size[i] = sizeof(int64_t);
lod_tensor.dtype = paddle::PaddleDType::INT64;
} else {
elem_size[i] = sizeof(float);
lod_tensor.dtype = paddle::PaddleDType::FLOAT32;
}
if (req->insts(0).tensor_array(i).shape(0) == -1) {
lod_tensor.lod.resize(1);
lod_tensor.lod[0].push_back(0);
VLOG(2) << "(logid=" << log_id << ") var[" << i << "] is lod_tensor";
} else {
lod_tensor.shape.push_back(batch_size);
capacity[i] = 1;
for (int k = 0; k < req->insts(0).tensor_array(i).shape_size(); ++k) {
int dim = req->insts(0).tensor_array(i).shape(k);
VLOG(2) << "(logid=" << log_id << ") shape for var[" << i
<< "]: " << dim;
capacity[i] *= dim;
lod_tensor.shape.push_back(dim);
}
VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] is tensor, capacity: " << capacity[i];
}
lod_tensor.name = model_config->_feed_name[i];
out->push_back(lod_tensor);
}
for (int i = 0; i < var_num; ++i) {
if (out->at(i).lod.size() == 1) {
for (int j = 0; j < batch_size; ++j) {
const Tensor &tensor = req->insts(j).tensor_array(i);
int data_len = tensor.int_data_size();
int cur_len = out->at(i).lod[0].back();
out->at(i).lod[0].push_back(cur_len + data_len);
}
out->at(i).data.Resize(out->at(i).lod[0].back() * elem_size[i]);
out->at(i).shape = {out->at(i).lod[0].back(), 1};
VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] is lod_tensor and len=" << out->at(i).lod[0].back();
} else {
out->at(i).data.Resize(batch_size * capacity[i] * elem_size[i]);
VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] is tensor and capacity=" << batch_size * capacity[i];
}
}
for (int i = 0; i < var_num; ++i) {
if (elem_type[i] == 0) {
int64_t *dst_ptr = static_cast<int64_t *>(out->at(i).data.data());
int offset = 0;
for (int j = 0; j < batch_size; ++j) {
for (int k = 0; k < req->insts(j).tensor_array(i).int_data_size();
++k) {
dst_ptr[offset + k] = req->insts(j).tensor_array(i).int_data(k);
}
if (out->at(i).lod.size() == 1) {
offset = out->at(i).lod[0][j + 1];
} else {
offset += capacity[i];
}
}
} else {
float *dst_ptr = static_cast<float *>(out->at(i).data.data());
int offset = 0;
for (int j = 0; j < batch_size; ++j) {
for (int k = 0; k < req->insts(j).tensor_array(i).int_data_size();
++k) {
dst_ptr[offset + k] = req->insts(j).tensor_array(i).int_data(k);
}
if (out->at(i).lod.size() == 1) {
offset = out->at(i).lod[0][j + 1];
} else {
offset += capacity[i];
}
}
}
}
int64_t end = timeline.TimeStampUS();
res->p_size = 0;
AddBlobInfo(res, start);
AddBlobInfo(res, end);
VLOG(2) << "(logid=" << log_id << ") read data from client success";
return 0;
}
DEFINE_OP(GeneralTextReaderOp);
} // namespace serving
} // namespace paddle_serving
} // namespace baidu
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <vector>
#include "core/general-server/general_model_service.pb.h"
#include "core/general-server/load_general_model_service.pb.h"
#include "core/general-server/op/general_infer_helper.h"
#include "core/predictor/framework/resource.h"
#include "paddle_inference_api.h" // NOLINT
namespace baidu {
namespace paddle_serving {
namespace serving {
class GeneralTextReaderOp
: public baidu::paddle_serving::predictor::OpWithChannel<GeneralBlob> {
public:
typedef std::vector<paddle::PaddleTensor> TensorVector;
DECLARE_OP(GeneralTextReaderOp);
int inference();
};
} // namespace serving
} // namespace paddle_serving
} // namespace baidu
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "core/general-server/op/general_text_response_op.h"
#include <algorithm>
#include <iostream>
#include <memory>
#include <sstream>
#include "core/predictor/framework/infer.h"
#include "core/predictor/framework/memory.h"
#include "core/predictor/framework/resource.h"
#include "core/util/include/timer.h"
namespace baidu {
namespace paddle_serving {
namespace serving {
using baidu::paddle_serving::Timer;
using baidu::paddle_serving::predictor::MempoolWrapper;
using baidu::paddle_serving::predictor::general_model::Tensor;
using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::FetchInst;
using baidu::paddle_serving::predictor::general_model::ModelOutput;
using baidu::paddle_serving::predictor::InferManager;
using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
int GeneralTextResponseOp::inference() {
VLOG(2) << "Going to run inference";
const std::vector<std::string> pre_node_names = pre_names();
VLOG(2) << "pre node names size: " << pre_node_names.size();
const GeneralBlob *input_blob;
uint64_t log_id =
get_depend_argument<GeneralBlob>(pre_node_names[0])->GetLogId();
const Request *req = dynamic_cast<const Request *>(get_request_message());
// response inst with only fetch_var_names
Response *res = mutable_data<Response>();
Timer timeline;
int64_t start = timeline.TimeStampUS();
VLOG(2) << "(logid=" << log_id
<< ") start to call load general model_conf op";
baidu::paddle_serving::predictor::Resource &resource =
baidu::paddle_serving::predictor::Resource::instance();
VLOG(2) << "(logid=" << log_id << ") get resource pointer done.";
std::shared_ptr<PaddleGeneralModelConfig> model_config =
resource.get_general_model_config().back();
std::vector<int> fetch_index;
fetch_index.resize(req->fetch_var_names_size());
for (int i = 0; i < req->fetch_var_names_size(); ++i) {
fetch_index[i] =
model_config->_fetch_alias_name_to_index[req->fetch_var_names(i)];
}
for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) {
const std::string &pre_name = pre_node_names[pi];
VLOG(2) << "(logid=" << log_id << ") pre names[" << pi << "]: " << pre_name
<< " (" << pre_node_names.size() << ")";
input_blob = get_depend_argument<GeneralBlob>(pre_name);
if (!input_blob) {
LOG(ERROR) << "(logid=" << log_id
<< ") Failed mutable depended argument, op: " << pre_name;
return -1;
}
const TensorVector *in = &input_blob->tensor_vector;
int batch_size = input_blob->GetBatchSize();
VLOG(2) << "(logid=" << log_id << ") input batch size: " << batch_size;
ModelOutput *output = res->add_outputs();
output->set_engine_name(
pre_name); // To get the order of model return values
for (int i = 0; i < batch_size; ++i) {
FetchInst *fetch_inst = output->add_insts();
for (auto &idx : fetch_index) {
Tensor *tensor = fetch_inst->add_tensor_array();
// currently only response float tensor or lod_tensor
tensor->set_elem_type(1);
if (model_config->_is_lod_fetch[idx]) {
VLOG(2) << "(logid=" << log_id << ") out[" << idx << " is lod_tensor";
tensor->add_shape(-1);
} else {
VLOG(2) << "(logid=" << log_id << ") out[" << idx << "] is tensor";
for (int k = 1; k < in->at(idx).shape.size(); ++k) {
VLOG(2) << "(logid=" << log_id << ") shape[" << k - 1
<< "]: " << in->at(idx).shape[k];
tensor->add_shape(in->at(idx).shape[k]);
}
}
}
}
int var_idx = 0;
for (auto &idx : fetch_index) {
float *data_ptr = static_cast<float *>(in->at(idx).data.data());
int cap = 1;
for (int j = 1; j < in->at(idx).shape.size(); ++j) {
cap *= in->at(idx).shape[j];
}
if (model_config->_is_lod_fetch[idx]) {
for (int j = 0; j < batch_size; ++j) {
for (int k = in->at(idx).lod[0][j]; k < in->at(idx).lod[0][j + 1];
k++) {
output->mutable_insts(j)
->mutable_tensor_array(var_idx)
->add_float_data(data_ptr[k]);
}
}
} else {
for (int j = 0; j < batch_size; ++j) {
for (int k = j * cap; k < (j + 1) * cap; ++k) {
output->mutable_insts(j)
->mutable_tensor_array(var_idx)
->add_float_data(data_ptr[k]);
}
}
}
var_idx++;
}
}
if (req->profile_server()) {
int64_t end = timeline.TimeStampUS();
// TODO(barriery): multi-model profile_time.
// At present, only the response_op is multi-input, so here we get
// the profile_time by hard coding. It needs to be replaced with
// a more elegant way.
for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) {
input_blob = get_depend_argument<GeneralBlob>(pre_node_names[pi]);
VLOG(2) << "(logid=" << log_id
<< ") p size for input blob: " << input_blob->p_size;
int profile_time_idx = -1;
if (pi == 0) {
profile_time_idx = 0;
} else {
profile_time_idx = input_blob->p_size - 2;
}
for (; profile_time_idx < input_blob->p_size; ++profile_time_idx) {
res->add_profile_time(input_blob->time_stamp[profile_time_idx]);
}
}
// TODO(guru4elephant): find more elegant way to do this
res->add_profile_time(start);
res->add_profile_time(end);
}
return 0;
}
DEFINE_OP(GeneralTextResponseOp);
} // namespace serving
} // namespace paddle_serving
} // namespace baidu
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <vector>
#include "core/general-server/general_model_service.pb.h"
#include "core/general-server/op/general_infer_helper.h"
#include "paddle_inference_api.h" // NOLINT
namespace baidu {
namespace paddle_serving {
namespace serving {
class GeneralTextResponseOp
: public baidu::paddle_serving::predictor::OpWithChannel<
baidu::paddle_serving::predictor::general_model::Response> {
public:
typedef std::vector<paddle::PaddleTensor> TensorVector;
DECLARE_OP(GeneralTextResponseOp);
int inference();
};
} // namespace serving
} // namespace paddle_serving
} // namespace baidu
......@@ -24,17 +24,16 @@ message Tensor {
repeated int32 int_data = 2;
repeated int64 int64_data = 3;
repeated float float_data = 4;
optional int32 elem_type = 5;// 0 means int64, 1 means float32, 2 means int32, 3 means bytes(string)
repeated int32 shape = 6; // shape should include batch
repeated int32 lod = 7; // only for fetch tensor currently
optional int32 elem_type =
5; // 0 means int64, 1 means float32, 2 means int32, 3 means bytes(string)
repeated int32 shape = 6; // shape should include batch
repeated int32 lod = 7; // only for fetch tensor currently
optional string name = 8; // get from the Model prototxt
optional string alias_name = 9; // get from the Model prototxt
};
message FeedInst { repeated Tensor tensor_array = 1; };
message FetchInst { repeated Tensor tensor_array = 1; };
message Request {
repeated FeedInst insts = 1;
repeated Tensor tensor = 1;
repeated string fetch_var_names = 2;
optional bool profile_server = 3 [ default = false ];
required uint64 log_id = 4 [ default = 0 ];
......@@ -46,7 +45,7 @@ message Response {
};
message ModelOutput {
repeated FetchInst insts = 1;
repeated Tensor tensor = 1;
optional string engine_name = 2;
}
......
......@@ -24,17 +24,16 @@ message Tensor {
repeated int32 int_data = 2;
repeated int64 int64_data = 3;
repeated float float_data = 4;
optional int32 elem_type = 5;
repeated int32 shape = 6;
repeated int32 lod = 7; // only for fetch tensor currently
optional int32 elem_type =
5; // 0 means int64, 1 means float32, 2 means int32, 3 means bytes(string)
repeated int32 shape = 6; // shape should include batch
repeated int32 lod = 7; // only for fetch tensor currently
optional string name = 8; // get from the Model prototxt
optional string alias_name = 9; // get from the Model prototxt
};
message FeedInst { repeated Tensor tensor_array = 1; };
message FetchInst { repeated Tensor tensor_array = 1; };
message Request {
repeated FeedInst insts = 1;
repeated Tensor tensor = 1;
repeated string fetch_var_names = 2;
optional bool profile_server = 3 [ default = false ];
required uint64 log_id = 4 [ default = 0 ];
......@@ -46,7 +45,7 @@ message Response {
};
message ModelOutput {
repeated FetchInst insts = 1;
repeated Tensor tensor = 1;
optional string engine_name = 2;
}
......
......@@ -37,7 +37,7 @@ python3.6 -m paddle_serving_server.serve --model uci_housing_model --thread 10 -
### 客户端使用curl访问
```shell
curl -XPOST http://127.0.0.1:9393/GeneralModelService/inference -d ' {"insts":[{"tensor_array":[{"float_data":[0.0137,-0.1136,0.2553,-0.0692,0.0582,-0.0727,-0.1583,-0.0584,0.6283,0.4919,0.1856,0.0795,-0.0332],"elem_type":1,"shape":[1,13]}]}],"fetch_var_names":["price"],"log_id":0}'
curl -XPOST http://127.0.0.1:9393/GeneralModelService/inference -d ' {"tensor":[{"float_data":[0.0137,-0.1136,0.2553,-0.0692,0.0582,-0.0727,-0.1583,-0.0584,0.6283,0.4919,0.1856,0.0795,-0.0332],"elem_type":1,"shape":[1,13]}],"fetch_var_names":["price"],"log_id":0}'
```
其中`127.0.0.1:9393`为IP和Port,根据您服务端启动的IP和Port自行设定。
......@@ -76,7 +76,7 @@ repeated int32 numbers = 1;
// rapidjson
{"numbers" : [12, 17, 1, 24] }
```
#### shape
#### elem_type
表示数据类型,0 means int64, 1 means float32, 2 means int32, 3 means bytes(string)
......@@ -93,7 +93,7 @@ repeated int32 numbers = 1;
以上面的fit_a_line为例,仍使用上文的请求数据体,但只作为示例演示用法,实际此时使用压缩得不偿失。
```shell
echo ' {"insts":[{"tensor_array":[{"float_data":[0.0137,-0.1136,0.2553,-0.0692,0.0582,-0.0727,-0.1583,-0.0584,0.6283,0.4919,0.1856,0.0795,-0.0332],"elem_type":1,"shape":[1,13]}]}],"fetch_var_names":["price"],"log_id":0}' | gzip -c > data.txt.gz
echo ' {"tensor":[{"float_data":[0.0137,-0.1136,0.2553,-0.0692,0.0582,-0.0727,-0.1583,-0.0584,0.6283,0.4919,0.1856,0.0795,-0.0332],"elem_type":1,"shape":[1,13]}],"fetch_var_names":["price"],"log_id":0}' | gzip -c > data.txt.gz
```
```shell
......
package main
import (
"io"
"os"
"fmt"
"bufio"
"strings"
"strconv"
)
func main() {
score_file := os.Args[1]
fi, err := os.Open(score_file)
if err != nil {
fmt.Print(err)
}
defer fi.Close()
br := bufio.NewReader(fi)
total := int(0)
acc := int(0)
for {
line, err := br.ReadString('\n')
if err == io.EOF {
break
}
line = strings.Trim(line, "\n")
s := strings.Split(line, "\t")
prob_str := strings.Trim(s[0], " ")
label_str := strings.Trim(s[1], " ")
prob, err := strconv.ParseFloat(prob_str, 32)
if err != nil {
panic(err)
}
label, err := strconv.ParseFloat(label_str, 32)
if err != nil {
panic(err)
}
if (prob - 0.5) * (label - 0.5) > 0 {
acc++
}
total++
}
fmt.Println("total num: ", total)
fmt.Println("acc num: ", acc)
fmt.Println("acc: ", float32(acc) / float32(total))
}
\ No newline at end of file
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"io"
"fmt"
"strings"
"bufio"
"strconv"
"os"
serving_client "github.com/PaddlePaddle/Serving/go/serving_client"
)
func main() {
var config_file_path string
config_file_path = os.Args[1]
handle := serving_client.LoadModelConfig(config_file_path)
handle = serving_client.Connect("127.0.0.1", "9292", handle)
test_file_path := os.Args[2]
fi, err := os.Open(test_file_path)
if err != nil {
fmt.Print(err)
}
defer fi.Close()
br := bufio.NewReader(fi)
fetch := []string{"cost", "acc", "prediction"}
var result map[string][]float32
for {
line, err := br.ReadString('\n')
if err == io.EOF {
break
}
line = strings.Trim(line, "\n")
var words = []int64{}
s := strings.Split(line, " ")
value, err := strconv.Atoi(s[0])
var feed_int_map map[string][]int64
for _, v := range s[1:value + 1] {
int_v, _ := strconv.Atoi(v)
words = append(words, int64(int_v))
}
label, err := strconv.Atoi(s[len(s)-1])
if err != nil {
panic(err)
}
feed_int_map = map[string][]int64{}
feed_int_map["words"] = words
feed_int_map["label"] = []int64{int64(label)}
result = serving_client.Predict(handle,
feed_int_map, fetch)
fmt.Println(result["prediction"][1], "\t", int64(label))
}
}
\ No newline at end of file
// Code generated by protoc-gen-go. DO NOT EDIT.
// source: general_model_config.proto
package baidu_paddle_serving_configure
import (
fmt "fmt"
proto "github.com/golang/protobuf/proto"
math "math"
)
// Reference imports to suppress errors if they are not otherwise used.
var _ = proto.Marshal
var _ = fmt.Errorf
var _ = math.Inf
// This is a compile-time assertion to ensure that this generated file
// is compatible with the proto package it is being compiled against.
// A compilation error at this line likely means your copy of the
// proto package needs to be updated.
const _ = proto.ProtoPackageIsVersion3 // please upgrade the proto package
type FeedVar struct {
Name *string `protobuf:"bytes,1,opt,name=name" json:"name,omitempty"`
AliasName *string `protobuf:"bytes,2,opt,name=alias_name,json=aliasName" json:"alias_name,omitempty"`
IsLodTensor *bool `protobuf:"varint,3,opt,name=is_lod_tensor,json=isLodTensor,def=0" json:"is_lod_tensor,omitempty"`
FeedType *int32 `protobuf:"varint,4,opt,name=feed_type,json=feedType,def=0" json:"feed_type,omitempty"`
Shape []int32 `protobuf:"varint,5,rep,name=shape" json:"shape,omitempty"`
XXX_NoUnkeyedLiteral struct{} `json:"-"`
XXX_unrecognized []byte `json:"-"`
XXX_sizecache int32 `json:"-"`
}
func (m *FeedVar) Reset() { *m = FeedVar{} }
func (m *FeedVar) String() string { return proto.CompactTextString(m) }
func (*FeedVar) ProtoMessage() {}
func (*FeedVar) Descriptor() ([]byte, []int) {
return fileDescriptor_efa52beffa29d37a, []int{0}
}
func (m *FeedVar) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_FeedVar.Unmarshal(m, b)
}
func (m *FeedVar) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_FeedVar.Marshal(b, m, deterministic)
}
func (m *FeedVar) XXX_Merge(src proto.Message) {
xxx_messageInfo_FeedVar.Merge(m, src)
}
func (m *FeedVar) XXX_Size() int {
return xxx_messageInfo_FeedVar.Size(m)
}
func (m *FeedVar) XXX_DiscardUnknown() {
xxx_messageInfo_FeedVar.DiscardUnknown(m)
}
var xxx_messageInfo_FeedVar proto.InternalMessageInfo
const Default_FeedVar_IsLodTensor bool = false
const Default_FeedVar_FeedType int32 = 0
func (m *FeedVar) GetName() string {
if m != nil && m.Name != nil {
return *m.Name
}
return ""
}
func (m *FeedVar) GetAliasName() string {
if m != nil && m.AliasName != nil {
return *m.AliasName
}
return ""
}
func (m *FeedVar) GetIsLodTensor() bool {
if m != nil && m.IsLodTensor != nil {
return *m.IsLodTensor
}
return Default_FeedVar_IsLodTensor
}
func (m *FeedVar) GetFeedType() int32 {
if m != nil && m.FeedType != nil {
return *m.FeedType
}
return Default_FeedVar_FeedType
}
func (m *FeedVar) GetShape() []int32 {
if m != nil {
return m.Shape
}
return nil
}
type FetchVar struct {
Name *string `protobuf:"bytes,1,opt,name=name" json:"name,omitempty"`
AliasName *string `protobuf:"bytes,2,opt,name=alias_name,json=aliasName" json:"alias_name,omitempty"`
IsLodTensor *bool `protobuf:"varint,3,opt,name=is_lod_tensor,json=isLodTensor,def=0" json:"is_lod_tensor,omitempty"`
Shape []int32 `protobuf:"varint,4,rep,name=shape" json:"shape,omitempty"`
XXX_NoUnkeyedLiteral struct{} `json:"-"`
XXX_unrecognized []byte `json:"-"`
XXX_sizecache int32 `json:"-"`
}
func (m *FetchVar) Reset() { *m = FetchVar{} }
func (m *FetchVar) String() string { return proto.CompactTextString(m) }
func (*FetchVar) ProtoMessage() {}
func (*FetchVar) Descriptor() ([]byte, []int) {
return fileDescriptor_efa52beffa29d37a, []int{1}
}
func (m *FetchVar) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_FetchVar.Unmarshal(m, b)
}
func (m *FetchVar) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_FetchVar.Marshal(b, m, deterministic)
}
func (m *FetchVar) XXX_Merge(src proto.Message) {
xxx_messageInfo_FetchVar.Merge(m, src)
}
func (m *FetchVar) XXX_Size() int {
return xxx_messageInfo_FetchVar.Size(m)
}
func (m *FetchVar) XXX_DiscardUnknown() {
xxx_messageInfo_FetchVar.DiscardUnknown(m)
}
var xxx_messageInfo_FetchVar proto.InternalMessageInfo
const Default_FetchVar_IsLodTensor bool = false
func (m *FetchVar) GetName() string {
if m != nil && m.Name != nil {
return *m.Name
}
return ""
}
func (m *FetchVar) GetAliasName() string {
if m != nil && m.AliasName != nil {
return *m.AliasName
}
return ""
}
func (m *FetchVar) GetIsLodTensor() bool {
if m != nil && m.IsLodTensor != nil {
return *m.IsLodTensor
}
return Default_FetchVar_IsLodTensor
}
func (m *FetchVar) GetShape() []int32 {
if m != nil {
return m.Shape
}
return nil
}
type GeneralModelConfig struct {
FeedVar []*FeedVar `protobuf:"bytes,1,rep,name=feed_var,json=feedVar" json:"feed_var,omitempty"`
FetchVar []*FetchVar `protobuf:"bytes,2,rep,name=fetch_var,json=fetchVar" json:"fetch_var,omitempty"`
XXX_NoUnkeyedLiteral struct{} `json:"-"`
XXX_unrecognized []byte `json:"-"`
XXX_sizecache int32 `json:"-"`
}
func (m *GeneralModelConfig) Reset() { *m = GeneralModelConfig{} }
func (m *GeneralModelConfig) String() string { return proto.CompactTextString(m) }
func (*GeneralModelConfig) ProtoMessage() {}
func (*GeneralModelConfig) Descriptor() ([]byte, []int) {
return fileDescriptor_efa52beffa29d37a, []int{2}
}
func (m *GeneralModelConfig) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_GeneralModelConfig.Unmarshal(m, b)
}
func (m *GeneralModelConfig) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_GeneralModelConfig.Marshal(b, m, deterministic)
}
func (m *GeneralModelConfig) XXX_Merge(src proto.Message) {
xxx_messageInfo_GeneralModelConfig.Merge(m, src)
}
func (m *GeneralModelConfig) XXX_Size() int {
return xxx_messageInfo_GeneralModelConfig.Size(m)
}
func (m *GeneralModelConfig) XXX_DiscardUnknown() {
xxx_messageInfo_GeneralModelConfig.DiscardUnknown(m)
}
var xxx_messageInfo_GeneralModelConfig proto.InternalMessageInfo
func (m *GeneralModelConfig) GetFeedVar() []*FeedVar {
if m != nil {
return m.FeedVar
}
return nil
}
func (m *GeneralModelConfig) GetFetchVar() []*FetchVar {
if m != nil {
return m.FetchVar
}
return nil
}
func init() {
proto.RegisterType((*FeedVar)(nil), "baidu.paddle_serving.configure.FeedVar")
proto.RegisterType((*FetchVar)(nil), "baidu.paddle_serving.configure.FetchVar")
proto.RegisterType((*GeneralModelConfig)(nil), "baidu.paddle_serving.configure.GeneralModelConfig")
}
func init() { proto.RegisterFile("general_model_config.proto", fileDescriptor_efa52beffa29d37a) }
var fileDescriptor_efa52beffa29d37a = []byte{
// 283 bytes of a gzipped FileDescriptorProto
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xb4, 0xd0, 0x31, 0x4b, 0xc4, 0x30,
0x14, 0x07, 0x70, 0x72, 0x6d, 0xb9, 0xf6, 0x1d, 0x2e, 0xc1, 0xa1, 0x08, 0x1e, 0xe5, 0x16, 0xe3,
0x52, 0xc4, 0xf1, 0x46, 0xc5, 0x73, 0x51, 0x87, 0x72, 0xb8, 0x86, 0xd8, 0xbc, 0xb6, 0x81, 0x5c,
0x53, 0x92, 0xde, 0xc1, 0x2d, 0x7e, 0x13, 0xf1, 0xab, 0x4a, 0x93, 0x43, 0x9c, 0x74, 0x72, 0x7b,
0x79, 0xff, 0xf0, 0xde, 0xe3, 0x07, 0x17, 0x2d, 0xf6, 0x68, 0x85, 0xe6, 0x3b, 0x23, 0x51, 0xf3,
0xda, 0xf4, 0x8d, 0x6a, 0xcb, 0xc1, 0x9a, 0xd1, 0xd0, 0xe5, 0x9b, 0x50, 0x72, 0x5f, 0x0e, 0x42,
0x4a, 0x8d, 0xdc, 0xa1, 0x3d, 0xa8, 0xbe, 0x2d, 0xc3, 0x97, 0xbd, 0xc5, 0xd5, 0x07, 0x81, 0xf9,
0x06, 0x51, 0xbe, 0x0a, 0x4b, 0x29, 0xc4, 0xbd, 0xd8, 0x61, 0x4e, 0x0a, 0xc2, 0xb2, 0xca, 0xd7,
0xf4, 0x12, 0x40, 0x68, 0x25, 0x1c, 0xf7, 0xc9, 0xcc, 0x27, 0x99, 0xef, 0xbc, 0x4c, 0xf1, 0x35,
0x9c, 0x29, 0xc7, 0xb5, 0x91, 0x7c, 0xc4, 0xde, 0x19, 0x9b, 0x47, 0x05, 0x61, 0xe9, 0x3a, 0x69,
0x84, 0x76, 0x58, 0x2d, 0x94, 0x7b, 0x32, 0x72, 0xeb, 0x13, 0xba, 0x84, 0xac, 0x41, 0x94, 0x7c,
0x3c, 0x0e, 0x98, 0xc7, 0x05, 0x61, 0xc9, 0x9a, 0xdc, 0x54, 0xe9, 0xd4, 0xdb, 0x1e, 0x07, 0xa4,
0xe7, 0x90, 0xb8, 0x4e, 0x0c, 0x98, 0x27, 0x45, 0xc4, 0x92, 0x2a, 0x3c, 0x56, 0xef, 0x90, 0x6e,
0x70, 0xac, 0xbb, 0xff, 0xbf, 0xef, 0x7b, 0x7f, 0xfc, 0x73, 0xff, 0x27, 0x01, 0xfa, 0x18, 0x78,
0x9f, 0x27, 0xdd, 0x7b, 0x2f, 0x47, 0xef, 0xc0, 0x1f, 0xce, 0x0f, 0xc2, 0xe6, 0xa4, 0x88, 0xd8,
0xe2, 0xf6, 0xaa, 0xfc, 0x5d, 0xba, 0x3c, 0x29, 0x57, 0xf3, 0xe6, 0xc4, 0xfd, 0x30, 0x81, 0x8c,
0x75, 0xe7, 0x87, 0xcc, 0xfc, 0x10, 0xf6, 0xf7, 0x90, 0x60, 0x31, 0xb9, 0x85, 0xea, 0x2b, 0x00,
0x00, 0xff, 0xff, 0x08, 0x27, 0x9c, 0x1a, 0xfe, 0x01, 0x00, 0x00,
}
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package serving_client
import (
"bytes"
"encoding/json"
"io/ioutil"
"log"
"net/http"
pb "github.com/PaddlePaddle/Serving/go/proto"
"github.com/golang/protobuf/proto"
)
type Tensor struct {
Data []byte `json:"data"`
FloatData []float32 `json:"float_data"`
IntData []int `json:"int_data"`
Int64Data []int64 `json:"int64_data"`
ElemType int `json:"elem_type"`
Shape []int `json:"shape"`
}
type FeedInst struct {
TensorArray []Tensor `json:"tensor_array"`
}
type FetchInst struct {
TensorArray []Tensor `json:"tensor_array"`
}
type Request struct {
Insts []FeedInst `json:"insts"`
FetchVarNames []string `json:"fetch_var_names"`
ProfileServer bool `json:"profile_server"`
}
type Response struct {
Insts []FetchInst `json:"insts"`
ProfileTime []int64 `json:"profile_time"`
}
type Handle struct {
Url string
Port string
FeedAliasNameMap map[string]string
FeedShapeMap map[string][]int
FeedNameMap map[string]int
FeedAliasNames []string
FetchNameMap map[string]int
FetchAliasNameMap map[string]string
}
func LoadModelConfig(config string) Handle {
in, err := ioutil.ReadFile(config)
if err != nil {
log.Fatalln("Failed to read general model: ", err)
}
general_model_config := &pb.GeneralModelConfig{}
if err := proto.Unmarshal(in, general_model_config); err != nil {
log.Fatalln("Failed to parse GeneralModelConfig: ", err)
}
log.Println("read protobuf succeed")
handle := Handle{}
handle.FeedNameMap = map[string]int{}
handle.FeedAliasNameMap = map[string]string{}
handle.FeedShapeMap = map[string][]int{}
handle.FetchNameMap = map[string]int{}
handle.FetchAliasNameMap = map[string]string{}
handle.FeedAliasNames = []string{}
for i, v := range general_model_config.FeedVar {
handle.FeedNameMap[*v.Name] = i
tmp_array := []int{}
for _, vv := range v.Shape {
tmp_array = append(tmp_array, int(vv))
}
handle.FeedShapeMap[*v.Name] = tmp_array
handle.FeedAliasNameMap[*v.AliasName] = *v.Name
handle.FeedAliasNames = append(handle.FeedAliasNames, *v.AliasName)
}
for i, v := range general_model_config.FetchVar {
handle.FetchNameMap[*v.Name] = i
handle.FetchAliasNameMap[*v.AliasName] = *v.Name
}
return handle
}
func Connect(url string, port string, handle Handle) Handle {
handle.Url = url
handle.Port = port
return handle
}
func Predict(handle Handle, int_feed_map map[string][]int64, fetch []string) map[string][]float32 {
contentType := "application/json;charset=utf-8"
var tensor_array []Tensor
var inst FeedInst
tensor_array = []Tensor{}
inst = FeedInst{}
for i := 0; i < len(handle.FeedAliasNames); i++ {
key_i := handle.FeedAliasNames[i]
var tmp Tensor
tmp.IntData = []int{}
tmp.Shape = []int{}
tmp.Int64Data = int_feed_map[key_i]
tmp.ElemType = 0
tmp.Shape = handle.FeedShapeMap[key_i]
tensor_array = append(tensor_array, tmp)
}
inst.TensorArray = tensor_array
var profile_server bool
profile_server = false
req := &Request{
Insts: []FeedInst{inst},
FetchVarNames: fetch,
ProfileServer: profile_server}
b, err := json.Marshal(req)
body := bytes.NewBuffer(b)
var post_address bytes.Buffer
post_address.WriteString("http://")
post_address.WriteString(handle.Url)
post_address.WriteString(":")
post_address.WriteString(handle.Port)
post_address.WriteString("/GeneralModelService/inference")
resp, err := http.Post(post_address.String(), contentType, body)
if err != nil {
log.Println("Post failed:", err)
}
defer resp.Body.Close()
content, err := ioutil.ReadAll(resp.Body)
if err != nil {
log.Println("Read failed:", err)
}
response_json := Response{}
err = json.Unmarshal([]byte(content), &response_json)
var result map[string][]float32
result = map[string][]float32{}
for i, v := range fetch {
result[v] = response_json.Insts[0].TensorArray[i].FloatData
}
return result
}
......@@ -18,28 +18,17 @@ public class PaddleServingClientExample {
INDArray npdata = Nd4j.createFromArray(data);
long[] batch_shape = {1,13};
INDArray batch_npdata = npdata.reshape(batch_shape);
HashMap<String, INDArray> feed_data
= new HashMap<String, INDArray>() {{
HashMap<String, Object> feed_data
= new HashMap<String, Object>() {{
put("x", batch_npdata);
}};
List<String> fetch = Arrays.asList("price");
Client client = new Client();
String target = "localhost:9393";
boolean succ = client.connect(target);
if (succ != true) {
System.out.println("connect failed.");
return false;
}
Map<String, INDArray> fetch_map = client.predict(feed_data, fetch);
if (fetch_map == null) {
return false;
}
for (Map.Entry<String, INDArray> e : fetch_map.entrySet()) {
System.out.println("Key = " + e.getKey() + ", Value = " + e.getValue());
}
HttpClient client = new HttpClient();
String result = client.predict(feed_data, fetch, true, 0);
System.out.println(result);
return true;
}
......@@ -77,134 +66,15 @@ public class PaddleServingClientExample {
INDArray im_size = Nd4j.createFromArray(new int[]{height, width});
long[] batch_size_shape = {1,2};
INDArray batch_im_size = im_size.reshape(batch_size_shape);
HashMap<String, INDArray> feed_data
= new HashMap<String, INDArray>() {{
HashMap<String, Object> feed_data
= new HashMap<String, Object>() {{
put("image", batch_image);
put("im_size", batch_im_size);
}};
List<String> fetch = Arrays.asList("save_infer_model/scale_0.tmp_0");
Client client = new Client();
String target = "localhost:9393";
boolean succ = client.connect(target);
if (succ != true) {
System.out.println("connect failed.");
return false;
}
succ = client.setRpcTimeoutMs(20000); // cpu
if (succ != true) {
System.out.println("set timeout failed.");
return false;
}
Map<String, INDArray> fetch_map = client.predict(feed_data, fetch);
if (fetch_map == null) {
return false;
}
for (Map.Entry<String, INDArray> e : fetch_map.entrySet()) {
System.out.println("Key = " + e.getKey() + ", Value = " + e.getValue());
}
return true;
}
boolean batch_predict() {
float[] data = {0.0137f, -0.1136f, 0.2553f, -0.0692f,
0.0582f, -0.0727f, -0.1583f, -0.0584f,
0.6283f, 0.4919f, 0.1856f, 0.0795f, -0.0332f};
INDArray npdata = Nd4j.createFromArray(data);
HashMap<String, INDArray> feed_data
= new HashMap<String, INDArray>() {{
put("x", npdata);
}};
List<HashMap<String, INDArray>> feed_batch
= new ArrayList<HashMap<String, INDArray>>() {{
add(feed_data);
add(feed_data);
}};
List<String> fetch = Arrays.asList("price");
Client client = new Client();
String target = "localhost:9393";
boolean succ = client.connect(target);
if (succ != true) {
System.out.println("connect failed.");
return false;
}
Map<String, INDArray> fetch_map = client.predict(feed_batch, fetch);
if (fetch_map == null) {
return false;
}
for (Map.Entry<String, INDArray> e : fetch_map.entrySet()) {
System.out.println("Key = " + e.getKey() + ", Value = " + e.getValue());
}
return true;
}
boolean asyn_predict() {
float[] data = {0.0137f, -0.1136f, 0.2553f, -0.0692f,
0.0582f, -0.0727f, -0.1583f, -0.0584f,
0.6283f, 0.4919f, 0.1856f, 0.0795f, -0.0332f};
INDArray npdata = Nd4j.createFromArray(data);
HashMap<String, INDArray> feed_data
= new HashMap<String, INDArray>() {{
put("x", npdata);
}};
List<String> fetch = Arrays.asList("price");
Client client = new Client();
String target = "localhost:9393";
boolean succ = client.connect(target);
if (succ != true) {
System.out.println("connect failed.");
return false;
}
PredictFuture future = client.asyn_predict(feed_data, fetch);
Map<String, INDArray> fetch_map = future.get();
if (fetch_map == null) {
System.out.println("Get future reslut failed");
return false;
}
for (Map.Entry<String, INDArray> e : fetch_map.entrySet()) {
System.out.println("Key = " + e.getKey() + ", Value = " + e.getValue());
}
return true;
}
boolean model_ensemble() {
long[] data = {8, 233, 52, 601};
INDArray npdata = Nd4j.createFromArray(data);
HashMap<String, INDArray> feed_data
= new HashMap<String, INDArray>() {{
put("words", npdata);
}};
List<String> fetch = Arrays.asList("prediction");
Client client = new Client();
String target = "localhost:9393";
boolean succ = client.connect(target);
if (succ != true) {
System.out.println("connect failed.");
return false;
}
Map<String, HashMap<String, INDArray>> fetch_map
= client.ensemble_predict(feed_data, fetch);
if (fetch_map == null) {
return false;
}
for (Map.Entry<String, HashMap<String, INDArray>> entry : fetch_map.entrySet()) {
System.out.println("Model = " + entry.getKey());
HashMap<String, INDArray> tt = entry.getValue();
for (Map.Entry<String, INDArray> e : tt.entrySet()) {
System.out.println("Key = " + e.getKey() + ", Value = " + e.getValue());
}
}
HttpClient client = new HttpClient();
String result = client.predict(feed_data, fetch, true, 0);
System.out.println(result);
return true;
}
......@@ -213,8 +83,8 @@ public class PaddleServingClientExample {
long[] position_ids = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
long[] input_ids = {101, 6843, 3241, 749, 8024, 7662, 2533, 1391, 2533, 2523, 7676, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
long[] segment_ids = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
HashMap<String, INDArray> feed_data
= new HashMap<String, INDArray>() {{
HashMap<String, Object> feed_data
= new HashMap<String, Object>() {{
put("input_mask", Nd4j.createFromArray(input_mask));
put("position_ids", Nd4j.createFromArray(position_ids));
put("input_ids", Nd4j.createFromArray(input_ids));
......@@ -222,22 +92,9 @@ public class PaddleServingClientExample {
}};
List<String> fetch = Arrays.asList("pooled_output");
Client client = new Client();
String target = "localhost:9393";
boolean succ = client.connect(target);
if (succ != true) {
System.out.println("connect failed.");
return false;
}
Map<String, INDArray> fetch_map = client.predict(feed_data, fetch);
if (fetch_map == null) {
return false;
}
for (Map.Entry<String, INDArray> e : fetch_map.entrySet()) {
System.out.println("Key = " + e.getKey() + ", Value = " + e.getValue());
}
HttpClient client = new HttpClient();
String result = client.predict(feed_data, fetch, true, 0);
System.out.println(result);
return true;
}
......@@ -271,8 +128,8 @@ public class PaddleServingClientExample {
long[] embedding_19 = {537425};
long[] embedding_0 = {737395};
HashMap<String, INDArray> feed_data
= new HashMap<String, INDArray>() {{
HashMap<String, Object> feed_data
= new HashMap<String, Object>() {{
put("embedding_14.tmp_0", Nd4j.createFromArray(embedding_14));
put("embedding_2.tmp_0", Nd4j.createFromArray(embedding_2));
put("embedding_10.tmp_0", Nd4j.createFromArray(embedding_10));
......@@ -302,23 +159,9 @@ public class PaddleServingClientExample {
put("embedding_0.tmp_0", Nd4j.createFromArray(embedding_0));
}};
List<String> fetch = Arrays.asList("prob");
Client client = new Client();
String target = "localhost:9393";
boolean succ = client.connect(target);
if (succ != true) {
System.out.println("connect failed.");
return false;
}
Map<String, INDArray> fetch_map = client.predict(feed_data, fetch);
if (fetch_map == null) {
return false;
}
for (Map.Entry<String, INDArray> e : fetch_map.entrySet()) {
System.out.println("Key = " + e.getKey() + ", Value = " + e.getValue());
}
HttpClient client = new HttpClient();
String result = client.predict(feed_data, fetch, true, 0);
System.out.println(result);
return true;
}
......@@ -330,7 +173,7 @@ public class PaddleServingClientExample {
if (args.length < 1) {
System.out.println("Usage: java -cp <jar> PaddleServingClientExample <test-type>.");
System.out.println("<test-type>: fit_a_line bert model_ensemble asyn_predict batch_predict cube_local cube_quant yolov4");
System.out.println("<test-type>: fit_a_line bert cube_local yolov4");
return;
}
String testType = args[0];
......@@ -339,16 +182,8 @@ public class PaddleServingClientExample {
succ = e.fit_a_line();
} else if ("bert".equals(testType)) {
succ = e.bert();
} else if ("model_ensemble".equals(testType)) {
succ = e.model_ensemble();
} else if ("asyn_predict".equals(testType)) {
succ = e.asyn_predict();
} else if ("batch_predict".equals(testType)) {
succ = e.batch_predict();
} else if ("cube_local".equals(testType)) {
succ = e.cube_local();
} else if ("cube_quant".equals(testType)) {
succ = e.cube_local();
} else if ("yolov4".equals(testType)) {
if (args.length < 2) {
System.out.println("Usage: java -cp <jar> PaddleServingClientExample yolov4 <image-filepath>.");
......
......@@ -145,6 +145,11 @@
<artifactId>json</artifactId>
<version>20190722</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.12</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
......
package io.paddle.serving.client;
import java.util.*;
import java.util.function.Function;
import java.lang.management.ManagementFactory;
import java.lang.management.RuntimeMXBean;
import java.util.stream.Collectors;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.Map.Entry;
import java.nio.file.*;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.api.iter.NdIndexIterator;
import org.nd4j.linalg.factory.Nd4j;
import java.lang.reflect.*;
import org.apache.http.HttpEntity;
import org.apache.http.NameValuePair;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.entity.StringEntity;
import org.apache.http.client.entity.GzipDecompressingEntity;
import org.apache.http.Header;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;
import org.json.*;
import io.paddle.serving.configure.*;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
enum ElementType
{
Int64_type, Float32_type, Int32_type, Bytes_type;
}
class Profiler {
int pid_;
String print_head_ = null;
List<String> time_record_ = null;
boolean enable_ = false;
Profiler() {
RuntimeMXBean runtimeMXBean = ManagementFactory.getRuntimeMXBean();
pid_ = Integer.valueOf(runtimeMXBean.getName().split("@")[0]).intValue();
print_head_ = "\nPROFILE\tpid:" + pid_ + "\t";
time_record_ = new ArrayList<String>();
time_record_.add(print_head_);
}
void record(String name) {
if (enable_) {
long ctime = System.currentTimeMillis() * 1000;
time_record_.add(name + ":" + String.valueOf(ctime) + " ");
}
}
void printProfile() {
if (enable_) {
String profile_str = String.join("", time_record_);
time_record_ = new ArrayList<String>();
time_record_.add(print_head_);
}
}
void enable(boolean flag) {
enable_ = flag;
}
}
public class HttpClient {
private int httpTimeoutS_;
private List<String> feedNames_;
private Map<String, String> feedRealNames_;
private Map<String, Integer> feedTypes_;
private Map<String, List<Integer>> feedShapes_;
private Map<String, Integer> feedNameToIndex_;
private Map<Integer, String> feedTypeToDataKey_;
private List<String> fetchNames_;
private Map<String, Integer> fetchTypes_;
private Set<String> lodTensorSet_;
private Map<String, Integer> feedTensorLen_;
private Profiler profiler_;
private String ip;
private String serverPort;
private String port;
private String serviceName;
private boolean request_compress_flag;
private boolean response_compress_flag;
public HttpClient() {
feedNames_ = null;
feedRealNames_ = null;
feedTypes_ = null;
feedShapes_ = null;
fetchNames_ = null;
fetchTypes_ = null;
lodTensorSet_ = null;
feedTensorLen_ = null;
feedNameToIndex_ = null;
httpTimeoutS_ = 200000;
ip = "127.0.0.1";
port = "9393";
serverPort = "9393";
serviceName = "/GeneralModelService/inference";
request_compress_flag = false;
response_compress_flag = false;
feedTypeToDataKey_ = new HashMap<Integer, String>();
feedTypeToDataKey_.put(0, "int64_data");
feedTypeToDataKey_.put(1, "float_data");
feedTypeToDataKey_.put(2, "int_data");
feedTypeToDataKey_.put(3, "data");
profiler_ = new Profiler();
boolean is_profile = false;
String FLAGS_profile_client = System.getenv("FLAGS_profile_client");
if (FLAGS_profile_client != null && FLAGS_profile_client.equals("1")) {
is_profile = true;
}
profiler_.enable(is_profile);
}
public void setTimeOut(int httpTimeoutS_) {
this.httpTimeoutS_ = httpTimeoutS_;
}
public void setIP(String ip) {
this.ip = ip;
}
public void setPort(String port) {
this.port = port;
this.serverPort = port;
}
public void setServiceName(String serviceName){
this.serviceName = serviceName;
}
public void loadClientConfig(String model_config_path) {
GeneralModelConfig.Builder model_conf_builder = GeneralModelConfig.newBuilder();
try {
String model_config_str = Files.readString(Paths.get(model_config_path));
com.google.protobuf.TextFormat.getParser().merge(model_config_str, model_conf_builder);
} catch (com.google.protobuf.TextFormat.ParseException e) {
System.out.format("Parse client config failed: %s\n", e.toString());
}
GeneralModelConfig model_conf = model_conf_builder.build();
feedNames_ = new ArrayList<String>();
feedRealNames_ = new HashMap<String, String>();
feedTypes_ = new HashMap<String, Integer>();
feedShapes_ = new HashMap<String, List<Integer>>();
lodTensorSet_ = new HashSet<String>();
feedTensorLen_ = new HashMap<String, Integer>();
feedNameToIndex_ = new HashMap<String, Integer>();
fetchNames_ = new ArrayList<String>();
fetchTypes_ = new HashMap<String, Integer>();
List<FeedVar> feed_var_list = model_conf.getFeedVarList();
for (int i = 0; i < feed_var_list.size(); ++i) {
FeedVar feed_var = feed_var_list.get(i);
String var_name = feed_var.getAliasName();
feedNames_.add(var_name);
feedRealNames_.put(var_name, feed_var.getName());
feedTypes_.put(var_name, feed_var.getFeedType());
feedShapes_.put(var_name, feed_var.getShapeList());
feedNameToIndex_.put(var_name, i);
if (feed_var.getIsLodTensor()) {
lodTensorSet_.add(var_name);
} else {
int counter = 1;
for (int dim : feedShapes_.get(var_name)) {
counter *= dim;
}
feedTensorLen_.put(var_name, counter);
}
}
List<FetchVar> fetch_var_list = model_conf.getFetchVarList();
for (int i = 0; i < fetch_var_list.size(); i++) {
FetchVar fetch_var = fetch_var_list.get(i);
String var_name = fetch_var.getAliasName();
fetchNames_.add(var_name);
fetchTypes_.put(var_name, fetch_var.getFetchType());
}
}
public void use_key(String keyFilePath) {
String key_str = null;
String encrypt_url = "http://" + this.ip + ":" +this.port;
try {
key_str = Files.readString(Paths.get(keyFilePath));
} catch (Exception e) {
System.out.format("Open key file failed: %s\n", e.toString());
}
JSONObject jsonKey = new JSONObject();
if( key_str != null) {
jsonKey.put("key", key_str);
}else{
jsonKey.put("key", "");
}
String result = doPost(encrypt_url, jsonKey.toString());
try {
JSONObject jsonObject = new JSONObject(result);
JSONArray jsonArray = jsonObject.getJSONArray("endpoint_list");
this.serverPort = jsonArray.getString(0);
System.out.format("Real ServerPort is: %s\n", this.serverPort);
}catch (JSONException err) {
System.out.format("Parse serverPort failed: %s\n", err.toString());
}
}
public void set_request_compress(boolean request_compress_flag) {
// need to be done.
this.request_compress_flag = request_compress_flag;
}
public void set_response_compress(boolean response_compress_flag) {
// need to be done.
this.response_compress_flag = response_compress_flag;
}
public static String compress(String str,String inEncoding) throws IOException {
if (str == null || str.length() == 0) {
return str;
}
ByteArrayOutputStream out = new ByteArrayOutputStream();
GZIPOutputStream gzip = new GZIPOutputStream(out);
gzip.write(str.getBytes(inEncoding));
gzip.close();
return out.toString("ISO-8859-1");
}
// 帮助用户封装Http请求的接口,用户只需要传递FeedData,Lod,Fetchlist即可。
// 根据Proto组装Json的过程由这个函数来完成,且接口与Python基本一致.
// 共提供了四组重载的接口,支持用户最少传入feedData和fetch,还可传lod和batchFlag.
public String predict(Map<String, Object> feedData,
List<String> fetch,
int log_id) {
return predict(feedData,null,fetch,false,log_id);
}
public String predict(Map<String, Object> feedData,
List<String> fetch,
boolean batchFlag,
int log_id) {
return predict(feedData,null,fetch,batchFlag,log_id);
}
public String predict(Map<String, Object> feedData,
Map<String, Object> feedLod,
List<String> fetch,
int log_id) {
return predict(feedData,feedLod,fetch,false,log_id);
}
public String predict(Map<String, Object> feedData,
Map<String, Object> feedLod,
List<String> fetch,
boolean batchFlag,
int log_id) {
String server_url = "http://" + this.ip + ":" + this.serverPort + this.serviceName;
// 处理fetchList
JSONArray jsonFetchList = new JSONArray();
Iterator<String> fetchIterator = fetch.iterator();
while (fetchIterator.hasNext()) {
jsonFetchList.put(fetchIterator.next());
}
// 处理Tensor
JSONArray jsonTensorArray = new JSONArray();
try{
if (null != feedData && feedData.size() > 0) {
// 通过map集成entrySet方法获取entity
Set<Entry<String, Object>> entrySet = feedData.entrySet();
// 循环遍历,获取迭代器
Iterator<Entry<String, Object>> iterator = entrySet.iterator();
while (iterator.hasNext()) {
JSONObject jsonTensor = new JSONObject();
Entry<String, Object> mapEntry = iterator.next();
Object objectValue = mapEntry.getValue();
String feed_alias_name = mapEntry.getKey();
String feed_real_name = feedRealNames_.get(feed_alias_name);
List<Integer> shape = feedShapes_.get(feed_alias_name);
int element_type = feedTypes_.get(feed_alias_name);
String protoDataKey = feedTypeToDataKey_.get(element_type);
Object feedLodValue = feedLod.get(feed_alias_name);
// 如果是INDArray类型,先转为一维,再objectValue.ToString.
// 如果是String或List,则直接objectValue.ToString.
if(objectValue.getClass().equals(INDArray.class)){
long[] flattened_shape = {-1};
Class<?> classLongArray = flattened_shape.getClass();
Method methodReshape = mapEntry.getValue().getClass().getMethod("reshape", classLongArray);
Method methodShape = mapEntry.getValue().getClass().getMethod("shape");
long[] indarrayShape = (long[])methodShape.invoke(objectValue);
shape.clear();
for(long dim:indarrayShape){
shape.add((int)dim);
}
objectValue = methodReshape.invoke(objectValue,flattened_shape);
}
if(batchFlag){
// 在index=0处,加上batch=1
shape.add(0, 1);
}
jsonTensor.put("alias_name", feed_alias_name);
jsonTensor.put("name", feed_real_name);
jsonTensor.put("shape", shape);
jsonTensor.put("elem_type", element_type);
jsonTensor.put(protoDataKey,objectValue);
if(feedLodValue != null) {
jsonTensor.put("lod", feedLodValue);
}
jsonTensorArray.put(jsonTensor);
}
}
}catch (Exception e) {
e.printStackTrace();
}
JSONObject jsonRequest = new JSONObject();
jsonRequest.put("log_id",log_id);
jsonRequest.put("fetch_var_names", jsonFetchList);
jsonRequest.put("tensor",jsonTensorArray);
return doPost(server_url, jsonRequest.toString());
}
public String doPost(String url, String strPostData) {
CloseableHttpClient httpClient = null;
CloseableHttpResponse httpResponse = null;
String result = "";
// 创建httpClient实例
httpClient = HttpClients.createDefault();
// 创建httpPost远程连接实例
HttpPost httpPost = new HttpPost(url);
// 配置请求参数实例
RequestConfig requestConfig = RequestConfig.custom().setConnectTimeout(httpTimeoutS_)// 设置连接主机服务超时时间
.setConnectionRequestTimeout(httpTimeoutS_)// 设置连接请求超时时间
.setSocketTimeout(httpTimeoutS_)// 设置读取数据连接超时时间
.build();
// 为httpPost实例设置配置
httpPost.setConfig(requestConfig);
// 设置请求头
httpPost.addHeader("Content-Type", "application/json");
if(response_compress_flag){
httpPost.addHeader("Accept-encoding", "gzip");
}
if(request_compress_flag && strPostData.length()>512){
try{
strPostData = compress(strPostData,"UTF-8");
httpPost.addHeader("Content-Encoding", "gzip");
} catch (IOException e) {
e.printStackTrace();
}
}
try {
httpPost.setEntity(new StringEntity(strPostData, "UTF-8"));
// httpClient对象执行post请求,并返回响应参数对象
httpResponse = httpClient.execute(httpPost);
// 从响应对象中获取响应内容
HttpEntity entity = httpResponse.getEntity();
Header header = entity.getContentEncoding();
if(header != null && header.getValue().equalsIgnoreCase("gzip")){ //判断返回内容是否为gzip压缩格式
GzipDecompressingEntity gzipEntity = new GzipDecompressingEntity(entity);
result = EntityUtils.toString(gzipEntity);
}else{
result = EntityUtils.toString(entity);
}
} catch (ClientProtocolException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
// 关闭资源
if (null != httpResponse) {
try {
httpResponse.close();
} catch (IOException e) {
e.printStackTrace();
}
}
if (null != httpClient) {
try {
httpClient.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return result;
}
}
package io.paddle.serving.client;
import java.util.*;
import java.util.function.Function;
import io.grpc.StatusRuntimeException;
import com.google.common.util.concurrent.ListenableFuture;
import org.nd4j.linalg.api.ndarray.INDArray;
import io.paddle.serving.client.Client;
import io.paddle.serving.grpc.*;
public class PredictFuture {
private ListenableFuture<InferenceResponse> callFuture_;
private Function<InferenceResponse,
Map<String, HashMap<String, INDArray>>> callBackFunc_;
PredictFuture(ListenableFuture<InferenceResponse> call_future,
Function<InferenceResponse,
Map<String, HashMap<String, INDArray>>> call_back_func) {
callFuture_ = call_future;
callBackFunc_ = call_back_func;
}
public Map<String, INDArray> get() {
InferenceResponse resp = null;
try {
resp = callFuture_.get();
} catch (Exception e) {
System.out.format("predict failed: %s\n", e.toString());
return null;
}
Map<String, HashMap<String, INDArray>> ensemble_result
= callBackFunc_.apply(resp);
List<Map.Entry<String, HashMap<String, INDArray>>> list
= new ArrayList<Map.Entry<String, HashMap<String, INDArray>>>(
ensemble_result.entrySet());
if (list.size() != 1) {
System.out.format("predict failed: please use get_ensemble impl.\n");
return null;
}
return list.get(0).getValue();
}
public Map<String, HashMap<String, INDArray>> ensemble_get() {
InferenceResponse resp = null;
try {
resp = callFuture_.get();
} catch (Exception e) {
System.out.format("predict failed: %s\n", e.toString());
return null;
}
return callBackFunc_.apply(resp);
}
}
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package baidu.paddle_serving.multi_lang;
option java_multiple_files = true;
option java_package = "io.paddle.serving.grpc";
option java_outer_classname = "ServingProto";
message Tensor {
optional bytes data = 1;
repeated int32 int_data = 2;
repeated int64 int64_data = 3;
repeated float float_data = 4;
optional int32 elem_type = 5;
repeated int32 shape = 6;
repeated int32 lod = 7; // only for fetch tensor currently
};
message FeedInst { repeated Tensor tensor_array = 1; };
message FetchInst { repeated Tensor tensor_array = 1; };
message InferenceRequest {
repeated FeedInst insts = 1;
repeated string feed_var_names = 2;
repeated string fetch_var_names = 3;
required bool is_python = 4 [ default = false ];
required uint64 log_id = 5 [ default = 0 ];
};
message InferenceResponse {
repeated ModelOutput outputs = 1;
optional string tag = 2;
required int32 err_code = 3;
};
message ModelOutput {
repeated FetchInst insts = 1;
optional string engine_name = 2;
}
message SetTimeoutRequest { required int32 timeout_ms = 1; }
message SimpleResponse { required int32 err_code = 1; }
message GetClientConfigRequest {}
message GetClientConfigResponse { required string client_config_str = 1; }
service MultiLangGeneralModelService {
rpc Inference(InferenceRequest) returns (InferenceResponse) {}
rpc SetTimeout(SetTimeoutRequest) returns (SimpleResponse) {}
rpc GetClientConfig(GetClientConfigRequest)
returns (GetClientConfigResponse) {}
};
......@@ -35,11 +35,11 @@ client-side configuration file are stored in the `encrypt_client` directory.
## Start Encryption Service
CPU Service
```
python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_encryption_model
python -m paddle_serving_server.serve --model encrypt_server/ --port 9393 --use_encryption_model
```
GPU Service
```
python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_encryption_model --gpu_ids 0
python -m paddle_serving_server.serve --model encrypt_server/ --port 9393 --use_encryption_model --gpu_ids 0
```
## Prediction
......
......@@ -36,14 +36,14 @@ def serving_encryption():
## 启动加密预测服务
CPU预测服务
```
python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_encryption_model
python -m paddle_serving_server.serve --model encrypt_server/ --port 9393 --use_encryption_model
```
GPU预测服务
```
python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_encryption_model --gpu_ids 0
python -m paddle_serving_server.serve --model encrypt_server/ --port 9393 --use_encryption_model --gpu_ids 0
```
## 预测
```
python test_client.py encrypt_client/
python test_client.py encrypt_client/serving_client_conf.prototxt
```
......@@ -19,7 +19,8 @@ import sys
client = Client()
client.load_client_config(sys.argv[1])
client.use_key("./key")
client.connect(["127.0.0.1:9300"], encryption=True)
client.connect(["0.0.0.0:9393"], encryption=True)
fetch_list = client.get_fetch_names()
import paddle
test_reader = paddle.batch(
......@@ -28,5 +29,5 @@ test_reader = paddle.batch(
batch_size=1)
for data in test_reader():
fetch_map = client.predict(feed={"x": data[0][0]}, fetch=["price"])
print("{} {}".format(fetch_map["price"][0], data[0][1][0]))
fetch_map = client.predict(feed={"x": data[0][0]}, fetch=fetch_list)
print(fetch_map)
......@@ -20,7 +20,7 @@ import numpy as np
client = Client()
client.load_client_config(sys.argv[1])
client.connect(["127.0.0.1:9393"])
fetch_list = client.get_fetch_names()
import paddle
test_reader = paddle.batch(
paddle.reader.shuffle(
......@@ -31,6 +31,5 @@ for data in test_reader():
new_data = np.zeros((1, 13)).astype("float32")
new_data[0] = data[0][0]
fetch_map = client.predict(
feed={"x": new_data}, fetch=["price"], batch=True)
print("{} {}".format(fetch_map["price"][0], data[0][1][0]))
feed={"x": new_data}, fetch=fetch_list, batch=True)
print(fetch_map)
......@@ -13,29 +13,30 @@
# limitations under the License.
# pylint: disable=doc-string-missing
from paddle_serving_client import MultiLangClient as Client
from paddle_serving_client.httpclient import HttpClient
import sys
import numpy as np
client = Client()
client.connect(["127.0.0.1:9393"])
"""
import time
client = HttpClient()
client.load_client_config(sys.argv[1])
client.use_key("./key")
client.set_response_compress(True)
client.set_request_compress(True)
fetch_list = client.get_fetch_names()
import paddle
test_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.uci_housing.test(), buf_size=500),
batch_size=1)
for data in test_reader():
new_data = np.zeros((1, 1, 13)).astype("float32")
new_data = np.zeros((1, 13)).astype("float32")
new_data[0] = data[0][0]
lst_data = []
for i in range(200):
lst_data.append(data[0][0])
fetch_map = client.predict(
feed={"x": new_data}, fetch=["price"], batch=True)
print("{} {}".format(fetch_map["price"][0], data[0][1][0]))
feed={"x": lst_data}, fetch=fetch_list, batch=True)
print(fetch_map)
"""
x = [
0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283,
0.4919, 0.1856, 0.0795, -0.0332
]
for i in range(3):
new_data = np.array(x).astype("float32").reshape((1, 13))
fetch_map = client.predict(
feed={"x": new_data}, fetch=["price"], batch=False)
if fetch_map["serving_status_code"] == 0:
print(fetch_map)
else:
print(fetch_map["serving_status_code"])
break
# 线性回归预测服务示例
## 获取数据
```shell
sh get_data.sh
```
## 开启 gRPC 服务端
``` shell
python test_server.py uci_housing_model/
```
也可以通过下面的一行代码开启默认 gRPC 服务:
```shell
python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --use_multilang
```
## 客户端预测
### 同步预测
``` shell
python test_sync_client.py
```
### 异步预测
``` shell
python test_asyn_client.py
```
### Batch 预测
``` shell
python test_batch_client.py
```
### 预测超时
``` shell
python test_timeout_client.py
```
wget --no-check-certificate https://paddle-serving.bj.bcebos.com/uci_housing.tar.gz
tar -xzf uci_housing.tar.gz
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=doc-string-missing
from paddle_serving_client import MultiLangClient as Client
import functools
import time
import threading
import grpc
import numpy as np
client = Client()
client.connect(["127.0.0.1:9393"])
complete_task_count = [0]
lock = threading.Lock()
def call_back(call_future):
try:
fetch_map = call_future.result()
print(fetch_map)
except grpc.RpcError as e:
print(e.code())
finally:
with lock:
complete_task_count[0] += 1
x = [
0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283,
0.4919, 0.1856, 0.0795, -0.0332
]
task_count = 0
for i in range(3):
new_data = np.array(x).astype("float32").reshape((1, 13))
future = client.predict(
feed={"x": new_data}, fetch=["price"], batch=False, asyn=True)
task_count += 1
future.add_done_callback(functools.partial(call_back))
while complete_task_count[0] != task_count:
time.sleep(0.1)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=doc-string-missing
from paddle_serving_client import MultiLangClient as Client
import numpy as np
client = Client()
client.connect(["127.0.0.1:9393"])
batch_size = 2
x = [
0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283,
0.4919, 0.1856, 0.0795, -0.0332
]
for i in range(3):
new_data = np.array(x).astype("float32").reshape((1, 1, 13))
batch_data = np.concatenate([new_data, new_data, new_data], axis=0)
print(batch_data.shape)
fetch_map = client.predict(
feed={"x": batch_data}, fetch=["price"], batch=True)
if fetch_map["serving_status_code"] == 0:
print(fetch_map)
else:
print(fetch_map["serving_status_code"])
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=doc-string-missing
import os
import sys
from paddle_serving_server import OpMaker
from paddle_serving_server import OpSeqMaker
from paddle_serving_server import MultiLangServer as Server
op_maker = OpMaker()
read_op = op_maker.create('general_reader')
general_infer_op = op_maker.create('general_infer')
response_op = op_maker.create('general_response')
op_seq_maker = OpSeqMaker()
op_seq_maker.add_op(read_op)
op_seq_maker.add_op(general_infer_op)
op_seq_maker.add_op(response_op)
server = Server()
server.set_op_sequence(op_seq_maker.get_op_sequence())
server.load_model_config(sys.argv[1])
server.prepare_server(workdir="work_dir1", port=9393, device="cpu")
server.run_server()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=doc-string-missing
import os
import sys
from paddle_serving_server import OpMaker
from paddle_serving_server import OpSeqMaker
from paddle_serving_server import MultiLangServer as Server
op_maker = OpMaker()
read_op = op_maker.create('general_reader')
general_infer_op = op_maker.create('general_infer')
response_op = op_maker.create('general_response')
op_seq_maker = OpSeqMaker()
op_seq_maker.add_op(read_op)
op_seq_maker.add_op(general_infer_op)
op_seq_maker.add_op(response_op)
server = Server()
server.set_op_sequence(op_seq_maker.get_op_sequence())
server.load_model_config(sys.argv[1])
server.set_gpuid("0")
server.prepare_server(workdir="work_dir1", port=9393, device="gpu")
server.run_server()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=doc-string-missing
from paddle_serving_client import MultiLangClient as Client
import grpc
import numpy as np
client = Client()
client.connect(["127.0.0.1:9393"])
client.set_rpc_timeout_ms(40)
x = [
0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283,
0.4919, 0.1856, 0.0795, -0.0332
]
for i in range(3):
new_data = np.array(x).astype("float32").reshape((1, 13))
fetch_map = client.predict(
feed={"x": new_data}, fetch=["price"], batch=False)
if fetch_map["serving_status_code"] == 0:
print(fetch_map)
elif fetch_map["serving_status_code"] == grpc.StatusCode.DEADLINE_EXCEEDED:
print('timeout')
else:
print(fetch_map["serving_status_code"])
## IMDB comment sentiment inference service
([简体中文](./README_CN.md)|English)
### Get model files and sample data
```
sh get_data.sh
```
the package downloaded contains cnn, lstm and bow model config along with their test_data and train_data.
### Start RPC inference service
```
python -m paddle_serving_server.serve --model imdb_cnn_model/ --thread 10 --port 9393 --use_multilang
```
### RPC Infer
The `paddlepaddle` package is used in `test_client.py`, and you may need to download the corresponding package(`pip install paddlepaddle`).
```
head test_data/part-0 | python test_client.py
```
it will get predict results of the first 10 test cases.
## IMDB评论情绪预测服务
(简体中文|[English](./README.md))
### 获取模型文件和样例数据
```
sh get_data.sh
```
脚本会下载和解压出cnn、lstm和bow三种模型的配置文文件以及test_data和train_data。
### 启动RPC预测服务
```
python -m paddle_serving_server.serve --model imdb_cnn_model/ --thread 10 --port 9393 --use_multilang
```
### 执行预测
`test_client.py`中使用了`paddlepaddle`包,需要进行下载(`pip install paddlepaddle`)。
```
head test_data/part-0 | python test_client.py
```
预测test_data/part-0的前十个样例。
wget --no-check-certificate https://fleet.bj.bcebos.com/text_classification_data.tar.gz
wget --no-check-certificate https://paddle-serving.bj.bcebos.com/imdb-demo/imdb_model.tar.gz
tar -zxvf text_classification_data.tar.gz
tar -zxvf imdb_model.tar.gz
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=doc-string-missing
import sys
import os
import paddle
import re
import paddle.fluid.incubate.data_generator as dg
py_version = sys.version_info[0]
class IMDBDataset(dg.MultiSlotDataGenerator):
def load_resource(self, dictfile):
self._vocab = {}
wid = 0
if py_version == 2:
with open(dictfile) as f:
for line in f:
self._vocab[line.strip()] = wid
wid += 1
else:
with open(dictfile, encoding="utf-8") as f:
for line in f:
self._vocab[line.strip()] = wid
wid += 1
self._unk_id = len(self._vocab)
self._pattern = re.compile(r'(;|,|\.|\?|!|\s|\(|\))')
self.return_value = ("words", [1, 2, 3, 4, 5, 6]), ("label", [0])
def get_words_only(self, line):
sent = line.lower().replace("<br />", " ").strip()
words = [x for x in self._pattern.split(sent) if x and x != " "]
feas = [
self._vocab[x] if x in self._vocab else self._unk_id for x in words
]
return feas
def get_words_and_label(self, line):
send = '|'.join(line.split('|')[:-1]).lower().replace("<br />",
" ").strip()
label = [int(line.split('|')[-1])]
words = [x for x in self._pattern.split(send) if x and x != " "]
feas = [
self._vocab[x] if x in self._vocab else self._unk_id for x in words
]
return feas, label
def infer_reader(self, infer_filelist, batch, buf_size):
def local_iter():
for fname in infer_filelist:
with open(fname, "r") as fin:
for line in fin:
feas, label = self.get_words_and_label(line)
yield feas, label
import paddle
batch_iter = paddle.batch(
paddle.reader.shuffle(
local_iter, buf_size=buf_size),
batch_size=batch)
return batch_iter
def generate_sample(self, line):
def memory_iter():
for i in range(1000):
yield self.return_value
def data_iter():
feas, label = self.get_words_and_label(line)
yield ("words", feas), ("label", label)
return data_iter
if __name__ == "__main__":
imdb = IMDBDataset()
imdb.load_resource("imdb.vocab")
imdb.run_from_stdin()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=doc-string-missing
from paddle_serving_client import MultiLangClient as Client
from paddle_serving_app.reader.imdb_reader import IMDBDataset
import sys
import numpy as np
client = Client()
client.connect(["127.0.0.1:9393"])
# you can define any english sentence or dataset here
# This example reuses imdb reader in training, you
# can define your own data preprocessing easily.
imdb_dataset = IMDBDataset()
imdb_dataset.load_resource('imdb.vocab')
for line in sys.stdin:
word_ids, label = imdb_dataset.get_words_and_label(line)
word_len = len(word_ids)
feed = {
"words": np.array(word_ids).reshape(word_len, 1),
"words.lod": [0, word_len]
}
fetch = ["prediction"]
fetch_map = client.predict(feed=feed, fetch=fetch, batch=True)
if fetch_map["serving_status_code"] == 0:
print(fetch_map)
else:
print(fetch_map["serving_status_code"])
#print("{} {}".format(fetch_map["prediction"][0], label[0]))
# Yolov4 Detection Service
([简体中文](README_CN.md)|English)
## Get Model
```
python -m paddle_serving_app.package --get_model yolov4
tar -xzvf yolov4.tar.gz
```
## Start RPC Service
```
python -m paddle_serving_server.serve --model yolov4_model --port 9393 --gpu_ids 0 --use_multilang
```
## Prediction
```
python test_client.py 000000570688.jpg
```
After the prediction is completed, a json file to save the prediction result and a picture with the detection result box will be generated in the `./outpu folder.
# Yolov4 检测服务
(简体中文|[English](README.md))
## 获取模型
```
python -m paddle_serving_app.package --get_model yolov4
tar -xzvf yolov4.tar.gz
```
## 启动RPC服务
```
python -m paddle_serving_server.serve --model yolov4_model --port 9393 --gpu_ids 0 --use_multilang
```
## 预测
```
python test_client.py 000000570688.jpg
```
预测完成会在`./output`文件夹下生成保存预测结果的json文件以及标出检测结果框的图片。
person
bicycle
car
motorcycle
airplane
bus
train
truck
boat
traffic light
fire hydrant
stop sign
parking meter
bench
bird
cat
dog
horse
sheep
cow
elephant
bear
zebra
giraffe
backpack
umbrella
handbag
tie
suitcase
frisbee
skis
snowboard
sports ball
kite
baseball bat
baseball glove
skateboard
surfboard
tennis racket
bottle
wine glass
cup
fork
knife
spoon
bowl
banana
apple
sandwich
orange
broccoli
carrot
hot dog
pizza
donut
cake
chair
couch
potted plant
bed
dining table
toilet
tv
laptop
mouse
remote
keyboard
cell phone
microwave
oven
toaster
sink
refrigerator
book
clock
vase
scissors
teddy bear
hair drier
toothbrush
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import numpy as np
from paddle_serving_client import MultiLangClient as Client
from paddle_serving_app.reader import *
import cv2
preprocess = Sequential([
File2Image(), BGR2RGB(), Resize(
(608, 608), interpolation=cv2.INTER_LINEAR), Div(255.0), Transpose(
(2, 0, 1))
])
postprocess = RCNNPostprocess("label_list.txt", "output", [608, 608])
client = Client()
client.connect(['127.0.0.1:9393'])
client.set_rpc_timeout_ms(100000)
im = preprocess(sys.argv[1])
fetch_map = client.predict(
feed={
"image": im,
"im_size": np.array(list(im.shape[1:])),
},
fetch=["save_infer_model/scale_0.tmp_0"],
batch=False)
print(fetch_map)
fetch_map.pop("serving_status_code")
fetch_map["image"] = sys.argv[1]
postprocess(fetch_map)
......@@ -16,5 +16,6 @@
from . import version
from . import client
from .client import *
from .httpclient import *
__version__ = version.version_tag
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import requests
import json
import numpy as np
import os
from .proto import general_model_config_pb2 as m_config
import google.protobuf.text_format
import gzip
from collections import Iterable
import base64
#param 'type'(which is in feed_var or fetch_var) = 0 means dataType is int64
#param 'type'(which is in feed_var or fetch_var) = 1 means dataType is float32
#param 'type'(which is in feed_var or fetch_var) = 2 means dataType is int32
#param 'type'(which is in feed_var or fetch_var) = 3 means dataType is string(also called bytes in proto)
int64_type = 0
float32_type = 1
int32_type = 2
bytes_type = 3
# this is corresponding to the proto
proto_data_key_list = ["int64_data", "float_data", "int_data", "data"]
def list_flatten(items, ignore_types=(str, bytes)):
for x in items:
if isinstance(x, Iterable) and not isinstance(x, ignore_types):
yield from list_flatten(x)
else:
yield x
class HttpClient(object):
def __init__(self,
ip="0.0.0.0",
port="9393",
service_name="/GeneralModelService/inference"):
self.feed_names_ = []
self.feed_real_names = []
self.fetch_names_ = []
self.feed_shapes_ = {}
self.feed_types_ = {}
self.feed_names_to_idx_ = {}
self.http_timeout_ms = 200000
self.ip = ip
self.port = port
self.server_port = port
self.service_name = service_name
self.key = None
self.try_request_gzip = False
self.try_response_gzip = False
def load_client_config(self, model_config_path_list):
if isinstance(model_config_path_list, str):
model_config_path_list = [model_config_path_list]
elif isinstance(model_config_path_list, list):
pass
file_path_list = []
for single_model_config in model_config_path_list:
if os.path.isdir(single_model_config):
file_path_list.append("{}/serving_client_conf.prototxt".format(
single_model_config))
elif os.path.isfile(single_model_config):
file_path_list.append(single_model_config)
model_conf = m_config.GeneralModelConfig()
f = open(file_path_list[0], 'r')
model_conf = google.protobuf.text_format.Merge(
str(f.read()), model_conf)
# load configuraion here
# get feed vars, fetch vars
# get feed shapes, feed types
# map feed names to index
self.feed_names_ = [var.alias_name for var in model_conf.feed_var]
self.feed_real_names = [var.name for var in model_conf.feed_var]
self.feed_names_to_idx_ = {} #this is useful
self.lod_tensor_set = set()
self.feed_tensor_len = {} #this is only used for shape check
self.key = None
for i, var in enumerate(model_conf.feed_var):
self.feed_names_to_idx_[var.alias_name] = i
self.feed_types_[var.alias_name] = var.feed_type
self.feed_shapes_[var.alias_name] = [dim for dim in var.shape]
if var.is_lod_tensor:
self.lod_tensor_set.add(var.alias_name)
else:
counter = 1
for dim in self.feed_shapes_[var.alias_name]:
counter *= dim
self.feed_tensor_len[var.alias_name] = counter
if len(file_path_list) > 1:
model_conf = m_config.GeneralModelConfig()
f = open(file_path_list[-1], 'r')
model_conf = google.protobuf.text_format.Merge(
str(f.read()), model_conf)
self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var]
self.fetch_names_to_type_ = {}
self.fetch_names_to_idx_ = {}
for i, var in enumerate(model_conf.fetch_var):
self.fetch_names_to_idx_[var.alias_name] = i
self.fetch_names_to_type_[var.alias_name] = var.fetch_type
if var.is_lod_tensor:
self.lod_tensor_set.add(var.alias_name)
return
def set_http_timeout_ms(self, http_timeout_ms):
if not isinstance(http_timeout_ms, int):
raise ValueError("http_timeout_ms must be int type.")
else:
self.http_timeout_ms = http_timeout_ms
def set_request_compress(self, try_request_gzip):
self.try_request_gzip = try_request_gzip
def set_response_compress(self, try_response_gzip):
self.try_response_gzip = try_response_gzip
# use_key is the function of encryption.
def use_key(self, key_filename):
with open(key_filename, "rb") as f:
self.key = f.read()
self.get_serving_port()
def get_serving_port(self):
encrypt_url = "http://" + str(self.ip) + ":" + str(self.port)
if self.key is not None:
req = json.dumps({"key": base64.b64encode(self.key).decode()})
else:
req = json.dumps({})
r = requests.post(encrypt_url, req)
result = r.json()
print(result)
if "endpoint_list" not in result:
raise ValueError("server not ready")
else:
self.server_port = str(result["endpoint_list"][0])
print("rpc port is ", self.server_port)
def get_feed_names(self):
return self.feed_names_
def get_fetch_names(self):
return self.fetch_names_
# feed 支持Numpy类型,Json-String,以及直接List、tuple
def predict(self,
feed=None,
fetch=None,
batch=False,
need_variant_tag=False,
log_id=0):
if feed is None or fetch is None:
raise ValueError("You should specify feed and fetch for prediction")
fetch_list = []
if isinstance(fetch, str):
fetch_list = [fetch]
elif isinstance(fetch, (list, tuple)):
fetch_list = fetch
else:
raise ValueError("Fetch only accepts string and list of string")
feed_batch = []
if isinstance(feed, dict):
feed_batch.append(feed)
elif isinstance(feed, (list, str, tuple)):
# if input is a list or str, and the number of feed_var is 1.
# create a temp_dict { key = feed_var_name, value = list}
# put the temp_dict into the feed_batch.
if len(self.feed_names_) != 1:
raise ValueError(
"input is a list, but we got 0 or 2+ feed_var, don`t know how to divide the feed list"
)
temp_dict = {}
temp_dict[self.feed_names_[0]] = feed
feed_batch.append(temp_dict)
else:
raise ValueError("Feed only accepts dict and list of dict")
# batch_size must be 1, cause batch is already in Tensor.
if len(feed_batch) != 1:
raise ValueError("len of feed_batch can only be 1.")
fetch_names = []
for key in fetch_list:
if key in self.fetch_names_:
fetch_names.append(key)
if len(fetch_names) == 0:
raise ValueError(
"Fetch names should not be empty or out of saved fetch list.")
return {}
feed_i = feed_batch[0]
Request = {}
Request["fetch_var_names"] = fetch_list
Request["log_id"] = int(log_id)
Request["tensor"] = []
index = 0
total_data_number = 0
for key in feed_i:
if ".lod" not in key and key not in self.feed_names_:
raise ValueError("Wrong feed name: {}.".format(key))
if ".lod" in key:
continue
Request["tensor"].append('')
Request["tensor"][index] = {}
lod = []
if "{}.lod".format(key) in feed_i:
lod = feed_i["{}.lod".format(key)]
shape = self.feed_shapes_[key].copy()
elem_type = self.feed_types_[key]
data_value = feed_i[key]
data_key = proto_data_key_list[elem_type]
# 输入不是string类型
if self.feed_types_[key] != bytes_type:
# feed_i[key] 可以是np.ndarray
# 也可以是string或list或tuple
# 当np.ndarray需要处理为list
if isinstance(feed_i[key], np.ndarray):
shape_lst = []
# 0维numpy 需要在外层再加一个[]
if feed_i[key].ndim == 0:
data_value = [feed_i[key].tolist()]
shape_lst.append(1)
else:
shape_lst.extend(list(feed_i[key].shape))
shape = shape_lst
data_value = feed_i[key].flatten().tolist()
# 当Batch为False,shape字段前插一个1,表示batch维
# 当Batch为True,则直接使用numpy.shape作为batch维度
if batch == False:
shape.insert(0, 1)
# 当是list或tuple时,需要把多层嵌套展开
if isinstance(feed_i[key], (list, tuple)):
# 当Batch为False,shape字段前插一个1,表示batch维
# 当Batch为True, 由于list并不像numpy那样规整,所以
# 无法获取shape,此时取第一维度作为Batch维度.
# 插入到feedVar.shape前面.
if batch == False:
shape.insert(0, 1)
else:
shape.insert(0, len(feed_i[key]))
feed_i[key] = [x for x in list_flatten(feed_i[key])]
data_value = feed_i[key]
'''
this is comment, for coder to understand.
#if input is string, feed is not numpy.
else:
shape = self.feed_shapes_[key]
data_value = feed_i[key]
'''
total_data_number = total_data_number + len(data_value)
Request["tensor"][index]["elem_type"] = elem_type
Request["tensor"][index]["shape"] = shape
Request["tensor"][index][data_key] = data_value
proto_index = self.feed_names_to_idx_[key]
Request["tensor"][index]["name"] = self.feed_real_names[proto_index]
Request["tensor"][index]["alias_name"] = key
if len(lod) > 0:
Request["tensor"][index]["lod"] = lod
index = index + 1
result = None
# request
web_url = "http://" + self.ip + ":" + self.server_port + self.service_name
postData = json.dumps(Request)
headers = {}
if self.try_request_gzip and total_data_number > 512:
postData = gzip.compress(bytes(postData, 'utf-8'))
headers["Content-Encoding"] = "gzip"
if self.try_response_gzip:
headers["Accept-encoding"] = "gzip"
# requests支持自动识别解压
result = requests.post(url=web_url, headers=headers, data=postData)
if result == None:
return None
if result.status_code == 200:
return result.json()
return result
......@@ -14,18 +14,16 @@
# pylint: disable=doc-string-missing
from . import monitor
from . import rpc_service
from . import serve
from . import version
__all__ = ["version", "server", "serve", "monitor", "rpc_service", "dag"]
__all__ = ["version", "server", "serve", "monitor", "dag"]
from paddle_serving_server import (
version,
server,
serve,
monitor,
rpc_service,
dag, )
from .dag import *
......
此差异已折叠。
......@@ -14,6 +14,7 @@
#!flask/bin/python
# pylint: disable=doc-string-missing
# Now, this is only for Pipeline.
from flask import Flask, request, abort
from contextlib import closing
from multiprocessing import Pool, Process, Queue
......
......@@ -16,7 +16,7 @@ from time import time as _time
import time
import threading
import multiprocessing
from paddle_serving_client import MultiLangClient, Client
from paddle_serving_client import Client
from concurrent import futures
import logging
import func_timeout
......@@ -330,8 +330,9 @@ class Op(object):
if self.client_type == 'brpc':
client = Client()
client.load_client_config(client_config)
elif self.client_type == 'grpc':
client = MultiLangClient()
# 待测试完成后,使用brpc-http替代。
# elif self.client_type == 'grpc':
# client = MultiLangClient()
elif self.client_type == 'local_predictor':
if self.local_predictor is None:
raise ValueError("local predictor not yet created")
......@@ -474,10 +475,13 @@ class Op(object):
fetch=self._fetch_names,
batch=True,
log_id=typical_logid)
# 后续用HttpClient替代
'''
if isinstance(self.client, MultiLangClient):
if call_result is None or call_result["serving_status_code"] != 0:
return None
call_result.pop("serving_status_code")
'''
return call_result
def postprocess(self, input_data, fetch_data, log_id=0):
......
......@@ -21,17 +21,33 @@ option cc_generic_services = true;
message Tensor {
repeated bytes data = 1;
optional int32 elem_type = 2;
repeated int32 shape = 3;
repeated int32 int_data = 2;
repeated int64 int64_data = 3;
repeated float float_data = 4;
optional int32 elem_type =
5; // 0 means int64, 1 means float32, 2 means int32, 3 means bytes(string)
repeated int32 shape = 6; // shape should include batch
repeated int32 lod = 7; // only for fetch tensor currently
optional string name = 8; // get from the Model prototxt
optional string alias_name = 9; // get from the Model prototxt
};
message FeedInst { repeated Tensor tensor_array = 1; };
message FetchInst { repeated Tensor tensor_array = 1; };
message Request {
repeated Tensor tensor = 1;
repeated string fetch_var_names = 2;
optional bool profile_server = 3 [ default = false ];
required uint64 log_id = 4 [ default = 0 ];
};
message Request { repeated FeedInst insts = 1; };
message Response {
repeated ModelOutput outputs = 1;
repeated int64 profile_time = 2;
};
message Response { repeated FetchInst insts = 1; };
message ModelOutput {
repeated Tensor tensor = 1;
optional string engine_name = 2;
}
service GeneralModelService {
rpc inference(Request) returns (Response);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册