From 261ba120efaec07880cb0e72f3830a453d55b58a Mon Sep 17 00:00:00 2001 From: barrierye Date: Mon, 13 Apr 2020 18:10:08 +0800 Subject: [PATCH] recover code --- core/general-server/op/general_copy_op.cpp | 102 +++++++++ core/general-server/op/general_copy_op.h | 47 ++++ .../op/general_dist_kv_infer_op.cpp | 173 +++++++++++++++ .../op/general_dist_kv_infer_op.h | 46 ++++ .../op/general_dist_kv_quant_infer_op.cpp | 204 ++++++++++++++++++ .../op/general_dist_kv_quant_infer_op.h | 46 ++++ ensemble-demo/client.py | 41 ++++ ensemble-demo/server.py | 43 ++++ 8 files changed, 702 insertions(+) create mode 100644 core/general-server/op/general_copy_op.cpp create mode 100644 core/general-server/op/general_copy_op.h create mode 100755 core/general-server/op/general_dist_kv_infer_op.cpp create mode 100644 core/general-server/op/general_dist_kv_infer_op.h create mode 100644 core/general-server/op/general_dist_kv_quant_infer_op.cpp create mode 100644 core/general-server/op/general_dist_kv_quant_infer_op.h create mode 100644 ensemble-demo/client.py create mode 100644 ensemble-demo/server.py diff --git a/core/general-server/op/general_copy_op.cpp b/core/general-server/op/general_copy_op.cpp new file mode 100644 index 00000000..314fca19 --- /dev/null +++ b/core/general-server/op/general_copy_op.cpp @@ -0,0 +1,102 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "core/general-server/op/general_copy_op.h" +#include +#include +#include +#include +#include "core/general-server/op/general_infer_helper.h" +#include "core/predictor/framework/infer.h" +#include "core/predictor/framework/memory.h" +#include "core/util/include/timer.h" + +namespace baidu { +namespace paddle_serving { +namespace serving { + +using baidu::paddle_serving::Timer; +using baidu::paddle_serving::predictor::MempoolWrapper; +using baidu::paddle_serving::predictor::general_model::Tensor; +using baidu::paddle_serving::predictor::general_model::Request; +using baidu::paddle_serving::predictor::general_model::FeedInst; +using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; + +int GeneralCopyOp::inference() { + // reade request from client + const std::vector pre_node_names = pre_names(); + if (pre_node_names.size() != 1) { + LOG(ERROR) << "This op(" << op_name() <<") can only have one predecessor op, but received " << pre_node_names.size(); + return -1; + } + const std::string pre_name = pre_node_names[0]; + + const GeneralBlob *input_blob = get_depend_argument(pre_name); + VLOG(2) << "precedent name: " << pre_name; + const TensorVector *in = &input_blob->tensor_vector; + VLOG(2) << "input size: " << in->size(); + int batch_size = input_blob->GetBatchSize(); + int input_var_num = 0; + + GeneralBlob *res = mutable_data(); + TensorVector *out = &res->tensor_vector; + + VLOG(2) << "input batch size: " << batch_size; + res->SetBatchSize(batch_size); + + if (!res) { + LOG(ERROR) << "Failed get op tls reader object output"; + } + + Timer timeline; + int64_t start = timeline.TimeStampUS(); + + VLOG(2) << "Going to init lod tensor"; + for (int i = 0; i < in->size(); ++i) { + paddle::PaddleTensor lod_tensor; + CopyLod(&in->at(i), &lod_tensor); + lod_tensor.dtype = in->at(i).dtype; + lod_tensor.name = in->at(i).name; + VLOG(2) << "lod tensor [" << i << "].name = " << lod_tensor.name; + out->push_back(lod_tensor); + } + + VLOG(2) << "pack done."; + + for (int i = 0; i < out->size(); ++i) { + int64_t *src_ptr = static_cast(in->at(i).data.data()); + out->at(i).data.Resize(out->at(i).lod[0].back() * sizeof(int64_t)); + out->at(i).shape = {out->at(i).lod[0].back(), 1}; + int64_t *tgt_ptr = static_cast(out->at(i).data.data()); + for (int j = 0; j < out->at(i).lod[0].back(); ++j) { + tgt_ptr[j] = src_ptr[j]; + } + } + + VLOG(2) << "output done."; + + timeline.Pause(); + int64_t end = timeline.TimeStampUS(); + CopyBlobInfo(input_blob, res); + AddBlobInfo(res, start); + AddBlobInfo(res, end); + + VLOG(2) << "read data from client success"; + return 0; +} + +DEFINE_OP(GeneralCopyOp); +} // namespace serving +} // namespace paddle_serving +} // namespace baidu diff --git a/core/general-server/op/general_copy_op.h b/core/general-server/op/general_copy_op.h new file mode 100644 index 00000000..89627ffb --- /dev/null +++ b/core/general-server/op/general_copy_op.h @@ -0,0 +1,47 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#ifdef BCLOUD +#ifdef WITH_GPU +#include "paddle/paddle_inference_api.h" +#else +#include "paddle/fluid/inference/api/paddle_inference_api.h" +#endif +#else +#include "paddle_inference_api.h" // NOLINT +#endif +#include +#include "core/general-server/general_model_service.pb.h" +#include "core/general-server/op/general_infer_helper.h" +#include "core/predictor/framework/resource.h" + +namespace baidu { +namespace paddle_serving { +namespace serving { + +class GeneralCopyOp + : public baidu::paddle_serving::predictor::OpWithChannel { + public: + typedef std::vector TensorVector; + + DECLARE_OP(GeneralCopyOp); + + int inference(); +}; + +} // namespace serving +} // namespace paddle_serving +} // namespace baidu diff --git a/core/general-server/op/general_dist_kv_infer_op.cpp b/core/general-server/op/general_dist_kv_infer_op.cpp new file mode 100755 index 00000000..d6e5b2bd --- /dev/null +++ b/core/general-server/op/general_dist_kv_infer_op.cpp @@ -0,0 +1,173 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "core/general-server/op/general_dist_kv_infer_op.h" +#include +#include +#include +#include +#include +#include +#include "core/cube/cube-api/include/cube_api.h" +#include "core/predictor/framework/infer.h" +#include "core/predictor/framework/memory.h" +#include "core/predictor/framework/resource.h" +#include "core/util/include/timer.h" + +namespace baidu { +namespace paddle_serving { +namespace serving { + +using baidu::paddle_serving::Timer; +using baidu::paddle_serving::predictor::MempoolWrapper; +using baidu::paddle_serving::predictor::general_model::Tensor; +using baidu::paddle_serving::predictor::general_model::Response; +using baidu::paddle_serving::predictor::general_model::Request; +using baidu::paddle_serving::predictor::general_model::FetchInst; +using baidu::paddle_serving::predictor::InferManager; +using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; + +int GeneralDistKVInferOp::inference() { + VLOG(2) << "Going to run inference"; + if (pre_node_names.size() != 1) { + LOG(ERROR) << "This op(" << op_name() <<") can only have one predecessor op, but received " << pre_node_names.size(); + return -1; + } + const std::string pre_name = pre_node_names[0]; + + const GeneralBlob *input_blob = get_depend_argument(pre_name); + VLOG(2) << "Get precedent op name: " << pre_name; + GeneralBlob *output_blob = mutable_data(); + + if (!input_blob) { + LOG(ERROR) << "Failed mutable depended argument, op:" << pre_name; + return -1; + } + + const TensorVector *in = &input_blob->tensor_vector; + TensorVector *out = &output_blob->tensor_vector; + int batch_size = input_blob->GetBatchSize(); + VLOG(2) << "input batch size: " << batch_size; + std::vector keys; + std::vector values; + int sparse_count = 0; + int dense_count = 0; + std::vector> dataptr_size_pairs; + size_t key_len = 0; + for (size_t i = 0; i < in->size(); ++i) { + if (in->at(i).dtype != paddle::PaddleDType::INT64) { + ++dense_count; + continue; + } + ++sparse_count; + size_t elem_num = 1; + for (size_t s = 0; s < in->at(i).shape.size(); ++s) { + elem_num *= in->at(i).shape[s]; + } + key_len += elem_num; + int64_t *data_ptr = static_cast(in->at(i).data.data()); + dataptr_size_pairs.push_back(std::make_pair(data_ptr, elem_num)); + } + keys.resize(key_len); + int key_idx = 0; + for (size_t i = 0; i < dataptr_size_pairs.size(); ++i) { + std::copy(dataptr_size_pairs[i].first, + dataptr_size_pairs[i].first + dataptr_size_pairs[i].second, + keys.begin() + key_idx); + key_idx += dataptr_size_pairs[i].second; + } + rec::mcube::CubeAPI *cube = rec::mcube::CubeAPI::instance(); + std::vector table_names = cube->get_table_names(); + if (table_names.size() == 0) { + LOG(ERROR) << "cube init error or cube config not given."; + return -1; + } + int ret = cube->seek(table_names[0], keys, &values); + + if (values.size() != keys.size() || values[0].buff.size() == 0) { + LOG(ERROR) << "cube value return null"; + } + size_t EMBEDDING_SIZE = values[0].buff.size() / sizeof(float); + TensorVector sparse_out; + sparse_out.resize(sparse_count); + TensorVector dense_out; + dense_out.resize(dense_count); + int cube_val_idx = 0; + int sparse_idx = 0; + int dense_idx = 0; + std::unordered_map in_out_map; + baidu::paddle_serving::predictor::Resource &resource = + baidu::paddle_serving::predictor::Resource::instance(); + std::shared_ptr model_config = + resource.get_general_model_config(); + for (size_t i = 0; i < in->size(); ++i) { + if (in->at(i).dtype != paddle::PaddleDType::INT64) { + dense_out[dense_idx] = in->at(i); + ++dense_idx; + continue; + } + + sparse_out[sparse_idx].lod.resize(in->at(i).lod.size()); + for (size_t x = 0; x < sparse_out[sparse_idx].lod.size(); ++x) { + sparse_out[sparse_idx].lod[x].resize(in->at(i).lod[x].size()); + std::copy(in->at(i).lod[x].begin(), + in->at(i).lod[x].end(), + sparse_out[sparse_idx].lod[x].begin()); + } + sparse_out[sparse_idx].dtype = paddle::PaddleDType::FLOAT32; + sparse_out[sparse_idx].shape.push_back( + sparse_out[sparse_idx].lod[0].back()); + sparse_out[sparse_idx].shape.push_back(EMBEDDING_SIZE); + sparse_out[sparse_idx].name = model_config->_feed_name[i]; + sparse_out[sparse_idx].data.Resize(sparse_out[sparse_idx].lod[0].back() * + EMBEDDING_SIZE * sizeof(float)); + float *dst_ptr = static_cast(sparse_out[sparse_idx].data.data()); + for (int x = 0; x < sparse_out[sparse_idx].lod[0].back(); ++x) { + float *data_ptr = dst_ptr + x * EMBEDDING_SIZE; + memcpy(data_ptr, + values[cube_val_idx].buff.data(), + values[cube_val_idx].buff.size()); + cube_val_idx++; + } + ++sparse_idx; + } + TensorVector infer_in; + infer_in.insert(infer_in.end(), dense_out.begin(), dense_out.end()); + infer_in.insert(infer_in.end(), sparse_out.begin(), sparse_out.end()); + + output_blob->SetBatchSize(batch_size); + + VLOG(2) << "infer batch size: " << batch_size; + + Timer timeline; + int64_t start = timeline.TimeStampUS(); + timeline.Start(); + + if (InferManager::instance().infer( + GENERAL_MODEL_NAME, &infer_in, out, batch_size)) { + LOG(ERROR) << "Failed do infer in fluid model: " << GENERAL_MODEL_NAME; + return -1; + } + + int64_t end = timeline.TimeStampUS(); + CopyBlobInfo(input_blob, output_blob); + AddBlobInfo(output_blob, start); + AddBlobInfo(output_blob, end); + return 0; +} +DEFINE_OP(GeneralDistKVInferOp); + +} // namespace serving +} // namespace paddle_serving +} // namespace baidu diff --git a/core/general-server/op/general_dist_kv_infer_op.h b/core/general-server/op/general_dist_kv_infer_op.h new file mode 100644 index 00000000..2dee5bca --- /dev/null +++ b/core/general-server/op/general_dist_kv_infer_op.h @@ -0,0 +1,46 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include +#ifdef BCLOUD +#ifdef WITH_GPU +#include "paddle/paddle_inference_api.h" +#else +#include "paddle/fluid/inference/api/paddle_inference_api.h" +#endif +#else +#include "paddle_inference_api.h" // NOLINT +#endif +#include "core/general-server/general_model_service.pb.h" +#include "core/general-server/op/general_infer_helper.h" + +namespace baidu { +namespace paddle_serving { +namespace serving { + +class GeneralDistKVInferOp + : public baidu::paddle_serving::predictor::OpWithChannel { + public: + typedef std::vector TensorVector; + + DECLARE_OP(GeneralDistKVInferOp); + + int inference(); +}; + +} // namespace serving +} // namespace paddle_serving +} // namespace baidu diff --git a/core/general-server/op/general_dist_kv_quant_infer_op.cpp b/core/general-server/op/general_dist_kv_quant_infer_op.cpp new file mode 100644 index 00000000..44bca2f9 --- /dev/null +++ b/core/general-server/op/general_dist_kv_quant_infer_op.cpp @@ -0,0 +1,204 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "core/general-server/op/general_dist_kv_quant_infer_op.h" +#include +#include +#include +#include +#include +#include +#include "core/cube/cube-api/include/cube_api.h" +#include "core/predictor/framework/infer.h" +#include "core/predictor/framework/memory.h" +#include "core/predictor/framework/resource.h" +#include "core/predictor/tools/quant.h" +#include "core/util/include/timer.h" + +namespace baidu { +namespace paddle_serving { +namespace serving { + +using baidu::paddle_serving::Timer; +using baidu::paddle_serving::predictor::MempoolWrapper; +using baidu::paddle_serving::predictor::general_model::Tensor; +using baidu::paddle_serving::predictor::general_model::Response; +using baidu::paddle_serving::predictor::general_model::Request; +using baidu::paddle_serving::predictor::general_model::FetchInst; +using baidu::paddle_serving::predictor::InferManager; +using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; + +int GeneralDistKVQuantInferOp::inference() { + VLOG(2) << "Going to run inference"; + if (pre_node_names.size() != 1) { + LOG(ERROR) << "This op(" << op_name() <<") can only have one predecessor op, but received " << pre_node_names.size(); + return -1; + } + const std::string pre_name = pre_node_names[0]; + + const GeneralBlob *input_blob = get_depend_argument(pre_name); + VLOG(2) << "Get precedent op name: " << pre_name; + GeneralBlob *output_blob = mutable_data(); + + if (!input_blob) { + LOG(ERROR) << "Failed mutable depended argument, op:" << pre_name; + return -1; + } + + const TensorVector *in = &input_blob->tensor_vector; + TensorVector *out = &output_blob->tensor_vector; + int batch_size = input_blob->GetBatchSize(); + VLOG(2) << "input batch size: " << batch_size; + std::vector keys; + std::vector values; + int sparse_count = 0; + int dense_count = 0; + std::vector> dataptr_size_pairs; + size_t key_len = 0; + for (size_t i = 0; i < in->size(); ++i) { + if (in->at(i).dtype != paddle::PaddleDType::INT64) { + ++dense_count; + continue; + } + ++sparse_count; + size_t elem_num = 1; + for (size_t s = 0; s < in->at(i).shape.size(); ++s) { + elem_num *= in->at(i).shape[s]; + } + key_len += elem_num; + int64_t *data_ptr = static_cast(in->at(i).data.data()); + dataptr_size_pairs.push_back(std::make_pair(data_ptr, elem_num)); + } + keys.resize(key_len); + int key_idx = 0; + for (size_t i = 0; i < dataptr_size_pairs.size(); ++i) { + std::copy(dataptr_size_pairs[i].first, + dataptr_size_pairs[i].first + dataptr_size_pairs[i].second, + keys.begin() + key_idx); + key_idx += dataptr_size_pairs[i].second; + } + rec::mcube::CubeAPI *cube = rec::mcube::CubeAPI::instance(); + std::vector table_names = cube->get_table_names(); + if (table_names.size() == 0) { + LOG(ERROR) << "cube init error or cube config not given."; + return -1; + } + int ret = cube->seek(table_names[0], keys, &values); + + if (values.size() != keys.size() || values[0].buff.size() == 0) { + LOG(ERROR) << "cube value return null"; + } + + TensorVector sparse_out; + sparse_out.resize(sparse_count); + TensorVector dense_out; + dense_out.resize(dense_count); + int cube_val_idx = 0; + int sparse_idx = 0; + int dense_idx = 0; + std::unordered_map in_out_map; + baidu::paddle_serving::predictor::Resource &resource = + baidu::paddle_serving::predictor::Resource::instance(); + std::shared_ptr model_config = + resource.get_general_model_config(); + int cube_quant_bits = resource.get_cube_quant_bits(); + size_t EMBEDDING_SIZE = 0; + if (cube_quant_bits == 0) { + EMBEDDING_SIZE = values[0].buff.size() / sizeof(float); + } else { + EMBEDDING_SIZE = values[0].buff.size() - 2 * sizeof(float); + } + + for (size_t i = 0; i < in->size(); ++i) { + if (in->at(i).dtype != paddle::PaddleDType::INT64) { + dense_out[dense_idx] = in->at(i); + ++dense_idx; + continue; + } + + sparse_out[sparse_idx].lod.resize(in->at(i).lod.size()); + for (size_t x = 0; x < sparse_out[sparse_idx].lod.size(); ++x) { + sparse_out[sparse_idx].lod[x].resize(in->at(i).lod[x].size()); + std::copy(in->at(i).lod[x].begin(), + in->at(i).lod[x].end(), + sparse_out[sparse_idx].lod[x].begin()); + } + sparse_out[sparse_idx].dtype = paddle::PaddleDType::FLOAT32; + sparse_out[sparse_idx].shape.push_back( + sparse_out[sparse_idx].lod[0].back()); + sparse_out[sparse_idx].shape.push_back(EMBEDDING_SIZE); + sparse_out[sparse_idx].name = model_config->_feed_name[i]; + sparse_out[sparse_idx].data.Resize(sparse_out[sparse_idx].lod[0].back() * + EMBEDDING_SIZE * sizeof(float)); + // END HERE + float *dst_ptr = static_cast(sparse_out[sparse_idx].data.data()); + for (int x = 0; x < sparse_out[sparse_idx].lod[0].back(); ++x) { + float *data_ptr = dst_ptr + x * EMBEDDING_SIZE; + if (cube_quant_bits == 0) { + memcpy(data_ptr, + values[cube_val_idx].buff.data(), + values[cube_val_idx].buff.size()); + } else { + // min (float), max (float), num, num, num... (Byte) + size_t num_of_float = + values[cube_val_idx].buff.size() - 2 * sizeof(float); + float *float_ptr = new float[num_of_float]; + char *src_ptr = new char[values[cube_val_idx].buff.size()]; + memcpy(src_ptr, + values[cube_val_idx].buff.data(), + values[cube_val_idx].buff.size()); + float *minmax = reinterpret_cast(src_ptr); + dequant(src_ptr + 2 * sizeof(float), + float_ptr, + minmax[0], + minmax[1], + num_of_float, + cube_quant_bits); + memcpy(data_ptr, float_ptr, sizeof(float) * num_of_float); + delete float_ptr; + delete src_ptr; + } + cube_val_idx++; + } + ++sparse_idx; + } + TensorVector infer_in; + infer_in.insert(infer_in.end(), dense_out.begin(), dense_out.end()); + infer_in.insert(infer_in.end(), sparse_out.begin(), sparse_out.end()); + + output_blob->SetBatchSize(batch_size); + + VLOG(2) << "infer batch size: " << batch_size; + + Timer timeline; + int64_t start = timeline.TimeStampUS(); + timeline.Start(); + + if (InferManager::instance().infer( + GENERAL_MODEL_NAME, &infer_in, out, batch_size)) { + LOG(ERROR) << "Failed do infer in fluid model: " << GENERAL_MODEL_NAME; + return -1; + } + + int64_t end = timeline.TimeStampUS(); + CopyBlobInfo(input_blob, output_blob); + AddBlobInfo(output_blob, start); + AddBlobInfo(output_blob, end); + return 0; +} +DEFINE_OP(GeneralDistKVQuantInferOp); + +} // namespace serving +} // namespace paddle_serving +} // namespace baidu diff --git a/core/general-server/op/general_dist_kv_quant_infer_op.h b/core/general-server/op/general_dist_kv_quant_infer_op.h new file mode 100644 index 00000000..e153311a --- /dev/null +++ b/core/general-server/op/general_dist_kv_quant_infer_op.h @@ -0,0 +1,46 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include +#ifdef BCLOUD +#ifdef WITH_GPU +#include "paddle/paddle_inference_api.h" +#else +#include "paddle/fluid/inference/api/paddle_inference_api.h" +#endif +#else +#include "paddle_inference_api.h" // NOLINT +#endif +#include "core/general-server/general_model_service.pb.h" +#include "core/general-server/op/general_infer_helper.h" + +namespace baidu { +namespace paddle_serving { +namespace serving { + +class GeneralDistKVQuantInferOp + : public baidu::paddle_serving::predictor::OpWithChannel { + public: + typedef std::vector TensorVector; + + DECLARE_OP(GeneralDistKVQuantInferOp); + + int inference(); +}; + +} // namespace serving +} // namespace paddle_serving +} // namespace baidu diff --git a/ensemble-demo/client.py b/ensemble-demo/client.py new file mode 100644 index 00000000..9a9467dc --- /dev/null +++ b/ensemble-demo/client.py @@ -0,0 +1,41 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=doc-string-missing +from paddle_serving_client import Client +from imdb_reader import IMDBDataset +import sys + +client = Client() +client.load_client_config('imdb_bow_client_conf/serving_client_conf.prototxt') +client.connect(["127.0.0.1:9393"]) + +# you can define any english sentence or dataset here +# This example reuses imdb reader in training, you +# can define your own data preprocessing easily. +imdb_dataset = IMDBDataset() +imdb_dataset.load_resource('imdb.vocab') + +for i in range(400): + line = 'i am very sad | 0' + word_ids, label = imdb_dataset.get_words_and_label(line) + feed = {"words": word_ids} + fetch = ["acc", "cost", "prediction"] + fetch_maps = client.predict(feed=feed, fetch=fetch) + if len(fetch_maps) == 1: + print("step: {}, res: {}".format(i, fetch_maps['prediction'][1])) + else: + for mi, fetch_map in enumerate(fetch_maps): + print("step: {}, model: {}, res: {}".format(i, mi, fetch_map['prediction'][1])) + # print('bow: 0.633530199528, cnn: 0.560272455215') + # exit(0) diff --git a/ensemble-demo/server.py b/ensemble-demo/server.py new file mode 100644 index 00000000..2e643054 --- /dev/null +++ b/ensemble-demo/server.py @@ -0,0 +1,43 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=doc-string-missing + +import os +import sys +from paddle_serving_server import OpMaker +from paddle_serving_server import OpSeqMaker +from paddle_serving_server import Server + +op_maker = OpMaker() +read_op = op_maker.create('general_reader') +g1_infer_op = op_maker.create('general_infer', node_name='g1') +g2_infer_op = op_maker.create('general_infer', node_name='g2') +# add_op = op_maker.create('general_add') +response_op = op_maker.create('general_response') + +op_seq_maker = OpSeqMaker() +op_seq_maker.add_op(read_op) +op_seq_maker.add_op(g1_infer_op, dependent_nodes=[read_op]) +op_seq_maker.add_op(g2_infer_op, dependent_nodes=[read_op]) +# op_seq_maker.add_op(add_op, dependent_nodes=[g1_infer_op, g2_infer_op]) +# op_seq_maker.add_op(response_op, dependent_nodes=[add_op]) +op_seq_maker.add_op(response_op, dependent_nodes=[g1_infer_op, g2_infer_op]) + +server = Server() +server.set_op_sequence(op_seq_maker.get_op_sequence()) +# server.load_model_config(sys.argv[1]) +model_configs = {'g1': 'imdb_cnn_model', 'g2': 'imdb_bow_model'} +server.load_model_config(model_configs) +server.prepare_server(workdir="work_dir1", port=9393, device="cpu") +server.run_server() -- GitLab