未验证 提交 a7064391 编写于 作者: Z Zhang Yulong 提交者: GitHub

Merge branch 'develop' into ci-test

文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
...@@ -33,9 +33,7 @@ if (WITH_PYTHON) ...@@ -33,9 +33,7 @@ if (WITH_PYTHON)
add_custom_target(general_model_config_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) add_custom_target(general_model_config_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
add_dependencies(general_model_config_py_proto general_model_config_py_proto_init) add_dependencies(general_model_config_py_proto general_model_config_py_proto_init)
py_grpc_proto_compile(multi_lang_general_model_service_py_proto SRCS proto/multi_lang_general_model_service.proto)
add_custom_target(multi_lang_general_model_service_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
add_dependencies(multi_lang_general_model_service_py_proto multi_lang_general_model_service_py_proto_init)
if (CLIENT) if (CLIENT)
py_proto_compile(sdk_configure_py_proto SRCS proto/sdk_configure.proto) py_proto_compile(sdk_configure_py_proto SRCS proto/sdk_configure.proto)
...@@ -53,11 +51,7 @@ if (WITH_PYTHON) ...@@ -53,11 +51,7 @@ if (WITH_PYTHON)
COMMENT "Copy generated general_model_config proto file into directory paddle_serving_client/proto." COMMENT "Copy generated general_model_config proto file into directory paddle_serving_client/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
add_custom_command(TARGET multi_lang_general_model_service_py_proto POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto
COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto
COMMENT "Copy generated multi_lang_general_model_service proto file into directory paddle_serving_client/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
endif() endif()
if (APP) if (APP)
...@@ -84,11 +78,6 @@ if (WITH_PYTHON) ...@@ -84,11 +78,6 @@ if (WITH_PYTHON)
COMMENT "Copy generated general_model_config proto file into directory paddle_serving_server/proto." COMMENT "Copy generated general_model_config proto file into directory paddle_serving_server/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
add_custom_command(TARGET multi_lang_general_model_service_py_proto POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto
COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto
COMMENT "Copy generated multi_lang_general_model_service proto file into directory paddle_serving_server/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
endif() endif()
endif() endif()
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package baidu.paddle_serving.multi_lang;
option java_multiple_files = true;
option java_package = "io.paddle.serving.grpc";
option java_outer_classname = "ServingProto";
message Tensor {
optional bytes data = 1;
repeated int32 int_data = 2;
repeated int64 int64_data = 3;
repeated float float_data = 4;
optional int32 elem_type = 5;
repeated int32 shape = 6;
repeated int32 lod = 7; // only for fetch tensor currently
};
message FeedInst { repeated Tensor tensor_array = 1; };
message FetchInst { repeated Tensor tensor_array = 1; };
message InferenceRequest {
repeated FeedInst insts = 1;
repeated string feed_var_names = 2;
repeated string fetch_var_names = 3;
required bool is_python = 4 [ default = false ];
required uint64 log_id = 5 [ default = 0 ];
};
message InferenceResponse {
repeated ModelOutput outputs = 1;
optional string tag = 2;
required int32 err_code = 3;
};
message ModelOutput {
repeated FetchInst insts = 1;
optional string engine_name = 2;
}
message SetTimeoutRequest { required int32 timeout_ms = 1; }
message SimpleResponse { required int32 err_code = 1; }
message GetClientConfigRequest {}
message GetClientConfigResponse { required string client_config_str = 1; }
service MultiLangGeneralModelService {
rpc Inference(InferenceRequest) returns (InferenceResponse) {}
rpc SetTimeout(SetTimeoutRequest) returns (SimpleResponse) {}
rpc GetClientConfig(GetClientConfigRequest)
returns (GetClientConfigResponse) {}
};
文件模式从 100755 更改为 100644
...@@ -11,10 +11,8 @@ ...@@ -11,10 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License # limitations under the License
#execute_process(COMMAND go env -w GO111MODULE=off)
add_subdirectory(cube-server) add_subdirectory(cube-server)
add_subdirectory(cube-api) add_subdirectory(cube-api)
add_subdirectory(cube-builder) add_subdirectory(cube-builder)
#add_subdirectory(cube-transfer) add_subdirectory(cube-transfer)
#add_subdirectory(cube-agent) add_subdirectory(cube-agent)
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake") set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
project(cube-agent Go) project(cube-agent Go)
include(cmake/golang.cmake) include(cmake/golang.cmake)
ExternalGoProject_Add(agent-docopt-go github.com/docopt/docopt-go) ExternalGoProject_Add(agent-docopt-go github.com/docopt/docopt-go)
......
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
...@@ -22,7 +22,7 @@ include_directories(SYSTEM ${CMAKE_CURRENT_BINARY_DIR}/../) ...@@ -22,7 +22,7 @@ include_directories(SYSTEM ${CMAKE_CURRENT_BINARY_DIR}/../)
add_executable(cube-builder src/main.cpp include/cube-builder/util.h src/util.cpp src/builder_job.cpp include/cube-builder/builder_job.h include/cube-builder/define.h src/seqfile_reader.cpp include/cube-builder/seqfile_reader.h include/cube-builder/raw_reader.h include/cube-builder/vtext.h src/crovl_builder_increment.cpp include/cube-builder/crovl_builder_increment.h src/curl_simple.cpp include/cube-builder/curl_simple.h) add_executable(cube-builder src/main.cpp include/cube-builder/util.h src/util.cpp src/builder_job.cpp include/cube-builder/builder_job.h include/cube-builder/define.h src/seqfile_reader.cpp include/cube-builder/seqfile_reader.h include/cube-builder/raw_reader.h include/cube-builder/vtext.h src/crovl_builder_increment.cpp include/cube-builder/crovl_builder_increment.h src/curl_simple.cpp include/cube-builder/curl_simple.h)
add_dependencies(cube-builder jsoncpp boost) add_dependencies(cube-builder jsoncpp boost brpc)
set(DYNAMIC_LIB set(DYNAMIC_LIB
gflags gflags
...@@ -39,4 +39,8 @@ target_link_libraries(cube-builder ${DYNAMIC_LIB}) ...@@ -39,4 +39,8 @@ target_link_libraries(cube-builder ${DYNAMIC_LIB})
# install # install
install(TARGETS cube-builder RUNTIME DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/bin) install(TARGETS cube-builder RUNTIME DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/bin)
install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/tool DESTINATION ${PADDLE_SERVING_INSTALL_DIR}) install(FILES ${CMAKE_CURRENT_LIST_DIR}/tool/kvtool.py DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/tool)
install(FILES ${CMAKE_CURRENT_LIST_DIR}/tool/kv_to_seqfile.py DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/tool)
install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/tool/source DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/tool)
...@@ -212,7 +212,7 @@ class slim_hash_map { ...@@ -212,7 +212,7 @@ class slim_hash_map {
int copy_data_from(const slim_hash_map& rhs) { int copy_data_from(const slim_hash_map& rhs) {
destroy(); destroy();
LOG(INFO) << "start copy data, rhs info, mHashSize: " << rhs.m_nHashSize;
if (rhs.m_nHashSize > 0) { if (rhs.m_nHashSize > 0) {
m_hashTable = new (std::nothrow) uint32_t[rhs.m_nHashSize]; m_hashTable = new (std::nothrow) uint32_t[rhs.m_nHashSize];
if (!m_hashTable) { if (!m_hashTable) {
...@@ -231,7 +231,7 @@ class slim_hash_map { ...@@ -231,7 +231,7 @@ class slim_hash_map {
<< sizeof(hash_node_t) * BLOCK_SIZE; << sizeof(hash_node_t) * BLOCK_SIZE;
return -1; return -1;
} }
LOG(INFO) << "copy data, m_nBlockNum: " << m_nBlockNum << " , copy size:" << sizeof(hash_node_t) * BLOCK_SIZE;
memcpy(m_blockAddr[m_nBlockNum], memcpy(m_blockAddr[m_nBlockNum],
rhs.m_blockAddr[m_nBlockNum], rhs.m_blockAddr[m_nBlockNum],
sizeof(hash_node_t) * BLOCK_SIZE); sizeof(hash_node_t) * BLOCK_SIZE);
...@@ -265,11 +265,13 @@ class slim_hash_map { ...@@ -265,11 +265,13 @@ class slim_hash_map {
} }
size_type index = key % m_nHashSize; size_type index = key % m_nHashSize;
hash_node_t* node = get_node(m_hashTable[index]); hash_node_t* node = get_node(m_hashTable[index]);
int node_cnt = 0;
while (node != NULL && node->data.first != key) { while (node != NULL && node->data.first != key) {
LOG(INFO) << "node link get:" << node->data.first;
node_cnt++;
node = get_node(node->next); node = get_node(node->next);
} }
LOG(INFO) << "key: " << key << " , found count: " << node_cnt;
if (node == NULL) { if (node == NULL) {
return end(); return end();
} }
...@@ -390,7 +392,6 @@ class slim_hash_map { ...@@ -390,7 +392,6 @@ class slim_hash_map {
if (node != NULL) { if (node != NULL) {
return node->data.second; return node->data.second;
} }
return add_node(index, key)->data.second; return add_node(index, key)->data.second;
} }
void clear() { void clear() {
...@@ -399,16 +400,16 @@ class slim_hash_map { ...@@ -399,16 +400,16 @@ class slim_hash_map {
m_nFreeEntries = 0; m_nFreeEntries = 0;
m_nSize = 0; m_nSize = 0;
} }
bool load(const char* file) { bool load(const char* file, uint32_t block_id) {
// clear(); // clear();
// bias = 0 means base mode, bias = K means patch mode, and base dict has size K
int size = sizeof(key_t) + sizeof(value_t); int size = sizeof(key_t) + sizeof(value_t);
FILE* fp = fopen(file, "rb"); FILE* fp = fopen(file, "rb");
char* buf = reinterpret_cast<char*>(malloc(size * 100000)); char* buf = reinterpret_cast<char*>(malloc(size * 100000));
LOG(INFO) << "current block id: " << block_id;
if (fp == NULL || buf == NULL) { if (fp == NULL || buf == NULL) {
return false; return false;
} }
size_t read_count; size_t read_count;
bool err = false; bool err = false;
key_t key; key_t key;
...@@ -423,6 +424,8 @@ class slim_hash_map { ...@@ -423,6 +424,8 @@ class slim_hash_map {
for (int i = 0; i < static_cast<int>(read_count); ++i) { for (int i = 0; i < static_cast<int>(read_count); ++i) {
key = *(reinterpret_cast<key_t*>(buf + i * size)); key = *(reinterpret_cast<key_t*>(buf + i * size));
value = *(reinterpret_cast<value_t*>(buf + i * size + sizeof(key_t))); value = *(reinterpret_cast<value_t*>(buf + i * size + sizeof(key_t)));
value = ((uint64_t)block_id << 32) | value;
LOG(INFO) << "slim map key: " << key << " , value: " << value;
(*this)[key] = value; (*this)[key] = value;
} }
} }
...@@ -557,7 +560,6 @@ class slim_hash_map { ...@@ -557,7 +560,6 @@ class slim_hash_map {
} }
hash_node_t* add_node(uint32_t index, const key_type& key) { hash_node_t* add_node(uint32_t index, const key_type& key) {
++m_nSize; ++m_nSize;
if (m_nFreeEntries) { if (m_nFreeEntries) {
uint32_t addr = m_nFreeEntries; uint32_t addr = m_nFreeEntries;
hash_node_t* node = get_node(addr); hash_node_t* node = get_node(addr);
...@@ -569,7 +571,7 @@ class slim_hash_map { ...@@ -569,7 +571,7 @@ class slim_hash_map {
} }
uint32_t block = ((m_nNextEntry & 0xFF800000) >> 23); uint32_t block = ((m_nNextEntry & 0xFF800000) >> 23);
//LOG(INFO) << "key: " << key << " here. index: " << index << " , m_nNextEntry: "<< m_nNextEntry << " , block:" << block<< ", m_nBlockNum:" << m_nBlockNum;
if (block >= m_nBlockNum) { if (block >= m_nBlockNum) {
try { try {
m_blockAddr[m_nBlockNum++] = new hash_node_t[BLOCK_SIZE]; m_blockAddr[m_nBlockNum++] = new hash_node_t[BLOCK_SIZE];
...@@ -581,7 +583,6 @@ class slim_hash_map { ...@@ -581,7 +583,6 @@ class slim_hash_map {
return NULL; return NULL;
} }
} }
uint32_t addr = m_nNextEntry; uint32_t addr = m_nNextEntry;
++m_nNextEntry; ++m_nNextEntry;
hash_node_t* node = get_node(addr); hash_node_t* node = get_node(addr);
......
...@@ -51,13 +51,12 @@ int Dict::load(const std::string& dict_path, ...@@ -51,13 +51,12 @@ int Dict::load(const std::string& dict_path,
bool in_mem, bool in_mem,
const std::string& v_path) { const std::string& v_path) {
TIME_FLAG(load_start); TIME_FLAG(load_start);
int ret = load_index(dict_path, v_path); int ret = load_index(dict_path, v_path);
if (ret != E_OK) { if (ret != E_OK) {
LOG(WARNING) << "load index failed"; LOG(WARNING) << "load index failed";
return ret; return ret;
} }
LOG(INFO) << "load index in mem mode: " << in_mem ;
if (in_mem) { if (in_mem) {
ret = load_data(dict_path, v_path); ret = load_data(dict_path, v_path);
if (ret != E_OK) { if (ret != E_OK) {
...@@ -81,8 +80,11 @@ int Dict::load_index(const std::string& dict_path, const std::string& v_path) { ...@@ -81,8 +80,11 @@ int Dict::load_index(const std::string& dict_path, const std::string& v_path) {
std::string index_n_path(dict_path); std::string index_n_path(dict_path);
index_n_path.append(v_path); index_n_path.append(v_path);
index_n_path.append("/index.n"); index_n_path.append("/index.n");
uint32_t cur_block_id = 0;
if (_base_dict) cur_block_id = _base_dict->_block_set.size();
LOG(INFO) << "index file path: " << index_n_path; LOG(INFO) << "index file path: " << index_n_path;
//ERR HERE
std::unique_ptr<FILE, decltype(&fclose)> pf(fopen(index_n_path.c_str(), "rb"), std::unique_ptr<FILE, decltype(&fclose)> pf(fopen(index_n_path.c_str(), "rb"),
&fclose); &fclose);
if (pf.get() == NULL) { if (pf.get() == NULL) {
...@@ -150,12 +152,16 @@ int Dict::load_index(const std::string& dict_path, const std::string& v_path) { ...@@ -150,12 +152,16 @@ int Dict::load_index(const std::string& dict_path, const std::string& v_path) {
return E_DATA_ERROR; return E_DATA_ERROR;
} }
} else { } else {
if (_slim_table.copy_data_from(_base_dict->_slim_table) != 0) {
LOG(ERROR) << "copy data from old index failed in patch mode";
return E_DATA_ERROR;
}
file_idx = 0; file_idx = 0;
LOG(INFO) LOG(INFO)
<< "index check file len failed in patch mode, set file_idx to 0"; << "index check fail, direct copy";
} }
} }
LOG(INFO) << "resize slim table, new count: " << count/2;
_slim_table.resize(count / 2); _slim_table.resize(count / 2);
char file[1024]; char file[1024];
...@@ -167,6 +173,7 @@ int Dict::load_index(const std::string& dict_path, const std::string& v_path) { ...@@ -167,6 +173,7 @@ int Dict::load_index(const std::string& dict_path, const std::string& v_path) {
dict_path.c_str(), dict_path.c_str(),
v_path.c_str(), v_path.c_str(),
file_idx); file_idx);
LOG(INFO) << "load file str: " << file;
if (stat(file, &fstat) < 0) { if (stat(file, &fstat) < 0) {
if (errno == ENOENT) { if (errno == ENOENT) {
LOG(WARNING) << "index." << file_idx << " not exist"; LOG(WARNING) << "index." << file_idx << " not exist";
...@@ -181,8 +188,8 @@ int Dict::load_index(const std::string& dict_path, const std::string& v_path) { ...@@ -181,8 +188,8 @@ int Dict::load_index(const std::string& dict_path, const std::string& v_path) {
<< (uint64_t)fstat.st_size; << (uint64_t)fstat.st_size;
return E_DATA_ERROR; return E_DATA_ERROR;
} }
LOG(INFO) << "loading from index." << file_idx; LOG(INFO) << "loading from index." << file_idx << " . table size: " << _slim_table.size();
if (!_slim_table.load(file) || _slim_table.size() > count) { if (!_slim_table.load(file, cur_block_id)) {
return E_DATA_ERROR; return E_DATA_ERROR;
} }
...@@ -193,8 +200,15 @@ int Dict::load_index(const std::string& dict_path, const std::string& v_path) { ...@@ -193,8 +200,15 @@ int Dict::load_index(const std::string& dict_path, const std::string& v_path) {
} }
int Dict::load_data(const std::string& dict_path, const std::string& v_path) { int Dict::load_data(const std::string& dict_path, const std::string& v_path) {
std::vector<uint32_t> block_size;
uint64_t total_data_size = 0;
if (_base_dict) { if (_base_dict) {
_block_set = _base_dict->_block_set; _block_set = _base_dict->_block_set;
LOG(INFO)<< "load data base dict block set size: " << _block_set[0].size;
for (size_t i = 0; i < _block_set.size(); ++i) {
block_size.push_back(_block_set[i].size);
total_data_size += _block_set[i].size;
}
} }
std::string data_n_path(dict_path); std::string data_n_path(dict_path);
...@@ -212,8 +226,6 @@ int Dict::load_data(const std::string& dict_path, const std::string& v_path) { ...@@ -212,8 +226,6 @@ int Dict::load_data(const std::string& dict_path, const std::string& v_path) {
return E_DATA_ERROR; return E_DATA_ERROR;
} }
std::vector<uint32_t> block_size;
uint64_t total_data_size = 0;
for (uint32_t i = 0; i < count; ++i) { for (uint32_t i = 0; i < count; ++i) {
uint32_t size = 0; uint32_t size = 0;
if (fread(reinterpret_cast<void*>(&size), sizeof(uint32_t), 1, pf) != 1) { if (fread(reinterpret_cast<void*>(&size), sizeof(uint32_t), 1, pf) != 1) {
...@@ -222,6 +234,7 @@ int Dict::load_data(const std::string& dict_path, const std::string& v_path) { ...@@ -222,6 +234,7 @@ int Dict::load_data(const std::string& dict_path, const std::string& v_path) {
return E_DATA_ERROR; return E_DATA_ERROR;
} }
block_size.push_back(size); block_size.push_back(size);
LOG(INFO) << "new block size: " << size;
total_data_size += size; total_data_size += size;
} }
g_data_size << (total_data_size / 1024 / 1024); g_data_size << (total_data_size / 1024 / 1024);
...@@ -229,36 +242,35 @@ int Dict::load_data(const std::string& dict_path, const std::string& v_path) { ...@@ -229,36 +242,35 @@ int Dict::load_data(const std::string& dict_path, const std::string& v_path) {
pf = NULL; pf = NULL;
uint32_t old_size = _block_set.size(); uint32_t old_size = _block_set.size();
LOG(INFO) << "load data old size: " << old_size;
for (size_t i = 0; i < old_size; ++i) { for (size_t i = 0; i < old_size; ++i) {
if (_block_set[i].size != block_size[i]) { if (_block_set[i].size != block_size[i]) {
old_size = 0; old_size = 0;
break; break;
} }
} }
_block_set.resize(count); LOG(INFO) << "load data block set count: " << count << " , old size: " << old_size;
_block_set.resize(count + old_size);
for (size_t i = old_size; i < _block_set.size(); ++i) { for (size_t i = old_size; i < _block_set.size(); ++i) {
char data_path[1024]; char data_path[1024];
LOG(INFO) << "load from data." << i; LOG(INFO) << "load from data." << i;
snprintf( //snprintf(
data_path, 1024, "%s%s/data.%lu", dict_path.c_str(), v_path.c_str(), i); // data_path, 1024, "%s%s/data.%lu", dict_path.c_str(), v_path.c_str(), i);
snprintf(data_path, 1024, "%s%s/data.%lu", dict_path.c_str(), v_path.c_str(), i - old_size);
FILE* data_file = fopen(data_path, "rb"); FILE* data_file = fopen(data_path, "rb");
if (data_file == NULL) { if (data_file == NULL) {
LOG(WARNING) << "open data file [" << data_path << " failed"; LOG(WARNING) << "open data file [" << data_path << " ]failed";
_block_set[i].s_data.reset(); _block_set[i].s_data.reset();
_block_set[i].size = 0; _block_set[i].size = 0;
continue; continue;
} }
_block_set[i].s_data.reset(reinterpret_cast<char*>(malloc(block_size[i] * sizeof(char))));
_block_set[i].s_data.reset(
reinterpret_cast<char*>(malloc(block_size[i] * sizeof(char))));
if (_block_set[i].s_data.get() == NULL) { if (_block_set[i].s_data.get() == NULL) {
LOG(ERROR) << "malloc data failed"; LOG(ERROR) << "malloc data failed";
fclose(data_file); fclose(data_file);
return E_OOM; return E_OOM;
} }
_block_set[i].size = block_size[i]; _block_set[i].size = block_size[i];
if (fread(reinterpret_cast<void*>(_block_set[i].s_data.get()), if (fread(reinterpret_cast<void*>(_block_set[i].s_data.get()),
sizeof(char), sizeof(char),
_block_set[i].size, _block_set[i].size,
...@@ -267,7 +279,10 @@ int Dict::load_data(const std::string& dict_path, const std::string& v_path) { ...@@ -267,7 +279,10 @@ int Dict::load_data(const std::string& dict_path, const std::string& v_path) {
fclose(data_file); fclose(data_file);
return E_DATA_ERROR; return E_DATA_ERROR;
} }
LOG(INFO) << "load new data to BlockSet succ";
for (size_t ii = 0; ii < 20; ++ii) {
LOG(INFO) << "data ptr: " << (int)(_block_set[i].s_data.get()[ii]);
}
fclose(data_file); fclose(data_file);
} }
...@@ -386,12 +401,11 @@ bool Dict::seek(uint64_t key, char* buff, uint64_t* buff_size) { ...@@ -386,12 +401,11 @@ bool Dict::seek(uint64_t key, char* buff, uint64_t* buff_size) {
uint64_t flag = it->second; uint64_t flag = it->second;
uint32_t id = (uint32_t)(flag >> 32); uint32_t id = (uint32_t)(flag >> 32);
uint64_t addr = (uint32_t)(flag); uint64_t addr = (uint32_t)(flag);
LOG(INFO) << "search key: " << id << " , addr: " << addr;
if (_block_set.size() > id) { if (_block_set.size() > id) {
uint32_t block_size = _block_set[id].size; uint32_t block_size = _block_set[id].size;
char* block_data = NULL; char* block_data = NULL;
block_data = _block_set[id].s_data.get(); block_data = _block_set[id].s_data.get();
if (block_data && addr + sizeof(uint32_t) <= block_size) { if (block_data && addr + sizeof(uint32_t) <= block_size) {
uint32_t len = *(reinterpret_cast<uint32_t*>(block_data + addr)); uint32_t len = *(reinterpret_cast<uint32_t*>(block_data + addr));
if (addr + len <= block_size && len >= sizeof(uint32_t)) { if (addr + len <= block_size && len >= sizeof(uint32_t)) {
...@@ -405,6 +419,7 @@ bool Dict::seek(uint64_t key, char* buff, uint64_t* buff_size) { ...@@ -405,6 +419,7 @@ bool Dict::seek(uint64_t key, char* buff, uint64_t* buff_size) {
<< default_buffer_size; << default_buffer_size;
return false; return false;
} }
LOG(INFO) << "seek key: " << key << " , addr: " << addr;
memcpy(buff, memcpy(buff,
(block_data + addr + sizeof(uint32_t)), (block_data + addr + sizeof(uint32_t)),
len - sizeof(uint32_t)); len - sizeof(uint32_t));
......
...@@ -18,11 +18,9 @@ project(cube-transfer Go) ...@@ -18,11 +18,9 @@ project(cube-transfer Go)
include(cmake/golang.cmake) include(cmake/golang.cmake)
ExternalGoProject_Add(rfw github.com/mipearson/rfw) ExternalGoProject_Add(transfer-rfw github.com/mipearson/rfw)
ExternalGoProject_Add(docopt-go github.com/docopt/docopt-go) ExternalGoProject_Add(transfer-docopt-go github.com/docopt/docopt-go)
add_custom_target(logex ExternalGoProject_Add(transfer-logex github.com/Badangel/logex)
COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} get github.com/Badangel/logex
DEPENDS rfw)
add_subdirectory(src) add_subdirectory(src)
install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/conf DESTINATION ${PADDLE_SERVING_INSTALL_DIR}) install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/conf DESTINATION ${PADDLE_SERVING_INSTALL_DIR})
文件模式从 100755 更改为 100644
...@@ -14,6 +14,6 @@ ...@@ -14,6 +14,6 @@
set(SOURCE_FILE cube-transfer.go) set(SOURCE_FILE cube-transfer.go)
add_go_executable(cube-transfer ${SOURCE_FILE}) add_go_executable(cube-transfer ${SOURCE_FILE})
add_dependencies(cube-transfer docopt-go) add_dependencies(cube-transfer transfer-docopt-go)
add_dependencies(cube-transfer rfw) add_dependencies(cube-transfer transfer-rfw)
add_dependencies(cube-transfer logex) add_dependencies(cube-transfer transfer-logex)
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
...@@ -17,68 +17,56 @@ package transfer ...@@ -17,68 +17,56 @@ package transfer
import ( import (
"fmt" "fmt"
"github.com/Badangel/logex" "github.com/Badangel/logex"
"os"
"time"
"transfer/dict" "transfer/dict"
) )
func Start() { func Start() {
go BackupTransfer() BackupTransfer()
logex.Notice(">>> starting server...")
addr := ":" + Port
err := startHttp(addr)
if err != nil {
logex.Fatalf("start http(addr=%v) failed: %v", addr, err)
os.Exit(255)
}
logex.Notice(">>> start server succ")
} }
func BackupTransfer() { func BackupTransfer() {
for { //trigger
//trigger version, err := TriggerStart(Dict.DonefileAddress)
version, err := TriggerStart(Dict.DonefileAddress) if err != nil {
if err != nil { logex.Fatalf("[trigger err]trigger err:%v ", err)
logex.Fatalf("[trigger err]trigger err:%v ", err) fmt.Printf("[error]trigger err:%v \n", err)
fmt.Printf("[error]trigger err:%v \n", err) fmt.Print("transfer over!")
break logex.Noticef("[transfer]status machine exit!")
} return
logex.Noticef("[trigger] get version:%v \n", version) }
if version.Id == 0 { logex.Noticef("[trigger] get version:%v \n", version)
logex.Noticef("[sleep]no new version, sleep 5 min")
fmt.Printf("[sleep]no new version, wait 5 min\n")
time.Sleep(5 * time.Minute)
continue
}
Dict.WaitVersionInfo = version Dict.WaitVersionInfo = version
logex.Noticef("[trigger finish] WaitVersionInfo version:%v \n", Dict.WaitVersionInfo) logex.Noticef("[trigger finish] WaitVersionInfo version:%v \n", Dict.WaitVersionInfo)
WriteWaitVersionInfoToFile() WriteWaitVersionInfoToFile()
//builder //builder
Dict.WaitVersionInfo.Status = dict.Dict_Status_Building Dict.WaitVersionInfo.Status = dict.Dict_Status_Building
Dict.WaitVersionInfo.MetaInfos = make(map[int]string) Dict.WaitVersionInfo.MetaInfos = make(map[int]string)
WriteWaitVersionInfoToFile() WriteWaitVersionInfoToFile()
if err = BuilderStart(Dict.WaitVersionInfo); err != nil { if err = BuilderStart(Dict.WaitVersionInfo); err != nil {
logex.Fatalf("builder err:%v \n", err) logex.Fatalf("builder err:%v \n", err)
} }
if Dict.WaitVersionInfo.Mode == dict.BASE { if Dict.WaitVersionInfo.Mode == dict.BASE {
var newCurrentVersion []dict.DictVersionInfo var newCurrentVersion []dict.DictVersionInfo
Dict.CurrentVersionInfo = newCurrentVersion Dict.CurrentVersionInfo = newCurrentVersion
WriteCurrentVersionInfoToFile() WriteCurrentVersionInfoToFile()
} }
logex.Noticef("[builder finish] WaitVersionInfo version:%v \n", Dict.WaitVersionInfo) if Dict.WaitVersionInfo.Mode == dict.DELTA {
var newCurrentVersion []dict.DictVersionInfo
Dict.CurrentVersionInfo = newCurrentVersion
WriteCurrentVersionInfoToFile()
}
logex.Noticef("[builder finish] WaitVersionInfo version:%v \n", Dict.WaitVersionInfo)
//deployer //deployer
Dict.WaitVersionInfo.Status = dict.Dict_Status_Deploying Dict.WaitVersionInfo.Status = dict.Dict_Status_Deploying
WriteWaitVersionInfoToFile() WriteWaitVersionInfoToFile()
if err = DeployStart(Dict.WaitVersionInfo); err != nil { if err = DeployStart(Dict.WaitVersionInfo); err != nil {
logex.Fatalf("deploy err:%v \n", err) logex.Fatalf("deploy err:%v \n", err)
}
logex.Noticef("[deploy finish]current version: %v\n",Dict.CurrentVersionInfo)
} }
logex.Noticef("[deploy finish]current version: %v\n",Dict.CurrentVersionInfo)
fmt.Print("transfer over!") fmt.Print("transfer over!")
logex.Noticef("[transfer]status machine exit!") logex.Noticef("[transfer]status machine exit!")
} }
...@@ -38,18 +38,19 @@ func GetDoneFileInfo(addr string) (version dict.DictVersionInfo, err error) { ...@@ -38,18 +38,19 @@ func GetDoneFileInfo(addr string) (version dict.DictVersionInfo, err error) {
Wget(addr, donefileAddr) Wget(addr, donefileAddr)
addr = donefileAddr addr = donefileAddr
} }
baseDonefile := addr + "/base.txt"
fmt.Printf("[trigrer]donefile path:%v \n", baseDonefile)
logex.Noticef("[trigrer]base donefile path:%v", baseDonefile)
contents, err := ioutil.ReadFile(baseDonefile)
VersionLen := len(Dict.CurrentVersionInfo) VersionLen := len(Dict.CurrentVersionInfo)
version.DictName = Dict.DictName version.DictName = Dict.DictName
if err != nil { fmt.Printf("get into mode check here\n")
fmt.Printf("[trigrer]read files err:%v \n", err) if Dict.DictMode == dict.BASE_ONLY {
logex.Fatalf("[trigrer]read files err:%v ", err) baseDonefile := addr + "/base.txt"
fmt.Printf("[trigrer]donefile path:%v \n", baseDonefile)
logex.Noticef("[trigrer]base donefile path:%v", baseDonefile)
contents, err_0 := ioutil.ReadFile(baseDonefile)
if err_0 != nil {
fmt.Printf("[trigrer]read files err:%v \n", err_0)
logex.Fatalf("[trigrer]read files err:%v ", err_0)
return return
} else { } else {
contentss := string(contents) contentss := string(contents)
lines := strings.Split(contentss, "\n") lines := strings.Split(contentss, "\n")
index := len(lines) - 1 index := len(lines) - 1
...@@ -80,19 +81,21 @@ func GetDoneFileInfo(addr string) (version dict.DictVersionInfo, err error) { ...@@ -80,19 +81,21 @@ func GetDoneFileInfo(addr string) (version dict.DictVersionInfo, err error) {
version.Mode = dict.BASE version.Mode = dict.BASE
return return
} }
} }
if Dict.DictMode == dict.BASR_DELTA && VersionLen > 0 { }
if Dict.DictMode == dict.BASR_DELTA {
patchDonefile := addr + "/patch.txt" patchDonefile := addr + "/patch.txt"
fmt.Printf("[trigrer]patchDonefile path:%v \n", patchDonefile) fmt.Printf("[trigrer]patchDonefile path:%v \n", patchDonefile)
logex.Noticef("[trigrer]patch donefile path:%v", patchDonefile) logex.Noticef("[trigrer]patch donefile path:%v", patchDonefile)
contents, err = ioutil.ReadFile(patchDonefile) contents, err_0 := ioutil.ReadFile(patchDonefile)
if err != nil { if err_0 != nil {
fmt.Printf("read files err:%v \n", err) fmt.Printf("[trigrer]read files err:%v \n", err_0)
logex.Fatalf("[trigrer]read files err:%v ", err_0)
return return
} else { } else {
contentss := string(contents) contentss := string(contents)
lines := strings.Split(contentss, "\n") lines := strings.Split(contentss, "\n")
fmt.Printf("[trigger]get patch lines here\n")
for index := 0; index < len(lines)-1; index++ { for index := 0; index < len(lines)-1; index++ {
if len(lines[index]) < 3 { if len(lines[index]) < 3 {
logex.Noticef("[trigrer]get patch donfile info error") logex.Noticef("[trigrer]get patch donfile info error")
...@@ -106,14 +109,15 @@ func GetDoneFileInfo(addr string) (version dict.DictVersionInfo, err error) { ...@@ -106,14 +109,15 @@ func GetDoneFileInfo(addr string) (version dict.DictVersionInfo, err error) {
logex.Noticef("[trigrer]donfile info:%v", donefileInfo) logex.Noticef("[trigrer]donfile info:%v", donefileInfo)
newId, _ := strconv.Atoi(donefileInfo.Id) newId, _ := strconv.Atoi(donefileInfo.Id)
newKey, _ := strconv.Atoi(donefileInfo.Key) newKey, _ := strconv.Atoi(donefileInfo.Key)
if newId > Dict.CurrentVersionInfo[VersionLen-1].Id && newKey == Dict.CurrentVersionInfo[VersionLen-1].Key { fmt.Printf("[trigger]read patch id: %d, key: %d\n", newId, newKey)
if VersionLen == 0 || newId > Dict.CurrentVersionInfo[VersionLen-1].Id {
version.Id = newId version.Id = newId
version.Key, _ = strconv.Atoi(donefileInfo.Key) version.Key, _ = strconv.Atoi(donefileInfo.Key)
version.Input = donefileInfo.Input version.Input = donefileInfo.Input
deployVersion := int(time.Now().Unix()) deployVersion := int(time.Now().Unix())
version.CreateTime = deployVersion version.CreateTime = deployVersion
version.Version = deployVersion version.Version = deployVersion
version.Depend = Dict.CurrentVersionInfo[VersionLen-1].Depend version.Depend = deployVersion
version.Mode = dict.DELTA version.Mode = dict.DELTA
return return
} }
......
...@@ -96,7 +96,8 @@ func ExeCommad(files string, params []string) (err error) { ...@@ -96,7 +96,8 @@ func ExeCommad(files string, params []string) (err error) {
func Wget(ftpPath string, downPath string) { func Wget(ftpPath string, downPath string) {
var params []string var params []string
params = append(params, "-P") params = append(params, "--limit-rate=100m")
params = append(params, "-P")
params = append(params, downPath) params = append(params, downPath)
params = append(params, "-r") params = append(params, "-r")
params = append(params, "-N") params = append(params, "-N")
...@@ -110,4 +111,4 @@ func Wget(ftpPath string, downPath string) { ...@@ -110,4 +111,4 @@ func Wget(ftpPath string, downPath string) {
if err != nil { if err != nil {
fmt.Printf("wget exe: %v\n", err) fmt.Printf("wget exe: %v\n", err)
} }
} }
\ No newline at end of file
...@@ -207,7 +207,7 @@ class PredictorClient { ...@@ -207,7 +207,7 @@ class PredictorClient {
void init_gflags(std::vector<std::string> argv); void init_gflags(std::vector<std::string> argv);
int init(const std::vector<std::string> &client_conf); int init(const std::vector<std::string>& client_conf);
void set_predictor_conf(const std::string& conf_path, void set_predictor_conf(const std::string& conf_path,
const std::string& conf_file); const std::string& conf_file);
...@@ -218,23 +218,22 @@ class PredictorClient { ...@@ -218,23 +218,22 @@ class PredictorClient {
int destroy_predictor(); int destroy_predictor();
int numpy_predict( int numpy_predict(const std::vector<py::array_t<float>>& float_feed,
const std::vector<std::vector<py::array_t<float>>>& float_feed_batch, const std::vector<std::string>& float_feed_name,
const std::vector<std::string>& float_feed_name, const std::vector<std::vector<int>>& float_shape,
const std::vector<std::vector<int>>& float_shape, const std::vector<std::vector<int>>& float_lod_slot_batch,
const std::vector<std::vector<int>>& float_lod_slot_batch, const std::vector<py::array_t<int64_t>>& int_feed,
const std::vector<std::vector<py::array_t<int64_t>>>& int_feed_batch, const std::vector<std::string>& int_feed_name,
const std::vector<std::string>& int_feed_name, const std::vector<std::vector<int>>& int_shape,
const std::vector<std::vector<int>>& int_shape, const std::vector<std::vector<int>>& int_lod_slot_batch,
const std::vector<std::vector<int>>& int_lod_slot_batch, const std::vector<std::string>& string_feed,
const std::vector<std::vector<std::string>>& string_feed_batch, const std::vector<std::string>& string_feed_name,
const std::vector<std::string>& string_feed_name, const std::vector<std::vector<int>>& string_shape,
const std::vector<std::vector<int>>& string_shape, const std::vector<std::vector<int>>& string_lod_slot_batch,
const std::vector<std::vector<int>>& string_lod_slot_batch, const std::vector<std::string>& fetch_name,
const std::vector<std::string>& fetch_name, PredictorRes& predict_res_batch, // NOLINT
PredictorRes& predict_res_batch, // NOLINT const int& pid,
const int& pid, const uint64_t log_id);
const uint64_t log_id);
private: private:
PredictorApi _api; PredictorApi _api;
...@@ -243,6 +242,7 @@ class PredictorClient { ...@@ -243,6 +242,7 @@ class PredictorClient {
std::string _predictor_path; std::string _predictor_path;
std::string _conf_file; std::string _conf_file;
std::map<std::string, int> _feed_name_to_idx; std::map<std::string, int> _feed_name_to_idx;
std::vector<std::string> _feed_name;
std::map<std::string, int> _fetch_name_to_idx; std::map<std::string, int> _fetch_name_to_idx;
std::map<std::string, std::string> _fetch_name_to_var_name; std::map<std::string, std::string> _fetch_name_to_var_name;
std::map<std::string, int> _fetch_name_to_type; std::map<std::string, int> _fetch_name_to_type;
......
...@@ -25,8 +25,6 @@ using baidu::paddle_serving::Timer; ...@@ -25,8 +25,6 @@ using baidu::paddle_serving::Timer;
using baidu::paddle_serving::predictor::general_model::Request; using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::Response; using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::Tensor; using baidu::paddle_serving::predictor::general_model::Tensor;
using baidu::paddle_serving::predictor::general_model::FeedInst;
using baidu::paddle_serving::predictor::general_model::FetchInst;
enum ProtoDataType { P_INT64, P_FLOAT32, P_INT32, P_STRING }; enum ProtoDataType { P_INT64, P_FLOAT32, P_INT32, P_STRING };
std::once_flag gflags_init_flag; std::once_flag gflags_init_flag;
namespace py = pybind11; namespace py = pybind11;
...@@ -68,9 +66,13 @@ int PredictorClient::init(const std::vector<std::string> &conf_file) { ...@@ -68,9 +66,13 @@ int PredictorClient::init(const std::vector<std::string> &conf_file) {
_fetch_name_to_idx.clear(); _fetch_name_to_idx.clear();
_shape.clear(); _shape.clear();
int feed_var_num = model_config.feed_var_size(); int feed_var_num = model_config.feed_var_size();
_feed_name.clear();
VLOG(2) << "feed var num: " << feed_var_num; VLOG(2) << "feed var num: " << feed_var_num;
for (int i = 0; i < feed_var_num; ++i) { for (int i = 0; i < feed_var_num; ++i) {
_feed_name_to_idx[model_config.feed_var(i).alias_name()] = i; _feed_name_to_idx[model_config.feed_var(i).alias_name()] = i;
VLOG(2) << "feed [" << i << "]"
<< " name: " << model_config.feed_var(i).name();
_feed_name.push_back(model_config.feed_var(i).name());
VLOG(2) << "feed alias name: " << model_config.feed_var(i).alias_name() VLOG(2) << "feed alias name: " << model_config.feed_var(i).alias_name()
<< " index: " << i; << " index: " << i;
std::vector<int> tmp_feed_shape; std::vector<int> tmp_feed_shape;
...@@ -146,15 +148,15 @@ int PredictorClient::create_predictor() { ...@@ -146,15 +148,15 @@ int PredictorClient::create_predictor() {
} }
int PredictorClient::numpy_predict( int PredictorClient::numpy_predict(
const std::vector<std::vector<py::array_t<float>>> &float_feed_batch, const std::vector<py::array_t<float>> &float_feed,
const std::vector<std::string> &float_feed_name, const std::vector<std::string> &float_feed_name,
const std::vector<std::vector<int>> &float_shape, const std::vector<std::vector<int>> &float_shape,
const std::vector<std::vector<int>> &float_lod_slot_batch, const std::vector<std::vector<int>> &float_lod_slot_batch,
const std::vector<std::vector<py::array_t<int64_t>>> &int_feed_batch, const std::vector<py::array_t<int64_t>> &int_feed,
const std::vector<std::string> &int_feed_name, const std::vector<std::string> &int_feed_name,
const std::vector<std::vector<int>> &int_shape, const std::vector<std::vector<int>> &int_shape,
const std::vector<std::vector<int>> &int_lod_slot_batch, const std::vector<std::vector<int>> &int_lod_slot_batch,
const std::vector<std::vector<std::string>> &string_feed_batch, const std::vector<std::string> &string_feed,
const std::vector<std::string> &string_feed_name, const std::vector<std::string> &string_feed_name,
const std::vector<std::vector<int>> &string_shape, const std::vector<std::vector<int>> &string_shape,
const std::vector<std::vector<int>> &string_lod_slot_batch, const std::vector<std::vector<int>> &string_lod_slot_batch,
...@@ -162,12 +164,6 @@ int PredictorClient::numpy_predict( ...@@ -162,12 +164,6 @@ int PredictorClient::numpy_predict(
PredictorRes &predict_res_batch, PredictorRes &predict_res_batch,
const int &pid, const int &pid,
const uint64_t log_id) { const uint64_t log_id) {
int batch_size = std::max(float_feed_batch.size(), int_feed_batch.size());
batch_size = batch_size > string_feed_batch.size() ? batch_size
: string_feed_batch.size();
VLOG(2) << "batch size: " << batch_size;
// batch_size must be 1, cause batch is already in Tensor.
// I suggest to remove the outside vector<>.
predict_res_batch.clear(); predict_res_batch.clear();
Timer timeline; Timer timeline;
int64_t preprocess_start = timeline.TimeStampUS(); int64_t preprocess_start = timeline.TimeStampUS();
...@@ -190,136 +186,122 @@ int PredictorClient::numpy_predict( ...@@ -190,136 +186,122 @@ int PredictorClient::numpy_predict(
} }
int vec_idx = 0; int vec_idx = 0;
// batch_size can only be 1, cause batch is already in Tensor. // batch is already in Tensor.
// if batch_size is not 1, error will occur in C++ part. std::vector<Tensor *> tensor_vec;
for (int bi = 0; bi < batch_size; bi++) {
VLOG(2) << "prepare batch " << bi;
std::vector<Tensor *> tensor_vec;
FeedInst *inst = req.add_insts();
std::vector<py::array_t<float>> float_feed = float_feed_batch[bi];
std::vector<py::array_t<int64_t>> int_feed = int_feed_batch[bi];
std::vector<std::string> string_feed = string_feed_batch[bi];
for (auto &name : float_feed_name) {
tensor_vec.push_back(inst->add_tensor_array());
}
for (auto &name : int_feed_name) {
tensor_vec.push_back(inst->add_tensor_array());
}
for (auto &name : string_feed_name) { for (auto &name : float_feed_name) {
tensor_vec.push_back(inst->add_tensor_array()); tensor_vec.push_back(req.add_tensor());
} }
VLOG(2) << "batch [" << bi << "] " for (auto &name : int_feed_name) {
<< "prepared"; tensor_vec.push_back(req.add_tensor());
}
vec_idx = 0; for (auto &name : string_feed_name) {
for (auto &name : float_feed_name) { tensor_vec.push_back(req.add_tensor());
int idx = _feed_name_to_idx[name]; }
if (idx >= tensor_vec.size()) {
LOG(ERROR) << "idx > tensor_vec.size()";
return -1;
}
int nbytes = float_feed[vec_idx].nbytes();
void *rawdata_ptr = (void *)(float_feed[vec_idx].data(0));
int total_number = float_feed[vec_idx].size();
Tensor *tensor = tensor_vec[idx];
VLOG(2) << "prepare float feed " << name << " shape size "
<< float_shape[vec_idx].size();
for (uint32_t j = 0; j < float_shape[vec_idx].size(); ++j) {
tensor->add_shape(float_shape[vec_idx][j]);
}
for (uint32_t j = 0; j < float_lod_slot_batch[vec_idx].size(); ++j) {
tensor->add_lod(float_lod_slot_batch[vec_idx][j]);
}
tensor->set_elem_type(P_FLOAT32);
tensor->mutable_float_data()->Resize(total_number, 0); vec_idx = 0;
memcpy(tensor->mutable_float_data()->mutable_data(), rawdata_ptr, nbytes); for (auto &name : float_feed_name) {
vec_idx++; int idx = _feed_name_to_idx[name];
if (idx >= tensor_vec.size()) {
LOG(ERROR) << "idx > tensor_vec.size()";
return -1;
}
VLOG(2) << "prepare float feed " << name << " idx " << idx;
int nbytes = float_feed[vec_idx].nbytes();
void *rawdata_ptr = (void *)(float_feed[vec_idx].data(0));
int total_number = float_feed[vec_idx].size();
Tensor *tensor = tensor_vec[idx];
VLOG(2) << "prepare float feed " << name << " shape size "
<< float_shape[vec_idx].size();
for (uint32_t j = 0; j < float_shape[vec_idx].size(); ++j) {
tensor->add_shape(float_shape[vec_idx][j]);
}
for (uint32_t j = 0; j < float_lod_slot_batch[vec_idx].size(); ++j) {
tensor->add_lod(float_lod_slot_batch[vec_idx][j]);
} }
tensor->set_elem_type(P_FLOAT32);
VLOG(2) << "batch [" << bi << "] " tensor->set_name(_feed_name[idx]);
<< "float feed value prepared"; tensor->set_alias_name(name);
vec_idx = 0; tensor->mutable_float_data()->Resize(total_number, 0);
for (auto &name : int_feed_name) { memcpy(tensor->mutable_float_data()->mutable_data(), rawdata_ptr, nbytes);
int idx = _feed_name_to_idx[name]; vec_idx++;
if (idx >= tensor_vec.size()) { }
LOG(ERROR) << "idx > tensor_vec.size()";
return -1;
}
Tensor *tensor = tensor_vec[idx];
int nbytes = int_feed[vec_idx].nbytes();
void *rawdata_ptr = (void *)(int_feed[vec_idx].data(0));
int total_number = int_feed[vec_idx].size();
for (uint32_t j = 0; j < int_shape[vec_idx].size(); ++j) { vec_idx = 0;
tensor->add_shape(int_shape[vec_idx][j]); for (auto &name : int_feed_name) {
} int idx = _feed_name_to_idx[name];
for (uint32_t j = 0; j < int_lod_slot_batch[vec_idx].size(); ++j) { if (idx >= tensor_vec.size()) {
tensor->add_lod(int_lod_slot_batch[vec_idx][j]); LOG(ERROR) << "idx > tensor_vec.size()";
} return -1;
tensor->set_elem_type(_type[idx]);
if (_type[idx] == P_INT64) {
tensor->mutable_int64_data()->Resize(total_number, 0);
memcpy(
tensor->mutable_int64_data()->mutable_data(), rawdata_ptr, nbytes);
} else {
tensor->mutable_int_data()->Resize(total_number, 0);
memcpy(tensor->mutable_int_data()->mutable_data(), rawdata_ptr, nbytes);
}
vec_idx++;
} }
Tensor *tensor = tensor_vec[idx];
int nbytes = int_feed[vec_idx].nbytes();
void *rawdata_ptr = (void *)(int_feed[vec_idx].data(0));
int total_number = int_feed[vec_idx].size();
VLOG(2) << "batch [" << bi << "] " for (uint32_t j = 0; j < int_shape[vec_idx].size(); ++j) {
<< "int feed value prepared"; tensor->add_shape(int_shape[vec_idx][j]);
}
for (uint32_t j = 0; j < int_lod_slot_batch[vec_idx].size(); ++j) {
tensor->add_lod(int_lod_slot_batch[vec_idx][j]);
}
tensor->set_elem_type(_type[idx]);
tensor->set_name(_feed_name[idx]);
tensor->set_alias_name(name);
if (_type[idx] == P_INT64) {
tensor->mutable_int64_data()->Resize(total_number, 0);
memcpy(tensor->mutable_int64_data()->mutable_data(), rawdata_ptr, nbytes);
} else {
tensor->mutable_int_data()->Resize(total_number, 0);
memcpy(tensor->mutable_int_data()->mutable_data(), rawdata_ptr, nbytes);
}
vec_idx++;
}
vec_idx = 0; vec_idx = 0;
for (auto &name : string_feed_name) { for (auto &name : string_feed_name) {
int idx = _feed_name_to_idx[name]; int idx = _feed_name_to_idx[name];
if (idx >= tensor_vec.size()) { if (idx >= tensor_vec.size()) {
LOG(ERROR) << "idx > tensor_vec.size()"; LOG(ERROR) << "idx > tensor_vec.size()";
return -1; return -1;
} }
Tensor *tensor = tensor_vec[idx]; Tensor *tensor = tensor_vec[idx];
for (uint32_t j = 0; j < string_shape[vec_idx].size(); ++j) { for (uint32_t j = 0; j < string_shape[vec_idx].size(); ++j) {
tensor->add_shape(string_shape[vec_idx][j]); tensor->add_shape(string_shape[vec_idx][j]);
} }
for (uint32_t j = 0; j < string_lod_slot_batch[vec_idx].size(); ++j) { for (uint32_t j = 0; j < string_lod_slot_batch[vec_idx].size(); ++j) {
tensor->add_lod(string_lod_slot_batch[vec_idx][j]); tensor->add_lod(string_lod_slot_batch[vec_idx][j]);
} }
tensor->set_elem_type(P_STRING); tensor->set_elem_type(P_STRING);
tensor->set_name(_feed_name[idx]);
const int string_shape_size = string_shape[vec_idx].size(); tensor->set_alias_name(name);
// string_shape[vec_idx] = [1];cause numpy has no datatype of string.
// we pass string via vector<vector<string> >. const int string_shape_size = string_shape[vec_idx].size();
if (string_shape_size != 1) { // string_shape[vec_idx] = [1];cause numpy has no datatype of string.
LOG(ERROR) << "string_shape_size should be 1-D, but received is : " // we pass string via vector<vector<string> >.
<< string_shape_size; if (string_shape_size != 1) {
return -1; LOG(ERROR) << "string_shape_size should be 1-D, but received is : "
} << string_shape_size;
switch (string_shape_size) { return -1;
case 1: { }
tensor->add_data(string_feed[vec_idx]); switch (string_shape_size) {
break; case 1: {
} tensor->add_data(string_feed[vec_idx]);
break;
} }
vec_idx++;
} }
vec_idx++;
VLOG(2) << "batch [" << bi << "] "
<< "string feed value prepared";
} }
int64_t preprocess_end = timeline.TimeStampUS(); int64_t preprocess_end = timeline.TimeStampUS();
int64_t client_infer_start = timeline.TimeStampUS(); int64_t client_infer_start = timeline.TimeStampUS();
Response res; Response res;
int64_t client_infer_end = 0; int64_t client_infer_end = 0;
...@@ -351,19 +333,18 @@ int PredictorClient::numpy_predict( ...@@ -351,19 +333,18 @@ int PredictorClient::numpy_predict(
int idx = 0; int idx = 0;
for (auto &name : fetch_name) { for (auto &name : fetch_name) {
// int idx = _fetch_name_to_idx[name]; // int idx = _fetch_name_to_idx[name];
int shape_size = output.insts(0).tensor_array(idx).shape_size(); int shape_size = output.tensor(idx).shape_size();
VLOG(2) << "fetch var " << name << " index " << idx << " shape size " VLOG(2) << "fetch var " << name << " index " << idx << " shape size "
<< shape_size; << shape_size;
model._shape_map[name].resize(shape_size); model._shape_map[name].resize(shape_size);
for (int i = 0; i < shape_size; ++i) { for (int i = 0; i < shape_size; ++i) {
model._shape_map[name][i] = model._shape_map[name][i] = output.tensor(idx).shape(i);
output.insts(0).tensor_array(idx).shape(i);
} }
int lod_size = output.insts(0).tensor_array(idx).lod_size(); int lod_size = output.tensor(idx).lod_size();
if (lod_size > 0) { if (lod_size > 0) {
model._lod_map[name].resize(lod_size); model._lod_map[name].resize(lod_size);
for (int i = 0; i < lod_size; ++i) { for (int i = 0; i < lod_size; ++i) {
model._lod_map[name][i] = output.insts(0).tensor_array(idx).lod(i); model._lod_map[name][i] = output.tensor(idx).lod(i);
} }
} }
idx += 1; idx += 1;
...@@ -375,22 +356,22 @@ int PredictorClient::numpy_predict( ...@@ -375,22 +356,22 @@ int PredictorClient::numpy_predict(
// int idx = _fetch_name_to_idx[name]; // int idx = _fetch_name_to_idx[name];
if (_fetch_name_to_type[name] == P_INT64) { if (_fetch_name_to_type[name] == P_INT64) {
VLOG(2) << "ferch var " << name << "type int64"; VLOG(2) << "ferch var " << name << "type int64";
int size = output.insts(0).tensor_array(idx).int64_data_size(); int size = output.tensor(idx).int64_data_size();
model._int64_value_map[name] = std::vector<int64_t>( model._int64_value_map[name] = std::vector<int64_t>(
output.insts(0).tensor_array(idx).int64_data().begin(), output.tensor(idx).int64_data().begin(),
output.insts(0).tensor_array(idx).int64_data().begin() + size); output.tensor(idx).int64_data().begin() + size);
} else if (_fetch_name_to_type[name] == P_FLOAT32) { } else if (_fetch_name_to_type[name] == P_FLOAT32) {
VLOG(2) << "fetch var " << name << "type float"; VLOG(2) << "fetch var " << name << "type float";
int size = output.insts(0).tensor_array(idx).float_data_size(); int size = output.tensor(idx).float_data_size();
model._float_value_map[name] = std::vector<float>( model._float_value_map[name] = std::vector<float>(
output.insts(0).tensor_array(idx).float_data().begin(), output.tensor(idx).float_data().begin(),
output.insts(0).tensor_array(idx).float_data().begin() + size); output.tensor(idx).float_data().begin() + size);
} else if (_fetch_name_to_type[name] == P_INT32) { } else if (_fetch_name_to_type[name] == P_INT32) {
VLOG(2) << "fetch var " << name << "type int32"; VLOG(2) << "fetch var " << name << "type int32";
int size = output.insts(0).tensor_array(idx).int_data_size(); int size = output.tensor(idx).int_data_size();
model._int32_value_map[name] = std::vector<int32_t>( model._int32_value_map[name] = std::vector<int32_t>(
output.insts(0).tensor_array(idx).int_data().begin(), output.tensor(idx).int_data().begin(),
output.insts(0).tensor_array(idx).int_data().begin() + size); output.tensor(idx).int_data().begin() + size);
} }
idx += 1; idx += 1;
} }
......
...@@ -97,33 +97,31 @@ PYBIND11_MODULE(serving_client, m) { ...@@ -97,33 +97,31 @@ PYBIND11_MODULE(serving_client, m) {
[](PredictorClient &self) { self.destroy_predictor(); }) [](PredictorClient &self) { self.destroy_predictor(); })
.def("numpy_predict", .def("numpy_predict",
[](PredictorClient &self, [](PredictorClient &self,
const std::vector<std::vector<py::array_t<float>>> const std::vector<py::array_t<float>> &float_feed,
&float_feed_batch,
const std::vector<std::string> &float_feed_name, const std::vector<std::string> &float_feed_name,
const std::vector<std::vector<int>> &float_shape, const std::vector<std::vector<int>> &float_shape,
const std::vector<std::vector<int>> &float_lod_slot_batch, const std::vector<std::vector<int>> &float_lod_slot_batch,
const std::vector<std::vector<py::array_t<int64_t>>> const std::vector<py::array_t<int64_t>> &int_feed,
&int_feed_batch,
const std::vector<std::string> &int_feed_name, const std::vector<std::string> &int_feed_name,
const std::vector<std::vector<int>> &int_shape, const std::vector<std::vector<int>> &int_shape,
const std::vector<std::vector<int>> &int_lod_slot_batch, const std::vector<std::vector<int>> &int_lod_slot_batch,
const std::vector<std::vector<std::string>>& string_feed_batch, const std::vector<std::string> &string_feed,
const std::vector<std::string>& string_feed_name, const std::vector<std::string> &string_feed_name,
const std::vector<std::vector<int>>& string_shape, const std::vector<std::vector<int>> &string_shape,
const std::vector<std::vector<int>>& string_lod_slot_batch, const std::vector<std::vector<int>> &string_lod_slot_batch,
const std::vector<std::string> &fetch_name, const std::vector<std::string> &fetch_name,
PredictorRes &predict_res_batch, PredictorRes &predict_res_batch,
const int &pid, const int &pid,
const uint64_t log_id) { const uint64_t log_id) {
return self.numpy_predict(float_feed_batch, return self.numpy_predict(float_feed,
float_feed_name, float_feed_name,
float_shape, float_shape,
float_lod_slot_batch, float_lod_slot_batch,
int_feed_batch, int_feed,
int_feed_name, int_feed_name,
int_shape, int_shape,
int_lod_slot_batch, int_lod_slot_batch,
string_feed_batch, string_feed,
string_feed_name, string_feed_name,
string_shape, string_shape,
string_lod_slot_batch, string_lod_slot_batch,
......
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "core/general-server/op/general_copy_op.h"
#include <algorithm>
#include <iostream>
#include <memory>
#include <sstream>
#include "core/general-server/op/general_infer_helper.h"
#include "core/predictor/framework/infer.h"
#include "core/predictor/framework/memory.h"
#include "core/util/include/timer.h"
namespace baidu {
namespace paddle_serving {
namespace serving {
using baidu::paddle_serving::Timer;
using baidu::paddle_serving::predictor::MempoolWrapper;
using baidu::paddle_serving::predictor::general_model::Tensor;
using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::FeedInst;
using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
int GeneralCopyOp::inference() {
// reade request from client
const std::vector<std::string> pre_node_names = pre_names();
if (pre_node_names.size() != 1) {
LOG(ERROR) << "This op(" << op_name()
<< ") can only have one predecessor op, but received "
<< pre_node_names.size();
return -1;
}
const std::string pre_name = pre_node_names[0];
const GeneralBlob *input_blob = get_depend_argument<GeneralBlob>(pre_name);
uint64_t log_id = input_blob->GetLogId();
VLOG(2) << "(logid=" << log_id << ") precedent name: " << pre_name;
const TensorVector *in = &input_blob->tensor_vector;
VLOG(2) << "(logid=" << log_id << ") input size: " << in->size();
int batch_size = input_blob->GetBatchSize();
int input_var_num = 0;
GeneralBlob *res = mutable_data<GeneralBlob>();
res->SetLogId(log_id);
TensorVector *out = &res->tensor_vector;
VLOG(2) << "(logid=" << log_id << ") input batch size: " << batch_size;
res->SetBatchSize(batch_size);
if (!res) {
LOG(ERROR) << "(logid=" << log_id
<< ") Failed get op tls reader object output";
}
Timer timeline;
int64_t start = timeline.TimeStampUS();
VLOG(2) << "(logid=" << log_id << ") Going to init lod tensor";
for (int i = 0; i < in->size(); ++i) {
paddle::PaddleTensor lod_tensor;
CopyLod(&in->at(i), &lod_tensor);
lod_tensor.dtype = in->at(i).dtype;
lod_tensor.name = in->at(i).name;
VLOG(2) << "(logid=" << log_id << ") lod tensor [" << i
<< "].name = " << lod_tensor.name;
out->push_back(lod_tensor);
}
VLOG(2) << "(logid=" << log_id << ") pack done.";
for (int i = 0; i < out->size(); ++i) {
int64_t *src_ptr = static_cast<int64_t *>(in->at(i).data.data());
out->at(i).data.Resize(out->at(i).lod[0].back() * sizeof(int64_t));
out->at(i).shape = {out->at(i).lod[0].back(), 1};
int64_t *tgt_ptr = static_cast<int64_t *>(out->at(i).data.data());
for (int j = 0; j < out->at(i).lod[0].back(); ++j) {
tgt_ptr[j] = src_ptr[j];
}
}
VLOG(2) << "(logid=" << log_id << ") output done.";
timeline.Pause();
int64_t end = timeline.TimeStampUS();
CopyBlobInfo(input_blob, res);
AddBlobInfo(res, start);
AddBlobInfo(res, end);
VLOG(2) << "(logid=" << log_id << ") read data from client success";
return 0;
}
DEFINE_OP(GeneralCopyOp);
} // namespace serving
} // namespace paddle_serving
} // namespace baidu
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <vector>
#include "core/general-server/general_model_service.pb.h"
#include "core/general-server/op/general_infer_helper.h"
#include "core/predictor/framework/resource.h"
#include "paddle_inference_api.h" // NOLINT
namespace baidu {
namespace paddle_serving {
namespace serving {
class GeneralCopyOp
: public baidu::paddle_serving::predictor::OpWithChannel<GeneralBlob> {
public:
typedef std::vector<paddle::PaddleTensor> TensorVector;
DECLARE_OP(GeneralCopyOp);
int inference();
};
} // namespace serving
} // namespace paddle_serving
} // namespace baidu
...@@ -36,7 +36,6 @@ using baidu::paddle_serving::predictor::MempoolWrapper; ...@@ -36,7 +36,6 @@ using baidu::paddle_serving::predictor::MempoolWrapper;
using baidu::paddle_serving::predictor::general_model::Tensor; using baidu::paddle_serving::predictor::general_model::Tensor;
using baidu::paddle_serving::predictor::general_model::Response; using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::Request; using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::FetchInst;
using baidu::paddle_serving::predictor::InferManager; using baidu::paddle_serving::predictor::InferManager;
using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
......
文件模式从 100755 更改为 100644
...@@ -34,11 +34,152 @@ using baidu::paddle_serving::predictor::MempoolWrapper; ...@@ -34,11 +34,152 @@ using baidu::paddle_serving::predictor::MempoolWrapper;
using baidu::paddle_serving::predictor::general_model::Tensor; using baidu::paddle_serving::predictor::general_model::Tensor;
using baidu::paddle_serving::predictor::general_model::Response; using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::Request; using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::FetchInst;
using baidu::paddle_serving::predictor::InferManager; using baidu::paddle_serving::predictor::InferManager;
using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
int GeneralDistKVInferOp::inference() { return 0; } // DistKV Infer Op: seek cube and then call paddle inference
// op seq: general_reader-> dist_kv_infer -> general_response
int GeneralDistKVInferOp::inference() {
VLOG(2) << "Going to run inference";
const std::vector<std::string> pre_node_names = pre_names();
if (pre_node_names.size() != 1) {
LOG(ERROR) << "This op(" << op_name()
<< ") can only have one predecessor op, but received "
<< pre_node_names.size();
return -1;
}
const std::string pre_name = pre_node_names[0];
const GeneralBlob *input_blob = get_depend_argument<GeneralBlob>(pre_name);
if (!input_blob) {
LOG(ERROR) << "input_blob is nullptr,error";
return -1;
}
uint64_t log_id = input_blob->GetLogId();
VLOG(2) << "(logid=" << log_id << ") Get precedent op name: " << pre_name;
GeneralBlob *output_blob = mutable_data<GeneralBlob>();
if (!output_blob) {
LOG(ERROR) << "(logid=" << log_id << ") output_blob is nullptr,error";
return -1;
}
output_blob->SetLogId(log_id);
if (!input_blob) {
LOG(ERROR) << "(logid=" << log_id
<< ") Failed mutable depended argument, op:" << pre_name;
return -1;
}
const TensorVector *in = &input_blob->tensor_vector;
TensorVector *out = &output_blob->tensor_vector;
std::vector<uint64_t> keys;
std::vector<rec::mcube::CubeValue> values;
int sparse_count = 0; // sparse inputs counts, sparse would seek cube
int dense_count = 0; // dense inputs counts, dense would directly call paddle infer
std::vector<std::pair<int64_t *, size_t>> dataptr_size_pairs;
size_t key_len = 0;
for (size_t i = 0; i < in->size(); ++i) {
if (in->at(i).dtype != paddle::PaddleDType::INT64) {
++dense_count;
continue;
}
++sparse_count;
size_t elem_num = 1;
for (size_t s = 0; s < in->at(i).shape.size(); ++s) {
elem_num *= in->at(i).shape[s];
}
key_len += elem_num;
int64_t *data_ptr = static_cast<int64_t *>(in->at(i).data.data());
dataptr_size_pairs.push_back(std::make_pair(data_ptr, elem_num));
}
keys.resize(key_len);
VLOG(3) << "(logid=" << log_id << ") cube number of keys to look up: " << key_len;
int key_idx = 0;
for (size_t i = 0; i < dataptr_size_pairs.size(); ++i) {
std::copy(dataptr_size_pairs[i].first,
dataptr_size_pairs[i].first + dataptr_size_pairs[i].second,
keys.begin() + key_idx);
key_idx += dataptr_size_pairs[i].second;
}
rec::mcube::CubeAPI *cube = rec::mcube::CubeAPI::instance();
std::vector<std::string> table_names = cube->get_table_names();
if (table_names.size() == 0) {
LOG(ERROR) << "cube init error or cube config not given.";
return -1;
}
// gather keys and seek cube servers, put results in values
int ret = cube->seek(table_names[0], keys, &values);
VLOG(3) << "(logid=" << log_id << ") cube seek status: " << ret;
if (values.size() != keys.size() || values[0].buff.size() == 0) {
LOG(ERROR) << "cube value return null";
}
// EMBEDDING_SIZE means the length of sparse vector, user can define length here.
size_t EMBEDDING_SIZE = values[0].buff.size() / sizeof(float);
TensorVector sparse_out;
sparse_out.resize(sparse_count);
TensorVector dense_out;
dense_out.resize(dense_count);
int cube_val_idx = 0;
int sparse_idx = 0;
int dense_idx = 0;
std::unordered_map<int, int> in_out_map;
baidu::paddle_serving::predictor::Resource &resource =
baidu::paddle_serving::predictor::Resource::instance();
std::shared_ptr<PaddleGeneralModelConfig> model_config = resource.get_general_model_config().front();
//copy data to tnsor
for (size_t i = 0; i < in->size(); ++i) {
if (in->at(i).dtype != paddle::PaddleDType::INT64) {
dense_out[dense_idx] = in->at(i);
++dense_idx;
continue;
}
sparse_out[sparse_idx].lod.resize(in->at(i).lod.size());
for (size_t x = 0; x < sparse_out[sparse_idx].lod.size(); ++x) {
sparse_out[sparse_idx].lod[x].resize(in->at(i).lod[x].size());
std::copy(in->at(i).lod[x].begin(),
in->at(i).lod[x].end(),
sparse_out[sparse_idx].lod[x].begin());
}
sparse_out[sparse_idx].dtype = paddle::PaddleDType::FLOAT32;
sparse_out[sparse_idx].shape.push_back(sparse_out[sparse_idx].lod[0].back());
sparse_out[sparse_idx].shape.push_back(EMBEDDING_SIZE);
sparse_out[sparse_idx].name = model_config->_feed_name[i];
sparse_out[sparse_idx].data.Resize(sparse_out[sparse_idx].lod[0].back() *
EMBEDDING_SIZE * sizeof(float));
float *dst_ptr = static_cast<float *>(sparse_out[sparse_idx].data.data());
for (int x = 0; x < sparse_out[sparse_idx].lod[0].back(); ++x) {
float *data_ptr = dst_ptr + x * EMBEDDING_SIZE;
memcpy(data_ptr,
values[cube_val_idx].buff.data(),
values[cube_val_idx].buff.size());
cube_val_idx++;
}
++sparse_idx;
}
VLOG(3) << "(logid=" << log_id << ") sparse tensor load success.";
TensorVector infer_in;
infer_in.insert(infer_in.end(), dense_out.begin(), dense_out.end());
infer_in.insert(infer_in.end(), sparse_out.begin(), sparse_out.end());
int batch_size = input_blob->_batch_size;
output_blob->_batch_size = batch_size;
Timer timeline;
int64_t start = timeline.TimeStampUS();
timeline.Start();
// call paddle inference here
if (InferManager::instance().infer(
engine_name().c_str(), &infer_in, out, batch_size)) {
LOG(ERROR) << "(logid=" << log_id << ") Failed do infer in fluid model: " << engine_name();
return -1;
}
int64_t end = timeline.TimeStampUS();
CopyBlobInfo(input_blob, output_blob);
AddBlobInfo(output_blob, start);
AddBlobInfo(output_blob, end);
return 0;
}
DEFINE_OP(GeneralDistKVInferOp); DEFINE_OP(GeneralDistKVInferOp);
} // namespace serving } // namespace serving
......
...@@ -35,7 +35,6 @@ using baidu::paddle_serving::predictor::MempoolWrapper; ...@@ -35,7 +35,6 @@ using baidu::paddle_serving::predictor::MempoolWrapper;
using baidu::paddle_serving::predictor::general_model::Tensor; using baidu::paddle_serving::predictor::general_model::Tensor;
using baidu::paddle_serving::predictor::general_model::Response; using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::Request; using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::FetchInst;
using baidu::paddle_serving::predictor::InferManager; using baidu::paddle_serving::predictor::InferManager;
using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
...@@ -117,9 +116,6 @@ int GeneralDistKVQuantInferOp::inference() { ...@@ -117,9 +116,6 @@ int GeneralDistKVQuantInferOp::inference() {
std::unordered_map<int, int> in_out_map; std::unordered_map<int, int> in_out_map;
baidu::paddle_serving::predictor::Resource &resource = baidu::paddle_serving::predictor::Resource &resource =
baidu::paddle_serving::predictor::Resource::instance(); baidu::paddle_serving::predictor::Resource::instance();
//TODO:Temporary addition, specific details to be studied by HexToString
std::shared_ptr<PaddleGeneralModelConfig> model_config =
resource.get_general_model_config()[0];
int cube_quant_bits = resource.get_cube_quant_bits(); int cube_quant_bits = resource.get_cube_quant_bits();
size_t EMBEDDING_SIZE = 0; size_t EMBEDDING_SIZE = 0;
if (cube_quant_bits == 0) { if (cube_quant_bits == 0) {
...@@ -146,7 +142,7 @@ int GeneralDistKVQuantInferOp::inference() { ...@@ -146,7 +142,7 @@ int GeneralDistKVQuantInferOp::inference() {
sparse_out[sparse_idx].shape.push_back( sparse_out[sparse_idx].shape.push_back(
sparse_out[sparse_idx].lod[0].back()); sparse_out[sparse_idx].lod[0].back());
sparse_out[sparse_idx].shape.push_back(EMBEDDING_SIZE); sparse_out[sparse_idx].shape.push_back(EMBEDDING_SIZE);
sparse_out[sparse_idx].name = model_config->_feed_name[i]; sparse_out[sparse_idx].name = in->at(i).name;
sparse_out[sparse_idx].data.Resize(sparse_out[sparse_idx].lod[0].back() * sparse_out[sparse_idx].data.Resize(sparse_out[sparse_idx].lod[0].back() *
EMBEDDING_SIZE * sizeof(float)); EMBEDDING_SIZE * sizeof(float));
// END HERE // END HERE
......
...@@ -31,7 +31,6 @@ using baidu::paddle_serving::predictor::MempoolWrapper; ...@@ -31,7 +31,6 @@ using baidu::paddle_serving::predictor::MempoolWrapper;
using baidu::paddle_serving::predictor::general_model::Tensor; using baidu::paddle_serving::predictor::general_model::Tensor;
using baidu::paddle_serving::predictor::general_model::Response; using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::Request; using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::FetchInst;
using baidu::paddle_serving::predictor::InferManager; using baidu::paddle_serving::predictor::InferManager;
using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
...@@ -49,7 +48,7 @@ int GeneralInferOp::inference() { ...@@ -49,7 +48,7 @@ int GeneralInferOp::inference() {
const GeneralBlob *input_blob = get_depend_argument<GeneralBlob>(pre_name); const GeneralBlob *input_blob = get_depend_argument<GeneralBlob>(pre_name);
if (!input_blob) { if (!input_blob) {
LOG(ERROR) << "input_blob is nullptr,error"; LOG(ERROR) << "input_blob is nullptr,error";
return -1; return -1;
} }
uint64_t log_id = input_blob->GetLogId(); uint64_t log_id = input_blob->GetLogId();
VLOG(2) << "(logid=" << log_id << ") Get precedent op name: " << pre_name; VLOG(2) << "(logid=" << log_id << ") Get precedent op name: " << pre_name;
...@@ -57,7 +56,7 @@ int GeneralInferOp::inference() { ...@@ -57,7 +56,7 @@ int GeneralInferOp::inference() {
GeneralBlob *output_blob = mutable_data<GeneralBlob>(); GeneralBlob *output_blob = mutable_data<GeneralBlob>();
if (!output_blob) { if (!output_blob) {
LOG(ERROR) << "output_blob is nullptr,error"; LOG(ERROR) << "output_blob is nullptr,error";
return -1; return -1;
} }
output_blob->SetLogId(log_id); output_blob->SetLogId(log_id);
......
...@@ -30,42 +30,8 @@ using baidu::paddle_serving::Timer; ...@@ -30,42 +30,8 @@ using baidu::paddle_serving::Timer;
using baidu::paddle_serving::predictor::MempoolWrapper; using baidu::paddle_serving::predictor::MempoolWrapper;
using baidu::paddle_serving::predictor::general_model::Tensor; using baidu::paddle_serving::predictor::general_model::Tensor;
using baidu::paddle_serving::predictor::general_model::Request; using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::FeedInst;
using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
enum ProtoDataType { P_INT64, P_FLOAT32, P_INT32, P_STRING }; enum ProtoDataType { P_INT64, P_FLOAT32, P_INT32, P_STRING };
int conf_check(const Request *req,
const std::shared_ptr<PaddleGeneralModelConfig> &model_config) {
int var_num = req->insts(0).tensor_array_size();
if (var_num != model_config->_feed_type.size()) {
LOG(ERROR) << "feed var number not match: model config["
<< model_config->_feed_type.size() << "] vs. actual[" << var_num
<< "]";
return -1;
}
VLOG(2) << "fetch var num in reader op: " << req->fetch_var_names_size();
for (int i = 0; i < var_num; ++i) {
const Tensor &tensor = req->insts(0).tensor_array(i);
if (model_config->_feed_type[i] != tensor.elem_type()) {
LOG(ERROR) << "feed type not match.";
return -1;
}
if (model_config->_feed_shape[i].size() == tensor.shape_size()) {
for (int j = 0; j < model_config->_feed_shape[i].size(); ++j) {
tensor.shape(j);
if (model_config->_feed_shape[i][j] != tensor.shape(j)) {
LOG(ERROR) << "feed shape not match.";
return -1;
}
}
} else {
LOG(ERROR) << "feed shape not match.";
return -1;
}
}
return 0;
}
int GeneralReaderOp::inference() { int GeneralReaderOp::inference() {
// read request from client // read request from client
...@@ -93,10 +59,8 @@ int GeneralReaderOp::inference() { ...@@ -93,10 +59,8 @@ int GeneralReaderOp::inference() {
res->SetLogId(log_id); res->SetLogId(log_id);
Timer timeline; Timer timeline;
int64_t start = timeline.TimeStampUS(); int64_t start = timeline.TimeStampUS();
// only get insts(0), cause batch is already in Tensor.
// req can only include 1 inst.
// var_num means the number of feed_var. // var_num means the number of feed_var.
int var_num = req->insts(0).tensor_array_size(); int var_num = req->tensor_size();
VLOG(2) << "(logid=" << log_id << ") var num: " << var_num VLOG(2) << "(logid=" << log_id << ") var num: " << var_num
<< ") start to call load general model_conf op"; << ") start to call load general model_conf op";
...@@ -105,19 +69,7 @@ int GeneralReaderOp::inference() { ...@@ -105,19 +69,7 @@ int GeneralReaderOp::inference() {
baidu::paddle_serving::predictor::Resource::instance(); baidu::paddle_serving::predictor::Resource::instance();
VLOG(2) << "(logid=" << log_id << ") get resource pointer done."; VLOG(2) << "(logid=" << log_id << ") get resource pointer done.";
// get the first InferOP's model_config as ReaderOp's model_config by default.
std::shared_ptr<PaddleGeneralModelConfig> model_config =
resource.get_general_model_config().front();
// TODO(guru4elephant): how to do conditional check?
/*
int ret = conf_check(req, model_config);
if (ret != 0) {
LOG(ERROR) << "model conf of server:";
resource.print_general_model_config(model_config);
return 0;
}
*/
// package tensor // package tensor
// prepare basic information for input // prepare basic information for input
// specify the memory needed for output tensor_vector // specify the memory needed for output tensor_vector
...@@ -128,7 +80,7 @@ int GeneralReaderOp::inference() { ...@@ -128,7 +80,7 @@ int GeneralReaderOp::inference() {
int64_t databuf_size = 0; int64_t databuf_size = 0;
for (int i = 0; i < var_num; ++i) { for (int i = 0; i < var_num; ++i) {
paddle::PaddleTensor paddleTensor; paddle::PaddleTensor paddleTensor;
const Tensor &tensor = req->insts(0).tensor_array(i); const Tensor &tensor = req->tensor(i);
data_len = 0; data_len = 0;
elem_type = 0; elem_type = 0;
elem_size = 0; elem_size = 0;
...@@ -175,7 +127,7 @@ int GeneralReaderOp::inference() { ...@@ -175,7 +127,7 @@ int GeneralReaderOp::inference() {
VLOG(2) << "(logid=" << log_id << ") shape for var[" << i << "]: " << dim; VLOG(2) << "(logid=" << log_id << ") shape for var[" << i << "]: " << dim;
paddleTensor.shape.push_back(dim); paddleTensor.shape.push_back(dim);
} }
paddleTensor.name = model_config->_feed_name[i]; paddleTensor.name = tensor.name();
out->push_back(paddleTensor); out->push_back(paddleTensor);
VLOG(2) << "(logid=" << log_id << ") tensor size for var[" << i VLOG(2) << "(logid=" << log_id << ") tensor size for var[" << i
......
...@@ -34,7 +34,6 @@ using baidu::paddle_serving::predictor::MempoolWrapper; ...@@ -34,7 +34,6 @@ using baidu::paddle_serving::predictor::MempoolWrapper;
using baidu::paddle_serving::predictor::general_model::Tensor; using baidu::paddle_serving::predictor::general_model::Tensor;
using baidu::paddle_serving::predictor::general_model::Response; using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::Request; using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::FetchInst;
using baidu::paddle_serving::predictor::general_model::ModelOutput; using baidu::paddle_serving::predictor::general_model::ModelOutput;
using baidu::paddle_serving::predictor::InferManager; using baidu::paddle_serving::predictor::InferManager;
using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
...@@ -49,7 +48,6 @@ int GeneralResponseOp::inference() { ...@@ -49,7 +48,6 @@ int GeneralResponseOp::inference() {
get_depend_argument<GeneralBlob>(pre_node_names[0])->GetLogId(); get_depend_argument<GeneralBlob>(pre_node_names[0])->GetLogId();
const Request *req = dynamic_cast<const Request *>(get_request_message()); const Request *req = dynamic_cast<const Request *>(get_request_message());
// response inst with only fetch_var_names
Response *res = mutable_data<Response>(); Response *res = mutable_data<Response>();
Timer timeline; Timer timeline;
...@@ -63,7 +61,8 @@ int GeneralResponseOp::inference() { ...@@ -63,7 +61,8 @@ int GeneralResponseOp::inference() {
baidu::paddle_serving::predictor::Resource::instance(); baidu::paddle_serving::predictor::Resource::instance();
VLOG(2) << "(logid=" << log_id << ") get resource pointer done."; VLOG(2) << "(logid=" << log_id << ") get resource pointer done.";
//get the last InferOP's model_config as ResponseOp's model_config by default. // get the last InferOP's model_config as ResponseOp's model_config by
// default.
std::shared_ptr<PaddleGeneralModelConfig> model_config = std::shared_ptr<PaddleGeneralModelConfig> model_config =
resource.get_general_model_config().back(); resource.get_general_model_config().back();
...@@ -71,6 +70,10 @@ int GeneralResponseOp::inference() { ...@@ -71,6 +70,10 @@ int GeneralResponseOp::inference() {
<< ") max body size : " << brpc::fLU64::FLAGS_max_body_size; << ") max body size : " << brpc::fLU64::FLAGS_max_body_size;
std::vector<int> fetch_index; std::vector<int> fetch_index;
// this is based on GetOutPutNames() is ordered map.
// and the order of Output is the same as the prototxt FetchVar.
// otherwise, you can only get the Output by the corresponding of
// Name -- Alias_name.
fetch_index.resize(req->fetch_var_names_size()); fetch_index.resize(req->fetch_var_names_size());
for (int i = 0; i < req->fetch_var_names_size(); ++i) { for (int i = 0; i < req->fetch_var_names_size(); ++i) {
fetch_index[i] = fetch_index[i] =
...@@ -95,40 +98,41 @@ int GeneralResponseOp::inference() { ...@@ -95,40 +98,41 @@ int GeneralResponseOp::inference() {
ModelOutput *output = res->add_outputs(); ModelOutput *output = res->add_outputs();
// To get the order of model return values // To get the order of model return values
output->set_engine_name(pre_name); output->set_engine_name(pre_name);
FetchInst *fetch_inst = output->add_insts();
var_idx = 0;
// idx is the real index of FetchVar.
// idx is not the index of FetchList.
// fetch_index is the real index in FetchVar of Fetchlist
// for example, FetchVar = {0:A, 1:B, 2:C}
// FetchList = {0:C,1:A}, at this situation.
// fetch_index = [2,0], C`index = 2 and A`index = 0
for (auto &idx : fetch_index) { for (auto &idx : fetch_index) {
Tensor *tensor = fetch_inst->add_tensor_array(); Tensor *tensor = output->add_tensor();
//tensor->set_elem_type(1); tensor->set_name(in->at(idx).name);
if (model_config->_is_lod_fetch[idx]) { tensor->set_alias_name(model_config->_fetch_alias_name[idx]);
VLOG(2) << "(logid=" << log_id << ") out[" << idx << "] " for (int k = 0; k < in->at(idx).shape.size(); ++k) {
<< model_config->_fetch_name[idx] << " is lod_tensor"; VLOG(2) << "(logid=" << log_id << ") shape[" << k
for (int k = 0; k < in->at(idx).shape.size(); ++k) { << "]: " << in->at(idx).shape[k];
VLOG(2) << "(logid=" << log_id << ") shape[" << k tensor->add_shape(in->at(idx).shape[k]);
<< "]: " << in->at(idx).shape[k]; }
tensor->add_shape(in->at(idx).shape[k]); std::string str_tensor_type = "is tensor";
} if (model_config->_is_lod_fetch[idx] && in->at(idx).lod.size() > 0) {
} else { str_tensor_type = "is lod_tensor";
VLOG(2) << "(logid=" << log_id << ") out[" << idx << "] " for (int j = 0; j < in->at(idx).lod[0].size(); ++j) {
<< model_config->_fetch_name[idx] << " is tensor"; tensor->add_lod(in->at(idx).lod[0][j]);
for (int k = 0; k < in->at(idx).shape.size(); ++k) {
VLOG(2) << "(logid=" << log_id << ") shape[" << k
<< "]: " << in->at(idx).shape[k];
tensor->add_shape(in->at(idx).shape[k]);
} }
} }
} VLOG(2) << "(logid=" << log_id << ") out[" << idx << "] "
<< model_config->_fetch_name[idx] << str_tensor_type;
var_idx = 0;
for (auto &idx : fetch_index) {
cap = 1; cap = 1;
for (int j = 0; j < in->at(idx).shape.size(); ++j) { for (int j = 0; j < in->at(idx).shape.size(); ++j) {
cap *= in->at(idx).shape[j]; cap *= in->at(idx).shape[j];
} }
FetchInst *fetch_p = output->mutable_insts(0);
auto dtype = in->at(idx).dtype; auto dtype = in->at(idx).dtype;
if (dtype == paddle::PaddleDType::INT64) { if (dtype == paddle::PaddleDType::INT64) {
tensor->set_elem_type(0);
VLOG(2) << "(logid=" << log_id << ") Prepare int64 var [" VLOG(2) << "(logid=" << log_id << ") Prepare int64 var ["
<< model_config->_fetch_name[idx] << "]."; << model_config->_fetch_name[idx] << "].";
int64_t *data_ptr = static_cast<int64_t *>(in->at(idx).data.data()); int64_t *data_ptr = static_cast<int64_t *>(in->at(idx).data.data());
...@@ -137,35 +141,24 @@ int GeneralResponseOp::inference() { ...@@ -137,35 +141,24 @@ int GeneralResponseOp::inference() {
// `Swap` method is faster than `{}` method. // `Swap` method is faster than `{}` method.
google::protobuf::RepeatedField<int64_t> tmp_data(data_ptr, google::protobuf::RepeatedField<int64_t> tmp_data(data_ptr,
data_ptr + cap); data_ptr + cap);
fetch_p->mutable_tensor_array(var_idx)->mutable_int64_data()->Swap( output->mutable_tensor(var_idx)->mutable_int64_data()->Swap(&tmp_data);
&tmp_data);
} else if (dtype == paddle::PaddleDType::FLOAT32) { } else if (dtype == paddle::PaddleDType::FLOAT32) {
tensor->set_elem_type(1);
VLOG(2) << "(logid=" << log_id << ") Prepare float var [" VLOG(2) << "(logid=" << log_id << ") Prepare float var ["
<< model_config->_fetch_name[idx] << "]."; << model_config->_fetch_name[idx] << "].";
float *data_ptr = static_cast<float *>(in->at(idx).data.data()); float *data_ptr = static_cast<float *>(in->at(idx).data.data());
google::protobuf::RepeatedField<float> tmp_data(data_ptr, google::protobuf::RepeatedField<float> tmp_data(data_ptr,
data_ptr + cap); data_ptr + cap);
fetch_p->mutable_tensor_array(var_idx)->mutable_float_data()->Swap( output->mutable_tensor(var_idx)->mutable_float_data()->Swap(&tmp_data);
&tmp_data);
} else if (dtype == paddle::PaddleDType::INT32) { } else if (dtype == paddle::PaddleDType::INT32) {
tensor->set_elem_type(2);
VLOG(2) << "(logid=" << log_id << ")Prepare int32 var [" VLOG(2) << "(logid=" << log_id << ")Prepare int32 var ["
<< model_config->_fetch_name[idx] << "]."; << model_config->_fetch_name[idx] << "].";
int32_t *data_ptr = static_cast<int32_t *>(in->at(idx).data.data()); int32_t *data_ptr = static_cast<int32_t *>(in->at(idx).data.data());
google::protobuf::RepeatedField<int32_t> tmp_data(data_ptr, google::protobuf::RepeatedField<int32_t> tmp_data(data_ptr,
data_ptr + cap); data_ptr + cap);
fetch_p->mutable_tensor_array(var_idx)->mutable_int_data()->Swap( output->mutable_tensor(var_idx)->mutable_int_data()->Swap(&tmp_data);
&tmp_data);
}
if (model_config->_is_lod_fetch[idx]) {
if (in->at(idx).lod.size() > 0) {
for (int j = 0; j < in->at(idx).lod[0].size(); ++j) {
fetch_p->mutable_tensor_array(var_idx)->add_lod(
in->at(idx).lod[0][j]);
}
}
} }
VLOG(2) << "(logid=" << log_id << ") fetch var [" VLOG(2) << "(logid=" << log_id << ") fetch var ["
...@@ -205,4 +198,4 @@ DEFINE_OP(GeneralResponseOp); ...@@ -205,4 +198,4 @@ DEFINE_OP(GeneralResponseOp);
} // namespace serving } // namespace serving
} // namespace paddle_serving } // namespace paddle_serving
} // namespace baidu } // namespace baidu
\ No newline at end of file
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "core/general-server/op/general_text_reader_op.h"
#include <algorithm>
#include <iostream>
#include <memory>
#include <sstream>
#include "core/predictor/framework/infer.h"
#include "core/predictor/framework/memory.h"
#include "core/util/include/timer.h"
namespace baidu {
namespace paddle_serving {
namespace serving {
using baidu::paddle_serving::Timer;
using baidu::paddle_serving::predictor::MempoolWrapper;
using baidu::paddle_serving::predictor::general_model::Tensor;
using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::FeedInst;
using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
int GeneralTextReaderOp::inference() {
// reade request from client
const Request *req = dynamic_cast<const Request *>(get_request_message());
uint64_t log_id = req->log_id();
int batch_size = req->insts_size();
int input_var_num = 0;
std::vector<int64_t> elem_type;
std::vector<int64_t> elem_size;
std::vector<int64_t> capacity;
GeneralBlob *res = mutable_data<GeneralBlob>();
if (!res) {
LOG(ERROR) << "(logid=" << log_id
<< ") Failed get op tls reader object output";
}
TensorVector *out = &res->tensor_vector;
res->SetBatchSize(batch_size);
res->SetLogId(log_id);
if (batch_size <= 0) {
LOG(ERROR) << "(logid=" << log_id << ") Batch size < 0";
return -1;
}
Timer timeline;
int64_t start = timeline.TimeStampUS();
int var_num = req->insts(0).tensor_array_size();
VLOG(2) << "(logid=" << log_id << ") var num: " << var_num;
VLOG(2) << "(logid=" << log_id
<< ") start to call load general model_conf op";
baidu::paddle_serving::predictor::Resource &resource =
baidu::paddle_serving::predictor::Resource::instance();
VLOG(2) << "(logid=" << log_id << ") get resource pointer done.";
std::shared_ptr<PaddleGeneralModelConfig> model_config =
resource.get_general_model_config()[0];
VLOG(2) << "(logid=" << log_id << ") print general model config done.";
elem_type.resize(var_num);
elem_size.resize(var_num);
capacity.resize(var_num);
for (int i = 0; i < var_num; ++i) {
paddle::PaddleTensor lod_tensor;
elem_type[i] = req->insts(0).tensor_array(i).elem_type();
VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] has elem type: " << elem_type[i];
if (elem_type[i] == 0) { // int64
elem_size[i] = sizeof(int64_t);
lod_tensor.dtype = paddle::PaddleDType::INT64;
} else {
elem_size[i] = sizeof(float);
lod_tensor.dtype = paddle::PaddleDType::FLOAT32;
}
if (req->insts(0).tensor_array(i).shape(0) == -1) {
lod_tensor.lod.resize(1);
lod_tensor.lod[0].push_back(0);
VLOG(2) << "(logid=" << log_id << ") var[" << i << "] is lod_tensor";
} else {
lod_tensor.shape.push_back(batch_size);
capacity[i] = 1;
for (int k = 0; k < req->insts(0).tensor_array(i).shape_size(); ++k) {
int dim = req->insts(0).tensor_array(i).shape(k);
VLOG(2) << "(logid=" << log_id << ") shape for var[" << i
<< "]: " << dim;
capacity[i] *= dim;
lod_tensor.shape.push_back(dim);
}
VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] is tensor, capacity: " << capacity[i];
}
lod_tensor.name = model_config->_feed_name[i];
out->push_back(lod_tensor);
}
for (int i = 0; i < var_num; ++i) {
if (out->at(i).lod.size() == 1) {
for (int j = 0; j < batch_size; ++j) {
const Tensor &tensor = req->insts(j).tensor_array(i);
int data_len = tensor.int_data_size();
int cur_len = out->at(i).lod[0].back();
out->at(i).lod[0].push_back(cur_len + data_len);
}
out->at(i).data.Resize(out->at(i).lod[0].back() * elem_size[i]);
out->at(i).shape = {out->at(i).lod[0].back(), 1};
VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] is lod_tensor and len=" << out->at(i).lod[0].back();
} else {
out->at(i).data.Resize(batch_size * capacity[i] * elem_size[i]);
VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] is tensor and capacity=" << batch_size * capacity[i];
}
}
for (int i = 0; i < var_num; ++i) {
if (elem_type[i] == 0) {
int64_t *dst_ptr = static_cast<int64_t *>(out->at(i).data.data());
int offset = 0;
for (int j = 0; j < batch_size; ++j) {
for (int k = 0; k < req->insts(j).tensor_array(i).int_data_size();
++k) {
dst_ptr[offset + k] = req->insts(j).tensor_array(i).int_data(k);
}
if (out->at(i).lod.size() == 1) {
offset = out->at(i).lod[0][j + 1];
} else {
offset += capacity[i];
}
}
} else {
float *dst_ptr = static_cast<float *>(out->at(i).data.data());
int offset = 0;
for (int j = 0; j < batch_size; ++j) {
for (int k = 0; k < req->insts(j).tensor_array(i).int_data_size();
++k) {
dst_ptr[offset + k] = req->insts(j).tensor_array(i).int_data(k);
}
if (out->at(i).lod.size() == 1) {
offset = out->at(i).lod[0][j + 1];
} else {
offset += capacity[i];
}
}
}
}
int64_t end = timeline.TimeStampUS();
res->p_size = 0;
AddBlobInfo(res, start);
AddBlobInfo(res, end);
VLOG(2) << "(logid=" << log_id << ") read data from client success";
return 0;
}
DEFINE_OP(GeneralTextReaderOp);
} // namespace serving
} // namespace paddle_serving
} // namespace baidu
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <vector>
#include "core/general-server/general_model_service.pb.h"
#include "core/general-server/load_general_model_service.pb.h"
#include "core/general-server/op/general_infer_helper.h"
#include "core/predictor/framework/resource.h"
#include "paddle_inference_api.h" // NOLINT
namespace baidu {
namespace paddle_serving {
namespace serving {
class GeneralTextReaderOp
: public baidu::paddle_serving::predictor::OpWithChannel<GeneralBlob> {
public:
typedef std::vector<paddle::PaddleTensor> TensorVector;
DECLARE_OP(GeneralTextReaderOp);
int inference();
};
} // namespace serving
} // namespace paddle_serving
} // namespace baidu
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "core/general-server/op/general_text_response_op.h"
#include <algorithm>
#include <iostream>
#include <memory>
#include <sstream>
#include "core/predictor/framework/infer.h"
#include "core/predictor/framework/memory.h"
#include "core/predictor/framework/resource.h"
#include "core/util/include/timer.h"
namespace baidu {
namespace paddle_serving {
namespace serving {
using baidu::paddle_serving::Timer;
using baidu::paddle_serving::predictor::MempoolWrapper;
using baidu::paddle_serving::predictor::general_model::Tensor;
using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::FetchInst;
using baidu::paddle_serving::predictor::general_model::ModelOutput;
using baidu::paddle_serving::predictor::InferManager;
using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
int GeneralTextResponseOp::inference() {
VLOG(2) << "Going to run inference";
const std::vector<std::string> pre_node_names = pre_names();
VLOG(2) << "pre node names size: " << pre_node_names.size();
const GeneralBlob *input_blob;
uint64_t log_id =
get_depend_argument<GeneralBlob>(pre_node_names[0])->GetLogId();
const Request *req = dynamic_cast<const Request *>(get_request_message());
// response inst with only fetch_var_names
Response *res = mutable_data<Response>();
Timer timeline;
int64_t start = timeline.TimeStampUS();
VLOG(2) << "(logid=" << log_id
<< ") start to call load general model_conf op";
baidu::paddle_serving::predictor::Resource &resource =
baidu::paddle_serving::predictor::Resource::instance();
VLOG(2) << "(logid=" << log_id << ") get resource pointer done.";
std::shared_ptr<PaddleGeneralModelConfig> model_config =
resource.get_general_model_config().back();
std::vector<int> fetch_index;
fetch_index.resize(req->fetch_var_names_size());
for (int i = 0; i < req->fetch_var_names_size(); ++i) {
fetch_index[i] =
model_config->_fetch_alias_name_to_index[req->fetch_var_names(i)];
}
for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) {
const std::string &pre_name = pre_node_names[pi];
VLOG(2) << "(logid=" << log_id << ") pre names[" << pi << "]: " << pre_name
<< " (" << pre_node_names.size() << ")";
input_blob = get_depend_argument<GeneralBlob>(pre_name);
if (!input_blob) {
LOG(ERROR) << "(logid=" << log_id
<< ") Failed mutable depended argument, op: " << pre_name;
return -1;
}
const TensorVector *in = &input_blob->tensor_vector;
int batch_size = input_blob->GetBatchSize();
VLOG(2) << "(logid=" << log_id << ") input batch size: " << batch_size;
ModelOutput *output = res->add_outputs();
output->set_engine_name(
pre_name); // To get the order of model return values
for (int i = 0; i < batch_size; ++i) {
FetchInst *fetch_inst = output->add_insts();
for (auto &idx : fetch_index) {
Tensor *tensor = fetch_inst->add_tensor_array();
// currently only response float tensor or lod_tensor
tensor->set_elem_type(1);
if (model_config->_is_lod_fetch[idx]) {
VLOG(2) << "(logid=" << log_id << ") out[" << idx << " is lod_tensor";
tensor->add_shape(-1);
} else {
VLOG(2) << "(logid=" << log_id << ") out[" << idx << "] is tensor";
for (int k = 1; k < in->at(idx).shape.size(); ++k) {
VLOG(2) << "(logid=" << log_id << ") shape[" << k - 1
<< "]: " << in->at(idx).shape[k];
tensor->add_shape(in->at(idx).shape[k]);
}
}
}
}
int var_idx = 0;
for (auto &idx : fetch_index) {
float *data_ptr = static_cast<float *>(in->at(idx).data.data());
int cap = 1;
for (int j = 1; j < in->at(idx).shape.size(); ++j) {
cap *= in->at(idx).shape[j];
}
if (model_config->_is_lod_fetch[idx]) {
for (int j = 0; j < batch_size; ++j) {
for (int k = in->at(idx).lod[0][j]; k < in->at(idx).lod[0][j + 1];
k++) {
output->mutable_insts(j)
->mutable_tensor_array(var_idx)
->add_float_data(data_ptr[k]);
}
}
} else {
for (int j = 0; j < batch_size; ++j) {
for (int k = j * cap; k < (j + 1) * cap; ++k) {
output->mutable_insts(j)
->mutable_tensor_array(var_idx)
->add_float_data(data_ptr[k]);
}
}
}
var_idx++;
}
}
if (req->profile_server()) {
int64_t end = timeline.TimeStampUS();
// TODO(barriery): multi-model profile_time.
// At present, only the response_op is multi-input, so here we get
// the profile_time by hard coding. It needs to be replaced with
// a more elegant way.
for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) {
input_blob = get_depend_argument<GeneralBlob>(pre_node_names[pi]);
VLOG(2) << "(logid=" << log_id
<< ") p size for input blob: " << input_blob->p_size;
int profile_time_idx = -1;
if (pi == 0) {
profile_time_idx = 0;
} else {
profile_time_idx = input_blob->p_size - 2;
}
for (; profile_time_idx < input_blob->p_size; ++profile_time_idx) {
res->add_profile_time(input_blob->time_stamp[profile_time_idx]);
}
}
// TODO(guru4elephant): find more elegant way to do this
res->add_profile_time(start);
res->add_profile_time(end);
}
return 0;
}
DEFINE_OP(GeneralTextResponseOp);
} // namespace serving
} // namespace paddle_serving
} // namespace baidu
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <vector>
#include "core/general-server/general_model_service.pb.h"
#include "core/general-server/op/general_infer_helper.h"
#include "paddle_inference_api.h" // NOLINT
namespace baidu {
namespace paddle_serving {
namespace serving {
class GeneralTextResponseOp
: public baidu::paddle_serving::predictor::OpWithChannel<
baidu::paddle_serving::predictor::general_model::Response> {
public:
typedef std::vector<paddle::PaddleTensor> TensorVector;
DECLARE_OP(GeneralTextResponseOp);
int inference();
};
} // namespace serving
} // namespace paddle_serving
} // namespace baidu
...@@ -24,17 +24,16 @@ message Tensor { ...@@ -24,17 +24,16 @@ message Tensor {
repeated int32 int_data = 2; repeated int32 int_data = 2;
repeated int64 int64_data = 3; repeated int64 int64_data = 3;
repeated float float_data = 4; repeated float float_data = 4;
optional int32 elem_type = 5; optional int32 elem_type =
repeated int32 shape = 6; 5; // 0 means int64, 1 means float32, 2 means int32, 3 means bytes(string)
repeated int32 lod = 7; // only for fetch tensor currently repeated int32 shape = 6; // shape should include batch
repeated int32 lod = 7; // only for fetch tensor currently
optional string name = 8; // get from the Model prototxt
optional string alias_name = 9; // get from the Model prototxt
}; };
message FeedInst { repeated Tensor tensor_array = 1; };
message FetchInst { repeated Tensor tensor_array = 1; };
message Request { message Request {
repeated FeedInst insts = 1; repeated Tensor tensor = 1;
repeated string fetch_var_names = 2; repeated string fetch_var_names = 2;
optional bool profile_server = 3 [ default = false ]; optional bool profile_server = 3 [ default = false ];
required uint64 log_id = 4 [ default = 0 ]; required uint64 log_id = 4 [ default = 0 ];
...@@ -46,7 +45,7 @@ message Response { ...@@ -46,7 +45,7 @@ message Response {
}; };
message ModelOutput { message ModelOutput {
repeated FetchInst insts = 1; repeated Tensor tensor = 1;
optional string engine_name = 2; optional string engine_name = 2;
} }
......
...@@ -280,6 +280,7 @@ class PdsCodeGenerator : public CodeGenerator { ...@@ -280,6 +280,7 @@ class PdsCodeGenerator : public CodeGenerator {
" baidu::rpc::ClosureGuard done_guard(done);\n" " baidu::rpc::ClosureGuard done_guard(done);\n"
" baidu::rpc::Controller* cntl = \n" " baidu::rpc::Controller* cntl = \n"
" static_cast<baidu::rpc::Controller*>(cntl_base);\n" " static_cast<baidu::rpc::Controller*>(cntl_base);\n"
" cntl->set_response_compress_type(brpc::COMPRESS_TYPE_GZIP);\n"
" uint64_t log_id = request->log_id();\n" " uint64_t log_id = request->log_id();\n"
" cntl->set_log_id(log_id);\n" " cntl->set_log_id(log_id);\n"
" ::baidu::paddle_serving::predictor::InferService* svr = \n" " ::baidu::paddle_serving::predictor::InferService* svr = \n"
...@@ -322,6 +323,7 @@ class PdsCodeGenerator : public CodeGenerator { ...@@ -322,6 +323,7 @@ class PdsCodeGenerator : public CodeGenerator {
" baidu::rpc::ClosureGuard done_guard(done);\n" " baidu::rpc::ClosureGuard done_guard(done);\n"
" baidu::rpc::Controller* cntl = \n" " baidu::rpc::Controller* cntl = \n"
" static_cast<baidu::rpc::Controller*>(cntl_base);\n" " static_cast<baidu::rpc::Controller*>(cntl_base);\n"
" cntl->set_response_compress_type(brpc::COMPRESS_TYPE_GZIP);\n"
" uint64_t log_id = equest->log_id();\n" " uint64_t log_id = equest->log_id();\n"
" cntl->set_log_id(log_id);\n" " cntl->set_log_id(log_id);\n"
" ::baidu::paddle_serving::predictor::InferService* svr = \n" " ::baidu::paddle_serving::predictor::InferService* svr = \n"
...@@ -1023,6 +1025,7 @@ class PdsCodeGenerator : public CodeGenerator { ...@@ -1023,6 +1025,7 @@ class PdsCodeGenerator : public CodeGenerator {
" brpc::ClosureGuard done_guard(done);\n" " brpc::ClosureGuard done_guard(done);\n"
" brpc::Controller* cntl = \n" " brpc::Controller* cntl = \n"
" static_cast<brpc::Controller*>(cntl_base);\n" " static_cast<brpc::Controller*>(cntl_base);\n"
" cntl->set_response_compress_type(brpc::COMPRESS_TYPE_GZIP);\n"
" uint64_t log_id = request->log_id();\n" " uint64_t log_id = request->log_id();\n"
" cntl->set_log_id(log_id);\n" " cntl->set_log_id(log_id);\n"
" ::baidu::paddle_serving::predictor::InferService* svr = \n" " ::baidu::paddle_serving::predictor::InferService* svr = \n"
...@@ -1067,6 +1070,7 @@ class PdsCodeGenerator : public CodeGenerator { ...@@ -1067,6 +1070,7 @@ class PdsCodeGenerator : public CodeGenerator {
" brpc::ClosureGuard done_guard(done);\n" " brpc::ClosureGuard done_guard(done);\n"
" brpc::Controller* cntl = \n" " brpc::Controller* cntl = \n"
" static_cast<brpc::Controller*>(cntl_base);\n" " static_cast<brpc::Controller*>(cntl_base);\n"
" cntl->set_response_compress_type(brpc::COMPRESS_TYPE_GZIP);\n"
" uint64_t log_id = request->log_id();\n" " uint64_t log_id = request->log_id();\n"
" cntl->set_log_id(log_id);\n" " cntl->set_log_id(log_id);\n"
" ::baidu::paddle_serving::predictor::InferService* svr = \n" " ::baidu::paddle_serving::predictor::InferService* svr = \n"
......
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
...@@ -2,3 +2,16 @@ set(seq_gen_src ${CMAKE_CURRENT_LIST_DIR}/seq_generator.cpp ${CMAKE_CURRENT_LIS ...@@ -2,3 +2,16 @@ set(seq_gen_src ${CMAKE_CURRENT_LIST_DIR}/seq_generator.cpp ${CMAKE_CURRENT_LIS
LIST(APPEND seq_gen_src ${PROTO_SRCS}) LIST(APPEND seq_gen_src ${PROTO_SRCS})
add_executable(seq_generator ${seq_gen_src}) add_executable(seq_generator ${seq_gen_src})
target_link_libraries(seq_generator protobuf -lpthread) target_link_libraries(seq_generator protobuf -lpthread)
set(seq_reader_src ${CMAKE_CURRENT_LIST_DIR}/seq_reader.cpp ${CMAKE_CURRENT_LIST_DIR}/../../cube/cube-builder/src/seqfile_reader.cpp)
add_executable(seq_reader ${seq_reader_src})
add_dependencies(seq_reader brpc)
install(TARGETS seq_reader
RUNTIME DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/bin
ARCHIVE DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/lib
LIBRARY DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/so
)
install(TARGETS seq_reader RUNTIME DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/tool)
install(TARGETS seq_generator RUNTIME DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/tool)
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <sys/time.h>
#include <limits.h>
#include <fstream>
#include <iostream>
#include <memory>
#include <thread>
#include "core/cube/cube-builder/include/cube-builder/seqfile_reader.h"
std::string string_to_hex(const std::string& input) {
static const char* const lut = "0123456789ABCDEF";
size_t len = input.length();
std::string output;
output.reserve(2 * len);
for (size_t i = 0; i < len; ++i) {
const unsigned char c = input[i];
output.push_back(lut[c >> 4]);
output.push_back(lut[c & 15]);
}
return output;
}
void printSeq(std::string file, int limit) {
SequenceFileRecordReader reader(file.c_str());
if (reader.open() != 0) {
std::cerr << "open file failed! " << file;
return;
}
if (reader.read_header() != 0) {
std::cerr << "read header error! " << file;
reader.close();
return;
}
Record record(reader.get_header());
int total_count = 0;
while (reader.next(&record) == 0) {
uint64_t key =
*reinterpret_cast<uint64_t *>(const_cast<char *>(record.key.data()));
total_count++;
int64_t value_length = record.record_len - record.key_len;
std::cout << "key: " << key << " , value: " << string_to_hex(record.value.c_str()) << std::endl;
if (total_count >= limit) {
break;
}
}
if (reader.close() != 0) {
std::cerr << "close file failed! " << file;
return;
}
}
int main(int argc, char **argv) {
if (argc != 3 && argc != 2) {
std::cout << "Seq Reader Usage:" << std::endl;
std::cout << "get all keys: ./seq_reader $FILENAME " << std::endl;
std::cout << "get some keys: ./seq_reader $FILENAME $KEY_NUM" << std::endl;
return -1;
}
if (argc == 3 || argc == 2) {
const char* filename_str = argv[1];
std::cout << "cstr filename is " << filename_str << std::endl;
std::string filename = filename_str;
std::cout << "filename is " << filename << std::endl;
if (argc == 3) {
const char* key_num_str = argv[2];
int key_limit = std::stoi(key_num_str);
printSeq(filename, key_limit);
} else {
printSeq(filename, INT_MAX);
}
}
return 0;
}
...@@ -24,17 +24,16 @@ message Tensor { ...@@ -24,17 +24,16 @@ message Tensor {
repeated int32 int_data = 2; repeated int32 int_data = 2;
repeated int64 int64_data = 3; repeated int64 int64_data = 3;
repeated float float_data = 4; repeated float float_data = 4;
optional int32 elem_type = 5; optional int32 elem_type =
repeated int32 shape = 6; 5; // 0 means int64, 1 means float32, 2 means int32, 3 means bytes(string)
repeated int32 lod = 7; // only for fetch tensor currently repeated int32 shape = 6; // shape should include batch
repeated int32 lod = 7; // only for fetch tensor currently
optional string name = 8; // get from the Model prototxt
optional string alias_name = 9; // get from the Model prototxt
}; };
message FeedInst { repeated Tensor tensor_array = 1; };
message FetchInst { repeated Tensor tensor_array = 1; };
message Request { message Request {
repeated FeedInst insts = 1; repeated Tensor tensor = 1;
repeated string fetch_var_names = 2; repeated string fetch_var_names = 2;
optional bool profile_server = 3 [ default = false ]; optional bool profile_server = 3 [ default = false ];
required uint64 log_id = 4 [ default = 0 ]; required uint64 log_id = 4 [ default = 0 ];
...@@ -46,7 +45,7 @@ message Response { ...@@ -46,7 +45,7 @@ message Response {
}; };
message ModelOutput { message ModelOutput {
repeated FetchInst insts = 1; repeated Tensor tensor = 1;
optional string engine_name = 2; optional string engine_name = 2;
} }
......
文件模式从 100755 更改为 100644
...@@ -87,6 +87,7 @@ go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway@v1.15.2 ...@@ -87,6 +87,7 @@ go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway@v1.15.2
go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger@v1.15.2 go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger@v1.15.2
go get -u github.com/golang/protobuf/protoc-gen-go@v1.4.3 go get -u github.com/golang/protobuf/protoc-gen-go@v1.4.3
go get -u google.golang.org/grpc@v1.33.0 go get -u google.golang.org/grpc@v1.33.0
go env -w GO111MODULE=auto
``` ```
......
...@@ -86,6 +86,7 @@ go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway@v1.15.2 ...@@ -86,6 +86,7 @@ go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway@v1.15.2
go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger@v1.15.2 go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger@v1.15.2
go get -u github.com/golang/protobuf/protoc-gen-go@v1.4.3 go get -u github.com/golang/protobuf/protoc-gen-go@v1.4.3
go get -u google.golang.org/grpc@v1.33.0 go get -u google.golang.org/grpc@v1.33.0
go env -w GO111MODULE=auto
``` ```
......
## 如果获得稀疏参数索引Cube所需的模型输入
### 背景知识
推荐系统需要大规模稀疏参数索引来帮助分布式部署,可在`python/example/criteo_ctr_with_cube`或是[PaddleRec](https://github.com/paddlepaddle/paddlerec)了解推荐模型。
稀疏参数索引的模型格式是SequenceFile,源自Hadoop生态的键值对格式文件。
为了方便调试,我们给出了从特定格式的可读文本文件到SequenceFile格式文件的转换工具,以及SequenceFile格式文件与可阅读文字的转换。
用户在调试Cube服务功能时,可以自定义KV对生成SequenceFile格式文件来进行调试。
用户在验证Cube的配送正确性时,可以转换SequenceFile格式文件至可读文字来进行比对验证。
### 预备知识
- 需要会编译Paddle Serving,参见[编译文档](./COMPILE.md)
### 用法
在编译结束后的安装文件,可以得到 seq_reader 和 kv_to_seqfile.py。
#### 生成SequenceFile
`output/tool/`下,修改`output/tool/source/file.txt`,该文件每一行对应一个键值对,用冒号`:`区分key和value部分。
例如:
```
1676869128226002114:48241 37064 91 -539 114 51 -122 269 229 -134 -282
1657749292782759014:167 40 98 27 117 10 -29 15 74 67 -54
```
执行
```
python kv_to_seqfile.py
```
即可生成`data`文件夹,我们看下它的结构
```
.
├── 20210805095422
│   └── base
│   └── feature
└── donefile
└── base.txt
```
其中`20210805095422/base/feature` 就是SequenceFile格式文件,donefile保存在`donefile/base.txt`
#### 查看SequenceFile
我们使用`seq_reader`工具来解读SequenceFile格式文件。
```
./seq_reader 20210805095422/base/feature 10 # 阅读开头的10个KV对
./seq_reader 20210805095422/base/feature # 阅读所有KV对
```
结果
```
key: 1676869128226002114 , value: 343832343109333730363409093931092D35333909313134093531092D3132320932363909323239092D313334092D323832
key: 1657749292782759014 , value: 3136370934300909393809323709313137093130092D3239093135093734093637092D3534
```
其中value 我们目前都以16进制的形式打印。
...@@ -324,6 +324,15 @@ GLOG_v=2 python -m paddle_serving_server.serve --model xxx_conf/ --port 9999 ...@@ -324,6 +324,15 @@ GLOG_v=2 python -m paddle_serving_server.serve --model xxx_conf/ --port 9999
**A:** Logid默认为0(后续应该有自动生成Logid的计划,当前版本0.4.0),Client端通过在predict函数中指定log_id参数传递 **A:** Logid默认为0(后续应该有自动生成Logid的计划,当前版本0.4.0),Client端通过在predict函数中指定log_id参数传递
#### Q: C++Server出现问题如何调试和定位
**A:** 推荐您使用gdb进行定位和调试,如果您使用docker,在启动容器时候,需要加上docker run --privileged参数,开启特权模式,这样才能在docker容器中使用gdb定位和调试
如果您C++端出现coredump,一般而言会生成一个core文件,若没有,则应开启生成core文件选项,使用ulimit -c unlimited命令。
使用gdb调试core文件的方法为:gdb <可执行文件> <core文件>,进入后输入bt指令,一般即可显示出错在哪一行。
注意:可执行文件路径是C++ bin文件的路径,而不是python命令,一般为类似下面的这种/usr/local/lib/python3.6/site-packages/paddle_serving_server/serving-gpu-102-0.6.2/serving
## 性能优化 ## 性能优化
# HTTP方式访问Server
Paddle Serving服务端目前提供了支持Http直接访问的功能,本文档显示了详细信息。
## 基本原理
BRPC-Server端支持通过Http的方式被访问,各种语言都有实现Http请求的一些库,所以Java/Python/Go等BRPC支持不太完善的语言,可以通过Http的方式直接访问服务端进行预测。
### Http方式
基本流程和原理:客户端需要将数据按照Proto约定的格式(请参阅[`core/general-server/proto/general_model_service.proto`](../core/general-server/proto/general_model_service.proto))封装在Http请求的请求体中。
BRPC-Server会尝试去JSON字符串中再去反序列化出Proto格式的数据,从而进行后续的处理。
### Http+protobuf方式
各种语言都提供了对ProtoBuf的支持,如果您对此比较熟悉,您也可以先将数据使用ProtoBuf序列化,再将序列化后的数据放入Http请求数据体中,然后指定Content-Type: application/proto,从而使用http/h2+protobuf二进制串访问服务。
实测随着数据量的增大,使用JSON方式的Http的数据量和反序列化的耗时会大幅度增加,推荐当您的数据量较大时,使用Http+protobuf方式,后续我们会在框架的HttpClient中增加该功能,目前暂没有支持。
**理论上讲,序列化/反序列化的性能从高到底排序为:protobuf > http/h2+protobuf > http**
## 示例
我们将以python/examples/fit_a_line为例,讲解如何通过Http访问Server端。
### 获取模型
```shell
sh get_data.sh
```
## 开启服务端
```shell
python3.6 -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393
```
服务端无须做任何改造,即可支持BRPC和HTTP两种方式。
## 客户端访问
### HttpClient方式发送Http请求(Python/Java)
为了方便用户快速的使用Http方式请求Server端预测服务,我们已经将常用的Http请求的数据体封装、压缩、请求加密等功能封装为一个HttpClient类提供给用户,方便用户使用。
使用HttpClient最简单只需要三步,1、创建一个HttpClient对象。2、加载Client端的prototxt配置文件(本例中为python/examples/fit_a_line/目录下的uci_housing_client/serving_client_conf.prototxt),3、调用Predict函数,通过Http方式请求预测服务。
此外,您可以根据自己的需要配置Server端IP、Port、服务名称(此服务名称需要与[`core/general-server/proto/general_model_service.proto`](../core/general-server/proto/general_model_service.proto)文件中的Service服务名和rpc方法名对应,即`GeneralModelService`字段和`inference`字段),设置Request数据体压缩,设置Response支持压缩传输,模型加密预测(需要配置Server端使用模型加密)、设置响应超时时间等功能。
Python的HttpClient使用示例见[`python/examples/fit_a_line/test_httpclient.py`](../python/examples/fit_a_line/test_httpclient.py),接口详见[`python/paddle_serving_client/httpclient.py`](../python/paddle_serving_client/httpclient.py)
Java的HttpClient使用示例见[`java/examples/src/main/java/PaddleServingClientExample.java`](../java/examples/src/main/java/PaddleServingClientExample.java)接口详见[`java/src/main/java/io/paddle/serving/client/HttpClient.java`](../java/src/main/java/io/paddle/serving/client/HttpClient.java)
如果不能满足您的需求,您也可以在此基础上添加一些功能。
如需支持https或者自定义Response的Status Code等,则需要对C++端brpc-Server进行一定的二次开发,请参考https://github.com/apache/incubator-brpc/blob/master/docs/cn/http_service.md,后续如果需求很大,我们也会将这部分功能加入到Server中,尽情期待。
### curl方式发送Http请求(基本原理)
```shell
curl -XPOST http://0.0.0.0:9393/GeneralModelService/inference -d ' {"tensor":[{"float_data":[0.0137,-0.1136,0.2553,-0.0692,0.0582,-0.0727,-0.1583,-0.0584,0.6283,0.4919,0.1856,0.0795,-0.0332],"elem_type":1,"name":"x","alias_name":"x","shape":[1,13]}],"fetch_var_names":["price"],"log_id":0}'
```
其中`127.0.0.1:9393`为IP和Port,根据您服务端启动的IP和Port自行设定。
`GeneralModelService`字段和`inference`字段分别为Proto文件中的Service服务名和rpc方法名,详见[`core/general-server/proto/general_model_service.proto`](../core/general-server/proto/general_model_service.proto)
-d后面的是请求的数据体,json中一定要包含上述proto中的required字段,否则转化会失败,对应请求会被拒绝。
需要注意的是,数据中的shape字段为模型实际需要的shape信息,包含batch维度在内,可能与proto文件中的shape不一致。
#### message
对应rapidjson Object, 以花括号包围,其中的元素会被递归地解析。
```protobuf
// protobuf
message Foo {
required string field1 = 1;
required int32 field2 = 2;
}
message Bar {
required Foo foo = 1;
optional bool flag = 2;
required string name = 3;
}
// rapidjson
{"foo":{"field1":"hello", "field2":3},"name":"Tom" }
```
#### repeated field
对应rapidjson Array, 以方括号包围,其中的元素会被递归地解析,和message不同,每个元素的类型相同。
```protobuf
// protobuf
repeated int32 numbers = 1;
// rapidjson
{"numbers" : [12, 17, 1, 24] }
```
#### elem_type
表示数据类型,0 means int64, 1 means float32, 2 means int32, 3 means bytes(string)
#### fetch_var_names
表示返回结果中需要的数据名称,请参考模型文件serving_client_conf.prototxt中的`fetch_var`字段下的`alias_name`
### Http压缩
支持gzip压缩,但gzip并不是一个压缩解压速度非常快的方法,当数据量较小时候,使用gzip压缩反而会得不偿失,推荐至少数据大于512字节时才考虑使用gzip压缩,实测结果是当数据量小于50K时,压缩的收益都不大。
#### Client请求的数据体压缩
以上面的fit_a_line为例,仍使用上文的请求数据体,但只作为示例演示用法,实际此时使用压缩得不偿失。
```shell
echo ' {"tensor":[{"float_data":[0.0137,-0.1136,0.2553,-0.0692,0.0582,-0.0727,-0.1583,-0.0584,0.6283,0.4919,0.1856,0.0795,-0.0332],"elem_type":1,"shape":[1,13]}],"fetch_var_names":["price"],"log_id":0}' | gzip -c > data.txt.gz
```
```shell
curl --data-binary @data.txt.gz -H'Content-Encoding: gzip' -XPOST http://127.0.0.1:9393/GeneralModelService/inference
```
**注意:当请求数据体压缩时,需要指定请求头中Content-Encoding: gzip**
#### Server端Response压缩
当Http请求头中设置了Accept-encoding: gzip时,Server端会尝试用gzip压缩Response的数据,“尝试“指的是压缩有可能不发生,条件有:
- 请求中没有设置Accept-encoding: gzip。
- body尺寸小于-http_body_compress_threshold指定的字节数,默认是512。gzip并不是一个很快的压缩算法,当body较小时,压缩增加的延时可能比网络传输省下的还多。当包较小时不做压缩可能是个更好的选项。
这时server总是会返回不压缩的结果。
如果使用curl,通常推荐使用--compressed参数来设置Response压缩,--compressed参数会自动地在http请求中设置Accept-encoding: gzip,并在收到压缩后的Response后自动解压,对于用户而言,整个压缩/解压过程就像透明的一样。
```shell
curl --data-binary @data.txt.gz -H'Content-Encoding: gzip' --compressed -XPOST http://127.0.0.1:9393/GeneralModelService/inference
```
若您只是在Http请求头中通过-H'Accept-encoding: gzip'设置了接收压缩的信息,收到的将是压缩后的Response,此时,您需要手动解压。
也就是说,--compressed = -H'Content-Encoding: gzip' + 自动解压,所以推荐您使用--compressed,以下仅作为单独设置请求头+手动解压的原理性示例。
当您想要验证返回值是否真的压缩时,您可以只添加请求头-H'Content-Encoding: gzip',而不解压,可以看到返回信息是压缩后的数据(一般而言是看不懂的压缩码)。
```shell
curl --data-binary @data.txt.gz -H'Content-Encoding: gzip' -H'Accept-encoding: gzip' -XPOST http://127.0.0.1:9393/GeneralModelService/inference | gunzip
```
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
package main
import (
"io"
"os"
"fmt"
"bufio"
"strings"
"strconv"
)
func main() {
score_file := os.Args[1]
fi, err := os.Open(score_file)
if err != nil {
fmt.Print(err)
}
defer fi.Close()
br := bufio.NewReader(fi)
total := int(0)
acc := int(0)
for {
line, err := br.ReadString('\n')
if err == io.EOF {
break
}
line = strings.Trim(line, "\n")
s := strings.Split(line, "\t")
prob_str := strings.Trim(s[0], " ")
label_str := strings.Trim(s[1], " ")
prob, err := strconv.ParseFloat(prob_str, 32)
if err != nil {
panic(err)
}
label, err := strconv.ParseFloat(label_str, 32)
if err != nil {
panic(err)
}
if (prob - 0.5) * (label - 0.5) > 0 {
acc++
}
total++
}
fmt.Println("total num: ", total)
fmt.Println("acc num: ", acc)
fmt.Println("acc: ", float32(acc) / float32(total))
}
\ No newline at end of file
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"io"
"fmt"
"strings"
"bufio"
"strconv"
"os"
serving_client "github.com/PaddlePaddle/Serving/go/serving_client"
)
func main() {
var config_file_path string
config_file_path = os.Args[1]
handle := serving_client.LoadModelConfig(config_file_path)
handle = serving_client.Connect("127.0.0.1", "9292", handle)
test_file_path := os.Args[2]
fi, err := os.Open(test_file_path)
if err != nil {
fmt.Print(err)
}
defer fi.Close()
br := bufio.NewReader(fi)
fetch := []string{"cost", "acc", "prediction"}
var result map[string][]float32
for {
line, err := br.ReadString('\n')
if err == io.EOF {
break
}
line = strings.Trim(line, "\n")
var words = []int64{}
s := strings.Split(line, " ")
value, err := strconv.Atoi(s[0])
var feed_int_map map[string][]int64
for _, v := range s[1:value + 1] {
int_v, _ := strconv.Atoi(v)
words = append(words, int64(int_v))
}
label, err := strconv.Atoi(s[len(s)-1])
if err != nil {
panic(err)
}
feed_int_map = map[string][]int64{}
feed_int_map["words"] = words
feed_int_map["label"] = []int64{int64(label)}
result = serving_client.Predict(handle,
feed_int_map, fetch)
fmt.Println(result["prediction"][1], "\t", int64(label))
}
}
\ No newline at end of file
// Code generated by protoc-gen-go. DO NOT EDIT.
// source: general_model_config.proto
package baidu_paddle_serving_configure
import (
fmt "fmt"
proto "github.com/golang/protobuf/proto"
math "math"
)
// Reference imports to suppress errors if they are not otherwise used.
var _ = proto.Marshal
var _ = fmt.Errorf
var _ = math.Inf
// This is a compile-time assertion to ensure that this generated file
// is compatible with the proto package it is being compiled against.
// A compilation error at this line likely means your copy of the
// proto package needs to be updated.
const _ = proto.ProtoPackageIsVersion3 // please upgrade the proto package
type FeedVar struct {
Name *string `protobuf:"bytes,1,opt,name=name" json:"name,omitempty"`
AliasName *string `protobuf:"bytes,2,opt,name=alias_name,json=aliasName" json:"alias_name,omitempty"`
IsLodTensor *bool `protobuf:"varint,3,opt,name=is_lod_tensor,json=isLodTensor,def=0" json:"is_lod_tensor,omitempty"`
FeedType *int32 `protobuf:"varint,4,opt,name=feed_type,json=feedType,def=0" json:"feed_type,omitempty"`
Shape []int32 `protobuf:"varint,5,rep,name=shape" json:"shape,omitempty"`
XXX_NoUnkeyedLiteral struct{} `json:"-"`
XXX_unrecognized []byte `json:"-"`
XXX_sizecache int32 `json:"-"`
}
func (m *FeedVar) Reset() { *m = FeedVar{} }
func (m *FeedVar) String() string { return proto.CompactTextString(m) }
func (*FeedVar) ProtoMessage() {}
func (*FeedVar) Descriptor() ([]byte, []int) {
return fileDescriptor_efa52beffa29d37a, []int{0}
}
func (m *FeedVar) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_FeedVar.Unmarshal(m, b)
}
func (m *FeedVar) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_FeedVar.Marshal(b, m, deterministic)
}
func (m *FeedVar) XXX_Merge(src proto.Message) {
xxx_messageInfo_FeedVar.Merge(m, src)
}
func (m *FeedVar) XXX_Size() int {
return xxx_messageInfo_FeedVar.Size(m)
}
func (m *FeedVar) XXX_DiscardUnknown() {
xxx_messageInfo_FeedVar.DiscardUnknown(m)
}
var xxx_messageInfo_FeedVar proto.InternalMessageInfo
const Default_FeedVar_IsLodTensor bool = false
const Default_FeedVar_FeedType int32 = 0
func (m *FeedVar) GetName() string {
if m != nil && m.Name != nil {
return *m.Name
}
return ""
}
func (m *FeedVar) GetAliasName() string {
if m != nil && m.AliasName != nil {
return *m.AliasName
}
return ""
}
func (m *FeedVar) GetIsLodTensor() bool {
if m != nil && m.IsLodTensor != nil {
return *m.IsLodTensor
}
return Default_FeedVar_IsLodTensor
}
func (m *FeedVar) GetFeedType() int32 {
if m != nil && m.FeedType != nil {
return *m.FeedType
}
return Default_FeedVar_FeedType
}
func (m *FeedVar) GetShape() []int32 {
if m != nil {
return m.Shape
}
return nil
}
type FetchVar struct {
Name *string `protobuf:"bytes,1,opt,name=name" json:"name,omitempty"`
AliasName *string `protobuf:"bytes,2,opt,name=alias_name,json=aliasName" json:"alias_name,omitempty"`
IsLodTensor *bool `protobuf:"varint,3,opt,name=is_lod_tensor,json=isLodTensor,def=0" json:"is_lod_tensor,omitempty"`
Shape []int32 `protobuf:"varint,4,rep,name=shape" json:"shape,omitempty"`
XXX_NoUnkeyedLiteral struct{} `json:"-"`
XXX_unrecognized []byte `json:"-"`
XXX_sizecache int32 `json:"-"`
}
func (m *FetchVar) Reset() { *m = FetchVar{} }
func (m *FetchVar) String() string { return proto.CompactTextString(m) }
func (*FetchVar) ProtoMessage() {}
func (*FetchVar) Descriptor() ([]byte, []int) {
return fileDescriptor_efa52beffa29d37a, []int{1}
}
func (m *FetchVar) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_FetchVar.Unmarshal(m, b)
}
func (m *FetchVar) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_FetchVar.Marshal(b, m, deterministic)
}
func (m *FetchVar) XXX_Merge(src proto.Message) {
xxx_messageInfo_FetchVar.Merge(m, src)
}
func (m *FetchVar) XXX_Size() int {
return xxx_messageInfo_FetchVar.Size(m)
}
func (m *FetchVar) XXX_DiscardUnknown() {
xxx_messageInfo_FetchVar.DiscardUnknown(m)
}
var xxx_messageInfo_FetchVar proto.InternalMessageInfo
const Default_FetchVar_IsLodTensor bool = false
func (m *FetchVar) GetName() string {
if m != nil && m.Name != nil {
return *m.Name
}
return ""
}
func (m *FetchVar) GetAliasName() string {
if m != nil && m.AliasName != nil {
return *m.AliasName
}
return ""
}
func (m *FetchVar) GetIsLodTensor() bool {
if m != nil && m.IsLodTensor != nil {
return *m.IsLodTensor
}
return Default_FetchVar_IsLodTensor
}
func (m *FetchVar) GetShape() []int32 {
if m != nil {
return m.Shape
}
return nil
}
type GeneralModelConfig struct {
FeedVar []*FeedVar `protobuf:"bytes,1,rep,name=feed_var,json=feedVar" json:"feed_var,omitempty"`
FetchVar []*FetchVar `protobuf:"bytes,2,rep,name=fetch_var,json=fetchVar" json:"fetch_var,omitempty"`
XXX_NoUnkeyedLiteral struct{} `json:"-"`
XXX_unrecognized []byte `json:"-"`
XXX_sizecache int32 `json:"-"`
}
func (m *GeneralModelConfig) Reset() { *m = GeneralModelConfig{} }
func (m *GeneralModelConfig) String() string { return proto.CompactTextString(m) }
func (*GeneralModelConfig) ProtoMessage() {}
func (*GeneralModelConfig) Descriptor() ([]byte, []int) {
return fileDescriptor_efa52beffa29d37a, []int{2}
}
func (m *GeneralModelConfig) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_GeneralModelConfig.Unmarshal(m, b)
}
func (m *GeneralModelConfig) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_GeneralModelConfig.Marshal(b, m, deterministic)
}
func (m *GeneralModelConfig) XXX_Merge(src proto.Message) {
xxx_messageInfo_GeneralModelConfig.Merge(m, src)
}
func (m *GeneralModelConfig) XXX_Size() int {
return xxx_messageInfo_GeneralModelConfig.Size(m)
}
func (m *GeneralModelConfig) XXX_DiscardUnknown() {
xxx_messageInfo_GeneralModelConfig.DiscardUnknown(m)
}
var xxx_messageInfo_GeneralModelConfig proto.InternalMessageInfo
func (m *GeneralModelConfig) GetFeedVar() []*FeedVar {
if m != nil {
return m.FeedVar
}
return nil
}
func (m *GeneralModelConfig) GetFetchVar() []*FetchVar {
if m != nil {
return m.FetchVar
}
return nil
}
func init() {
proto.RegisterType((*FeedVar)(nil), "baidu.paddle_serving.configure.FeedVar")
proto.RegisterType((*FetchVar)(nil), "baidu.paddle_serving.configure.FetchVar")
proto.RegisterType((*GeneralModelConfig)(nil), "baidu.paddle_serving.configure.GeneralModelConfig")
}
func init() { proto.RegisterFile("general_model_config.proto", fileDescriptor_efa52beffa29d37a) }
var fileDescriptor_efa52beffa29d37a = []byte{
// 283 bytes of a gzipped FileDescriptorProto
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xb4, 0xd0, 0x31, 0x4b, 0xc4, 0x30,
0x14, 0x07, 0x70, 0x72, 0x6d, 0xb9, 0xf6, 0x1d, 0x2e, 0xc1, 0xa1, 0x08, 0x1e, 0xe5, 0x16, 0xe3,
0x52, 0xc4, 0xf1, 0x46, 0xc5, 0x73, 0x51, 0x87, 0x72, 0xb8, 0x86, 0xd8, 0xbc, 0xb6, 0x81, 0x5c,
0x53, 0x92, 0xde, 0xc1, 0x2d, 0x7e, 0x13, 0xf1, 0xab, 0x4a, 0x93, 0x43, 0x9c, 0x74, 0x72, 0x7b,
0x79, 0xff, 0xf0, 0xde, 0xe3, 0x07, 0x17, 0x2d, 0xf6, 0x68, 0x85, 0xe6, 0x3b, 0x23, 0x51, 0xf3,
0xda, 0xf4, 0x8d, 0x6a, 0xcb, 0xc1, 0x9a, 0xd1, 0xd0, 0xe5, 0x9b, 0x50, 0x72, 0x5f, 0x0e, 0x42,
0x4a, 0x8d, 0xdc, 0xa1, 0x3d, 0xa8, 0xbe, 0x2d, 0xc3, 0x97, 0xbd, 0xc5, 0xd5, 0x07, 0x81, 0xf9,
0x06, 0x51, 0xbe, 0x0a, 0x4b, 0x29, 0xc4, 0xbd, 0xd8, 0x61, 0x4e, 0x0a, 0xc2, 0xb2, 0xca, 0xd7,
0xf4, 0x12, 0x40, 0x68, 0x25, 0x1c, 0xf7, 0xc9, 0xcc, 0x27, 0x99, 0xef, 0xbc, 0x4c, 0xf1, 0x35,
0x9c, 0x29, 0xc7, 0xb5, 0x91, 0x7c, 0xc4, 0xde, 0x19, 0x9b, 0x47, 0x05, 0x61, 0xe9, 0x3a, 0x69,
0x84, 0x76, 0x58, 0x2d, 0x94, 0x7b, 0x32, 0x72, 0xeb, 0x13, 0xba, 0x84, 0xac, 0x41, 0x94, 0x7c,
0x3c, 0x0e, 0x98, 0xc7, 0x05, 0x61, 0xc9, 0x9a, 0xdc, 0x54, 0xe9, 0xd4, 0xdb, 0x1e, 0x07, 0xa4,
0xe7, 0x90, 0xb8, 0x4e, 0x0c, 0x98, 0x27, 0x45, 0xc4, 0x92, 0x2a, 0x3c, 0x56, 0xef, 0x90, 0x6e,
0x70, 0xac, 0xbb, 0xff, 0xbf, 0xef, 0x7b, 0x7f, 0xfc, 0x73, 0xff, 0x27, 0x01, 0xfa, 0x18, 0x78,
0x9f, 0x27, 0xdd, 0x7b, 0x2f, 0x47, 0xef, 0xc0, 0x1f, 0xce, 0x0f, 0xc2, 0xe6, 0xa4, 0x88, 0xd8,
0xe2, 0xf6, 0xaa, 0xfc, 0x5d, 0xba, 0x3c, 0x29, 0x57, 0xf3, 0xe6, 0xc4, 0xfd, 0x30, 0x81, 0x8c,
0x75, 0xe7, 0x87, 0xcc, 0xfc, 0x10, 0xf6, 0xf7, 0x90, 0x60, 0x31, 0xb9, 0x85, 0xea, 0x2b, 0x00,
0x00, 0xff, 0xff, 0x08, 0x27, 0x9c, 0x1a, 0xfe, 0x01, 0x00, 0x00,
}
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package serving_client
import (
"bytes"
"encoding/json"
"io/ioutil"
"log"
"net/http"
pb "github.com/PaddlePaddle/Serving/go/proto"
"github.com/golang/protobuf/proto"
)
type Tensor struct {
Data []byte `json:"data"`
FloatData []float32 `json:"float_data"`
IntData []int `json:"int_data"`
Int64Data []int64 `json:"int64_data"`
ElemType int `json:"elem_type"`
Shape []int `json:"shape"`
}
type FeedInst struct {
TensorArray []Tensor `json:"tensor_array"`
}
type FetchInst struct {
TensorArray []Tensor `json:"tensor_array"`
}
type Request struct {
Insts []FeedInst `json:"insts"`
FetchVarNames []string `json:"fetch_var_names"`
ProfileServer bool `json:"profile_server"`
}
type Response struct {
Insts []FetchInst `json:"insts"`
ProfileTime []int64 `json:"profile_time"`
}
type Handle struct {
Url string
Port string
FeedAliasNameMap map[string]string
FeedShapeMap map[string][]int
FeedNameMap map[string]int
FeedAliasNames []string
FetchNameMap map[string]int
FetchAliasNameMap map[string]string
}
func LoadModelConfig(config string) Handle {
in, err := ioutil.ReadFile(config)
if err != nil {
log.Fatalln("Failed to read general model: ", err)
}
general_model_config := &pb.GeneralModelConfig{}
if err := proto.Unmarshal(in, general_model_config); err != nil {
log.Fatalln("Failed to parse GeneralModelConfig: ", err)
}
log.Println("read protobuf succeed")
handle := Handle{}
handle.FeedNameMap = map[string]int{}
handle.FeedAliasNameMap = map[string]string{}
handle.FeedShapeMap = map[string][]int{}
handle.FetchNameMap = map[string]int{}
handle.FetchAliasNameMap = map[string]string{}
handle.FeedAliasNames = []string{}
for i, v := range general_model_config.FeedVar {
handle.FeedNameMap[*v.Name] = i
tmp_array := []int{}
for _, vv := range v.Shape {
tmp_array = append(tmp_array, int(vv))
}
handle.FeedShapeMap[*v.Name] = tmp_array
handle.FeedAliasNameMap[*v.AliasName] = *v.Name
handle.FeedAliasNames = append(handle.FeedAliasNames, *v.AliasName)
}
for i, v := range general_model_config.FetchVar {
handle.FetchNameMap[*v.Name] = i
handle.FetchAliasNameMap[*v.AliasName] = *v.Name
}
return handle
}
func Connect(url string, port string, handle Handle) Handle {
handle.Url = url
handle.Port = port
return handle
}
func Predict(handle Handle, int_feed_map map[string][]int64, fetch []string) map[string][]float32 {
contentType := "application/json;charset=utf-8"
var tensor_array []Tensor
var inst FeedInst
tensor_array = []Tensor{}
inst = FeedInst{}
for i := 0; i < len(handle.FeedAliasNames); i++ {
key_i := handle.FeedAliasNames[i]
var tmp Tensor
tmp.IntData = []int{}
tmp.Shape = []int{}
tmp.Int64Data = int_feed_map[key_i]
tmp.ElemType = 0
tmp.Shape = handle.FeedShapeMap[key_i]
tensor_array = append(tensor_array, tmp)
}
inst.TensorArray = tensor_array
var profile_server bool
profile_server = false
req := &Request{
Insts: []FeedInst{inst},
FetchVarNames: fetch,
ProfileServer: profile_server}
b, err := json.Marshal(req)
body := bytes.NewBuffer(b)
var post_address bytes.Buffer
post_address.WriteString("http://")
post_address.WriteString(handle.Url)
post_address.WriteString(":")
post_address.WriteString(handle.Port)
post_address.WriteString("/GeneralModelService/inference")
resp, err := http.Post(post_address.String(), contentType, body)
if err != nil {
log.Println("Post failed:", err)
}
defer resp.Body.Close()
content, err := ioutil.ReadAll(resp.Body)
if err != nil {
log.Println("Read failed:", err)
}
response_json := Response{}
err = json.Unmarshal([]byte(content), &response_json)
var result map[string][]float32
result = map[string][]float32{}
for i, v := range fetch {
result[v] = response_json.Insts[0].TensorArray[i].FloatData
}
return result
}
...@@ -11,39 +11,83 @@ import org.nd4j.linalg.factory.Nd4j; ...@@ -11,39 +11,83 @@ import org.nd4j.linalg.factory.Nd4j;
import java.util.*; import java.util.*;
public class PaddleServingClientExample { public class PaddleServingClientExample {
boolean fit_a_line() { boolean fit_a_line(String model_config_path) {
float[] data = {0.0137f, -0.1136f, 0.2553f, -0.0692f, float[] data = {0.0137f, -0.1136f, 0.2553f, -0.0692f,
0.0582f, -0.0727f, -0.1583f, -0.0584f, 0.0582f, -0.0727f, -0.1583f, -0.0584f,
0.6283f, 0.4919f, 0.1856f, 0.0795f, -0.0332f}; 0.6283f, 0.4919f, 0.1856f, 0.0795f, -0.0332f};
INDArray npdata = Nd4j.createFromArray(data); INDArray npdata = Nd4j.createFromArray(data);
long[] batch_shape = {1,13}; long[] batch_shape = {1,13};
INDArray batch_npdata = npdata.reshape(batch_shape); INDArray batch_npdata = npdata.reshape(batch_shape);
HashMap<String, INDArray> feed_data HashMap<String, Object> feed_data
= new HashMap<String, INDArray>() {{ = new HashMap<String, Object>() {{
put("x", batch_npdata); put("x", batch_npdata);
}}; }};
List<String> fetch = Arrays.asList("price"); List<String> fetch = Arrays.asList("price");
Client client = new Client(); HttpClient client = new HttpClient();
String target = "localhost:9393"; client.setIP("0.0.0.0");
boolean succ = client.connect(target); client.setPort("9393");
if (succ != true) { client.loadClientConfig(model_config_path);
System.out.println("connect failed."); String result = client.predict(feed_data, fetch, true, 0);
return false;
} System.out.println(result);
return true;
}
Map<String, INDArray> fetch_map = client.predict(feed_data, fetch); boolean encrypt(String model_config_path,String keyFilePath) {
if (fetch_map == null) { float[] data = {0.0137f, -0.1136f, 0.2553f, -0.0692f,
return false; 0.0582f, -0.0727f, -0.1583f, -0.0584f,
0.6283f, 0.4919f, 0.1856f, 0.0795f, -0.0332f};
INDArray npdata = Nd4j.createFromArray(data);
long[] batch_shape = {1,13};
INDArray batch_npdata = npdata.reshape(batch_shape);
HashMap<String, Object> feed_data
= new HashMap<String, Object>() {{
put("x", batch_npdata);
}};
List<String> fetch = Arrays.asList("price");
HttpClient client = new HttpClient();
client.setIP("0.0.0.0");
client.setPort("9393");
client.loadClientConfig(model_config_path);
client.use_key(keyFilePath);
try {
Thread.sleep(1000*3); // 休眠3秒,等待Server启动
} catch (Exception e) {
//TODO: handle exception
} }
String result = client.predict(feed_data, fetch, true, 0);
System.out.println(result);
return true;
}
for (Map.Entry<String, INDArray> e : fetch_map.entrySet()) { boolean compress(String model_config_path) {
System.out.println("Key = " + e.getKey() + ", Value = " + e.getValue()); float[] data = {0.0137f, -0.1136f, 0.2553f, -0.0692f,
} 0.0582f, -0.0727f, -0.1583f, -0.0584f,
0.6283f, 0.4919f, 0.1856f, 0.0795f, -0.0332f};
INDArray npdata = Nd4j.createFromArray(data);
long[] batch_shape = {500,13};
INDArray batch_npdata = npdata.broadcast(batch_shape);
HashMap<String, Object> feed_data
= new HashMap<String, Object>() {{
put("x", batch_npdata);
}};
List<String> fetch = Arrays.asList("price");
HttpClient client = new HttpClient();
client.setIP("0.0.0.0");
client.setPort("9393");
client.loadClientConfig(model_config_path);
client.set_request_compress(true);
client.set_response_compress(true);
String result = client.predict(feed_data, fetch, true, 0);
System.out.println(result);
return true; return true;
} }
boolean yolov4(String filename) { boolean yolov4(String model_config_path,String filename) {
// https://deeplearning4j.konduit.ai/ // https://deeplearning4j.konduit.ai/
int height = 608; int height = 608;
int width = 608; int width = 608;
...@@ -77,171 +121,44 @@ public class PaddleServingClientExample { ...@@ -77,171 +121,44 @@ public class PaddleServingClientExample {
INDArray im_size = Nd4j.createFromArray(new int[]{height, width}); INDArray im_size = Nd4j.createFromArray(new int[]{height, width});
long[] batch_size_shape = {1,2}; long[] batch_size_shape = {1,2};
INDArray batch_im_size = im_size.reshape(batch_size_shape); INDArray batch_im_size = im_size.reshape(batch_size_shape);
HashMap<String, INDArray> feed_data HashMap<String, Object> feed_data
= new HashMap<String, INDArray>() {{ = new HashMap<String, Object>() {{
put("image", batch_image); put("image", batch_image);
put("im_size", batch_im_size); put("im_size", batch_im_size);
}}; }};
List<String> fetch = Arrays.asList("save_infer_model/scale_0.tmp_0"); List<String> fetch = Arrays.asList("save_infer_model/scale_0.tmp_0");
HttpClient client = new HttpClient();
Client client = new Client(); client.setIP("0.0.0.0");
String target = "localhost:9393"; client.setPort("9393");
boolean succ = client.connect(target); client.loadClientConfig(model_config_path);
if (succ != true) { String result = client.predict(feed_data, fetch, true, 0);
System.out.println("connect failed."); System.out.println(result);
return false;
}
succ = client.setRpcTimeoutMs(20000); // cpu
if (succ != true) {
System.out.println("set timeout failed.");
return false;
}
Map<String, INDArray> fetch_map = client.predict(feed_data, fetch);
if (fetch_map == null) {
return false;
}
for (Map.Entry<String, INDArray> e : fetch_map.entrySet()) {
System.out.println("Key = " + e.getKey() + ", Value = " + e.getValue());
}
return true;
}
boolean batch_predict() {
float[] data = {0.0137f, -0.1136f, 0.2553f, -0.0692f,
0.0582f, -0.0727f, -0.1583f, -0.0584f,
0.6283f, 0.4919f, 0.1856f, 0.0795f, -0.0332f};
INDArray npdata = Nd4j.createFromArray(data);
HashMap<String, INDArray> feed_data
= new HashMap<String, INDArray>() {{
put("x", npdata);
}};
List<HashMap<String, INDArray>> feed_batch
= new ArrayList<HashMap<String, INDArray>>() {{
add(feed_data);
add(feed_data);
}};
List<String> fetch = Arrays.asList("price");
Client client = new Client();
String target = "localhost:9393";
boolean succ = client.connect(target);
if (succ != true) {
System.out.println("connect failed.");
return false;
}
Map<String, INDArray> fetch_map = client.predict(feed_batch, fetch);
if (fetch_map == null) {
return false;
}
for (Map.Entry<String, INDArray> e : fetch_map.entrySet()) {
System.out.println("Key = " + e.getKey() + ", Value = " + e.getValue());
}
return true; return true;
} }
boolean asyn_predict() { boolean bert(String model_config_path) {
float[] data = {0.0137f, -0.1136f, 0.2553f, -0.0692f,
0.0582f, -0.0727f, -0.1583f, -0.0584f,
0.6283f, 0.4919f, 0.1856f, 0.0795f, -0.0332f};
INDArray npdata = Nd4j.createFromArray(data);
HashMap<String, INDArray> feed_data
= new HashMap<String, INDArray>() {{
put("x", npdata);
}};
List<String> fetch = Arrays.asList("price");
Client client = new Client();
String target = "localhost:9393";
boolean succ = client.connect(target);
if (succ != true) {
System.out.println("connect failed.");
return false;
}
PredictFuture future = client.asyn_predict(feed_data, fetch);
Map<String, INDArray> fetch_map = future.get();
if (fetch_map == null) {
System.out.println("Get future reslut failed");
return false;
}
for (Map.Entry<String, INDArray> e : fetch_map.entrySet()) {
System.out.println("Key = " + e.getKey() + ", Value = " + e.getValue());
}
return true;
}
boolean model_ensemble() {
long[] data = {8, 233, 52, 601};
INDArray npdata = Nd4j.createFromArray(data);
HashMap<String, INDArray> feed_data
= new HashMap<String, INDArray>() {{
put("words", npdata);
}};
List<String> fetch = Arrays.asList("prediction");
Client client = new Client();
String target = "localhost:9393";
boolean succ = client.connect(target);
if (succ != true) {
System.out.println("connect failed.");
return false;
}
Map<String, HashMap<String, INDArray>> fetch_map
= client.ensemble_predict(feed_data, fetch);
if (fetch_map == null) {
return false;
}
for (Map.Entry<String, HashMap<String, INDArray>> entry : fetch_map.entrySet()) {
System.out.println("Model = " + entry.getKey());
HashMap<String, INDArray> tt = entry.getValue();
for (Map.Entry<String, INDArray> e : tt.entrySet()) {
System.out.println("Key = " + e.getKey() + ", Value = " + e.getValue());
}
}
return true;
}
boolean bert() {
float[] input_mask = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}; float[] input_mask = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
long[] position_ids = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; long[] position_ids = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
long[] input_ids = {101, 6843, 3241, 749, 8024, 7662, 2533, 1391, 2533, 2523, 7676, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; long[] input_ids = {101, 6843, 3241, 749, 8024, 7662, 2533, 1391, 2533, 2523, 7676, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
long[] segment_ids = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; long[] segment_ids = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
HashMap<String, INDArray> feed_data HashMap<String, Object> feed_data
= new HashMap<String, INDArray>() {{ = new HashMap<String, Object>() {{
put("input_mask", Nd4j.createFromArray(input_mask)); put("input_mask", Nd4j.createFromArray(input_mask));
put("position_ids", Nd4j.createFromArray(position_ids)); put("position_ids", Nd4j.createFromArray(position_ids));
put("input_ids", Nd4j.createFromArray(input_ids)); put("input_ids", Nd4j.createFromArray(input_ids));
put("segment_ids", Nd4j.createFromArray(segment_ids)); put("segment_ids", Nd4j.createFromArray(segment_ids));
}}; }};
List<String> fetch = Arrays.asList("pooled_output"); List<String> fetch = Arrays.asList("pooled_output");
HttpClient client = new HttpClient();
Client client = new Client(); client.setIP("0.0.0.0");
String target = "localhost:9393"; client.setPort("9393");
boolean succ = client.connect(target); client.loadClientConfig(model_config_path);
if (succ != true) { String result = client.predict(feed_data, fetch, true, 0);
System.out.println("connect failed."); System.out.println(result);
return false;
}
Map<String, INDArray> fetch_map = client.predict(feed_data, fetch);
if (fetch_map == null) {
return false;
}
for (Map.Entry<String, INDArray> e : fetch_map.entrySet()) {
System.out.println("Key = " + e.getKey() + ", Value = " + e.getValue());
}
return true; return true;
} }
boolean cube_local() { boolean cube_local(String model_config_path) {
long[] embedding_14 = {250644}; long[] embedding_14 = {250644};
long[] embedding_2 = {890346}; long[] embedding_2 = {890346};
long[] embedding_10 = {3939}; long[] embedding_10 = {3939};
...@@ -271,8 +188,8 @@ public class PaddleServingClientExample { ...@@ -271,8 +188,8 @@ public class PaddleServingClientExample {
long[] embedding_19 = {537425}; long[] embedding_19 = {537425};
long[] embedding_0 = {737395}; long[] embedding_0 = {737395};
HashMap<String, INDArray> feed_data HashMap<String, Object> feed_data
= new HashMap<String, INDArray>() {{ = new HashMap<String, Object>() {{
put("embedding_14.tmp_0", Nd4j.createFromArray(embedding_14)); put("embedding_14.tmp_0", Nd4j.createFromArray(embedding_14));
put("embedding_2.tmp_0", Nd4j.createFromArray(embedding_2)); put("embedding_2.tmp_0", Nd4j.createFromArray(embedding_2));
put("embedding_10.tmp_0", Nd4j.createFromArray(embedding_10)); put("embedding_10.tmp_0", Nd4j.createFromArray(embedding_10));
...@@ -302,23 +219,12 @@ public class PaddleServingClientExample { ...@@ -302,23 +219,12 @@ public class PaddleServingClientExample {
put("embedding_0.tmp_0", Nd4j.createFromArray(embedding_0)); put("embedding_0.tmp_0", Nd4j.createFromArray(embedding_0));
}}; }};
List<String> fetch = Arrays.asList("prob"); List<String> fetch = Arrays.asList("prob");
HttpClient client = new HttpClient();
Client client = new Client(); client.setIP("0.0.0.0");
String target = "localhost:9393"; client.setPort("9393");
boolean succ = client.connect(target); client.loadClientConfig(model_config_path);
if (succ != true) { String result = client.predict(feed_data, fetch, true, 0);
System.out.println("connect failed."); System.out.println(result);
return false;
}
Map<String, INDArray> fetch_map = client.predict(feed_data, fetch);
if (fetch_map == null) {
return false;
}
for (Map.Entry<String, INDArray> e : fetch_map.entrySet()) {
System.out.println("Key = " + e.getKey() + ", Value = " + e.getValue());
}
return true; return true;
} }
...@@ -328,33 +234,33 @@ public class PaddleServingClientExample { ...@@ -328,33 +234,33 @@ public class PaddleServingClientExample {
PaddleServingClientExample e = new PaddleServingClientExample(); PaddleServingClientExample e = new PaddleServingClientExample();
boolean succ = false; boolean succ = false;
if (args.length < 1) { if (args.length < 2) {
System.out.println("Usage: java -cp <jar> PaddleServingClientExample <test-type>."); System.out.println("Usage: java -cp <jar> PaddleServingClientExample <test-type> <configPath>.");
System.out.println("<test-type>: fit_a_line bert model_ensemble asyn_predict batch_predict cube_local cube_quant yolov4"); System.out.println("<test-type>: fit_a_line bert cube_local yolov4 encrypt");
return; return;
} }
String testType = args[0]; String testType = args[0];
System.out.format("[Example] %s\n", testType); System.out.format("[Example] %s\n", testType);
if ("fit_a_line".equals(testType)) { if ("fit_a_line".equals(testType)) {
succ = e.fit_a_line(); succ = e.fit_a_line(args[1]);
} else if ("compress".equals(testType)) {
succ = e.compress(args[1]);
} else if ("bert".equals(testType)) { } else if ("bert".equals(testType)) {
succ = e.bert(); succ = e.bert(args[1]);
} else if ("model_ensemble".equals(testType)) {
succ = e.model_ensemble();
} else if ("asyn_predict".equals(testType)) {
succ = e.asyn_predict();
} else if ("batch_predict".equals(testType)) {
succ = e.batch_predict();
} else if ("cube_local".equals(testType)) { } else if ("cube_local".equals(testType)) {
succ = e.cube_local(); succ = e.cube_local(args[1]);
} else if ("cube_quant".equals(testType)) {
succ = e.cube_local();
} else if ("yolov4".equals(testType)) { } else if ("yolov4".equals(testType)) {
if (args.length < 2) { if (args.length < 3) {
System.out.println("Usage: java -cp <jar> PaddleServingClientExample yolov4 <image-filepath>."); System.out.println("Usage: java -cp <jar> PaddleServingClientExample yolov4 <configPath> <image-filepath>.");
return;
}
succ = e.yolov4(args[1],args[2]);
} else if ("encrypt".equals(testType)) {
if (args.length < 3) {
System.out.println("Usage: java -cp <jar> PaddleServingClientExample encrypt <configPath> <keyPath>.");
return; return;
} }
succ = e.yolov4(args[1]); succ = e.encrypt(args[1],args[2]);
} else { } else {
System.out.format("test-type(%s) not match.\n", testType); System.out.format("test-type(%s) not match.\n", testType);
return; return;
......
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
...@@ -145,6 +145,11 @@ ...@@ -145,6 +145,11 @@
<artifactId>json</artifactId> <artifactId>json</artifactId>
<version>20190722</version> <version>20190722</version>
</dependency> </dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.12</version>
</dependency>
<dependency> <dependency>
<groupId>org.slf4j</groupId> <groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId> <artifactId>slf4j-api</artifactId>
......
package io.paddle.serving.client;
import java.util.*;
import java.util.function.Function;
import java.lang.management.ManagementFactory;
import java.lang.management.RuntimeMXBean;
import java.util.stream.Collectors;
import java.util.List;
import java.util.ArrayList;
import io.grpc.ManagedChannel;
import io.grpc.ManagedChannelBuilder;
import io.grpc.StatusRuntimeException;
import com.google.protobuf.ByteString;
import com.google.common.util.concurrent.ListenableFuture;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.api.iter.NdIndexIterator;
import org.nd4j.linalg.factory.Nd4j;
import io.paddle.serving.grpc.*;
import io.paddle.serving.configure.*;
import io.paddle.serving.client.PredictFuture;
class Profiler {
int pid_;
String print_head_ = null;
List<String> time_record_ = null;
boolean enable_ = false;
Profiler() {
RuntimeMXBean runtimeMXBean = ManagementFactory.getRuntimeMXBean();
pid_ = Integer.valueOf(runtimeMXBean.getName().split("@")[0]).intValue();
print_head_ = "\nPROFILE\tpid:" + pid_ + "\t";
time_record_ = new ArrayList<String>();
time_record_.add(print_head_);
}
void record(String name) {
if (enable_) {
long ctime = System.currentTimeMillis() * 1000;
time_record_.add(name + ":" + String.valueOf(ctime) + " ");
}
}
void printProfile() {
if (enable_) {
String profile_str = String.join("", time_record_);
time_record_ = new ArrayList<String>();
time_record_.add(print_head_);
}
}
void enable(boolean flag) {
enable_ = flag;
}
}
public class Client {
private ManagedChannel channel_;
private MultiLangGeneralModelServiceGrpc.MultiLangGeneralModelServiceBlockingStub blockingStub_;
private MultiLangGeneralModelServiceGrpc.MultiLangGeneralModelServiceFutureStub futureStub_;
private double rpcTimeoutS_;
private List<String> feedNames_;
private Map<String, Integer> feedTypes_;
private Map<String, List<Integer>> feedShapes_;
private List<String> fetchNames_;
private Map<String, Integer> fetchTypes_;
private Set<String> lodTensorSet_;
private Map<String, Integer> feedTensorLen_;
private Profiler profiler_;
public Client() {
channel_ = null;
blockingStub_ = null;
futureStub_ = null;
rpcTimeoutS_ = 2;
feedNames_ = null;
feedTypes_ = null;
feedShapes_ = null;
fetchNames_ = null;
fetchTypes_ = null;
lodTensorSet_ = null;
feedTensorLen_ = null;
profiler_ = new Profiler();
boolean is_profile = false;
String FLAGS_profile_client = System.getenv("FLAGS_profile_client");
if (FLAGS_profile_client != null && FLAGS_profile_client.equals("1")) {
is_profile = true;
}
profiler_.enable(is_profile);
}
public boolean setRpcTimeoutMs(int rpc_timeout) {
if (futureStub_ == null || blockingStub_ == null) {
System.out.println("set timeout must be set after connect.");
return false;
}
rpcTimeoutS_ = rpc_timeout / 1000.0;
SetTimeoutRequest timeout_req = SetTimeoutRequest.newBuilder()
.setTimeoutMs(rpc_timeout)
.build();
SimpleResponse resp;
try {
resp = blockingStub_.setTimeout(timeout_req);
} catch (StatusRuntimeException e) {
System.out.format("Set RPC timeout failed: %s\n", e.toString());
return false;
}
return resp.getErrCode() == 0;
}
public boolean connect(String target) {
// TODO: target must be NameResolver-compliant URI
// https://grpc.github.io/grpc-java/javadoc/io/grpc/ManagedChannelBuilder.html
try {
channel_ = ManagedChannelBuilder.forTarget(target)
.defaultLoadBalancingPolicy("round_robin")
.maxInboundMessageSize(Integer.MAX_VALUE)
.usePlaintext()
.build();
blockingStub_ = MultiLangGeneralModelServiceGrpc.newBlockingStub(channel_);
futureStub_ = MultiLangGeneralModelServiceGrpc.newFutureStub(channel_);
} catch (Exception e) {
System.out.format("Connect failed: %s\n", e.toString());
return false;
}
GetClientConfigRequest get_client_config_req = GetClientConfigRequest.newBuilder().build();
GetClientConfigResponse resp;
try {
resp = blockingStub_.getClientConfig(get_client_config_req);
} catch (Exception e) {
System.out.format("Get Client config failed: %s\n", e.toString());
return false;
}
String model_config_str = resp.getClientConfigStr();
_parseModelConfig(model_config_str);
return true;
}
private void _parseModelConfig(String model_config_str) {
GeneralModelConfig.Builder model_conf_builder = GeneralModelConfig.newBuilder();
try {
com.google.protobuf.TextFormat.getParser().merge(model_config_str, model_conf_builder);
} catch (com.google.protobuf.TextFormat.ParseException e) {
System.out.format("Parse client config failed: %s\n", e.toString());
}
GeneralModelConfig model_conf = model_conf_builder.build();
feedNames_ = new ArrayList<String>();
fetchNames_ = new ArrayList<String>();
feedTypes_ = new HashMap<String, Integer>();
feedShapes_ = new HashMap<String, List<Integer>>();
fetchTypes_ = new HashMap<String, Integer>();
lodTensorSet_ = new HashSet<String>();
feedTensorLen_ = new HashMap<String, Integer>();
List<FeedVar> feed_var_list = model_conf.getFeedVarList();
for (FeedVar feed_var : feed_var_list) {
feedNames_.add(feed_var.getAliasName());
}
List<FetchVar> fetch_var_list = model_conf.getFetchVarList();
for (FetchVar fetch_var : fetch_var_list) {
fetchNames_.add(fetch_var.getAliasName());
}
for (int i = 0; i < feed_var_list.size(); ++i) {
FeedVar feed_var = feed_var_list.get(i);
String var_name = feed_var.getAliasName();
feedTypes_.put(var_name, feed_var.getFeedType());
feedShapes_.put(var_name, feed_var.getShapeList());
if (feed_var.getIsLodTensor()) {
lodTensorSet_.add(var_name);
} else {
int counter = 1;
for (int dim : feedShapes_.get(var_name)) {
counter *= dim;
}
feedTensorLen_.put(var_name, counter);
}
}
for (int i = 0; i < fetch_var_list.size(); i++) {
FetchVar fetch_var = fetch_var_list.get(i);
String var_name = fetch_var.getAliasName();
fetchTypes_.put(var_name, fetch_var.getFetchType());
if (fetch_var.getIsLodTensor()) {
lodTensorSet_.add(var_name);
}
}
}
private InferenceRequest _packInferenceRequest(
List<HashMap<String, INDArray>> feed_batch,
Iterable<String> fetch,
long log_id) throws IllegalArgumentException {
List<String> feed_var_names = new ArrayList<String>();
feed_var_names.addAll(feed_batch.get(0).keySet());
InferenceRequest.Builder req_builder = InferenceRequest.newBuilder()
.addAllFeedVarNames(feed_var_names)
.addAllFetchVarNames(fetch)
.setIsPython(false)
.setLogId(log_id);
for (HashMap<String, INDArray> feed_data: feed_batch) {
FeedInst.Builder inst_builder = FeedInst.newBuilder();
for (String name: feed_var_names) {
Tensor.Builder tensor_builder = Tensor.newBuilder();
INDArray variable = feed_data.get(name);
long[] flattened_shape = {-1};
INDArray flattened_list = variable.reshape(flattened_shape);
int v_type = feedTypes_.get(name);
NdIndexIterator iter = new NdIndexIterator(flattened_list.shape());
if (v_type == 0) { // int64
while (iter.hasNext()) {
long[] next_index = iter.next();
long x = flattened_list.getLong(next_index);
tensor_builder.addInt64Data(x);
}
} else if (v_type == 1) { // float32
while (iter.hasNext()) {
long[] next_index = iter.next();
float x = flattened_list.getFloat(next_index);
tensor_builder.addFloatData(x);
}
} else if (v_type == 2) { // int32
while (iter.hasNext()) {
long[] next_index = iter.next();
// the interface of INDArray is strange:
// https://deeplearning4j.org/api/latest/org/nd4j/linalg/api/ndarray/INDArray.html
int[] int_next_index = new int[next_index.length];
for(int i = 0; i < next_index.length; i++) {
int_next_index[i] = (int)next_index[i];
}
int x = flattened_list.getInt(int_next_index);
tensor_builder.addIntData(x);
}
} else {
throw new IllegalArgumentException("error tensor value type.");
}
long[] longArray = variable.shape();
int[] intArray = Arrays.stream(longArray).mapToInt(i -> (int) i).toArray();
List<Integer> indarrayShapeList = Arrays.stream(intArray).boxed().collect(Collectors.toList());
//tensor_builder.addAllShape(feedShapes_.get(name));
tensor_builder.addAllShape(indarrayShapeList);
inst_builder.addTensorArray(tensor_builder.build());
}
req_builder.addInsts(inst_builder.build());
}
return req_builder.build();
}
private Map<String, HashMap<String, INDArray>>
_unpackInferenceResponse(
InferenceResponse resp,
Iterable<String> fetch,
Boolean need_variant_tag) throws IllegalArgumentException {
return Client._staticUnpackInferenceResponse(
resp, fetch, fetchTypes_, lodTensorSet_, need_variant_tag);
}
private static Map<String, HashMap<String, INDArray>>
_staticUnpackInferenceResponse(
InferenceResponse resp,
Iterable<String> fetch,
Map<String, Integer> fetchTypes,
Set<String> lodTensorSet,
Boolean need_variant_tag) throws IllegalArgumentException {
if (resp.getErrCode() != 0) {
return null;
}
String tag = resp.getTag();
HashMap<String, HashMap<String, INDArray>> multi_result_map
= new HashMap<String, HashMap<String, INDArray>>();
for (ModelOutput model_result: resp.getOutputsList()) {
String engine_name = model_result.getEngineName();
FetchInst inst = model_result.getInsts(0);
HashMap<String, INDArray> result_map
= new HashMap<String, INDArray>();
int index = 0;
for (String name: fetch) {
Tensor variable = inst.getTensorArray(index);
int v_type = fetchTypes.get(name);
INDArray data = null;
if (v_type == 0) { // int64
List<Long> list = variable.getInt64DataList();
long[] array = new long[list.size()];
for (int i = 0; i < list.size(); i++) {
array[i] = list.get(i);
}
data = Nd4j.createFromArray(array);
} else if (v_type == 1) { // float32
List<Float> list = variable.getFloatDataList();
float[] array = new float[list.size()];
for (int i = 0; i < list.size(); i++) {
array[i] = list.get(i);
}
data = Nd4j.createFromArray(array);
} else if (v_type == 2) { // int32
List<Integer> list = variable.getIntDataList();
int[] array = new int[list.size()];
for (int i = 0; i < list.size(); i++) {
array[i] = list.get(i);
}
data = Nd4j.createFromArray(array);
} else {
throw new IllegalArgumentException("error tensor value type.");
}
// shape
List<Integer> shape_lsit = variable.getShapeList();
int[] shape_array = new int[shape_lsit.size()];
for (int i = 0; i < shape_lsit.size(); ++i) {
shape_array[i] = shape_lsit.get(i);
}
data = data.reshape(shape_array);
// put data to result_map
result_map.put(name, data);
// lod
if (lodTensorSet.contains(name)) {
List<Integer> list = variable.getLodList();
int[] array = new int[list.size()];
for (int i = 0; i < list.size(); i++) {
array[i] = list.get(i);
}
result_map.put(name + ".lod", Nd4j.createFromArray(array));
}
index += 1;
}
multi_result_map.put(engine_name, result_map);
}
// TODO: tag(ABtest not support now)
return multi_result_map;
}
public Map<String, INDArray> predict(
HashMap<String, INDArray> feed,
Iterable<String> fetch) {
return predict(feed, fetch, false, 0);
}
public Map<String, INDArray> predict(
HashMap<String, INDArray> feed,
Iterable<String> fetch,
long log_id) {
return predict(feed, fetch, false, log_id);
}
public Map<String, HashMap<String, INDArray>> ensemble_predict(
HashMap<String, INDArray> feed,
Iterable<String> fetch) {
return ensemble_predict(feed, fetch, false, 0);
}
public Map<String, HashMap<String, INDArray>> ensemble_predict(
HashMap<String, INDArray> feed,
Iterable<String> fetch,
long log_id) {
return ensemble_predict(feed, fetch, false, log_id);
}
public PredictFuture asyn_predict(
HashMap<String, INDArray> feed,
Iterable<String> fetch) {
return asyn_predict(feed, fetch, false, 0);
}
public PredictFuture asyn_predict(
HashMap<String, INDArray> feed,
Iterable<String> fetch,
long log_id) {
return asyn_predict(feed, fetch, false, log_id);
}
public Map<String, INDArray> predict(
HashMap<String, INDArray> feed,
Iterable<String> fetch,
Boolean need_variant_tag) {
return predict(feed, fetch, need_variant_tag, 0);
}
public Map<String, INDArray> predict(
HashMap<String, INDArray> feed,
Iterable<String> fetch,
Boolean need_variant_tag,
long log_id) {
List<HashMap<String, INDArray>> feed_batch
= new ArrayList<HashMap<String, INDArray>>();
feed_batch.add(feed);
return predict(feed_batch, fetch, need_variant_tag, log_id);
}
public Map<String, HashMap<String, INDArray>> ensemble_predict(
HashMap<String, INDArray> feed,
Iterable<String> fetch,
Boolean need_variant_tag) {
return ensemble_predict(feed, fetch, need_variant_tag, 0);
}
public Map<String, HashMap<String, INDArray>> ensemble_predict(
HashMap<String, INDArray> feed,
Iterable<String> fetch,
Boolean need_variant_tag,
long log_id) {
List<HashMap<String, INDArray>> feed_batch
= new ArrayList<HashMap<String, INDArray>>();
feed_batch.add(feed);
return ensemble_predict(feed_batch, fetch, need_variant_tag, log_id);
}
public PredictFuture asyn_predict(
HashMap<String, INDArray> feed,
Iterable<String> fetch,
Boolean need_variant_tag) {
return asyn_predict(feed, fetch, need_variant_tag, 0);
}
public PredictFuture asyn_predict(
HashMap<String, INDArray> feed,
Iterable<String> fetch,
Boolean need_variant_tag,
long log_id) {
List<HashMap<String, INDArray>> feed_batch
= new ArrayList<HashMap<String, INDArray>>();
feed_batch.add(feed);
return asyn_predict(feed_batch, fetch, need_variant_tag, log_id);
}
public Map<String, INDArray> predict(
List<HashMap<String, INDArray>> feed_batch,
Iterable<String> fetch) {
return predict(feed_batch, fetch, false, 0);
}
public Map<String, INDArray> predict(
List<HashMap<String, INDArray>> feed_batch,
Iterable<String> fetch,
long log_id) {
return predict(feed_batch, fetch, false, log_id);
}
public Map<String, HashMap<String, INDArray>> ensemble_predict(
List<HashMap<String, INDArray>> feed_batch,
Iterable<String> fetch) {
return ensemble_predict(feed_batch, fetch, false, 0);
}
public Map<String, HashMap<String, INDArray>> ensemble_predict(
List<HashMap<String, INDArray>> feed_batch,
Iterable<String> fetch,
long log_id) {
return ensemble_predict(feed_batch, fetch, false, log_id);
}
public PredictFuture asyn_predict(
List<HashMap<String, INDArray>> feed_batch,
Iterable<String> fetch) {
return asyn_predict(feed_batch, fetch, false, 0);
}
public PredictFuture asyn_predict(
List<HashMap<String, INDArray>> feed_batch,
Iterable<String> fetch,
long log_id) {
return asyn_predict(feed_batch, fetch, false, log_id);
}
public Map<String, INDArray> predict(
List<HashMap<String, INDArray>> feed_batch,
Iterable<String> fetch,
Boolean need_variant_tag) {
return predict(feed_batch, fetch, need_variant_tag, 0);
}
public Map<String, INDArray> predict(
List<HashMap<String, INDArray>> feed_batch,
Iterable<String> fetch,
Boolean need_variant_tag,
long log_id) {
try {
profiler_.record("java_prepro_0");
InferenceRequest req = _packInferenceRequest(
feed_batch, fetch, log_id);
profiler_.record("java_prepro_1");
profiler_.record("java_client_infer_0");
InferenceResponse resp = blockingStub_.inference(req);
profiler_.record("java_client_infer_1");
profiler_.record("java_postpro_0");
Map<String, HashMap<String, INDArray>> ensemble_result
= _unpackInferenceResponse(resp, fetch, need_variant_tag);
List<Map.Entry<String, HashMap<String, INDArray>>> list
= new ArrayList<Map.Entry<String, HashMap<String, INDArray>>>(
ensemble_result.entrySet());
if (list.size() != 1) {
System.out.format("Failed to predict: please use ensemble_predict impl.\n");
return null;
}
profiler_.record("java_postpro_1");
profiler_.printProfile();
return list.get(0).getValue();
} catch (StatusRuntimeException e) {
System.out.format("Failed to predict: %s\n", e.toString());
return null;
}
}
public Map<String, HashMap<String, INDArray>> ensemble_predict(
List<HashMap<String, INDArray>> feed_batch,
Iterable<String> fetch,
Boolean need_variant_tag) {
return ensemble_predict(feed_batch, fetch, need_variant_tag, 0);
}
public Map<String, HashMap<String, INDArray>> ensemble_predict(
List<HashMap<String, INDArray>> feed_batch,
Iterable<String> fetch,
Boolean need_variant_tag,
long log_id) {
try {
profiler_.record("java_prepro_0");
InferenceRequest req = _packInferenceRequest(
feed_batch, fetch, log_id);
profiler_.record("java_prepro_1");
profiler_.record("java_client_infer_0");
InferenceResponse resp = blockingStub_.inference(req);
profiler_.record("java_client_infer_1");
profiler_.record("java_postpro_0");
Map<String, HashMap<String, INDArray>> ensemble_result
= _unpackInferenceResponse(resp, fetch, need_variant_tag);
profiler_.record("java_postpro_1");
profiler_.printProfile();
return ensemble_result;
} catch (StatusRuntimeException e) {
System.out.format("Failed to predict: %s\n", e.toString());
return null;
}
}
public PredictFuture asyn_predict(
List<HashMap<String, INDArray>> feed_batch,
Iterable<String> fetch,
Boolean need_variant_tag) {
return asyn_predict(feed_batch, fetch, need_variant_tag, 0);
}
public PredictFuture asyn_predict(
List<HashMap<String, INDArray>> feed_batch,
Iterable<String> fetch,
Boolean need_variant_tag,
long log_id) {
InferenceRequest req = _packInferenceRequest(
feed_batch, fetch, log_id);
ListenableFuture<InferenceResponse> future = futureStub_.inference(req);
PredictFuture predict_future = new PredictFuture(future,
(InferenceResponse resp) -> {
return Client._staticUnpackInferenceResponse(
resp, fetch, fetchTypes_, lodTensorSet_, need_variant_tag);
}
);
return predict_future;
}
}
package io.paddle.serving.client;
import java.util.*;
import java.util.function.Function;
import java.lang.management.ManagementFactory;
import java.lang.management.RuntimeMXBean;
import java.util.stream.Collectors;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.Map.Entry;
import java.nio.file.*;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.api.iter.NdIndexIterator;
import org.nd4j.linalg.factory.Nd4j;
import java.lang.reflect.*;
import org.apache.http.HttpEntity;
import org.apache.http.NameValuePair;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.entity.StringEntity;
import org.apache.http.client.entity.GzipDecompressingEntity;
import org.apache.http.Header;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;
import org.apache.http.entity.InputStreamEntity;
import org.json.*;
import io.paddle.serving.configure.*;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
enum ElementType
{
Int64_type, Float32_type, Int32_type, Bytes_type;
}
class Profiler {
int pid_;
String print_head_ = null;
List<String> time_record_ = null;
boolean enable_ = false;
Profiler() {
RuntimeMXBean runtimeMXBean = ManagementFactory.getRuntimeMXBean();
pid_ = Integer.valueOf(runtimeMXBean.getName().split("@")[0]).intValue();
print_head_ = "\nPROFILE\tpid:" + pid_ + "\t";
time_record_ = new ArrayList<String>();
time_record_.add(print_head_);
}
void record(String name) {
if (enable_) {
long ctime = System.currentTimeMillis() * 1000;
time_record_.add(name + ":" + String.valueOf(ctime) + " ");
}
}
void printProfile() {
if (enable_) {
String profile_str = String.join("", time_record_);
time_record_ = new ArrayList<String>();
time_record_.add(print_head_);
}
}
void enable(boolean flag) {
enable_ = flag;
}
}
public class HttpClient {
private int httpTimeoutS_;
private List<String> feedNames_;
private Map<String, String> feedRealNames_;
private Map<String, Integer> feedTypes_;
private Map<String, List<Integer>> feedShapes_;
private Map<String, Integer> feedNameToIndex_;
private Map<Integer, String> feedTypeToDataKey_;
private List<String> fetchNames_;
private Map<String, Integer> fetchTypes_;
private Set<String> lodTensorSet_;
private Map<String, Integer> feedTensorLen_;
private Profiler profiler_;
private String ip;
private String serverPort;
private String port;
private String serviceName;
private boolean request_compress_flag;
private boolean response_compress_flag;
private String GLOG_v;
public HttpClient() {
feedNames_ = null;
feedRealNames_ = null;
feedTypes_ = null;
feedShapes_ = null;
fetchNames_ = null;
fetchTypes_ = null;
lodTensorSet_ = null;
feedTensorLen_ = null;
feedNameToIndex_ = null;
httpTimeoutS_ = 200000;
ip = "0.0.0.0";
port = "9393";
serverPort = "9393";
serviceName = "/GeneralModelService/inference";
request_compress_flag = false;
response_compress_flag = false;
GLOG_v = System.getenv("GLOG_v");
feedTypeToDataKey_ = new HashMap<Integer, String>();
feedTypeToDataKey_.put(0, "int64_data");
feedTypeToDataKey_.put(1, "float_data");
feedTypeToDataKey_.put(2, "int_data");
feedTypeToDataKey_.put(3, "data");
profiler_ = new Profiler();
boolean is_profile = false;
String FLAGS_profile_client = System.getenv("FLAGS_profile_client");
if (FLAGS_profile_client != null && FLAGS_profile_client.equals("1")) {
is_profile = true;
}
profiler_.enable(is_profile);
}
public void setTimeOut(int httpTimeoutS_) {
this.httpTimeoutS_ = httpTimeoutS_;
}
public void setIP(String ip) {
this.ip = ip;
}
public void setPort(String port) {
this.port = port;
this.serverPort = port;
}
public void setServiceName(String serviceName){
this.serviceName = serviceName;
}
public void loadClientConfig(String model_config_path) {
GeneralModelConfig.Builder model_conf_builder = GeneralModelConfig.newBuilder();
try {
byte[] data = Files.readAllBytes(Paths.get(model_config_path));
String model_config_str = new String(data, "utf-8");
com.google.protobuf.TextFormat.getParser().merge(model_config_str, model_conf_builder);
} catch (com.google.protobuf.TextFormat.ParseException e) {
System.out.format("Parse client config failed: %s\n", e.toString());
} catch (Exception e) {
System.out.format("Open client config failed: %s\n", e.toString());
}
GeneralModelConfig model_conf = model_conf_builder.build();
feedNames_ = new ArrayList<String>();
feedRealNames_ = new HashMap<String, String>();
feedTypes_ = new HashMap<String, Integer>();
feedShapes_ = new HashMap<String, List<Integer>>();
lodTensorSet_ = new HashSet<String>();
feedTensorLen_ = new HashMap<String, Integer>();
feedNameToIndex_ = new HashMap<String, Integer>();
fetchNames_ = new ArrayList<String>();
fetchTypes_ = new HashMap<String, Integer>();
List<FeedVar> feed_var_list = model_conf.getFeedVarList();
for (int i = 0; i < feed_var_list.size(); ++i) {
FeedVar feed_var = feed_var_list.get(i);
String var_name = feed_var.getAliasName();
feedNames_.add(var_name);
feedRealNames_.put(var_name, feed_var.getName());
feedTypes_.put(var_name, feed_var.getFeedType());
feedShapes_.put(var_name, feed_var.getShapeList());
feedNameToIndex_.put(var_name, i);
if (feed_var.getIsLodTensor()) {
lodTensorSet_.add(var_name);
} else {
int counter = 1;
for (int dim : feedShapes_.get(var_name)) {
counter *= dim;
}
feedTensorLen_.put(var_name, counter);
}
}
List<FetchVar> fetch_var_list = model_conf.getFetchVarList();
for (int i = 0; i < fetch_var_list.size(); i++) {
FetchVar fetch_var = fetch_var_list.get(i);
String var_name = fetch_var.getAliasName();
fetchNames_.add(var_name);
fetchTypes_.put(var_name, fetch_var.getFetchType());
}
}
public void use_key(String keyFilePath) {
String key_str = null;
String encrypt_url = "http://" + this.ip + ":" +this.port;
try {
byte[] data = Files.readAllBytes(Paths.get(keyFilePath));
key_str = Base64.getEncoder().encodeToString(data);
} catch (Exception e) {
System.out.format("Open key file failed: %s\n", e.toString());
}
JSONObject jsonKey = new JSONObject();
if( key_str != null) {
jsonKey.put("key", key_str);
}else{
jsonKey.put("key", "");
}
String result = doPost(encrypt_url, jsonKey.toString());
try {
JSONObject jsonObject = new JSONObject(result);
JSONArray jsonArray = jsonObject.getJSONArray("endpoint_list");
this.serverPort = jsonArray.getString(0);
System.out.format("Real ServerPort is: %s\n", this.serverPort);
}catch (JSONException err) {
System.out.format("Parse serverPort failed: %s\n", err.toString());
}
}
public void set_request_compress(boolean request_compress_flag) {
// need to be done.
this.request_compress_flag = request_compress_flag;
}
public void set_response_compress(boolean response_compress_flag) {
// need to be done.
this.response_compress_flag = response_compress_flag;
}
public byte[] compress(String str) {
if (str == null || str.length() == 0) {
return null;
}
ByteArrayOutputStream out = new ByteArrayOutputStream();
GZIPOutputStream gzip;
try {
gzip = new GZIPOutputStream(out);
gzip.write(str.getBytes("UTF-8"));
gzip.close();
} catch (Exception e) {
e.printStackTrace();
}
return out.toByteArray();
}
// 帮助用户封装Http请求的接口,用户只需要传递FeedData,Lod,Fetchlist即可。
// 根据Proto组装Json的过程由这个函数来完成,且接口与Python基本一致.
// 共提供了四组重载的接口,支持用户最少传入feedData和fetch,还可传lod和batchFlag.
public String predict(Map<String, Object> feedData,
List<String> fetch,
int log_id) {
return predict(feedData,null,fetch,false,log_id);
}
public String predict(Map<String, Object> feedData,
List<String> fetch,
boolean batchFlag,
int log_id) {
return predict(feedData,null,fetch,batchFlag,log_id);
}
public String predict(Map<String, Object> feedData,
Map<String, Object> feedLod,
List<String> fetch,
int log_id) {
return predict(feedData,feedLod,fetch,false,log_id);
}
public String predict(Map<String, Object> feedData,
Map<String, Object> feedLod,
List<String> fetch,
boolean batchFlag,
int log_id) {
String server_url = "http://" + this.ip + ":" + this.serverPort + this.serviceName;
// 处理fetchList
JSONArray jsonFetchList = new JSONArray();
Iterator<String> fetchIterator = fetch.iterator();
while (fetchIterator.hasNext()) {
jsonFetchList.put(fetchIterator.next());
}
// 处理Tensor
JSONArray jsonTensorArray = new JSONArray();
try{
if (null != feedData && feedData.size() > 0) {
// 通过map集成entrySet方法获取entity
Set<Entry<String, Object>> entrySet = feedData.entrySet();
// 循环遍历,获取迭代器
Iterator<Entry<String, Object>> iterator = entrySet.iterator();
while (iterator.hasNext()) {
JSONObject jsonTensor = new JSONObject();
Entry<String, Object> mapEntry = iterator.next();
Object objectValue = mapEntry.getValue();
String feed_alias_name = mapEntry.getKey();
String feed_real_name = feedRealNames_.get(feed_alias_name);
List<Integer> shape = new ArrayList<Integer>(feedShapes_.get(feed_alias_name));
int element_type = feedTypes_.get(feed_alias_name);
jsonTensor.put("alias_name", feed_alias_name);
jsonTensor.put("name", feed_real_name);
jsonTensor.put("elem_type", element_type);
// 处理数据与shape
String protoDataKey = feedTypeToDataKey_.get(element_type);
// 如果是INDArray类型,先转为一维.
// 此时shape为INDArray的shape
if(objectValue instanceof INDArray){
INDArray tempIndArray = (INDArray)objectValue;
long[] indarrayShape = tempIndArray.shape();
shape.clear();
for(long dim:indarrayShape){
shape.add((int)dim);
}
objectValue = tempIndArray.data().asDouble();
}else if(objectValue.getClass().isArray()){
// 如果是数组类型,则无须处理,直接使用即可。
// 且数组无法嵌套,此时batch无法从数据中获取
// 默认batch维度为1,或者feedVar的shape信息中已包含batch
}else if(objectValue instanceof List){
// 如果为list,可能存在嵌套,此时需要展平
// 如果batchFlag为True,则认为是嵌套list
// 此时取最外层为batch的维度
if (batchFlag) {
List<?> list = new ArrayList<>();
list = new ArrayList<>((Collection<?>)objectValue);
// 在index=0处,加上batch
shape.add(0, list.size());
}
objectValue = recursiveExtract(objectValue);
}else{
// 此时认为是传入的单个String或者Int等
// 此时无法获取batch信息,故对shape不处理
// 由于Proto中为Repeated,需要把数据包装成list
if(objectValue instanceof String){
if(feedTypes_.get(protoDataKey)!= ElementType.Bytes_type.ordinal()){
throw new Exception("feedvar is not string-type,feed can`t be a single string.");
}
}else{
if(feedTypes_.get(protoDataKey)== ElementType.Bytes_type.ordinal()){
throw new Exception("feedvar is string-type,feed, feed can`t be a single int or others.");
}
}
List<Object> list = new ArrayList<>();
list.add(objectValue);
objectValue = list;
}
jsonTensor.put(protoDataKey,objectValue);
if(!batchFlag){
// 在index=0处,加上batch=1
shape.add(0, 1);
}
jsonTensor.put("shape", shape);
// 处理lod信息,支持INDArray Array Iterable
Object feedLodValue = null;
if(feedLod != null){
feedLodValue = feedLod.get(feed_alias_name);
if(feedLodValue != null) {
if(feedLodValue instanceof INDArray){
INDArray tempIndArray = (INDArray)feedLodValue;
feedLodValue = tempIndArray.data().asInt();
}else if(feedLodValue.getClass().isArray()){
// 如果是数组类型,则无须处理,直接使用即可。
}else if(feedLodValue instanceof Iterable){
// 如果为list,可能存在嵌套,此时需要展平
feedLodValue = recursiveExtract(feedLodValue);
}else{
throw new Exception("Lod must be INDArray or Array or Iterable.");
}
jsonTensor.put("lod", feedLodValue);
}
}
jsonTensorArray.put(jsonTensor);
}
}
}catch (Exception e) {
e.printStackTrace();
}
JSONObject jsonRequest = new JSONObject();
jsonRequest.put("log_id",log_id);
jsonRequest.put("fetch_var_names", jsonFetchList);
jsonRequest.put("tensor",jsonTensorArray);
if(GLOG_v != null){
System.out.format("------- Final jsonRequest: %s\n", jsonRequest.toString());
}
return doPost(server_url, jsonRequest.toString());
}
public String doPost(String url, String strPostData) {
CloseableHttpClient httpClient = null;
CloseableHttpResponse httpResponse = null;
String result = "";
// 创建httpClient实例
httpClient = HttpClients.createDefault();
// 创建httpPost远程连接实例
HttpPost httpPost = new HttpPost(url);
// 配置请求参数实例
RequestConfig requestConfig = RequestConfig.custom().setConnectTimeout(httpTimeoutS_)// 设置连接主机服务超时时间
.setConnectionRequestTimeout(httpTimeoutS_)// 设置连接请求超时时间
.setSocketTimeout(httpTimeoutS_)// 设置读取数据连接超时时间
.build();
// 为httpPost实例设置配置
httpPost.setConfig(requestConfig);
httpPost.setHeader("Content-Type", "application/json;charset=utf-8");
// 设置请求头
if(response_compress_flag){
httpPost.addHeader("Accept-encoding", "gzip");
if(GLOG_v != null){
System.out.format("------- Accept-encoding gzip: \n");
}
}
try {
if(request_compress_flag && strPostData.length()>1024){
try{
byte[] gzipEncrypt = compress(strPostData);
httpPost.setEntity(new InputStreamEntity(new ByteArrayInputStream(gzipEncrypt), gzipEncrypt.length));
httpPost.addHeader("Content-Encoding", "gzip");
} catch (Exception e) {
e.printStackTrace();
}
}else{
httpPost.setEntity(new StringEntity(strPostData, "UTF-8"));
}
// httpClient对象执行post请求,并返回响应参数对象
httpResponse = httpClient.execute(httpPost);
// 从响应对象中获取响应内容
HttpEntity entity = httpResponse.getEntity();
Header header = entity.getContentEncoding();
if(GLOG_v != null){
System.out.format("------- response header: %s\n", header);
}
if(header != null && header.getValue().equalsIgnoreCase("gzip")){ //判断返回内容是否为gzip压缩格式
GzipDecompressingEntity gzipEntity = new GzipDecompressingEntity(entity);
result = EntityUtils.toString(gzipEntity);
if(GLOG_v != null){
System.out.format("------- degzip response: %s\n", result);
}
}else{
result = EntityUtils.toString(entity);
}
} catch (ClientProtocolException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
// 关闭资源
if (null != httpResponse) {
try {
httpResponse.close();
} catch (IOException e) {
e.printStackTrace();
}
}
if (null != httpClient) {
try {
httpClient.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return result;
}
public List<Object> recursiveExtract(Object stuff) {
List<Object> mylist = new ArrayList<Object>();
if(stuff instanceof Iterable) {
for(Object o : (Iterable< ? >)stuff) {
mylist.addAll(recursiveExtract(o));
}
} else if(stuff instanceof Map) {
for(Object o : ((Map<?, ? extends Object>) stuff).values()) {
mylist.addAll(recursiveExtract(o));
}
} else {
mylist.add(stuff);
}
return mylist;
}
}
package io.paddle.serving.client;
import java.util.*;
import java.util.function.Function;
import io.grpc.StatusRuntimeException;
import com.google.common.util.concurrent.ListenableFuture;
import org.nd4j.linalg.api.ndarray.INDArray;
import io.paddle.serving.client.Client;
import io.paddle.serving.grpc.*;
public class PredictFuture {
private ListenableFuture<InferenceResponse> callFuture_;
private Function<InferenceResponse,
Map<String, HashMap<String, INDArray>>> callBackFunc_;
PredictFuture(ListenableFuture<InferenceResponse> call_future,
Function<InferenceResponse,
Map<String, HashMap<String, INDArray>>> call_back_func) {
callFuture_ = call_future;
callBackFunc_ = call_back_func;
}
public Map<String, INDArray> get() {
InferenceResponse resp = null;
try {
resp = callFuture_.get();
} catch (Exception e) {
System.out.format("predict failed: %s\n", e.toString());
return null;
}
Map<String, HashMap<String, INDArray>> ensemble_result
= callBackFunc_.apply(resp);
List<Map.Entry<String, HashMap<String, INDArray>>> list
= new ArrayList<Map.Entry<String, HashMap<String, INDArray>>>(
ensemble_result.entrySet());
if (list.size() != 1) {
System.out.format("predict failed: please use get_ensemble impl.\n");
return null;
}
return list.get(0).getValue();
}
public Map<String, HashMap<String, INDArray>> ensemble_get() {
InferenceResponse resp = null;
try {
resp = callFuture_.get();
} catch (Exception e) {
System.out.format("predict failed: %s\n", e.toString());
return null;
}
return callBackFunc_.apply(resp);
}
}
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package baidu.paddle_serving.multi_lang;
option java_multiple_files = true;
option java_package = "io.paddle.serving.grpc";
option java_outer_classname = "ServingProto";
message Tensor {
optional bytes data = 1;
repeated int32 int_data = 2;
repeated int64 int64_data = 3;
repeated float float_data = 4;
optional int32 elem_type = 5;
repeated int32 shape = 6;
repeated int32 lod = 7; // only for fetch tensor currently
};
message FeedInst { repeated Tensor tensor_array = 1; };
message FetchInst { repeated Tensor tensor_array = 1; };
message InferenceRequest {
repeated FeedInst insts = 1;
repeated string feed_var_names = 2;
repeated string fetch_var_names = 3;
required bool is_python = 4 [ default = false ];
required uint64 log_id = 5 [ default = 0 ];
};
message InferenceResponse {
repeated ModelOutput outputs = 1;
optional string tag = 2;
required int32 err_code = 3;
};
message ModelOutput {
repeated FetchInst insts = 1;
optional string engine_name = 2;
}
message SetTimeoutRequest { required int32 timeout_ms = 1; }
message SimpleResponse { required int32 err_code = 1; }
message GetClientConfigRequest {}
message GetClientConfigResponse { required string client_config_str = 1; }
service MultiLangGeneralModelService {
rpc Inference(InferenceRequest) returns (InferenceResponse) {}
rpc SetTimeout(SetTimeoutRequest) returns (SimpleResponse) {}
rpc GetClientConfig(GetClientConfigRequest)
returns (GetClientConfigResponse) {}
};
文件模式从 100755 更改为 100644
...@@ -81,7 +81,6 @@ if (SERVER) ...@@ -81,7 +81,6 @@ if (SERVER)
if(WITH_LITE) if(WITH_LITE)
set(VERSION_SUFFIX 2) set(VERSION_SUFFIX 2)
endif() endif()
add_custom_command( add_custom_command(
OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
COMMAND cp -r COMMAND cp -r
......
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
## Criteo CTR with Sparse Parameter Indexing Service
([简体中文](./README_CN.md)|English)
### Get Sample Dataset
go to directory `python/examples/criteo_ctr_with_cube`
```
sh get_data.sh
```
### Download Model and Sparse Parameter Sequence Files
```
wget https://paddle-serving.bj.bcebos.com/unittest/ctr_cube_unittest.tar.gz
tar xf ctr_cube_unittest.tar.gz
mv models/ctr_client_conf ./
mv models/ctr_serving_model_kv ./
mv models/data ./cube/
```
the model will be in ./ctr_server_model_kv and ./ctr_client_config.
### Start Sparse Parameter Indexing Service
```
wget https://paddle-serving.bj.bcebos.com/others/cube_app.tar.gz
tar xf cube_app.tar.gz
mv cube_app/cube* ./cube/
sh cube_prepare.sh &
```
Here, the sparse parameter is loaded by cube sparse parameter indexing service Cube.
### Start RPC Predictor, the number of serving thread is 4(configurable in test_server.py)
```
python test_server.py ctr_serving_model_kv
```
### Run Prediction
```
python test_client.py ctr_client_conf/serving_client_conf.prototxt ./raw_data
```
### Benchmark
CPU :Intel(R) Xeon(R) CPU 6148 @ 2.40GHz
Model :[Criteo CTR](https://github.com/PaddlePaddle/Serving/blob/develop/python/examples/criteo_ctr_with_cube/network_conf.py)
server core/thread num : 4/8
Run
```
bash benchmark.sh
```
1000 batches will be sent by every client
| client thread num | prepro | client infer | op0 | op1 | op2 | postpro | avg_latency | qps |
| ------------------ | ------ | ------------ | ------ | ----- | ------ | ------- | ----- | ----- |
| 1 | 0.035 | 1.596 | 0.021 | 0.518 | 0.0024 | 0.0025 | 6.774 | 147.7 |
| 2 | 0.034 | 1.780 | 0.027 | 0.463 | 0.0020 | 0.0023 | 6.931 | 288.3 |
| 4 | 0.038 | 2.954 | 0.025 | 0.455 | 0.0019 | 0.0027 | 8.378 | 477.5 |
| 8 | 0.044 | 8.230 | 0.028 | 0.464 | 0.0023 | 0.0034 | 14.191 | 563.8 |
| 16 | 0.048 | 21.037 | 0.028 | 0.455 | 0.0025 | 0.0041 | 27.236 | 587.5 |
the average latency of threads
![avg cost](../../../doc/criteo-cube-benchmark-avgcost.png)
The QPS is
![qps](../../../doc/criteo-cube-benchmark-qps.png)
## 带稀疏参数索引服务的CTR预测服务
(简体中文|[English](./README.md))
### 获取样例数据
进入目录 `python/examples/criteo_ctr_with_cube`
```
sh get_data.sh
```
### 下载模型和稀疏参数序列文件
```
wget https://paddle-serving.bj.bcebos.com/unittest/ctr_cube_unittest.tar.gz
tar xf ctr_cube_unittest.tar.gz
mv models/ctr_client_conf ./
mv models/ctr_serving_model_kv ./
mv models/data ./cube/
```
执行脚本后会在当前目录有ctr_server_model_kv和ctr_client_config文件夹。
### 启动稀疏参数索引服务
```
wget https://paddle-serving.bj.bcebos.com/others/cube_app.tar.gz
tar xf cube_app.tar.gz
mv cube_app/cube* ./cube/
sh cube_prepare.sh &
```
此处,模型当中的稀疏参数会被存放在稀疏参数索引服务Cube当中。
### 启动RPC预测服务,服务端线程数为4(可在test_server.py配置)
```
python test_server.py ctr_serving_model_kv
```
### 执行预测
```
python test_client.py ctr_client_conf/serving_client_conf.prototxt ./raw_data
```
### Benchmark
设备 :Intel(R) Xeon(R) CPU 6148 @ 2.40GHz
模型 :[Criteo CTR](https://github.com/PaddlePaddle/Serving/blob/develop/python/examples/criteo_ctr_with_cube/network_conf.py)
server core/thread num : 4/8
执行
```
bash benchmark.sh
```
客户端每个线程会发送1000个batch
| client thread num | prepro | client infer | op0 | op1 | op2 | postpro | avg_latency | qps |
| ------------------ | ------ | ------------ | ------ | ----- | ------ | ------- | ----- | ----- |
| 1 | 0.035 | 1.596 | 0.021 | 0.518 | 0.0024 | 0.0025 | 6.774 | 147.7 |
| 2 | 0.034 | 1.780 | 0.027 | 0.463 | 0.0020 | 0.0023 | 6.931 | 288.3 |
| 4 | 0.038 | 2.954 | 0.025 | 0.455 | 0.0019 | 0.0027 | 8.378 | 477.5 |
| 8 | 0.044 | 8.230 | 0.028 | 0.464 | 0.0023 | 0.0034 | 14.191 | 563.8 |
| 16 | 0.048 | 21.037 | 0.028 | 0.455 | 0.0025 | 0.0041 | 27.236 | 587.5 |
平均每个线程耗时图如下
![avg cost](../../../doc/criteo-cube-benchmark-avgcost.png)
每个线程QPS耗时如下
![qps](../../../doc/criteo-cube-benchmark-qps.png)
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -14,58 +14,49 @@ ...@@ -14,58 +14,49 @@
# pylint: disable=doc-string-missing # pylint: disable=doc-string-missing
import sys import sys
import os
import paddle
import re
import paddle.fluid.incubate.data_generator as dg import paddle.fluid.incubate.data_generator as dg
py_version = sys.version_info[0]
class CriteoDataset(dg.MultiSlotDataGenerator):
class IMDBDataset(dg.MultiSlotDataGenerator): def setup(self, sparse_feature_dim):
def load_resource(self, dictfile): self.cont_min_ = [0, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
self._vocab = {} self.cont_max_ = [
wid = 0 20, 600, 100, 50, 64000, 500, 100, 50, 500, 10, 10, 10, 50
if py_version == 2: ]
with open(dictfile) as f: self.cont_diff_ = [
for line in f: 20, 603, 100, 50, 64000, 500, 100, 50, 500, 10, 10, 10, 50
self._vocab[line.strip()] = wid
wid += 1
else:
with open(dictfile, encoding="utf-8") as f:
for line in f:
self._vocab[line.strip()] = wid
wid += 1
self._unk_id = len(self._vocab)
self._pattern = re.compile(r'(;|,|\.|\?|!|\s|\(|\))')
self.return_value = ("words", [1, 2, 3, 4, 5, 6]), ("label", [0])
def get_words_only(self, line):
sent = line.lower().replace("<br />", " ").strip()
words = [x for x in self._pattern.split(sent) if x and x != " "]
feas = [
self._vocab[x] if x in self._vocab else self._unk_id for x in words
] ]
return feas self.hash_dim_ = sparse_feature_dim
# here, training data are lines with line_index < train_idx_
self.train_idx_ = 41256555
self.continuous_range_ = range(1, 14)
self.categorical_range_ = range(14, 40)
def get_words_and_label(self, line): def _process_line(self, line):
send = '|'.join(line.split('|')[:-1]).lower().replace("<br />", features = line.rstrip('\n').split('\t')
" ").strip() dense_feature = []
label = [int(line.split('|')[-1])] sparse_feature = []
for idx in self.continuous_range_:
if features[idx] == '':
dense_feature.append(0.0)
else:
dense_feature.append((float(features[idx]) - self.cont_min_[idx - 1]) / \
self.cont_diff_[idx - 1])
for idx in self.categorical_range_:
sparse_feature.append(
[hash(str(idx) + features[idx]) % self.hash_dim_])
words = [x for x in self._pattern.split(send) if x and x != " "] return dense_feature, sparse_feature, [int(features[0])]
feas = [
self._vocab[x] if x in self._vocab else self._unk_id for x in words
]
return feas, label
def infer_reader(self, infer_filelist, batch, buf_size): def infer_reader(self, filelist, batch, buf_size):
def local_iter(): def local_iter():
for fname in infer_filelist: for fname in filelist:
with open(fname, "r") as fin: with open(fname.strip(), "r") as fin:
for line in fin: for line in fin:
feas, label = self.get_words_and_label(line) dense_feature, sparse_feature, label = self._process_line(
yield feas, label line)
#yield dense_feature, sparse_feature, label
yield [dense_feature] + sparse_feature + [label]
import paddle import paddle
batch_iter = paddle.batch( batch_iter = paddle.batch(
...@@ -75,18 +66,18 @@ class IMDBDataset(dg.MultiSlotDataGenerator): ...@@ -75,18 +66,18 @@ class IMDBDataset(dg.MultiSlotDataGenerator):
return batch_iter return batch_iter
def generate_sample(self, line): def generate_sample(self, line):
def memory_iter():
for i in range(1000):
yield self.return_value
def data_iter(): def data_iter():
feas, label = self.get_words_and_label(line) dense_feature, sparse_feature, label = self._process_line(line)
yield ("words", feas), ("label", label) feature_name = ["dense_input"]
for idx in self.categorical_range_:
feature_name.append("C" + str(idx - 13))
feature_name.append("label")
yield zip(feature_name, [dense_feature] + sparse_feature + [label])
return data_iter return data_iter
if __name__ == "__main__": if __name__ == "__main__":
imdb = IMDBDataset() criteo_dataset = CriteoDataset()
imdb.load_resource("imdb.vocab") criteo_dataset.setup(int(sys.argv[1]))
imdb.run_from_stdin() criteo_dataset.run_from_stdin()
...@@ -12,25 +12,9 @@ ...@@ -12,25 +12,9 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# pylint: disable=doc-string-missing # pylint: disable=doc-string-missing
from paddle_serving_client import MultiLangClient as Client #! /bin/bash
import numpy as np
client = Client()
client.connect(["127.0.0.1:9393"])
batch_size = 2 mkdir -p cube_model
x = [ mkdir -p cube/data
0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, ./cube/cube-builder -dict_name=test_dict -job_mode=base -last_version=0 -cur_version=0 -depend_version=0 -input_path=./cube_model -output_path=${PWD}/cube/data -shard_num=1 -only_build=false
0.4919, 0.1856, 0.0795, -0.0332 cd cube && ./cube
]
for i in range(3):
new_data = np.array(x).astype("float32").reshape((1, 1, 13))
batch_data = np.concatenate([new_data, new_data, new_data], axis=0)
print(batch_data.shape)
fetch_map = client.predict(
feed={"x": batch_data}, fetch=["price"], batch=True)
if fetch_map["serving_status_code"] == 0:
print(fetch_map)
else:
print(fetch_map["serving_status_code"])
wget --no-check-certificate https://paddle-serving.bj.bcebos.com/uci_housing.tar.gz wget --no-check-certificate https://paddle-serving.bj.bcebos.com/data/ctr_prediction/ctr_data.tar.gz
tar -xzf uci_housing.tar.gz tar -zxvf ctr_data.tar.gz
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=doc-string-missing
from __future__ import print_function
from args import parse_args
import os
import paddle.fluid as fluid
import paddle
import sys
from network_conf import dnn_model
dense_feature_dim = 13
paddle.enable_static()
def train():
args = parse_args()
sparse_only = args.sparse_only
if not os.path.isdir(args.model_output_dir):
os.mkdir(args.model_output_dir)
dense_input = fluid.layers.data(
name="dense_input", shape=[dense_feature_dim], dtype='float32')
sparse_input_ids = [
fluid.layers.data(
name="C" + str(i), shape=[1], lod_level=1, dtype="int64")
for i in range(1, 27)
]
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
#nn_input = None if sparse_only else dense_input
nn_input = dense_input
predict_y, loss, auc_var, batch_auc_var, infer_vars = dnn_model(
nn_input, sparse_input_ids, label, args.embedding_size,
args.sparse_feature_dim)
optimizer = fluid.optimizer.SGD(learning_rate=1e-4)
optimizer.minimize(loss)
exe = fluid.Executor(fluid.CPUPlace())
exe.run(fluid.default_startup_program())
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
dataset.set_use_var([dense_input] + sparse_input_ids + [label])
python_executable = "python3.6"
pipe_command = "{} criteo_reader.py {}".format(python_executable,
args.sparse_feature_dim)
dataset.set_pipe_command(pipe_command)
dataset.set_batch_size(128)
thread_num = 10
dataset.set_thread(thread_num)
whole_filelist = [
"raw_data/part-%d" % x for x in range(len(os.listdir("raw_data")))
]
print(whole_filelist)
dataset.set_filelist(whole_filelist[:100])
dataset.load_into_memory()
fluid.layers.Print(auc_var)
epochs = 1
for i in range(epochs):
exe.train_from_dataset(
program=fluid.default_main_program(), dataset=dataset, debug=True)
print("epoch {} finished".format(i))
import paddle_serving_client.io as server_io
feed_var_dict = {}
feed_var_dict['dense_input'] = dense_input
for i, sparse in enumerate(sparse_input_ids):
feed_var_dict["embedding_{}.tmp_0".format(i)] = sparse
fetch_var_dict = {"prob": predict_y}
feed_kv_dict = {}
feed_kv_dict['dense_input'] = dense_input
for i, emb in enumerate(infer_vars):
feed_kv_dict["embedding_{}.tmp_0".format(i)] = emb
fetch_var_dict = {"prob": predict_y}
server_io.save_model("ctr_serving_model", "ctr_client_conf", feed_var_dict,
fetch_var_dict, fluid.default_main_program())
server_io.save_model("ctr_serving_model_kv", "ctr_client_conf_kv",
feed_kv_dict, fetch_var_dict,
fluid.default_main_program())
if __name__ == '__main__':
train()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=doc-string-missing
import paddle.fluid as fluid
import math
def dnn_model(dense_input, sparse_inputs, label, embedding_size,
sparse_feature_dim):
def embedding_layer(input):
emb = fluid.layers.embedding(
input=input,
is_sparse=True,
is_distributed=False,
size=[sparse_feature_dim, embedding_size],
param_attr=fluid.ParamAttr(
name="SparseFeatFactors",
initializer=fluid.initializer.Uniform()))
x = fluid.layers.sequence_pool(input=emb, pool_type='sum')
return emb, x
def mlp_input_tensor(emb_sums, dense_tensor):
#if isinstance(dense_tensor, fluid.Variable):
# return fluid.layers.concat(emb_sums, axis=1)
#else:
return fluid.layers.concat(emb_sums + [dense_tensor], axis=1)
def mlp(mlp_input):
fc1 = fluid.layers.fc(input=mlp_input,
size=400,
act='relu',
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Normal(
scale=1 / math.sqrt(mlp_input.shape[1]))))
fc2 = fluid.layers.fc(input=fc1,
size=400,
act='relu',
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Normal(
scale=1 / math.sqrt(fc1.shape[1]))))
fc3 = fluid.layers.fc(input=fc2,
size=400,
act='relu',
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Normal(
scale=1 / math.sqrt(fc2.shape[1]))))
pre = fluid.layers.fc(input=fc3,
size=2,
act='softmax',
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Normal(
scale=1 / math.sqrt(fc3.shape[1]))))
return pre
emb_pair_sums = list(map(embedding_layer, sparse_inputs))
emb_sums = [x[1] for x in emb_pair_sums]
infer_vars = [x[0] for x in emb_pair_sums]
mlp_in = mlp_input_tensor(emb_sums, dense_input)
predict = mlp(mlp_in)
cost = fluid.layers.cross_entropy(input=predict, label=label)
avg_cost = fluid.layers.reduce_sum(cost)
accuracy = fluid.layers.accuracy(input=predict, label=label)
auc_var, batch_auc_var, auc_states = \
fluid.layers.auc(input=predict, label=label, num_thresholds=2 ** 12, slide_steps=20)
return predict, avg_cost, auc_var, batch_auc_var, infer_vars
...@@ -12,31 +12,45 @@ ...@@ -12,31 +12,45 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# pylint: disable=doc-string-missing # pylint: disable=doc-string-missing
from paddle_serving_client import MultiLangClient as Client
from paddle_serving_app.reader.imdb_reader import IMDBDataset from paddle_serving_client import Client
import sys import sys
import os
import criteo as criteo
import time
from paddle_serving_client.metric import auc
import numpy as np import numpy as np
py_version = sys.version_info[0]
client = Client() client = Client()
client.connect(["127.0.0.1:9393"]) client.load_client_config(sys.argv[1])
client.connect(["127.0.0.1:9292"])
# you can define any english sentence or dataset here
# This example reuses imdb reader in training, you
# can define your own data preprocessing easily.
imdb_dataset = IMDBDataset()
imdb_dataset.load_resource('imdb.vocab')
for line in sys.stdin: batch = 1
word_ids, label = imdb_dataset.get_words_and_label(line) buf_size = 100
word_len = len(word_ids) dataset = criteo.CriteoDataset()
feed = { dataset.setup(1000001)
"words": np.array(word_ids).reshape(word_len, 1), test_filelists = ["{}/part-0".format(sys.argv[2])]
"words.lod": [0, word_len] reader = dataset.infer_reader(test_filelists, batch, buf_size)
} label_list = []
fetch = ["prediction"] prob_list = []
fetch_map = client.predict(feed=feed, fetch=fetch, batch=True) start = time.time()
if fetch_map["serving_status_code"] == 0: for ei in range(10000):
print(fetch_map) if py_version == 2:
data = reader().next()
else: else:
print(fetch_map["serving_status_code"]) data = reader().__next__()
#print("{} {}".format(fetch_map["prediction"][0], label[0])) feed_dict = {}
feed_dict['dense_input'] = data[0][0]
for i in range(1, 27):
feed_dict["embedding_{}.tmp_0".format(i - 1)] = np.array(data[0][i]).reshape(-1)
feed_dict["embedding_{}.tmp_0.lod".format(i - 1)] = [0, len(data[0][i])]
fetch_map = client.predict(feed=feed_dict, fetch=["prob"])
print(fetch_map)
prob_list.append(fetch_map['prob'][0][1])
label_list.append(data[0][-1][0])
print(auc(label_list, prob_list))
end = time.time()
print(end - start)
...@@ -17,20 +17,25 @@ import os ...@@ -17,20 +17,25 @@ import os
import sys import sys
from paddle_serving_server import OpMaker from paddle_serving_server import OpMaker
from paddle_serving_server import OpSeqMaker from paddle_serving_server import OpSeqMaker
from paddle_serving_server import MultiLangServer as Server from paddle_serving_server import Server
op_maker = OpMaker() op_maker = OpMaker()
read_op = op_maker.create('general_reader') read_op = op_maker.create('general_reader')
general_infer_op = op_maker.create('general_infer') general_dist_kv_infer_op = op_maker.create('general_dist_kv_infer')
response_op = op_maker.create('general_response') response_op = op_maker.create('general_response')
op_seq_maker = OpSeqMaker() op_seq_maker = OpSeqMaker()
op_seq_maker.add_op(read_op) op_seq_maker.add_op(read_op)
op_seq_maker.add_op(general_infer_op) op_seq_maker.add_op(general_dist_kv_infer_op)
op_seq_maker.add_op(response_op) op_seq_maker.add_op(response_op)
server = Server() server = Server()
server.set_op_sequence(op_seq_maker.get_op_sequence()) server.set_op_sequence(op_seq_maker.get_op_sequence())
server.set_num_threads(4)
server.load_model_config(sys.argv[1]) server.load_model_config(sys.argv[1])
server.prepare_server(workdir="work_dir1", port=9393, device="cpu") server.prepare_server(
workdir="work_dir1",
port=9292,
device="cpu",
cube_conf="./cube/conf/cube.conf")
server.run_server() server.run_server()
...@@ -35,11 +35,11 @@ client-side configuration file are stored in the `encrypt_client` directory. ...@@ -35,11 +35,11 @@ client-side configuration file are stored in the `encrypt_client` directory.
## Start Encryption Service ## Start Encryption Service
CPU Service CPU Service
``` ```
python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_encryption_model python -m paddle_serving_server.serve --model encrypt_server/ --port 9393 --use_encryption_model
``` ```
GPU Service GPU Service
``` ```
python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_encryption_model --gpu_ids 0 python -m paddle_serving_server.serve --model encrypt_server/ --port 9393 --use_encryption_model --gpu_ids 0
``` ```
## Prediction ## Prediction
......
...@@ -36,14 +36,14 @@ def serving_encryption(): ...@@ -36,14 +36,14 @@ def serving_encryption():
## 启动加密预测服务 ## 启动加密预测服务
CPU预测服务 CPU预测服务
``` ```
python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_encryption_model python -m paddle_serving_server.serve --model encrypt_server/ --port 9393 --use_encryption_model
``` ```
GPU预测服务 GPU预测服务
``` ```
python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_encryption_model --gpu_ids 0 python -m paddle_serving_server.serve --model encrypt_server/ --port 9393 --use_encryption_model --gpu_ids 0
``` ```
## 预测 ## 预测
``` ```
python test_client.py encrypt_client/ python test_client.py encrypt_client/serving_client_conf.prototxt
``` ```
...@@ -19,7 +19,8 @@ import sys ...@@ -19,7 +19,8 @@ import sys
client = Client() client = Client()
client.load_client_config(sys.argv[1]) client.load_client_config(sys.argv[1])
client.use_key("./key") client.use_key("./key")
client.connect(["127.0.0.1:9300"], encryption=True) client.connect(["0.0.0.0:9393"], encryption=True)
fetch_list = client.get_fetch_names()
import paddle import paddle
test_reader = paddle.batch( test_reader = paddle.batch(
...@@ -28,5 +29,5 @@ test_reader = paddle.batch( ...@@ -28,5 +29,5 @@ test_reader = paddle.batch(
batch_size=1) batch_size=1)
for data in test_reader(): for data in test_reader():
fetch_map = client.predict(feed={"x": data[0][0]}, fetch=["price"]) fetch_map = client.predict(feed={"x": data[0][0]}, fetch=fetch_list)
print("{} {}".format(fetch_map["price"][0], data[0][1][0])) print(fetch_map)
...@@ -18,30 +18,21 @@ sh get_data.sh ...@@ -18,30 +18,21 @@ sh get_data.sh
python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393
``` ```
### Client prediction ## Client prediction
### RPC Client
The `paddlepaddle` package is used in `test_client.py`, and you may need to download the corresponding package(`pip install paddlepaddle`). The `paddlepaddle` package is used in `test_client.py`, and you may need to download the corresponding package(`pip install paddlepaddle`).
``` shell ``` shell
python test_client.py uci_housing_client/serving_client_conf.prototxt python test_client.py uci_housing_client/serving_client_conf.prototxt
``` ```
### Http Client
## HTTP service
### Start server
Start a web service with default web service hosting modules:
``` shell ``` shell
python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --name uci python test_httpclient.py uci_housing_client/serving_client_conf.prototxt
``` ```
### Client prediction
``` shell
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]}], "fetch":["price"]}' http://127.0.0.1:9393/uci/prediction
```
## Benchmark ## Benchmark
``` shell ``` shell
......
...@@ -10,15 +10,16 @@ sh get_data.sh ...@@ -10,15 +10,16 @@ sh get_data.sh
## RPC服务
### 开启服务端 ## 开启服务端
```shell ```shell
python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393
``` ```
### 客户端预测 ## 客户端预测
### 客户端RPC
`test_client.py`中使用了`paddlepaddle`包,需要进行下载(`pip install paddlepaddle`)。 `test_client.py`中使用了`paddlepaddle`包,需要进行下载(`pip install paddlepaddle`)。
...@@ -26,23 +27,12 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po ...@@ -26,23 +27,12 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po
python test_client.py uci_housing_client/serving_client_conf.prototxt python test_client.py uci_housing_client/serving_client_conf.prototxt
``` ```
### 客户端Http预测
## HTTP服务
### 开启服务端
通过下面的一行代码开启默认web服务:
``` shell ``` shell
python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --name uci python test_httpclient.py uci_housing_client/serving_client_conf.prototxt
``` ```
### 客户端预测
``` shell
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]}], "fetch":["price"]}' http://127.0.0.1:9393/uci/prediction
```
## 性能测试 ## 性能测试
``` shell ``` shell
......
文件模式从 100755 更改为 100644
...@@ -20,7 +20,7 @@ import numpy as np ...@@ -20,7 +20,7 @@ import numpy as np
client = Client() client = Client()
client.load_client_config(sys.argv[1]) client.load_client_config(sys.argv[1])
client.connect(["127.0.0.1:9393"]) client.connect(["127.0.0.1:9393"])
fetch_list = client.get_fetch_names()
import paddle import paddle
test_reader = paddle.batch( test_reader = paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
...@@ -31,6 +31,5 @@ for data in test_reader(): ...@@ -31,6 +31,5 @@ for data in test_reader():
new_data = np.zeros((1, 13)).astype("float32") new_data = np.zeros((1, 13)).astype("float32")
new_data[0] = data[0][0] new_data[0] = data[0][0]
fetch_map = client.predict( fetch_map = client.predict(
feed={"x": new_data}, fetch=["price"], batch=True) feed={"x": new_data}, fetch=fetch_list, batch=True)
print("{} {}".format(fetch_map["price"][0], data[0][1][0]))
print(fetch_map) print(fetch_map)
...@@ -13,29 +13,28 @@ ...@@ -13,29 +13,28 @@
# limitations under the License. # limitations under the License.
# pylint: disable=doc-string-missing # pylint: disable=doc-string-missing
from paddle_serving_client import MultiLangClient as Client from paddle_serving_client.httpclient import HttpClient
import sys
import numpy as np import numpy as np
client = Client() import time
client.connect(["127.0.0.1:9393"])
""" client = HttpClient()
client.load_client_config(sys.argv[1])
# if you want to enable Encrypt Module,uncommenting the following line
# client.use_key("./key")
client.set_response_compress(True)
client.set_request_compress(True)
fetch_list = client.get_fetch_names()
import paddle
test_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.uci_housing.test(), buf_size=500),
batch_size=1)
for data in test_reader(): for data in test_reader():
new_data = np.zeros((1, 1, 13)).astype("float32") new_data = np.zeros((1, 13)).astype("float32")
new_data[0] = data[0][0] new_data[0] = data[0][0]
fetch_map = client.predict( fetch_map = client.predict(
feed={"x": new_data}, fetch=["price"], batch=True) feed={"x": new_data}, fetch=fetch_list, batch=True)
print("{} {}".format(fetch_map["price"][0], data[0][1][0]))
print(fetch_map) print(fetch_map)
""" break
x = [
0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283,
0.4919, 0.1856, 0.0795, -0.0332
]
for i in range(3):
new_data = np.array(x).astype("float32").reshape((1, 13))
fetch_map = client.predict(
feed={"x": new_data}, fetch=["price"], batch=False)
if fetch_map["serving_status_code"] == 0:
print(fetch_map)
else:
print(fetch_map["serving_status_code"])
# 线性回归预测服务示例
## 获取数据
```shell
sh get_data.sh
```
## 开启 gRPC 服务端
``` shell
python test_server.py uci_housing_model/
```
也可以通过下面的一行代码开启默认 gRPC 服务:
```shell
python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --use_multilang
```
## 客户端预测
### 同步预测
``` shell
python test_sync_client.py
```
### 异步预测
``` shell
python test_asyn_client.py
```
### Batch 预测
``` shell
python test_batch_client.py
```
### 预测超时
``` shell
python test_timeout_client.py
```
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=doc-string-missing
from paddle_serving_client import MultiLangClient as Client
import functools
import time
import threading
import grpc
import numpy as np
client = Client()
client.connect(["127.0.0.1:9393"])
complete_task_count = [0]
lock = threading.Lock()
def call_back(call_future):
try:
fetch_map = call_future.result()
print(fetch_map)
except grpc.RpcError as e:
print(e.code())
finally:
with lock:
complete_task_count[0] += 1
x = [
0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283,
0.4919, 0.1856, 0.0795, -0.0332
]
task_count = 0
for i in range(3):
new_data = np.array(x).astype("float32").reshape((1, 13))
future = client.predict(
feed={"x": new_data}, fetch=["price"], batch=False, asyn=True)
task_count += 1
future.add_done_callback(functools.partial(call_back))
while complete_task_count[0] != task_count:
time.sleep(0.1)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=doc-string-missing
import os
import sys
from paddle_serving_server import OpMaker
from paddle_serving_server import OpSeqMaker
from paddle_serving_server import MultiLangServer as Server
op_maker = OpMaker()
read_op = op_maker.create('general_reader')
general_infer_op = op_maker.create('general_infer')
response_op = op_maker.create('general_response')
op_seq_maker = OpSeqMaker()
op_seq_maker.add_op(read_op)
op_seq_maker.add_op(general_infer_op)
op_seq_maker.add_op(response_op)
server = Server()
server.set_op_sequence(op_seq_maker.get_op_sequence())
server.load_model_config(sys.argv[1])
server.set_gpuid("0")
server.prepare_server(workdir="work_dir1", port=9393, device="gpu")
server.run_server()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=doc-string-missing
from paddle_serving_client import MultiLangClient as Client
import grpc
import numpy as np
client = Client()
client.connect(["127.0.0.1:9393"])
client.set_rpc_timeout_ms(40)
x = [
0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283,
0.4919, 0.1856, 0.0795, -0.0332
]
for i in range(3):
new_data = np.array(x).astype("float32").reshape((1, 13))
fetch_map = client.predict(
feed={"x": new_data}, fetch=["price"], batch=False)
if fetch_map["serving_status_code"] == 0:
print(fetch_map)
elif fetch_map["serving_status_code"] == grpc.StatusCode.DEADLINE_EXCEEDED:
print('timeout')
else:
print(fetch_map["serving_status_code"])
## IMDB comment sentiment inference service
([简体中文](./README_CN.md)|English)
### Get model files and sample data
```
sh get_data.sh
```
the package downloaded contains cnn, lstm and bow model config along with their test_data and train_data.
### Start RPC inference service
```
python -m paddle_serving_server.serve --model imdb_cnn_model/ --thread 10 --port 9393 --use_multilang
```
### RPC Infer
The `paddlepaddle` package is used in `test_client.py`, and you may need to download the corresponding package(`pip install paddlepaddle`).
```
head test_data/part-0 | python test_client.py
```
it will get predict results of the first 10 test cases.
## IMDB评论情绪预测服务
(简体中文|[English](./README.md))
### 获取模型文件和样例数据
```
sh get_data.sh
```
脚本会下载和解压出cnn、lstm和bow三种模型的配置文文件以及test_data和train_data。
### 启动RPC预测服务
```
python -m paddle_serving_server.serve --model imdb_cnn_model/ --thread 10 --port 9393 --use_multilang
```
### 执行预测
`test_client.py`中使用了`paddlepaddle`包,需要进行下载(`pip install paddlepaddle`)。
```
head test_data/part-0 | python test_client.py
```
预测test_data/part-0的前十个样例。
wget --no-check-certificate https://fleet.bj.bcebos.com/text_classification_data.tar.gz
wget --no-check-certificate https://paddle-serving.bj.bcebos.com/imdb-demo/imdb_model.tar.gz
tar -zxvf text_classification_data.tar.gz
tar -zxvf imdb_model.tar.gz
# Yolov4 Detection Service
([简体中文](README_CN.md)|English)
## Get Model
```
python -m paddle_serving_app.package --get_model yolov4
tar -xzvf yolov4.tar.gz
```
## Start RPC Service
```
python -m paddle_serving_server.serve --model yolov4_model --port 9393 --gpu_ids 0 --use_multilang
```
## Prediction
```
python test_client.py 000000570688.jpg
```
After the prediction is completed, a json file to save the prediction result and a picture with the detection result box will be generated in the `./outpu folder.
# Yolov4 检测服务
(简体中文|[English](README.md))
## 获取模型
```
python -m paddle_serving_app.package --get_model yolov4
tar -xzvf yolov4.tar.gz
```
## 启动RPC服务
```
python -m paddle_serving_server.serve --model yolov4_model --port 9393 --gpu_ids 0 --use_multilang
```
## 预测
```
python test_client.py 000000570688.jpg
```
预测完成会在`./output`文件夹下生成保存预测结果的json文件以及标出检测结果框的图片。
person
bicycle
car
motorcycle
airplane
bus
train
truck
boat
traffic light
fire hydrant
stop sign
parking meter
bench
bird
cat
dog
horse
sheep
cow
elephant
bear
zebra
giraffe
backpack
umbrella
handbag
tie
suitcase
frisbee
skis
snowboard
sports ball
kite
baseball bat
baseball glove
skateboard
surfboard
tennis racket
bottle
wine glass
cup
fork
knife
spoon
bowl
banana
apple
sandwich
orange
broccoli
carrot
hot dog
pizza
donut
cake
chair
couch
potted plant
bed
dining table
toilet
tv
laptop
mouse
remote
keyboard
cell phone
microwave
oven
toaster
sink
refrigerator
book
clock
vase
scissors
teddy bear
hair drier
toothbrush
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import numpy as np
from paddle_serving_client import MultiLangClient as Client
from paddle_serving_app.reader import *
import cv2
preprocess = Sequential([
File2Image(), BGR2RGB(), Resize(
(608, 608), interpolation=cv2.INTER_LINEAR), Div(255.0), Transpose(
(2, 0, 1))
])
postprocess = RCNNPostprocess("label_list.txt", "output", [608, 608])
client = Client()
client.connect(['127.0.0.1:9393'])
client.set_rpc_timeout_ms(100000)
im = preprocess(sys.argv[1])
fetch_map = client.predict(
feed={
"image": im,
"im_size": np.array(list(im.shape[1:])),
},
fetch=["save_infer_model/scale_0.tmp_0"],
batch=False)
print(fetch_map)
fetch_map.pop("serving_status_code")
fetch_map["image"] = sys.argv[1]
postprocess(fetch_map)
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
...@@ -16,5 +16,6 @@ ...@@ -16,5 +16,6 @@
from . import version from . import version
from . import client from . import client
from .client import * from .client import *
from .httpclient import *
__version__ = version.version_tag __version__ = version.version_tag
...@@ -25,11 +25,8 @@ import base64 ...@@ -25,11 +25,8 @@ import base64
import time import time
import sys import sys
import grpc
from .proto import multi_lang_general_model_service_pb2
sys.path.append( sys.path.append(
os.path.join(os.path.abspath(os.path.dirname(__file__)), 'proto')) os.path.join(os.path.abspath(os.path.dirname(__file__)), 'proto'))
from .proto import multi_lang_general_model_service_pb2_grpc
#param 'type'(which is in feed_var or fetch_var) = 0 means dataType is int64 #param 'type'(which is in feed_var or fetch_var) = 0 means dataType is int64
#param 'type'(which is in feed_var or fetch_var) = 1 means dataType is float32 #param 'type'(which is in feed_var or fetch_var) = 1 means dataType is float32
...@@ -307,30 +304,40 @@ class Client(object): ...@@ -307,30 +304,40 @@ class Client(object):
if isinstance(feed, dict): if isinstance(feed, dict):
feed_batch.append(feed) feed_batch.append(feed)
elif isinstance(feed, list): elif isinstance(feed, list):
# batch_size must be 1, cause batch is already in Tensor. # if input is a list and the number of feed_var is 1.
if len(feed) != 1: # create a temp_dict { key = feed_var_name, value = list}
raise ValueError("Feed only list = [dict]") # put the temp_dict into the feed_batch.
feed_batch = feed if len(self.feed_names_) != 1:
raise ValueError(
"input is a list, but we got 0 or 2+ feed_var, don`t know how to divide the feed list"
)
temp_dict = {}
temp_dict[self.feed_names_[0]] = feed
feed_batch.append(temp_dict)
else: else:
raise ValueError("Feed only accepts dict and list of dict") raise ValueError("Feed only accepts dict and list of dict")
int_slot_batch = [] # batch_size must be 1, cause batch is already in Tensor.
if len(feed_batch) != 1:
raise ValueError("len of feed_batch can only be 1.")
int_slot = []
int_feed_names = [] int_feed_names = []
int_shape = [] int_shape = []
int_lod_slot_batch = [] int_lod_slot_batch = []
float_slot_batch = []
float_slot = []
float_feed_names = [] float_feed_names = []
float_lod_slot_batch = [] float_lod_slot_batch = []
float_shape = [] float_shape = []
string_slot_batch = []
string_slot = []
string_feed_names = [] string_feed_names = []
string_lod_slot_batch = [] string_lod_slot_batch = []
string_shape = [] string_shape = []
fetch_names = [] fetch_names = []
counter = 0 counter = 0
# batch_size must be 1, cause batch is already in Tensor.
batch_size = len(feed_batch)
for key in fetch_list: for key in fetch_list:
if key in self.fetch_names_: if key in self.fetch_names_:
...@@ -340,87 +347,69 @@ class Client(object): ...@@ -340,87 +347,69 @@ class Client(object):
raise ValueError( raise ValueError(
"Fetch names should not be empty or out of saved fetch list.") "Fetch names should not be empty or out of saved fetch list.")
for i, feed_i in enumerate(feed_batch): feed_i = feed_batch[0]
int_slot = [] for key in feed_i:
int_lod_slot = [] if ".lod" not in key and key not in self.feed_names_:
float_slot = [] raise ValueError("Wrong feed name: {}.".format(key))
float_lod_slot = [] if ".lod" in key:
string_slot = [] continue
string_lod_slot = []
for key in feed_i: self.shape_check(feed_i, key)
if ".lod" not in key and key not in self.feed_names_: if self.feed_types_[key] in int_type:
raise ValueError("Wrong feed name: {}.".format(key)) int_feed_names.append(key)
if ".lod" in key: shape_lst = []
continue if batch == False:
#if not isinstance(feed_i[key], np.ndarray): feed_i[key] = np.expand_dims(feed_i[key], 0).repeat(
self.shape_check(feed_i, key) 1, axis=0)
if self.feed_types_[key] in int_type: if isinstance(feed_i[key], np.ndarray):
if i == 0: shape_lst.extend(list(feed_i[key].shape))
int_feed_names.append(key) int_shape.append(shape_lst)
shape_lst = [] else:
if batch == False: int_shape.append(self.feed_shapes_[key])
feed_i[key] = np.expand_dims(feed_i[key], 0).repeat( if "{}.lod".format(key) in feed_i:
1, axis=0) int_lod_slot_batch.append(feed_i["{}.lod".format(key)])
if isinstance(feed_i[key], np.ndarray): else:
shape_lst.extend(list(feed_i[key].shape)) int_lod_slot_batch.append([])
int_shape.append(shape_lst)
else: if isinstance(feed_i[key], np.ndarray):
int_shape.append(self.feed_shapes_[key]) int_slot.append(np.ascontiguousarray(feed_i[key]))
if "{}.lod".format(key) in feed_i:
int_lod_slot_batch.append(feed_i["{}.lod".format(
key)])
else:
int_lod_slot_batch.append([])
if isinstance(feed_i[key], np.ndarray):
int_slot.append(np.ascontiguousarray(feed_i[key]))
self.has_numpy_input = True
else:
int_slot.append(np.ascontiguousarray(feed_i[key]))
self.all_numpy_input = False
elif self.feed_types_[key] in float_type:
if i == 0:
float_feed_names.append(key)
shape_lst = []
if batch == False:
feed_i[key] = np.expand_dims(feed_i[key], 0).repeat(
1, axis=0)
if isinstance(feed_i[key], np.ndarray):
shape_lst.extend(list(feed_i[key].shape))
float_shape.append(shape_lst)
else:
float_shape.append(self.feed_shapes_[key])
if "{}.lod".format(key) in feed_i:
float_lod_slot_batch.append(feed_i["{}.lod".format(
key)])
else:
float_lod_slot_batch.append([])
if isinstance(feed_i[key], np.ndarray):
float_slot.append(np.ascontiguousarray(feed_i[key]))
self.has_numpy_input = True
else:
float_slot.append(np.ascontiguousarray(feed_i[key]))
self.all_numpy_input = False
#if input is string, feed is not numpy.
elif self.feed_types_[key] in string_type:
if i == 0:
string_feed_names.append(key)
string_shape.append(self.feed_shapes_[key])
if "{}.lod".format(key) in feed_i:
string_lod_slot_batch.append(feed_i["{}.lod".format(
key)])
else:
string_lod_slot_batch.append([])
string_slot.append(feed_i[key])
self.has_numpy_input = True self.has_numpy_input = True
int_slot_batch.append(int_slot) else:
int_lod_slot_batch.append(int_lod_slot) int_slot.append(np.ascontiguousarray(feed_i[key]))
float_slot_batch.append(float_slot) self.all_numpy_input = False
float_lod_slot_batch.append(float_lod_slot)
string_slot_batch.append(string_slot) elif self.feed_types_[key] in float_type:
string_lod_slot_batch.append(string_lod_slot) float_feed_names.append(key)
shape_lst = []
if batch == False:
feed_i[key] = np.expand_dims(feed_i[key], 0).repeat(
1, axis=0)
if isinstance(feed_i[key], np.ndarray):
shape_lst.extend(list(feed_i[key].shape))
float_shape.append(shape_lst)
else:
float_shape.append(self.feed_shapes_[key])
if "{}.lod".format(key) in feed_i:
float_lod_slot_batch.append(feed_i["{}.lod".format(key)])
else:
float_lod_slot_batch.append([])
if isinstance(feed_i[key], np.ndarray):
float_slot.append(np.ascontiguousarray(feed_i[key]))
self.has_numpy_input = True
else:
float_slot.append(np.ascontiguousarray(feed_i[key]))
self.all_numpy_input = False
#if input is string, feed is not numpy.
elif self.feed_types_[key] in string_type:
string_feed_names.append(key)
string_shape.append(self.feed_shapes_[key])
if "{}.lod".format(key) in feed_i:
string_lod_slot_batch.append(feed_i["{}.lod".format(key)])
else:
string_lod_slot_batch.append([])
string_slot.append(feed_i[key])
self.has_numpy_input = True
self.profile_.record('py_prepro_1') self.profile_.record('py_prepro_1')
self.profile_.record('py_client_infer_0') self.profile_.record('py_client_infer_0')
...@@ -428,11 +417,11 @@ class Client(object): ...@@ -428,11 +417,11 @@ class Client(object):
result_batch_handle = self.predictorres_constructor() result_batch_handle = self.predictorres_constructor()
if self.all_numpy_input: if self.all_numpy_input:
res = self.client_handle_.numpy_predict( res = self.client_handle_.numpy_predict(
float_slot_batch, float_feed_names, float_shape, float_slot, float_feed_names, float_shape, float_lod_slot_batch,
float_lod_slot_batch, int_slot_batch, int_feed_names, int_shape, int_slot, int_feed_names, int_shape, int_lod_slot_batch,
int_lod_slot_batch, string_slot_batch, string_feed_names, string_slot, string_feed_names, string_shape,
string_shape, string_lod_slot_batch, fetch_names, string_lod_slot_batch, fetch_names, result_batch_handle,
result_batch_handle, self.pid, log_id) self.pid, log_id)
elif self.has_numpy_input == False: elif self.has_numpy_input == False:
raise ValueError( raise ValueError(
"Please make sure all of your inputs are numpy array") "Please make sure all of your inputs are numpy array")
...@@ -520,243 +509,3 @@ class Client(object): ...@@ -520,243 +509,3 @@ class Client(object):
def release(self): def release(self):
self.client_handle_.destroy_predictor() self.client_handle_.destroy_predictor()
self.client_handle_ = None self.client_handle_ = None
class MultiLangClient(object):
def __init__(self):
self.channel_ = None
self.stub_ = None
self.rpc_timeout_s_ = 2
self.profile_ = _Profiler()
def add_variant(self, tag, cluster, variant_weight):
# TODO
raise Exception("cannot support ABtest yet")
def set_rpc_timeout_ms(self, rpc_timeout):
if self.stub_ is None:
raise Exception("set timeout must be set after connect.")
if not isinstance(rpc_timeout, int):
# for bclient
raise ValueError("rpc_timeout must be int type.")
self.rpc_timeout_s_ = rpc_timeout / 1000.0
timeout_req = multi_lang_general_model_service_pb2.SetTimeoutRequest()
timeout_req.timeout_ms = rpc_timeout
resp = self.stub_.SetTimeout(timeout_req)
return resp.err_code == 0
def connect(self, endpoints):
# https://github.com/tensorflow/serving/issues/1382
options = [('grpc.max_receive_message_length', 512 * 1024 * 1024),
('grpc.max_send_message_length', 512 * 1024 * 1024),
('grpc.lb_policy_name', 'round_robin')]
# TODO: weight round robin
g_endpoint = 'ipv4:{}'.format(','.join(endpoints))
self.channel_ = grpc.insecure_channel(g_endpoint, options=options)
self.stub_ = multi_lang_general_model_service_pb2_grpc.MultiLangGeneralModelServiceStub(
self.channel_)
# get client model config
get_client_config_req = multi_lang_general_model_service_pb2.GetClientConfigRequest(
)
resp = self.stub_.GetClientConfig(get_client_config_req)
model_config_str = resp.client_config_str
self._parse_model_config(model_config_str)
def _flatten_list(self, nested_list):
for item in nested_list:
if isinstance(item, (list, tuple)):
for sub_item in self._flatten_list(item):
yield sub_item
else:
yield item
def _parse_model_config(self, model_config_str):
model_conf = m_config.GeneralModelConfig()
model_conf = google.protobuf.text_format.Merge(model_config_str,
model_conf)
self.feed_names_ = [var.alias_name for var in model_conf.feed_var]
self.feed_types_ = {}
self.feed_shapes_ = {}
self.lod_tensor_set_ = set()
for i, var in enumerate(model_conf.feed_var):
self.feed_types_[var.alias_name] = var.feed_type
self.feed_shapes_[var.alias_name] = var.shape
if var.is_lod_tensor:
self.lod_tensor_set_.add(var.alias_name)
self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var]
self.fetch_types_ = {}
for i, var in enumerate(model_conf.fetch_var):
self.fetch_types_[var.alias_name] = var.fetch_type
if var.is_lod_tensor:
self.lod_tensor_set_.add(var.alias_name)
def _pack_inference_request(self, feed, fetch, is_python, log_id):
req = multi_lang_general_model_service_pb2.InferenceRequest()
req.fetch_var_names.extend(fetch)
req.is_python = is_python
req.log_id = log_id
feed_var_names = []
for key in feed.keys():
if '.lod' not in key:
feed_var_names.append(key)
req.feed_var_names.extend(feed_var_names)
inst = multi_lang_general_model_service_pb2.FeedInst()
for name in req.feed_var_names:
tensor = multi_lang_general_model_service_pb2.Tensor()
var = feed[name]
v_type = self.feed_types_[name]
if is_python:
data = None
if isinstance(var, list):
if v_type == 0: # int64
data = np.array(var, dtype="int64")
elif v_type == 1: # float32
data = np.array(var, dtype="float32")
elif v_type == 2: # int32
data = np.array(var, dtype="int32")
else:
raise Exception("error tensor value type.")
elif isinstance(var, np.ndarray):
data = var
if v_type == 0:
if data.dtype != 'int64':
data = data.astype("int64")
elif v_type == 1:
if data.dtype != 'float32':
data = data.astype("float32")
elif v_type == 2:
if data.dtype != 'int32':
data = data.astype("int32")
else:
raise Exception("error tensor value type.")
else:
raise Exception("var must be list or ndarray.")
data = np.ascontiguousarray(data)
tensor.data = data.tobytes()
tensor.shape.extend(list(var.shape))
if "{}.lod".format(name) in feed.keys():
tensor.lod.extend(feed["{}.lod".format(name)])
inst.tensor_array.append(tensor)
req.insts.append(inst)
return req
def _unpack_inference_response(self, resp, fetch, is_python,
need_variant_tag):
if resp.err_code != 0:
return None
tag = resp.tag
multi_result_map = {}
for model_result in resp.outputs:
inst = model_result.insts[0]
result_map = {}
for i, name in enumerate(fetch):
var = inst.tensor_array[i]
v_type = self.fetch_types_[name]
if is_python:
if v_type == 0: # int64
result_map[name] = np.frombuffer(
var.data, dtype="int64")
elif v_type == 1: # float32
result_map[name] = np.frombuffer(
var.data, dtype="float32")
else:
raise Exception("error type.")
else:
if v_type == 0: # int64
result_map[name] = np.array(
list(var.int64_data), dtype="int64")
elif v_type == 1: # float32
result_map[name] = np.array(
list(var.float_data), dtype="float32")
else:
raise Exception("error type.")
result_map[name].shape = list(var.shape)
if name in self.lod_tensor_set_:
result_map["{}.lod".format(name)] = np.array(list(var.lod))
multi_result_map[model_result.engine_name] = result_map
ret = None
if len(resp.outputs) == 1:
ret = list(multi_result_map.values())[0]
else:
ret = multi_result_map
ret["serving_status_code"] = 0
return ret if not need_variant_tag else [ret, tag]
def _done_callback_func(self, fetch, is_python, need_variant_tag):
def unpack_resp(resp):
return self._unpack_inference_response(resp, fetch, is_python,
need_variant_tag)
return unpack_resp
def get_feed_names(self):
return self.feed_names_
def predict(self,
feed,
fetch,
batch=True,
need_variant_tag=False,
asyn=False,
is_python=True,
log_id=0):
if isinstance(feed, dict) is False:
raise ValueError("Type Error. grpc feed must be dict.")
if batch is False:
for key in feed:
if ".lod" not in key:
feed[key] = np.expand_dims(feed[key], 0).repeat(1, axis=0)
if not asyn:
try:
self.profile_.record('py_prepro_0')
req = self._pack_inference_request(
feed, fetch, is_python=is_python, log_id=log_id)
self.profile_.record('py_prepro_1')
self.profile_.record('py_client_infer_0')
resp = self.stub_.Inference(req, timeout=self.rpc_timeout_s_)
self.profile_.record('py_client_infer_1')
self.profile_.record('py_postpro_0')
ret = self._unpack_inference_response(
resp,
fetch,
is_python=is_python,
need_variant_tag=need_variant_tag)
self.profile_.record('py_postpro_1')
self.profile_.print_profile()
return ret
except grpc.RpcError as e:
return {"serving_status_code": e.code()}
else:
req = self._pack_inference_request(
feed, fetch, is_python=is_python, log_id=log_id)
call_future = self.stub_.Inference.future(
req, timeout=self.rpc_timeout_s_)
return MultiLangPredictFuture(
call_future,
self._done_callback_func(
fetch,
is_python=is_python,
need_variant_tag=need_variant_tag))
class MultiLangPredictFuture(object):
def __init__(self, call_future, callback_func):
self.call_future_ = call_future
self.callback_func_ = callback_func
def result(self):
try:
resp = self.call_future_.result()
except grpc.RpcError as e:
return {"serving_status_code": e.code()}
return self.callback_func_(resp)
def add_done_callback(self, fn):
def __fn__(call_future):
assert call_future == self.call_future_
fn(self)
self.call_future_.add_done_callback(__fn__)
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import requests
import json
import numpy as np
import os
from .proto import general_model_config_pb2 as m_config
import google.protobuf.text_format
import gzip
from collections import Iterable
import base64
#param 'type'(which is in feed_var or fetch_var) = 0 means dataType is int64
#param 'type'(which is in feed_var or fetch_var) = 1 means dataType is float32
#param 'type'(which is in feed_var or fetch_var) = 2 means dataType is int32
#param 'type'(which is in feed_var or fetch_var) = 3 means dataType is string(also called bytes in proto)
int64_type = 0
float32_type = 1
int32_type = 2
bytes_type = 3
# this is corresponding to the proto
proto_data_key_list = ["int64_data", "float_data", "int_data", "data"]
def list_flatten(items, ignore_types=(str, bytes)):
for x in items:
if isinstance(x, Iterable) and not isinstance(x, ignore_types):
yield from list_flatten(x)
else:
yield x
def data_bytes_number(datalist):
total_bytes_number = 0
if isinstance(datalist, list):
if len(datalist) == 0:
return total_bytes_number
else:
for data in datalist:
if isinstance(data, str):
total_bytes_number = total_bytes_number + len(data)
else:
total_bytes_number = total_bytes_number + 4 * len(datalist)
break
else:
raise ValueError(
"In the Function data_bytes_number(), data must be list.")
return total_bytes_number
class HttpClient(object):
def __init__(self,
ip="0.0.0.0",
port="9393",
service_name="/GeneralModelService/inference"):
self.feed_names_ = []
self.feed_real_names = []
self.fetch_names_ = []
self.feed_shapes_ = {}
self.feed_types_ = {}
self.feed_names_to_idx_ = {}
self.http_timeout_ms = 200000
self.ip = ip
self.port = port
self.server_port = port
self.service_name = service_name
self.key = None
self.try_request_gzip = False
self.try_response_gzip = False
def load_client_config(self, model_config_path_list):
if isinstance(model_config_path_list, str):
model_config_path_list = [model_config_path_list]
elif isinstance(model_config_path_list, list):
pass
file_path_list = []
for single_model_config in model_config_path_list:
if os.path.isdir(single_model_config):
file_path_list.append("{}/serving_client_conf.prototxt".format(
single_model_config))
elif os.path.isfile(single_model_config):
file_path_list.append(single_model_config)
model_conf = m_config.GeneralModelConfig()
f = open(file_path_list[0], 'r')
model_conf = google.protobuf.text_format.Merge(
str(f.read()), model_conf)
# load configuraion here
# get feed vars, fetch vars
# get feed shapes, feed types
# map feed names to index
self.feed_names_ = [var.alias_name for var in model_conf.feed_var]
self.feed_real_names = [var.name for var in model_conf.feed_var]
self.feed_names_to_idx_ = {} #this is useful
self.lod_tensor_set = set()
self.feed_tensor_len = {} #this is only used for shape check
self.key = None
for i, var in enumerate(model_conf.feed_var):
self.feed_names_to_idx_[var.alias_name] = i
self.feed_types_[var.alias_name] = var.feed_type
self.feed_shapes_[var.alias_name] = [dim for dim in var.shape]
if var.is_lod_tensor:
self.lod_tensor_set.add(var.alias_name)
else:
counter = 1
for dim in self.feed_shapes_[var.alias_name]:
counter *= dim
self.feed_tensor_len[var.alias_name] = counter
if len(file_path_list) > 1:
model_conf = m_config.GeneralModelConfig()
f = open(file_path_list[-1], 'r')
model_conf = google.protobuf.text_format.Merge(
str(f.read()), model_conf)
self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var]
self.fetch_names_to_type_ = {}
self.fetch_names_to_idx_ = {}
for i, var in enumerate(model_conf.fetch_var):
self.fetch_names_to_idx_[var.alias_name] = i
self.fetch_names_to_type_[var.alias_name] = var.fetch_type
if var.is_lod_tensor:
self.lod_tensor_set.add(var.alias_name)
return
def set_http_timeout_ms(self, http_timeout_ms):
if not isinstance(http_timeout_ms, int):
raise ValueError("http_timeout_ms must be int type.")
else:
self.http_timeout_ms = http_timeout_ms
def set_ip(self, ip):
self.ip = ip
def set_service_name(self, service_name):
self.service_name = service_name
def set_port(self, port):
self.port = port
def set_request_compress(self, try_request_gzip):
self.try_request_gzip = try_request_gzip
def set_response_compress(self, try_response_gzip):
self.try_response_gzip = try_response_gzip
# use_key is the function of encryption.
def use_key(self, key_filename):
with open(key_filename, "rb") as f:
self.key = f.read()
self.get_serving_port()
def get_serving_port(self):
encrypt_url = "http://" + str(self.ip) + ":" + str(self.port)
if self.key is not None:
req = json.dumps({"key": base64.b64encode(self.key).decode()})
else:
req = json.dumps({})
r = requests.post(encrypt_url, req)
result = r.json()
print(result)
if "endpoint_list" not in result:
raise ValueError("server not ready")
else:
self.server_port = str(result["endpoint_list"][0])
print("rpc port is ", self.server_port)
def get_feed_names(self):
return self.feed_names_
def get_fetch_names(self):
return self.fetch_names_
# feed 支持Numpy类型,以及直接List、tuple
# 不支持str类型,因为proto中为repeated.
def predict(self,
feed=None,
fetch=None,
batch=False,
need_variant_tag=False,
log_id=0):
if feed is None or fetch is None:
raise ValueError("You should specify feed and fetch for prediction")
fetch_list = []
if isinstance(fetch, str):
fetch_list = [fetch]
elif isinstance(fetch, (list, tuple)):
fetch_list = fetch
else:
raise ValueError("Fetch only accepts string and list of string")
feed_batch = []
if isinstance(feed, dict):
feed_batch.append(feed)
elif isinstance(feed, (list, str, tuple)):
# if input is a list or str or tuple, and the number of feed_var is 1.
# create a temp_dict { key = feed_var_name, value = list}
# put the temp_dict into the feed_batch.
if len(self.feed_names_) != 1:
raise ValueError(
"input is a list, but we got 0 or 2+ feed_var, don`t know how to divide the feed list"
)
temp_dict = {}
temp_dict[self.feed_names_[0]] = feed
feed_batch.append(temp_dict)
else:
raise ValueError("Feed only accepts dict and list of dict")
# batch_size must be 1, cause batch is already in Tensor.
if len(feed_batch) != 1:
raise ValueError("len of feed_batch can only be 1.")
fetch_names = []
for key in fetch_list:
if key in self.fetch_names_:
fetch_names.append(key)
if len(fetch_names) == 0:
raise ValueError(
"Fetch names should not be empty or out of saved fetch list.")
return {}
feed_i = feed_batch[0]
Request = {}
Request["fetch_var_names"] = fetch_list
Request["log_id"] = int(log_id)
Request["tensor"] = []
index = 0
total_data_number = 0
for key in feed_i:
if ".lod" not in key and key not in self.feed_names_:
raise ValueError("Wrong feed name: {}.".format(key))
if ".lod" in key:
continue
Request["tensor"].append('')
Request["tensor"][index] = {}
lod = []
if "{}.lod".format(key) in feed_i:
lod = feed_i["{}.lod".format(key)]
shape = self.feed_shapes_[key].copy()
elem_type = self.feed_types_[key]
data_value = feed_i[key]
data_key = proto_data_key_list[elem_type]
# feed_i[key] 可以是np.ndarray
# 也可以是list或tuple
# 当np.ndarray需要处理为list
if isinstance(feed_i[key], np.ndarray):
shape_lst = []
# 0维numpy 需要在外层再加一个[]
if feed_i[key].ndim == 0:
data_value = [feed_i[key].tolist()]
shape_lst.append(1)
else:
shape_lst.extend(list(feed_i[key].shape))
shape = shape_lst
data_value = feed_i[key].flatten().tolist()
# 当Batch为False,shape字段前插一个1,表示batch维
# 当Batch为True,则直接使用numpy.shape作为batch维度
if batch == False:
shape.insert(0, 1)
# 当是list或tuple时,需要把多层嵌套展开
elif isinstance(feed_i[key], (list, tuple)):
# 当Batch为False,shape字段前插一个1,表示batch维
# 当Batch为True, 由于list并不像numpy那样规整,所以
# 无法获取shape,此时取第一维度作为Batch维度.
# 插入到feedVar.shape前面.
if batch == False:
shape.insert(0, 1)
else:
shape.insert(0, len(feed_i[key]))
feed_i[key] = [x for x in list_flatten(feed_i[key])]
data_value = feed_i[key]
else:
# 输入可能是单个的str或int值等
# 此时先统一处理为一个list
# 由于输入比较特殊,shape保持原feedvar中不变
data_value = []
data_value.append(feed_i[key])
if isinstance(feed_i[key], str):
if self.feed_types_[key] != bytes_type:
raise ValueError(
"feedvar is not string-type,feed can`t be a single string."
)
else:
if self.feed_types_[key] == bytes_type:
raise ValueError(
"feedvar is string-type,feed, feed can`t be a single int or others."
)
# 如果不压缩,那么不需要统计数据量。
if self.try_request_gzip:
total_data_number = total_data_number + data_bytes_number(
data_value)
Request["tensor"][index]["elem_type"] = elem_type
Request["tensor"][index]["shape"] = shape
Request["tensor"][index][data_key] = data_value
proto_index = self.feed_names_to_idx_[key]
Request["tensor"][index]["name"] = self.feed_real_names[proto_index]
Request["tensor"][index]["alias_name"] = key
if len(lod) > 0:
Request["tensor"][index]["lod"] = lod
index = index + 1
result = None
# request
web_url = "http://" + self.ip + ":" + self.server_port + self.service_name
postData = json.dumps(Request)
headers = {}
# 当数据区长度大于512字节时才压缩.
if self.try_request_gzip and total_data_number > 512:
postData = gzip.compress(bytes(postData, 'utf-8'))
headers["Content-Encoding"] = "gzip"
if self.try_response_gzip:
headers["Accept-encoding"] = "gzip"
# requests支持自动识别解压
result = requests.post(url=web_url, headers=headers, data=postData)
if result == None:
return None
if result.status_code == 200:
return result.json()
return result
...@@ -14,18 +14,16 @@ ...@@ -14,18 +14,16 @@
# pylint: disable=doc-string-missing # pylint: disable=doc-string-missing
from . import monitor from . import monitor
from . import rpc_service
from . import serve from . import serve
from . import version from . import version
__all__ = ["version", "server", "serve", "monitor", "rpc_service", "dag"] __all__ = ["version", "server", "serve", "monitor", "dag"]
from paddle_serving_server import ( from paddle_serving_server import (
version, version,
server, server,
serve, serve,
monitor, monitor,
rpc_service,
dag, ) dag, )
from .dag import * from .dag import *
......
文件模式从 100755 更改为 100644
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import os
import numpy as np
import google.protobuf.text_format
from .proto import general_model_config_pb2 as m_config
from .proto import multi_lang_general_model_service_pb2
sys.path.append(
os.path.join(os.path.abspath(os.path.dirname(__file__)), 'proto'))
from .proto import multi_lang_general_model_service_pb2_grpc
class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc.
MultiLangGeneralModelServiceServicer):
def __init__(self, model_config_path_list, is_multi_model, endpoints):
self.is_multi_model_ = is_multi_model
self.model_config_path_list = model_config_path_list
self.endpoints_ = endpoints
self._init_bclient(self.model_config_path_list, self.endpoints_)
self._parse_model_config(self.model_config_path_list)
def _init_bclient(self, model_config_path_list, endpoints, timeout_ms=None):
file_path_list = []
for single_model_config in model_config_path_list:
if os.path.isdir(single_model_config):
file_path_list.append("{}/serving_server_conf.prototxt".format(
single_model_config))
elif os.path.isfile(single_model_config):
file_path_list.append(single_model_config)
from paddle_serving_client import Client
self.bclient_ = Client()
if timeout_ms is not None:
self.bclient_.set_rpc_timeout_ms(timeout_ms)
self.bclient_.load_client_config(file_path_list)
self.bclient_.connect(endpoints)
def _parse_model_config(self, model_config_path_list):
if isinstance(model_config_path_list, str):
model_config_path_list = [model_config_path_list]
elif isinstance(model_config_path_list, list):
pass
file_path_list = []
for single_model_config in model_config_path_list:
if os.path.isdir(single_model_config):
file_path_list.append("{}/serving_server_conf.prototxt".format(
single_model_config))
elif os.path.isfile(single_model_config):
file_path_list.append(single_model_config)
model_conf = m_config.GeneralModelConfig()
f = open(file_path_list[0], 'r')
model_conf = google.protobuf.text_format.Merge(
str(f.read()), model_conf)
self.feed_names_ = [var.alias_name for var in model_conf.feed_var]
self.feed_types_ = {}
self.feed_shapes_ = {}
self.lod_tensor_set_ = set()
for i, var in enumerate(model_conf.feed_var):
self.feed_types_[var.alias_name] = var.feed_type
self.feed_shapes_[var.alias_name] = var.shape
if var.is_lod_tensor:
self.lod_tensor_set_.add(var.alias_name)
if len(file_path_list) > 1:
model_conf = m_config.GeneralModelConfig()
f = open(file_path_list[-1], 'r')
model_conf = google.protobuf.text_format.Merge(
str(f.read()), model_conf)
self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var]
self.fetch_types_ = {}
for i, var in enumerate(model_conf.fetch_var):
self.fetch_types_[var.alias_name] = var.fetch_type
if var.is_lod_tensor:
self.lod_tensor_set_.add(var.alias_name)
def _flatten_list(self, nested_list):
for item in nested_list:
if isinstance(item, (list, tuple)):
for sub_item in self._flatten_list(item):
yield sub_item
else:
yield item
def _unpack_inference_request(self, request):
feed_names = list(request.feed_var_names)
fetch_names = list(request.fetch_var_names)
is_python = request.is_python
log_id = request.log_id
feed_batch = []
for feed_inst in request.insts:
feed_dict = {}
for idx, name in enumerate(feed_names):
var = feed_inst.tensor_array[idx]
v_type = self.feed_types_[name]
data = None
if is_python:
if v_type == 0: # int64
data = np.frombuffer(var.data, dtype="int64")
elif v_type == 1: # float32
data = np.frombuffer(var.data, dtype="float32")
elif v_type == 2: # int32
data = np.frombuffer(var.data, dtype="int32")
else:
raise Exception("error type.")
else:
if v_type == 0: # int64
data = np.array(list(var.int64_data), dtype="int64")
elif v_type == 1: # float32
data = np.array(list(var.float_data), dtype="float32")
elif v_type == 2: # int32
data = np.array(list(var.int_data), dtype="int32")
else:
raise Exception("error type.")
data.shape = list(feed_inst.tensor_array[idx].shape)
feed_dict[name] = np.ascontiguousarray(data)
if len(var.lod) > 0:
feed_dict["{}.lod".format(name)] = var.lod
feed_batch.append(feed_dict)
return feed_batch, fetch_names, is_python, log_id
def _pack_inference_response(self, ret, fetch_names, is_python):
resp = multi_lang_general_model_service_pb2.InferenceResponse()
if ret is None:
resp.err_code = 1
return resp
results, tag = ret
resp.tag = tag
resp.err_code = 0
if not self.is_multi_model_:
results = {'general_infer_0': results}
for model_name, model_result in results.items():
model_output = multi_lang_general_model_service_pb2.ModelOutput()
inst = multi_lang_general_model_service_pb2.FetchInst()
for idx, name in enumerate(fetch_names):
tensor = multi_lang_general_model_service_pb2.Tensor()
v_type = self.fetch_types_[name]
if is_python:
tensor.data = model_result[name].tobytes()
else:
if v_type == 0: # int64
tensor.int64_data.extend(model_result[name].reshape(-1)
.tolist())
elif v_type == 1: # float32
tensor.float_data.extend(model_result[name].reshape(-1)
.tolist())
elif v_type == 2: # int32
tensor.int_data.extend(model_result[name].reshape(-1)
.tolist())
else:
raise Exception("error type.")
tensor.shape.extend(list(model_result[name].shape))
if "{}.lod".format(name) in model_result:
tensor.lod.extend(model_result["{}.lod".format(name)]
.tolist())
inst.tensor_array.append(tensor)
model_output.insts.append(inst)
model_output.engine_name = model_name
resp.outputs.append(model_output)
return resp
def SetTimeout(self, request, context):
# This porcess and Inference process cannot be operate at the same time.
# For performance reasons, do not add thread lock temporarily.
timeout_ms = request.timeout_ms
self._init_bclient(self.model_config_path_list, self.endpoints_,
timeout_ms)
resp = multi_lang_general_model_service_pb2.SimpleResponse()
resp.err_code = 0
return resp
def Inference(self, request, context):
feed_batch, fetch_names, is_python, log_id \
= self._unpack_inference_request(request)
ret = self.bclient_.predict(
feed=feed_batch,
fetch=fetch_names,
batch=True,
need_variant_tag=True,
log_id=log_id)
return self._pack_inference_response(ret, fetch_names, is_python)
def GetClientConfig(self, request, context):
#model_config_path_list is list right now.
#dict should be added when graphMaker is used.
resp = multi_lang_general_model_service_pb2.GetClientConfigResponse()
model_config_str = []
for single_model_config in self.model_config_path_list:
if os.path.isdir(single_model_config):
with open("{}/serving_server_conf.prototxt".format(
single_model_config)) as f:
model_config_str.append(str(f.read()))
elif os.path.isfile(single_model_config):
with open(single_model_config) as f:
model_config_str.append(str(f.read()))
resp.client_config_str = model_config_str[0]
return resp
...@@ -23,13 +23,26 @@ import json ...@@ -23,13 +23,26 @@ import json
import base64 import base64
import time import time
from multiprocessing import Process from multiprocessing import Process
from flask import Flask, request
import sys import sys
if sys.version_info.major == 2: if sys.version_info.major == 2:
from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer
elif sys.version_info.major == 3: elif sys.version_info.major == 3:
from http.server import BaseHTTPRequestHandler, HTTPServer from http.server import BaseHTTPRequestHandler, HTTPServer
from contextlib import closing
import socket
# web_service.py is still used by Pipeline.
def port_is_available(port):
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
sock.settimeout(2)
result = sock.connect_ex(('0.0.0.0', port))
if result != 0:
return True
else:
return False
def format_gpu_to_strlist(unformatted_gpus): def format_gpu_to_strlist(unformatted_gpus):
gpus_strlist = [] gpus_strlist = []
...@@ -117,8 +130,6 @@ def serve_args(): ...@@ -117,8 +130,6 @@ def serve_args():
type=str, type=str,
default="workdir", default="workdir",
help="Working dir of current service") help="Working dir of current service")
parser.add_argument(
"--name", type=str, default="None", help="Default service name")
parser.add_argument( parser.add_argument(
"--use_mkl", default=False, action="store_true", help="Use MKL") "--use_mkl", default=False, action="store_true", help="Use MKL")
parser.add_argument( parser.add_argument(
...@@ -148,11 +159,6 @@ def serve_args(): ...@@ -148,11 +159,6 @@ def serve_args():
default=False, default=False,
action="store_true", action="store_true",
help="Use encryption model") help="Use encryption model")
parser.add_argument(
"--use_multilang",
default=False,
action="store_true",
help="Use Multi-language-service")
parser.add_argument( parser.add_argument(
"--use_trt", default=False, action="store_true", help="Use TensorRT") "--use_trt", default=False, action="store_true", help="Use TensorRT")
parser.add_argument( parser.add_argument(
...@@ -189,7 +195,6 @@ def start_gpu_card_model(gpu_mode, port, args): # pylint: disable=doc-string-mi ...@@ -189,7 +195,6 @@ def start_gpu_card_model(gpu_mode, port, args): # pylint: disable=doc-string-mi
ir_optim = args.ir_optim ir_optim = args.ir_optim
use_mkl = args.use_mkl use_mkl = args.use_mkl
max_body_size = args.max_body_size max_body_size = args.max_body_size
use_multilang = args.use_multilang
workdir = "{}_{}".format(args.workdir, port) workdir = "{}_{}".format(args.workdir, port)
if model == "": if model == "":
...@@ -222,10 +227,7 @@ def start_gpu_card_model(gpu_mode, port, args): # pylint: disable=doc-string-mi ...@@ -222,10 +227,7 @@ def start_gpu_card_model(gpu_mode, port, args): # pylint: disable=doc-string-mi
general_response_op = op_maker.create('general_response') general_response_op = op_maker.create('general_response')
op_seq_maker.add_op(general_response_op) op_seq_maker.add_op(general_response_op)
if use_multilang: server = serving.Server()
server = serving.MultiLangServer()
else:
server = serving.Server()
server.set_op_sequence(op_seq_maker.get_op_sequence()) server.set_op_sequence(op_seq_maker.get_op_sequence())
server.set_num_threads(thread_num) server.set_num_threads(thread_num)
server.use_mkl(use_mkl) server.use_mkl(use_mkl)
...@@ -372,54 +374,14 @@ if __name__ == "__main__": ...@@ -372,54 +374,14 @@ if __name__ == "__main__":
elif os.path.isfile(single_model_config): elif os.path.isfile(single_model_config):
raise ValueError("The input of --model should be a dir not file.") raise ValueError("The input of --model should be a dir not file.")
if args.name == "None": if args.use_encryption_model:
from .web_service import port_is_available p_flag = False
if args.use_encryption_model: p = None
p_flag = False serving_port = 0
p = None server = HTTPServer(('0.0.0.0', int(args.port)), MainService)
serving_port = 0 print(
server = HTTPServer(('localhost', int(args.port)), MainService) 'Starting encryption server, waiting for key from client, use <Ctrl-C> to stop'
print( )
'Starting encryption server, waiting for key from client, use <Ctrl-C> to stop' server.serve_forever()
)
server.serve_forever()
else:
start_multi_card(args)
else: else:
from .web_service import WebService start_multi_card(args)
web_service = WebService(name=args.name)
web_service.load_model_config(args.model)
workdir = "{}_{}".format(args.workdir, args.port)
web_service.prepare_server(
workdir=workdir,
port=args.port,
use_lite=args.use_lite,
use_xpu=args.use_xpu,
ir_optim=args.ir_optim,
thread_num=args.thread,
precision=args.precision,
use_calib=args.use_calib,
use_trt=args.use_trt,
gpu_multi_stream=args.gpu_multi_stream,
op_num=args.op_num,
op_max_batch=args.op_max_batch,
gpuid=args.gpu_ids)
web_service.run_rpc_service()
app_instance = Flask(__name__)
@app_instance.before_first_request
def init():
web_service._launch_web_service()
service_name = "/" + web_service.name + "/prediction"
@app_instance.route(service_name, methods=["POST"])
def run():
return web_service.get_prediction(request)
app_instance.run(host="0.0.0.0",
port=web_service.port,
threaded=False,
processes=4)
...@@ -16,11 +16,9 @@ import os ...@@ -16,11 +16,9 @@ import os
import tarfile import tarfile
import socket import socket
import paddle_serving_server as paddle_serving_server import paddle_serving_server as paddle_serving_server
from paddle_serving_server.rpc_service import MultiLangServerServiceServicer
from paddle_serving_server.serve import format_gpu_to_strlist from paddle_serving_server.serve import format_gpu_to_strlist
from .proto import server_configure_pb2 as server_sdk from .proto import server_configure_pb2 as server_sdk
from .proto import general_model_config_pb2 as m_config from .proto import general_model_config_pb2 as m_config
from .proto import multi_lang_general_model_service_pb2_grpc
import google.protobuf.text_format import google.protobuf.text_format
import time import time
from .version import version_tag, version_suffix, device_type from .version import version_tag, version_suffix, device_type
...@@ -33,7 +31,6 @@ if sys.platform.startswith('win') is False: ...@@ -33,7 +31,6 @@ if sys.platform.startswith('win') is False:
import shutil import shutil
import platform import platform
import numpy as np import numpy as np
import grpc
import sys import sys
import collections import collections
import subprocess import subprocess
...@@ -595,195 +592,3 @@ class Server(object): ...@@ -595,195 +592,3 @@ class Server(object):
print(command) print(command)
os.system(command) os.system(command)
class MultiLangServer(object):
def __init__(self):
self.bserver_ = Server()
self.worker_num_ = 4
self.body_size_ = 64 * 1024 * 1024
self.concurrency_ = 100000
self.is_multi_model_ = False # for model ensemble, which is not useful right now.
self.device = "cpu" # this is the default value for multilang `device`.
def set_max_concurrency(self, concurrency):
self.concurrency_ = concurrency
self.bserver_.set_max_concurrency(concurrency)
def set_device(self, device="cpu"):
self.device = device
self.bserver_.set_device(device)
def set_num_threads(self, threads):
self.worker_num_ = threads
self.bserver_.set_num_threads(threads)
def set_max_body_size(self, body_size):
self.bserver_.set_max_body_size(body_size)
if body_size >= self.body_size_:
self.body_size_ = body_size
else:
print(
"max_body_size is less than default value, will use default value in service."
)
def use_encryption_model(self, flag=False):
self.encryption_model = flag
def set_port(self, port):
self.gport_ = port
def set_precision(self, precision="fp32"):
self.precision = precision
def set_use_calib(self, use_calib=False):
self.use_calib = use_calib
def set_reload_interval(self, interval):
self.bserver_.set_reload_interval(interval)
def set_op_sequence(self, op_seq):
self.bserver_.set_op_sequence(op_seq)
def set_op_graph(self, op_graph):
self.bserver_.set_op_graph(op_graph)
def use_mkl(self, flag):
self.bserver_.use_mkl(flag)
def set_memory_optimize(self, flag=False):
self.bserver_.set_memory_optimize(flag)
def set_ir_optimize(self, flag=False):
self.bserver_.set_ir_optimize(flag)
def set_gpuid(self, gpuid):
self.bserver_.set_gpuid(gpuid)
def set_op_num(self, op_num):
self.bserver_.set_op_num(op_num)
def set_op_max_batch(self, op_max_batch):
self.bserver_.set_op_max_batch(op_max_batch)
def set_trt(self):
self.bserver_.set_trt()
def set_gpu_multi_stream(self):
self.bserver_.set_gpu_multi_stream()
def set_lite(self):
self.bserver_.set_lite()
def set_xpu(self):
self.bserver_.set_xpu()
def load_model_config(self,
server_config_dir_paths,
client_config_path=None):
if isinstance(server_config_dir_paths, str):
server_config_dir_paths = [server_config_dir_paths]
elif isinstance(server_config_dir_paths, list):
pass
else:
raise Exception("The type of model_config_paths must be str or list"
", not {}.".format(type(server_config_dir_paths)))
for single_model_config in server_config_dir_paths:
if os.path.isdir(single_model_config):
pass
elif os.path.isfile(single_model_config):
raise ValueError(
"The input of --model should be a dir not file.")
self.bserver_.load_model_config(server_config_dir_paths)
if client_config_path is None:
#now dict is not useful.
if isinstance(server_config_dir_paths, dict):
self.is_multi_model_ = True
client_config_path = []
for server_config_path_items in list(
server_config_dir_paths.items()):
client_config_path.append(server_config_path_items[1])
elif isinstance(server_config_dir_paths, list):
self.is_multi_model_ = False
client_config_path = server_config_dir_paths
else:
raise Exception(
"The type of model_config_paths must be str or list or "
"dict({op: model_path}), not {}.".format(
type(server_config_dir_paths)))
if isinstance(client_config_path, str):
client_config_path = [client_config_path]
elif isinstance(client_config_path, list):
pass
else: # dict is not support right now.
raise Exception(
"The type of client_config_path must be str or list or "
"dict({op: model_path}), not {}.".format(
type(client_config_path)))
if len(client_config_path) != len(server_config_dir_paths):
raise Warning(
"The len(client_config_path) is {}, != len(server_config_dir_paths) {}."
.format(len(client_config_path), len(server_config_dir_paths)))
self.bclient_config_path_list = client_config_path
def prepare_server(self,
workdir=None,
port=9292,
device=None,
use_encryption_model=False,
cube_conf=None):
# if `device` is not set, use self.device
# self.device may not be changed.
# or self.device may have changed by set_device.
if device == None:
device = self.device
# if `device` is set, let self.device = device.
else:
self.device = device
if not self._port_is_available(port):
raise SystemExit("Port {} is already used".format(port))
default_port = 12000
self.port_list_ = []
for i in range(1000):
if default_port + i != port and self._port_is_available(default_port
+ i):
self.port_list_.append(default_port + i)
break
self.bserver_.prepare_server(
workdir=workdir,
port=self.port_list_[0],
device=device,
use_encryption_model=use_encryption_model,
cube_conf=cube_conf)
self.set_port(port)
def _launch_brpc_service(self, bserver):
bserver.run_server()
def _port_is_available(self, port):
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
sock.settimeout(2)
result = sock.connect_ex(('0.0.0.0', port))
return result != 0
def run_server(self):
p_bserver = Process(
target=self._launch_brpc_service, args=(self.bserver_, ))
p_bserver.start()
options = [('grpc.max_send_message_length', self.body_size_),
('grpc.max_receive_message_length', self.body_size_)]
server = grpc.server(
futures.ThreadPoolExecutor(max_workers=self.worker_num_),
options=options,
maximum_concurrent_rpcs=self.concurrency_)
multi_lang_general_model_service_pb2_grpc.add_MultiLangGeneralModelServiceServicer_to_server(
MultiLangServerServiceServicer(
self.bclient_config_path_list, self.is_multi_model_,
["0.0.0.0:{}".format(self.port_list_[0])]), server)
server.add_insecure_port('[::]:{}'.format(self.gport_))
server.start()
p_bserver.join()
server.wait_for_termination()
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#!flask/bin/python #!flask/bin/python
# pylint: disable=doc-string-missing # pylint: disable=doc-string-missing
# Now, this is only for Pipeline.
from flask import Flask, request, abort from flask import Flask, request, abort
from contextlib import closing from contextlib import closing
from multiprocessing import Pool, Process, Queue from multiprocessing import Pool, Process, Queue
......
...@@ -16,7 +16,7 @@ from time import time as _time ...@@ -16,7 +16,7 @@ from time import time as _time
import time import time
import threading import threading
import multiprocessing import multiprocessing
from paddle_serving_client import MultiLangClient, Client from paddle_serving_client import Client
from concurrent import futures from concurrent import futures
import logging import logging
import func_timeout import func_timeout
...@@ -330,8 +330,9 @@ class Op(object): ...@@ -330,8 +330,9 @@ class Op(object):
if self.client_type == 'brpc': if self.client_type == 'brpc':
client = Client() client = Client()
client.load_client_config(client_config) client.load_client_config(client_config)
elif self.client_type == 'grpc': # 待测试完成后,使用brpc-http替代。
client = MultiLangClient() # elif self.client_type == 'grpc':
# client = MultiLangClient()
elif self.client_type == 'local_predictor': elif self.client_type == 'local_predictor':
if self.local_predictor is None: if self.local_predictor is None:
raise ValueError("local predictor not yet created") raise ValueError("local predictor not yet created")
...@@ -474,10 +475,13 @@ class Op(object): ...@@ -474,10 +475,13 @@ class Op(object):
fetch=self._fetch_names, fetch=self._fetch_names,
batch=True, batch=True,
log_id=typical_logid) log_id=typical_logid)
# 后续用HttpClient替代
'''
if isinstance(self.client, MultiLangClient): if isinstance(self.client, MultiLangClient):
if call_result is None or call_result["serving_status_code"] != 0: if call_result is None or call_result["serving_status_code"] != 0:
return None return None
call_result.pop("serving_status_code") call_result.pop("serving_status_code")
'''
return call_result return call_result
def postprocess(self, input_data, fetch_data, log_id=0): def postprocess(self, input_data, fetch_data, log_id=0):
......
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
...@@ -21,17 +21,33 @@ option cc_generic_services = true; ...@@ -21,17 +21,33 @@ option cc_generic_services = true;
message Tensor { message Tensor {
repeated bytes data = 1; repeated bytes data = 1;
optional int32 elem_type = 2; repeated int32 int_data = 2;
repeated int32 shape = 3; repeated int64 int64_data = 3;
repeated float float_data = 4;
optional int32 elem_type =
5; // 0 means int64, 1 means float32, 2 means int32, 3 means bytes(string)
repeated int32 shape = 6; // shape should include batch
repeated int32 lod = 7; // only for fetch tensor currently
optional string name = 8; // get from the Model prototxt
optional string alias_name = 9; // get from the Model prototxt
}; };
message FeedInst { repeated Tensor tensor_array = 1; }; message Request {
repeated Tensor tensor = 1;
message FetchInst { repeated Tensor tensor_array = 1; }; repeated string fetch_var_names = 2;
optional bool profile_server = 3 [ default = false ];
required uint64 log_id = 4 [ default = 0 ];
};
message Request { repeated FeedInst insts = 1; }; message Response {
repeated ModelOutput outputs = 1;
repeated int64 profile_time = 2;
};
message Response { repeated FetchInst insts = 1; }; message ModelOutput {
repeated Tensor tensor = 1;
optional string engine_name = 2;
}
service GeneralModelService { service GeneralModelService {
rpc inference(Request) returns (Response); rpc inference(Request) returns (Response);
......
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册