提交 2b5d3035 编写于 作者: G gongweibao

Merge branch 'develop' of https://github.com/paddlePaddle/serving into fixtypo

test=develop
......@@ -285,22 +285,16 @@ int PredictorClient::batch_predict(
// int idx = _fetch_name_to_idx[name];
if (_fetch_name_to_type[name] == 0) {
VLOG(2) << "ferch var " << name << "type int";
model._int64_value_map[name].resize(
output.insts(0).tensor_array(idx).int64_data_size());
int size = output.insts(0).tensor_array(idx).int64_data_size();
for (int i = 0; i < size; ++i) {
model._int64_value_map[name][i] =
output.insts(0).tensor_array(idx).int64_data(i);
}
model._int64_value_map[name] = std::vector<int64_t>(
output.insts(0).tensor_array(idx).int64_data().begin(),
output.insts(0).tensor_array(idx).int64_data().begin() + size);
} else {
VLOG(2) << "fetch var " << name << "type float";
model._float_value_map[name].resize(
output.insts(0).tensor_array(idx).float_data_size());
int size = output.insts(0).tensor_array(idx).float_data_size();
for (int i = 0; i < size; ++i) {
model._float_value_map[name][i] =
output.insts(0).tensor_array(idx).float_data(i);
}
model._float_value_map[name] = std::vector<float>(
output.insts(0).tensor_array(idx).float_data().begin(),
output.insts(0).tensor_array(idx).float_data().begin() + size);
}
idx += 1;
}
......@@ -564,22 +558,16 @@ int PredictorClient::numpy_predict(
// int idx = _fetch_name_to_idx[name];
if (_fetch_name_to_type[name] == 0) {
VLOG(2) << "ferch var " << name << "type int";
model._int64_value_map[name].resize(
output.insts(0).tensor_array(idx).int64_data_size());
int size = output.insts(0).tensor_array(idx).int64_data_size();
for (int i = 0; i < size; ++i) {
model._int64_value_map[name][i] =
output.insts(0).tensor_array(idx).int64_data(i);
}
model._int64_value_map[name] = std::vector<int64_t>(
output.insts(0).tensor_array(idx).int64_data().begin(),
output.insts(0).tensor_array(idx).int64_data().begin() + size);
} else {
VLOG(2) << "fetch var " << name << "type float";
model._float_value_map[name].resize(
output.insts(0).tensor_array(idx).float_data_size());
int size = output.insts(0).tensor_array(idx).float_data_size();
for (int i = 0; i < size; ++i) {
model._float_value_map[name][i] =
output.insts(0).tensor_array(idx).float_data(i);
}
model._float_value_map[name] = std::vector<float>(
output.insts(0).tensor_array(idx).float_data().begin(),
output.insts(0).tensor_array(idx).float_data().begin() + size);
}
idx += 1;
}
......
......@@ -12,13 +12,23 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include <sys/time.h>
#include <fstream>
#include <iostream>
#include <memory>
#include <thread>
#include "core/predictor/framework.pb.h"
#include "quant.h"
#include "seq_file.h"
inline uint64_t time_diff(const struct timeval &start_time,
const struct timeval &end_time) {
return (end_time.tv_sec - start_time.tv_sec) * 1000000 +
(end_time.tv_usec - start_time.tv_usec);
}
using paddle::framework::proto::VarType;
std::map<int, size_t> var_type_size;
void reg_var_types() {
......@@ -100,8 +110,8 @@ int dump_parameter(const char *input_file, const char *output_file) {
char *value_buf = new char[value_buf_len];
size_t offset = 0;
for (int64_t i = 0; i < dims[0]; ++i) {
// std::cout << "key_len " << key_len << " value_len " << value_buf_len <<
// std::endl;
// std::cout << "key_len " << key_len << " value_len " << value_buf_len
// << std::endl;
memcpy(value_buf, tensor_buf + offset, value_buf_len);
seq_file_writer.write((char *)&i, sizeof(i), value_buf, value_buf_len);
offset += value_buf_len;
......@@ -109,14 +119,14 @@ int dump_parameter(const char *input_file, const char *output_file) {
return 0;
}
int compress_parameter(const char *file1, const char *file2, int bits) {
float *read_embedding_table(const char *file1, std::vector<int64_t> &dims) {
std::ifstream is(file1);
// Step 1: is read version, os write version
uint32_t version;
is.read(reinterpret_cast<char *>(&version), sizeof(version));
if (version != 0) {
std::cout << "Version number " << version << " not supported" << std::endl;
return -1;
return NULL;
}
std::cout << "Version size: " << sizeof(version) << std::endl;
// Step 2: is read LoD level, os write LoD level
......@@ -138,7 +148,7 @@ int compress_parameter(const char *file1, const char *file2, int bits) {
is.read(reinterpret_cast<char *>(&version), sizeof(version));
if (version != 0) {
std::cout << "Version number " << version << " not supported" << std::endl;
return -1;
return NULL;
}
// Step 4: is read Tensor Data, os write min/max/quant data
......@@ -149,10 +159,10 @@ int compress_parameter(const char *file1, const char *file2, int bits) {
is.read(reinterpret_cast<char *>(buf.get()), size);
if (!desc.ParseFromArray(buf.get(), size)) {
std::cout << "Cannot parse tensor desc" << std::endl;
return -1;
return NULL;
}
// read tensor
std::vector<int64_t> dims;
// std::vector<int64_t> dims;
dims.reserve(static_cast<size_t>(desc.dims().size()));
std::copy(desc.dims().begin(), desc.dims().end(), std::back_inserter(dims));
......@@ -164,7 +174,7 @@ int compress_parameter(const char *file1, const char *file2, int bits) {
if (dims.size() != 2) {
std::cout << "Parameter dims not 2D" << std::endl;
return -1;
return NULL;
}
size_t numel = 1;
......@@ -176,47 +186,96 @@ int compress_parameter(const char *file1, const char *file2, int bits) {
char *tensor_buf = new char[buf_size];
is.read(static_cast<char *>(tensor_buf), buf_size);
float *tensor_float_buf = reinterpret_cast<float *>(tensor_buf);
size_t per_line_size = dims[1] * 1 + 2 * sizeof(float);
char *tensor_out = new char[per_line_size * dims[0]];
return tensor_float_buf;
}
float loss = 0;
float all_loss = 0;
int compress_parameter_parallel(const char *file1,
const char *file2,
int bits,
int n_threads) {
#define MIN_THREADS (1)
#define MAX_THREADS (80)
std::vector<int64_t> dims;
float *emb_table = read_embedding_table(file1, dims);
if (emb_table == NULL || dims.size() != 2) {
return -1;
}
// int64_t dict_size = dims[0]/100000000;
int64_t dict_size = dims[0];
int64_t emb_size = dims[1];
size_t per_line_size = emb_size * 1 + 2 * sizeof(float);
n_threads = std::min(std::max(MIN_THREADS, n_threads), MAX_THREADS);
int64_t step = dict_size / n_threads;
std::vector<char *> result;
result.reserve(dict_size + 1);
double pow2bits = pow(2, bits);
std::cout << "Start Quant" << std::endl;
SeqFileWriter seq_file_writer(file2);
size_t offset = 0;
for (int64_t i = 0; i < dims[0]; ++i) {
std::vector<std::thread> threads;
for (int i = 0; i < n_threads + 1; ++i) {
threads.push_back(std::thread([=, &result]() {
int64_t start = i * step;
int64_t end = (i + 1) * step;
if (i == n_threads) {
if (start == dict_size) {
return;
}
end = dict_size;
}
printf("THREAD[%d], index [%ld, %ld), start Quant table...\n",
i,
start,
end);
struct timeval quant_start;
gettimeofday(&(quant_start), NULL);
for (int64_t k = start; k < end; ++k) {
float xmin = 0, xmax = 0, loss = 0;
size_t scale = dims[1];
char *tensor_temp = new char[per_line_size];
greedy_search(
tensor_float_buf + i * dims[1], xmin, xmax, loss, scale, bits);
for (size_t e = 0; e < dims[1]; ++e) {
float x = *(tensor_float_buf + i * dims[1] + e);
int val = round((x - xmin) / (xmax - xmin) * (pow(2, bits) - 1));
val = std::max(0, val);
val = std::min((int)pow(2, bits) - 1, val);
emb_table + k * emb_size, xmin, xmax, loss, emb_size, bits);
// 得出 loss 最小的时候的 scale
float scale = (xmax - xmin) * (pow2bits - 1);
char *min_ptr = tensor_temp;
char *max_ptr = tensor_temp + sizeof(float);
memcpy(min_ptr, &xmin, sizeof(float));
memcpy(max_ptr, &xmax, sizeof(float));
for (size_t e = 0; e < emb_size; ++e) {
float x = *(emb_table + k * emb_size + e);
int val = round((x - xmin) / scale);
val = std::max(0, val);
val = std::min((int)pow2bits - 1, val);
*(tensor_temp + 2 * sizeof(float) + e) = val;
float unit = (xmax - xmin) / pow(2, bits);
float trans_val = unit * val + xmin;
}
seq_file_writer.write((char *)&i, sizeof(i), tensor_temp, per_line_size);
result[k] = tensor_temp;
if ((k - start) % 10000 == 0) {
printf("THREAD[%d], handle line: %ld\n", i, k - start);
}
}
struct timeval quant_end;
gettimeofday(&(quant_end), NULL);
printf("THREAD[%d], Quantization finished, cost: %lu us!!!\n",
i,
time_diff(quant_start, quant_end));
}));
}
for (auto &thread : threads) {
thread.join();
}
SeqFileWriter seq_file_writer(file2);
for (int64_t i = 0; i < dict_size; i++) {
seq_file_writer.write((char *)&i, sizeof(i), result[i], per_line_size);
}
return 0;
}
int main(int argc, char **argv) {
if (argc < 3 || argc > 4) {
std::cout << "Usage: if no compress, please follow:" << std::endl;
std::cout << "seq_generator PARAMETER_FILE OUTPUT_FILE\n" << std::endl;
if (argc < 3 || argc > 5) {
std::cout << "Usage:" << std::endl;
std::cout << "if no compress, please follow:" << std::endl;
std::cout << " seq_generator PARAMETER_FILE OUTPUT_FILE\n" << std::endl;
std::cout << "if compress, please follow: " << std::endl;
std::cout << "seq_generator PARAMETER_FILE OUTPUT_FILE QUANT_BITS"
std::cout << " seq_generator PARAMETER_FILE OUTPUT_FILE QUANT_BITS "
"[N_THREADS]"
<< std::endl;
std::cout << "Now it only support 8 bit." << std::endl;
std::cout << " Now it only support 8 bit." << std::endl;
return -1;
}
reg_var_types();
......@@ -227,7 +286,13 @@ int main(int argc, char **argv) {
}
if (argc == 4) {
std::cout << "generate compressed sparse param sequence file" << std::endl;
compress_parameter(argv[1], argv[2], atoi(argv[3]));
compress_parameter_parallel(argv[1], argv[2], atoi(argv[3]), 1);
return 0;
}
if (argc == 5) {
std::cout << "parallel generate compressed sparse param sequence file"
<< std::endl;
compress_parameter_parallel(argv[1], argv[2], atoi(argv[3]), atoi(argv[4]));
return 0;
}
}
......
......@@ -43,7 +43,7 @@ In the default centos7 image we provide, the Python path is `/usr/bin/python`. I
### Integrated CPU version paddle inference library
``` shell
mkdir build && cd build
mkdir server-build-cpu && cd server-build-cpu
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DSERVER=ON ..
make -j10
```
......@@ -53,7 +53,7 @@ you can execute `make install` to put targets under directory `./output`, you ne
### Integrated GPU version paddle inference library
``` shell
mkdir build && cd build
mkdir server-build-gpu && cd server-build-gpu
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DSERVER=ON -DWITH_GPU=ON ..
make -j10
```
......@@ -65,7 +65,7 @@ execute `make install` to put targets under directory `./output`
## Compile Client
``` shell
mkdir build && cd build
mkdir client-build && cd client-build
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DCLIENT=ON ..
make -j10
```
......@@ -75,7 +75,7 @@ execute `make install` to put targets under directory `./output`
## Compile the App
```bash
mkdir build && cd build
mkdir app-build && cd app-build
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DAPP=ON ..
make
```
......
......@@ -43,7 +43,7 @@ export PYTHONROOT=/usr/
### 集成CPU版本Paddle Inference Library
``` shell
mkdir build && cd build
mkdir server-build-cpu && cd server-build-cpu
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DSERVER=ON ..
make -j10
```
......@@ -53,7 +53,7 @@ make -j10
### 集成GPU版本Paddle Inference Library
``` shell
mkdir build && cd build
mkdir server-build-gpu && cd server-build-gpu
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DSERVER=ON -DWITH_GPU=ON ..
make -j10
```
......@@ -65,7 +65,7 @@ make -j10
## 编译Client部分
``` shell
mkdir build && cd build
mkdir client-build && cd client-build
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DCLIENT=ON ..
make -j10
```
......@@ -75,7 +75,7 @@ make -j10
## 编译App部分
```bash
mkdir build && cd build
mkdir app-build && cd app-build
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DCMAKE_INSTALL_PREFIX=./output -DAPP=ON ..
make
```
......
......@@ -3,45 +3,45 @@
## CPU server
### Python 3
```
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server-0.3.0-py3-none-any.whl
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server-0.3.1-py3-none-any.whl
```
### Python 2
```
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server-0.3.0-py2-none-any.whl
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server-0.3.1-py2-none-any.whl
```
## GPU server
### Python 3
```
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.3.0-py3-none-any.whl
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.3.1-py3-none-any.whl
```
### Python 2
```
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.3.0-py2-none-any.whl
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.3.1-py2-none-any.whl
```
## Client
### Python 3.7
```
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.3.0-cp37-none-manylinux1_x86_64.whl
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.3.1-cp37-none-any.whl
```
### Python 3.6
```
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.3.0-cp36-none-manylinux1_x86_64.whl
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.3.1-cp36-none-any.whl
```
### Python 2.7
```
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.3.0-cp27-none-manylinux1_x86_64.whl
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.3.1-cp27-none-any.whl
```
## App
### Python 3
```
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_app-0.1.0-py3-none-any.whl
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_app-0.1.1-py3-none-any.whl
```
### Python 2
```
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_app-0.1.0-py2-none-any.whl
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_app-0.1.1-py2-none-any.whl
```
......@@ -14,7 +14,10 @@
# pylint: disable=doc-string-missing
from paddle_serving_client import MultiLangClient
import functools
import sys
import time
import threading
client = MultiLangClient()
client.load_client_config(sys.argv[1])
......@@ -26,7 +29,22 @@ test_reader = paddle.batch(
paddle.dataset.uci_housing.test(), buf_size=500),
batch_size=1)
complete_task_count = [0]
lock = threading.Lock()
def call_back(call_future, data):
fetch_map = call_future.result()
print("{} {}".format(fetch_map["price"][0], data[0][1][0]))
with lock:
complete_task_count[0] += 1
task_count = 0
for data in test_reader():
future = client.predict(feed={"x": data[0][0]}, fetch=["price"], asyn=True)
fetch_map = future.result()
print("{} {}".format(fetch_map["price"][0], data[0][1][0]))
task_count += 1
future.add_done_callback(functools.partial(call_back, data=data))
while complete_task_count[0] != task_count:
time.sleep(0.1)
......@@ -12,4 +12,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.
""" Paddle Serving App version string """
serving_app_version = "0.1.0"
serving_app_version = "0.1.1"
......@@ -391,7 +391,13 @@ class MultiLangClient(object):
self._parse_model_config(path)
def connect(self, endpoint):
self.channel_ = grpc.insecure_channel(endpoint[0]) #TODO
# https://github.com/tensorflow/serving/issues/1382
options = [('grpc.max_receive_message_length', 512 * 1024 * 1024),
('grpc.max_send_message_length', 512 * 1024 * 1024),
('grpc.max_receive_message_length', 512 * 1024 * 1024)]
self.channel_ = grpc.insecure_channel(
endpoint[0], options=options) #TODO
self.stub_ = multi_lang_general_model_service_pb2_grpc.MultiLangGeneralModelServiceStub(
self.channel_)
......@@ -431,7 +437,6 @@ class MultiLangClient(object):
def _pack_feed_data(self, feed, fetch, is_python):
req = multi_lang_general_model_service_pb2.Request()
req.fetch_var_names.extend(fetch)
req.feed_var_names.extend(feed.keys())
req.is_python = is_python
feed_batch = None
if isinstance(feed, dict):
......@@ -440,6 +445,7 @@ class MultiLangClient(object):
feed_batch = feed
else:
raise Exception("{} not support".format(type(feed)))
req.feed_var_names.extend(feed_batch[0].keys())
init_feed_names = False
for feed_data in feed_batch:
inst = multi_lang_general_model_service_pb2.FeedInst()
......@@ -516,6 +522,9 @@ class MultiLangClient(object):
return unpack_resp
def get_feed_names(self):
return self.feed_names_
def predict(self,
feed,
fetch,
......@@ -548,3 +557,10 @@ class MultiLangPredictFuture(object):
def result(self):
resp = self.call_future_.result()
return self.callback_func_(resp)
def add_done_callback(self, fn):
def __fn__(call_future):
assert call_future == self.call_future_
fn(self)
self.call_future_.add_done_callback(__fn__)
......@@ -12,6 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
""" Paddle Serving Client version string """
serving_client_version = "0.3.0"
serving_server_version = "0.3.0"
module_proto_version = "0.3.0"
serving_client_version = "0.3.1"
serving_server_version = "0.3.1"
module_proto_version = "0.3.1"
......@@ -12,6 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
""" Paddle Serving Client version string """
serving_client_version = "0.3.0"
serving_server_version = "0.3.0"
module_proto_version = "0.3.0"
serving_client_version = "0.3.1"
serving_server_version = "0.3.1"
module_proto_version = "0.3.1"
......@@ -12,6 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
""" Paddle Serving Client version string """
serving_client_version = "0.3.0"
serving_server_version = "0.3.0"
module_proto_version = "0.3.0"
serving_client_version = "0.3.1"
serving_server_version = "0.3.1"
module_proto_version = "0.3.1"
......@@ -15,6 +15,6 @@
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
import re
with open("setup.cfg", "w") as f:
line = "[bdist_wheel]\npython-tag={0}{1}\nplat-name=manylinux1_x86_64".format(
get_abbr_impl(), get_impl_ver())
line = "[bdist_wheel]\npython-tag={0}{1}".format(get_abbr_impl(),
get_impl_ver())
f.write(line)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册