提交 81cd31a8 编写于 作者: W wangjiawei04

merge with master 0624

Change-Id: I09b028bf244e63654da0cab154766856f94742d1
......@@ -41,7 +41,7 @@ cpp_source_dirs.append('proto/*.proto')
# DELETE_AUTO_LIBS('$OUT_ROOT/third-64/glog/output/lib/libglog.a')
# DELETE_AUTO_LIBS('$OUT_ROOT/third-64/gflags/output/lib/libgflags.a')
# DELETE_AUTO_LIBS('$OUT_ROOT/third-64/gflags/output/lib/libgflags_nothreads.a')
# DELETE_AUTO_LIBS('$OUT_ROOT/baidu/paddlepaddle/paddle/output/lib/libpaddle_fluid_noavx_openblas.a')
DELETE_AUTO_LIBS('$OUT_ROOT/baidu/paddlepaddle/paddle/output/lib/libpaddle_fluid_noavx_openblas.a')
PROTOFLAGS(
'--plugin=protoc-gen-pdcodegen=../pdcodegen/plugin/pdcodegen',
......
......@@ -10,7 +10,7 @@
下图是一个对serving请求的耗时阶段的不完整分析。图中对brpc的开销,只列出了bthread创建和启动开销。
![](http://paddle-serving.bj.bcebos.com/doc/serving-timings.png)
![](serving-timings.png)
(右键在新窗口中浏览大图)
......@@ -280,7 +280,7 @@ Serving扩展能力的测试是指,在不同模型上:
下图是Paddle Serving在BOW模型上QPS随serving端线程数增加而变化的图表。可以看出当线程数较少时(4线程/8线程/12线程),QPS的变化规律非常杂乱;当线程数较多时,QPS曲线又基本趋于一致,基本无线性增长关系。
![](https://paddle-serving.bj.bcebos.com/doc/qps-threads-bow.png)
![](qps-threads-bow.png)
(右键在新窗口中浏览大图)
......@@ -438,7 +438,7 @@ Serving扩展能力的测试是指,在不同模型上:
下图是Paddle Serving在CNN模型上QPS随serving端线程数增加而变化的图表。可以看出,随着线程数变大,Serving QPS有较为明显的线性增长关系。可以这样解释此图表:例如,线程数为16时,基本在20个并发时达到最大QPS,此后再增加并发压力QPS基本保持稳定;当线程能够数为24线程时,基本在28并发时达到最大QPS,此后再增大并发压力qps基本保持稳定。
![](https://paddle-serving.bj.bcebos.com/doc/qps-threads-cnn.png)
![](qps-threads-cnn.png)
(右键在新窗口中浏览大图)
......@@ -596,7 +596,7 @@ Serving扩展能力的测试是指,在不同模型上:
下图是Paddle Serving在LSTM模型上QPS随serving端线程数增加而变化的图表。可以看出,随着线程数变大,Serving QPS有较为明显的线性增长关系。可以这样解释此图表:例如,线程数为16时,基本在20个并发时达到最大QPS,此后再增加并发压力QPS基本保持稳定;当线程能够数为24线程时,基本在28并发时达到最大QPS,此后再增大并发压力qps基本保持稳定。
![](https://paddle-serving.bj.bcebos.com/doc/qps-threads-lstm.png)
![](qps-threads-lstm.png)
(右键在新窗口中浏览大图)
......
此差异已折叠。
......@@ -16,4 +16,6 @@
[Benchmarking](BENCHMARKING.md)
[GPU Benchmarking](GPU_BENCHMARKING.md)
[FAQ](FAQ.md)
......@@ -165,7 +165,7 @@ reloadable_type: 检查reload条件:timestamp_ne/timestamp_gt/md5sum/revision/
model_data_path: 模型文件路径
runtime_thread_num: 若大于0, 则启用bsf多线程调度框架,在每个预测bthread worker内启动多线程预测。
runtime_thread_num: 若大于0, 则启用bsf多线程调度框架,在每个预测bthread worker内启动多线程预测。要注意的是,当启用worker内多线程预测,workflow中OP需要用Serving框架的BatchTensor类做预测的输入和输出 (predictor/framework/infer_data.h, `class BatchTensor`)。
batch_infer_size: 启用bsf多线程预测时,每个预测线程的batch size
......
......@@ -19,6 +19,27 @@ namespace baidu {
namespace paddle_serving {
namespace predictor {
struct MempoolRegion {
MempoolRegion(im::fugue::memory::Region *region,
im::Mempool *mempool) :
_region(region), _mempool(mempool){}
im::fugue::memory::Region *region() {return _region;}
im::Mempool *mempool() {return _mempool;}
im::fugue::memory::Region* _region;
im::Mempool* _mempool;
~MempoolRegion() {
if (_region) {
delete _region;
_region = NULL;
}
if (_mempool) {
delete _mempool;
_mempool = NULL;
}
}
};
int MempoolWrapper::initialize() {
if (THREAD_KEY_CREATE(&_bspec_key, NULL) != 0) {
LOG(ERROR) << "unable to create thread_key of thrd_data";
......@@ -33,16 +54,20 @@ int MempoolWrapper::initialize() {
}
int MempoolWrapper::thread_initialize() {
_region.init();
im::Mempool* p_mempool = new (std::nothrow) im::Mempool(&_region);
if (p_mempool == NULL) {
im::fugue::memory::Region *region = new im::fugue::memory::Region();
region->init();
im::Mempool* mempool = new (std::nothrow) im::Mempool(region);
MempoolRegion *mempool_region = new MempoolRegion(region, mempool);
if (mempool == NULL) {
LOG(ERROR) << "Failed create thread mempool";
return -1;
}
if (THREAD_SETSPECIFIC(_bspec_key, p_mempool) != 0) {
if (THREAD_SETSPECIFIC(_bspec_key, mempool_region) != 0) {
LOG(ERROR) << "unable to set the thrd_data";
delete p_mempool;
delete region;
delete mempool;
delete mempool_region;
return -1;
}
......@@ -51,23 +76,34 @@ int MempoolWrapper::thread_initialize() {
}
int MempoolWrapper::thread_clear() {
im::Mempool* p_mempool = (im::Mempool*)THREAD_GETSPECIFIC(_bspec_key);
if (p_mempool) {
p_mempool->release_block();
_region.reset();
MempoolRegion* mempool_region = (MempoolRegion*)THREAD_GETSPECIFIC(_bspec_key);
if (mempool_region == NULL) {
LOG(WARNING) << "THREAD_GETSPECIFIC() returned NULL";
return -1;
}
im::Mempool* mempool = mempool_region->mempool();
im::fugue::memory::Region* region = mempool_region->region();
if (mempool) {
mempool->release_block();
region->reset();
}
return 0;
}
void* MempoolWrapper::malloc(size_t size) {
im::Mempool* p_mempool = (im::Mempool*)THREAD_GETSPECIFIC(_bspec_key);
if (!p_mempool) {
MempoolRegion* mempool_region = (MempoolRegion*)THREAD_GETSPECIFIC(_bspec_key);
if (mempool_region == NULL) {
LOG(WARNING) << "THREAD_GETSPECIFIC() returned NULL";
return NULL;
}
im::Mempool* mempool = mempool_region->mempool();
if (!mempool) {
LOG(WARNING) << "Cannot malloc memory:" << size
<< ", since mempool is not thread initialized";
return NULL;
}
return p_mempool->malloc(size);
return mempool->malloc(size);
}
} // namespace predictor
......
......@@ -39,7 +39,7 @@ class MempoolWrapper {
void* malloc(size_t size);
private:
im::fugue::memory::Region _region;
//im::fugue::memory::Region _region;
THREAD_KEY_T _bspec_key;
};
......
......@@ -143,7 +143,7 @@ int main(int argc, char** argv) {
std::string filename(argv[0]);
filename = filename.substr(filename.find_last_of('/') + 1);
settings.log_file = (std::string("./log/") + filename + ".log").c_str();
settings.log_file = strdup((std::string("./log/") + filename + ".log").c_str());
settings.delete_old = logging::DELETE_OLD_LOG_FILE;
logging::InitLogging(settings);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册