diff --git a/README.md b/README.md index 8715b2f703ab241d236542784bb82270c165ed9c..17478804bc1b6a0a528f588ae335d4bd55c4977f 100644 --- a/README.md +++ b/README.md @@ -1,264 +1,7 @@ [TOC] # 概述 -PaddlePaddle是公司开源的机器学习框架,广泛支持各种深度学习模型的定制化开发; -Paddle cloud是基于PaddlePaddle框架实现的一整套云平台,对外提供全流程的AI开发平台,对内托管集团内各产品线的机器学习云服务。 - -Paddle serving是Paddle cloud的在线预测部分,与Paddle cloud模型训练环节无缝衔接,对外提供机器学习预测共有云服务,对内为公司各业务线提供统一的模型预测开发框架和云服务。 - -# Getting Started -## 运行示例 -说明:Imagenet图像分类模型,默认采用CPU模式(GPU模式请修改BCLOUD配置项,并用Dockerfile构建运行环境,[Docker部署请参考Wiki](http://agroup.baidu.com/share/md/044f552e866f4078900be503784e2468))。 - -Step1:启动Server端: -```shell -git clone ssh://icode.baidu.com:8235/baidu/paddle-serving/serving ~/my_paddle_serving/baidu/paddle-serving/serving && cd ~/my_paddle_serving/baidu/paddle-serving/serving && bcloud build && ./output/bin/image_class & -``` - -Step2:启动Client端: -```shell -git clone ssh://icode.baidu.com:8235/baidu/paddle-serving/sdk-cpp ~/my_paddle_serving/baidu/paddle-serving/sdk-cpp && cd ~/my_paddle_serving/baidu/paddle-serving/sdk-cpp && bcloud build && ./output/bin/ximage && pkill image_class -``` - -## 示例说明 -### 预测接口定义 -```c++ -syntax="proto2"; -package baidu.paddle_serving.predictor.image_class; -option cc_generic_services = true; - -// x-image request相关(批量接口) -message XImageReqInstance { - required bytes image_binary = 1; - required uint32 image_length = 2; -}; - -message Request { - repeated XImageReqInstance instances = 1; -}; - -// x-image response相关(批量接口) -message DensePrediction { - repeated float categories = 1; -}; - -message ClassResponse { - repeated DensePrediction predictions = 1; -}; - -message XImageResInstance { - required string response_json = 1; -}; - -message Response { - // Each json string is serialized from ClassResponse - repeated XImageResInstance predictions = 1; -}; - -// Service/method相关 -service ImageClassifyService { - rpc inference(Request) returns (Response); - rpc debug(Request) returns (Response); -}; -``` -### Server端实现 -用户只需定制或配置以下三类信息的实现,即可快速搭建完整的Paddle-Serving预测模块。 - -#### 接口改造([proto目录](http://icode.baidu.com/repos/baidu/paddle-serving/serving/tree/master:proto/)) -Server端需对预测接口作如下修改即可: -```c++ -// 改动1:依赖paddle-serving option接口文件 -import "pds_option.proto"; -... -service ClassService { - rpc inference(Request) returns (Response); - rpc debug(Request) returns (Response); - // 改动2:打开generate_impl开关(以支持配置驱动) - option (pds.options).generate_impl = true; -}; -``` - -#### 示例配置([conf目录](http://icode.baidu.com/repos/baidu/paddle-serving/serving/tree/master:conf/)) -- gflags配置项 - -| name | 默认值 | 含义 | -|------|--------|------| -| workflow_path | ./conf | workflow配置目录名 | -|workflow_file|workflow.conf|workflow配置文件名| -|inferservice_path|./conf|service配置目录名| -|inferservice_file|service.conf|service配置文件名| -|logger_path|./conf|日志配置目录名| -|logger_file|log.conf|日志配置文件名| -|resource_path|./conf|资源管理器目录名| -|resource_file|resource.conf|资源管理器文件名| -|reload_interval_s|10|重载线程间隔时间(s)| - -- 配置文件实例(Image图像分类demo) -```shell -# >>> service.conf -[@Service] -name: ImageClassifyService -@workflow: workflow_image_classification - -# >>> workflow.conf -[@Workflow] -name: workflow_image_classification -path: ./conf -file: imagec_dag.conf - -# >>> imagec_dag.conf -workflow_type: Sequence -[@Node] -name: image_reader_op -type: ImageReaderOp - -[@Node] -name: image_classify_op -type: ImageClassifyOp -[.@Depend] -name: image_reader_op -mode: RO - -[@Node] -name: write_json_op -type: WriteJsonOp -[.@Depend] -name: image_classify_op -mode: RO - -# >>> resource.conf -model_manager_path: ./conf -model_manager_file: model_toolkit.conf -``` - -#### 定制Op算子([op目录](http://icode.baidu.com/repos/baidu/paddle-serving/serving/tree/master:op/)) -- 预处理算子(ImageReaderOp):从Request中读取图像字节流,通过opencv解码,填充tensor对象并输出到channel; -- 预测调用算子(ImageClassifyOp):从ImageReaderOp的channel获得输入tensor,临时申请输出tensor,调用ModelToolkit进行预测,并将输出tensor写入channel -- 后处理算子(WriteJsonOp):从ImageClassifyop的channel获得输出tensor,将其序列化为json字符串,写入作为rpc的output; - -### Client端实现 -用户只需定制或配置以下三类信息,即可方便的接入预估请求,并在本地配置多套服务连接: - -#### 接口改造([proto目录](http://icode.baidu.com/repos/baidu/paddle-serving/sdk-cpp/tree/master:proto)) -Client端接口只需对预测接口作如下修改即可: -```c++ -// 改动1:依赖paddle-serving option接口文件 -import "pds_option.proto"; -... -service ImageClassifyService { - rpc inference(Request) returns (Response); - rpc debug(Request) returns (Response); - // 改动2:打开generate_stub开关(以支持配置驱动) - option (pds.options).generate_stub = true; -}; -``` - -#### 连接配置([conf目录](http://icode.baidu.com/repos/baidu/paddle-serving/sdk-cpp/tree/master:conf)) -```shell -# predictions.conf -## 默认配置共享 -[DefaultVariantInfo] -Tag : default -[.Connection] -ConnectTimeoutMicroSec : 200 -ReadTimeoutMicroSec : 2000 -WriteTimeoutMicroSec : 500 -ConnectRetryCount : 2 -MaxConnectionPerHost : 100 -HedgeRequestTimeoutMicroSec : -1 -HedgeFetchRetryCount : 2 -BnsReloadIntervalSeconds : 10 -ConnectionType : pooled -[.NamingInfo] -ClusterFilterStrategy : Default -LoadBalanceStrategy : la -[.RpcParameter] -# 0-NONE, 1-SNAPPY, 2-GZIP, 3-ZLIB, 4-LZ4 -CompressType : 0 -Protocol : baidu_std -MaxChannelPerRequest : 3 - -[@Predictor] -name : ximage -service_name : baidu.paddle_serving.predictor.image_class.ImageClassifyService -endpoint_router : WeightedRandomRender -[.WeightedRandomRender] -VariantWeightList : 30|70 # 30% vs 70% pvs -[.@VariantInfo] -Tag : var1 # 变体版本标识,提供上游辨识 -[..NamingInfo] -Cluster : list://127.0.0.1:8010 -[.@VariantInfo] -Tag : var2 -[..NamingInfo] -Cluster : list://127.0.0.1:8011 -``` - -#### 请求逻辑([demo/ximage.cpp](http://icode.baidu.com/repos/baidu/paddle-serving/sdk-cpp/blob/master:demo/ximage.cpp)) -```c++ -// 进程级初始化 -assert(PredictorAPI::instance().create("./conf/predictions.conf") == 0); -// 线程级预测调用: -Request req; -// fill request -// ... -Response res; -Predictor* ximage = PredictorAPI::instance().fetch_predictor("ximage"); -assert(ximage != NULL); -ximage->inference(req, res); -// parse response -// ... -assert(PredictorAPI::instance().free_predictor(ximage) == 0); - -// 进程级销毁 -assert(PredictorAPI::instance().destroy() == 0); -``` - -## 凤巢协议兼容 -Paddle Serving由凤巢观星框架发展而来,而之前框架的通信协议是nshead+compack+idl,为方便新老接口的兼容,Paddle Serving的server和client均支持向后兼容: -- 老API访问新Server,为适配老观星客户端数据包格式,新Server需通过mcpack2pb生成能解析idl格式的pb对象,详见:[wtitleq server实现](http://icode.baidu.com/repos/baidu/paddle-serving/lr-model/tree/master) -- 新SDK访问老Server,为能够访问老观星server服务,SDK需通过mcpack2pb插件生成基于idl格式的序列化逻辑;详见:[wtitleq api实现](http://icode.baidu.com/repos/baidu/infinite-inference/as-wtitleq-demo/tree/master)。 - -凤巢广告拆包支持:Paddle Serving的SDK-Cpp为用户提供了简单易用的拆包功能,通过修改proto/conf文件开启: -```c++ -// interface.proto文件 -message PredictorRequest { - message AdvRequest { - // 广告级别字段 - repeated uint32 ideaid = 1; - repeated string title = 2; - } - - // query级别字段 - required uint64 sid = 1; - required string query = 2; - // ... - - // 广告级别字段 - repeated AdvRequest advs = 3 [(pds.pack_on)=true]; // 改动1:对advs字段进行拆包 -} - -// ... - -service WtitleqService { - rpc ... - rpc ... - option (pds.options).package_size = 10; // 改动2:限制单包大小 -} -``` -[wtitleq sdk的proto实例](http://icode.baidu.com/repos/baidu/infinite-inference/as-wtitleq-demo/blob/master:proto/predictor_api.proto)。 - -```bash -# predictions.conf文件 -[@Predictor] -# ... -[.@VariantInfo] -#... -[..RpcParameter] -Protocol : itp # 改动3:修改rpc请求参数为itp协议 - -``` -[wtitleq sdk的conf实例](http://icode.baidu.com/repos/baidu/infinite-inference/as-wtitleq-demo/blob/master:conf/predictors.conf)。 +PaddlePaddle是公司开源的机器学习框架,广泛支持各种深度学习模型的定制化开发; Paddle serving是Paddle的在线预测部分,与Paddle模型训练环节无缝衔接,提供机器学习预测云服务。 # 框架简介 @@ -270,7 +13,7 @@ Protocol : itp # 改动3:修改rpc请求参数为itp协议 - 用户接口:搭建服务=定义proto文件+实现/复用Op+撰写配置,支持sdk/http请求; ## 名词解释 -- 预测引擎:对PaddlePaddle/Abacus/Tensorflow等各种推理计算Lib的封装,屏蔽预测模型动态Reload细节,对上层暴露统一的预测接口; +- 预测引擎:对PaddlePaddle预测Lib的封装,屏蔽预测模型动态Reload细节,对上层暴露统一的预测接口; - 预测模型:由离线训练框架生成、在线预测引擎加载的数据文件或目录,以PaddleFluid模型为例,通常包括拓扑文件和参数文件; - Op 算子:Paddle-serving对在线(预处理/后处理等)业务逻辑的最小粒度封装,框架提供OpWithChannel和OpWithChannelAndConf这两种常用的Op基类;框架默认实现通用Op算子; - Node:由某个Op算子类结合参数配置组成的Op算子实例,也是Workflow中的一个执行单元; @@ -284,8 +27,8 @@ Protocol : itp # 改动3:修改rpc请求参数为itp协议 Paddle serving框架为策略工程师提供以下三层面的功能性扩展: ### 模型 -- 预测引擎:集成PaddlePaddle、Abacus、Tensorrt、Anakin、Tensorflow等常用机器学习框架的预测Lib; -- 模型种类:支持PaddlePaddle(V1、V2、Fluid)、TensorrtUFF、Anakin、Tensorflow、Caffe等常见模型格式; +- 预测引擎:集成PaddlePaddle深度学习框架的预测Lib; +- 模型种类:支持Paddle Fluid模型格式; - 用户接口:支持模型加载、重载的配置化驱动,不同种类模型的预测接口完全一致; - 模型调度:支持基于异步线程模型的多模型预估调度,实现异构资源的优先级调度; @@ -298,28 +41,11 @@ Paddle serving框架为策略工程师提供以下三层面的功能性扩展: - RPC:底层通过Baidu-rpc封装网络交互,Server端可配置化启动多个独立Service,框架会搜集Service粒度的详细业务指标,并按照BVar接口对接到Noah等监控平台; - SDK:基于Baidu-rpc的client进行封装,提供多下游连接管理、可扩展路由策略、可定制参数实验、自动分包等机制,支持同步、半同步、纯异步等交互模式,以及多种兼容协议,所有连接策略均通过配置驱动 -# 平台简介 -![图片](http://agroup-bos.cdn.bcebos.com/42a0e34a7c6b36976e3932639209fd823d8f25e0) - -- [运维API](http://agroup.baidu.com/share/md/e582f543fb574e9b92445286955a976d) -- [预测API](http://agroup.baidu.com/share/md/eb91a51739514319844ceccdb331564c) - ## 名词解释 -- 用户(User):云平台注册用户,可基于平台Dashboard对账户下的端点信息进行增、删、查、改; - 端点(Endpoit):对一个预测需求的逻辑抽象,通常包含一到多个服务变体,以方便多版本模型管理; - 变体(Variant):一套同质化的Paddle-serving集群服务,每个实例起一个Paddle-serving进程; -- 实验(A/B Test):支持变体实验和参数化实验两种模式,变体实验根据Endpoint所属变体流量百分比实现流量随机抽样;参数化实验通过对pv绑定实验参数、由Paddle-serving进程解析参数、选择不同的代码分支进行实验; - -## 主要功能 -在公有云落地场景为Infinite(天衍)云平台,主要为策略工程师提供以下三方面的全流程托管: -- 统一接入代理:提供代理服务,通过zk和云平台实时同步元信息,支持多模型版本管理和A/B测试路由策略,提供统一入口和标准预测API; -- 自动化部署:对接K8S/Opera等常见PaaS部署平台,支持服务的一键部署、回滚、下线等运维操作,支持endpoint/variant/model等维度的资源管理; -- 可视化运维:对接console、notebook、dashboard等前端工具和页面,满足可视化运维需求; # 设计文档 -- [总体设计文档](http://agroup.baidu.com/paddleserving/view/office/895070) -- [框架详设文档](http://agroup.baidu.com:8964/static/a3/e40876e464ba08ae5de14aa7710cf326456751.pdf?filename=PaddleServing%E6%9C%8D%E5%8A%A1%E6%A1%86%E6%9E%B6%E8%AF%A6%E7%BB%86%E8%AE%BE%E8%AE%A1%E6%96%87%E6%A1%A3v0_1.pdf) -- [平台详设文档](http://agroup.baidu.com/share/office/042a0941579e49adb8c255c8b5e92d51) # FAQ 1. 如何修改端口配置? @@ -327,12 +53,3 @@ Paddle serving框架为策略工程师提供以下三层面的功能性扩展: - 如果在inferservice_file里指定了port:xxx,那么就去申请该端口号; - 否则,如果在gflags.conf里指定了--port:xxx,那就去申请该端口号; - 否则,使用程序里指定的默认端口号:8010。 -2. 如何在部署的时候配置动态端口? -- 如果使用FCCI部署协议(凤巢检索端内部的部署协议),需要(1)通过inferservice_file指定端口号;(2)修改[Rakefile.opera](http://wiki.baidu.com/pages/viewpage.action?pageId=399979183#id-%E4%BB%8E%E9%9B%B6%E5%BC%80%E5%A7%8B%E5%86%99production-%E7%BC%96%E5%86%99Rakefile)的dynamic_port_config配置 -- `@dynamic_port_config为动态端口配置,向Opera申请名为:name的动态端口,其端口号会被写到:conf文件中的:target配置项。`例子如下: -``` -@dynamic_port_config = [ - {:name => 'main', :conf => 'framework/service.conf', :target => 'port'}, // 部署时自动向Opera申请端口,服务将会监听这个端口 - {:name => 'main', :conf => 'predictor_valid.conf', :target => 'port'}, // valid工具向这个端口发送测试请求,确保服务已正常启动 -] -``` diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 02fd33ac95b16956088251c65b506b5e469c4483..5dd37fb68403fddff375a4ffa0b6b8bee1559c6a 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -839,7 +839,7 @@ function(PROTOBUF_GENERATE_SERVING_CPP SRCS HDRS) ARGS --cpp_out=${CMAKE_CURRENT_BINARY_DIR} --pdcodegen_out=${CMAKE_CURRENT_BINARY_DIR} --plugin=protoc-gen-pdcodegen=${CMAKE_BINARY_DIR}/predictor/pdcodegen - # --proto_path=${CMAKE_SOURCE_DIR}/predictor/proto + --proto_path=${CMAKE_SOURCE_DIR}/predictor/proto ${_protobuf_include_path} ${ABS_FIL} DEPENDS ${ABS_FIL} ${Protobuf_PROTOC_EXECUTABLE} COMMENT "Running Paddle-serving C++ protocol buffer compiler on ${FIL}" diff --git a/configure/proto/sdk_configure.proto b/configure/proto/sdk_configure.proto index 99f26c6f4cddcae9bf01785764a6c4167a58f76b..7658a9b609a50cdc90b70b85198086293bf19dc4 100644 --- a/configure/proto/sdk_configure.proto +++ b/configure/proto/sdk_configure.proto @@ -16,12 +16,12 @@ syntax = "proto2"; package baidu.paddle_serving.configure; message ConnectionConf { - required uint32 connect_timeout_ms = 1; - required uint32 rpc_timeout_ms = 2; - required uint32 connect_retry_count = 3; - required uint32 max_connection_per_host = 4; - required uint32 hedge_request_timeout_ms = 5; - required uint32 hedge_fetch_retry_count = 6; + required int32 connect_timeout_ms = 1; + required int32 rpc_timeout_ms = 2; + required int32 connect_retry_count = 3; + required int32 max_connection_per_host = 4; + required int32 hedge_request_timeout_ms = 5; + required int32 hedge_fetch_retry_count = 6; required string connection_type = 7; }; @@ -33,10 +33,10 @@ message NamingConf { message RpcParameter { // 0-NONE, 1-SNAPPY, 2-GZIP, 3-ZLIB, 4-LZ4 - required uint32 compress_type = 1; - required uint32 package_size = 2; + required int32 compress_type = 1; + required int32 package_size = 2; required string protocol = 3; - required uint32 max_channel_per_request = 4; + required int32 max_channel_per_request = 4; }; message SplitConf { diff --git a/configure/proto/server_configure.proto b/configure/proto/server_configure.proto index ccd97b83ba1050b5bf00b54ea0d3b0478ccfa2ca..3f236fcf9c0ad3e2a8b3a0a787407fc7d6c2f5ce 100644 --- a/configure/proto/server_configure.proto +++ b/configure/proto/server_configure.proto @@ -21,9 +21,9 @@ message EngineDesc { required string reloadable_meta = 3; required string reloadable_type = 4; required string model_data_path = 5; - required uint32 runtime_thread_num = 6; - required uint32 batch_infer_size = 7; - required uint32 enable_batch_align = 8; + required int32 runtime_thread_num = 6; + required int32 batch_infer_size = 7; + required int32 enable_batch_align = 8; optional string version_file = 9; optional string version_type = 10; }; diff --git a/doc/CLIENT_CONFIGURE.md b/doc/CLIENT_CONFIGURE.md new file mode 100644 index 0000000000000000000000000000000000000000..25beb8d19975a014df8b9304fef0767663e1788d --- /dev/null +++ b/doc/CLIENT_CONFIGURE.md @@ -0,0 +1,162 @@ +# Client side configuration + +Paddle-serving C++ client SDK主配置文件为conf/predictors.prototxt。其中一个示例如下: + +## Sample conf + +```shell +default_variant_conf { + tag: "default" + connection_conf { + connect_timeout_ms: 2000 + rpc_timeout_ms: 20000 + connect_retry_count: 2 + max_connection_per_host: 100 + hedge_request_timeout_ms: -1 + hedge_fetch_retry_count: 2 + connection_type: "pooled" + } + naming_conf { + cluster_filter_strategy: "Default" + load_balance_strategy: "la" + } + rpc_parameter { + compress_type: 0 + package_size: 20 + protocol: "baidu_std" + max_channel_per_request: 3 + } +} +predictors { + name: "ximage" + service_name: "baidu.paddle_serving.predictor.image_classification.ImageClassifyService" + endpoint_router: "WeightedRandomRender" + weighted_random_render_conf { + variant_weight_list: "50|50" + } + variants { + tag: "var1" + naming_conf { + cluster: "list://127.0.0.1:8010" + } + } + variants { + tag: "var2" + naming_conf { + cluster: "list://127.0.0.1:8011" + } + } +} + +predictors { + name: "echo_service" + service_name: "baidu.paddle_serving.predictor.echo_service.BuiltinTestEchoService" + endpoint_router: "WeightedRandomRender" + weighted_random_render_conf { + variant_weight_list: "50" + } + variants { + tag: "var1" + naming_conf { + cluster: "list://127.0.0.1:8010,127.0.0.1:8011" + } + } +} + +``` + +## 名词解释 +- 预测服务 (Predictor):对一个Paddle预测服务的封装 +- 端点(Endpoit):对一个预测需求的逻辑抽象,通常包含一到多个服务变体,以方便多版本模型管理; +- 变体(Variant):一套同质化的Paddle-serving集群服务,每个实例起一个Paddle-serving进程; + +## 配置项解释 + +### default_variant_conf + +```shell +default_variant_conf { + tag: "default" + connection_conf { + connect_timeout_ms: 2000 + rpc_timeout_ms: 20000 + connect_retry_count: 2 + max_connection_per_host: 100 + hedge_request_timeout_ms: -1 + hedge_fetch_retry_count: 2 + connection_type: "pooled" + } + naming_conf { + cluster_filter_strategy: "Default" # Not used for now + load_balance_strategy: "la" + } + rpc_parameter { + compress_type: 0 + package_size: 20 + protocol: "baidu_std" + max_channel_per_request: 3 + } +} +``` +其中: + +connection_type: Maybe single/short/pooled, see [BRPC DOC: connection_type](https://github.com/apache/incubator-brpc/blob/master/docs/cn/client.md#%E8%BF%9E%E6%8E%A5%E6%96%B9%E5%BC%8F) + +cluster_filter_strategy: 暂时未用 + +load_balance_strategy: Maybe rr/wrr/random/la/c_murmurhash/c_md5, see [BRPC DOC: load_balance](https://github.com/apache/incubator-brpc/blob/master/docs/cn/client.md#%E8%B4%9F%E8%BD%BD%E5%9D%87%E8%A1%A1) + +compress_type: 0-None, 1-Snappy, 2-gzip, 3-zlib, 4-lz4, see [BRPC DOC: compress_type](https://github.com/apache/incubator-brpc/blob/master/docs/cn/client.md#%E5%8E%8B%E7%BC%A9) + +protocol: Maybe baidu_std/http/h2/h2:grpc/thrift/memcache/redis... see [BRPC DOC: protocol](https://github.com/apache/incubator-brpc/blob/master/docs/cn/client.md#%E5%8D%8F%E8%AE%AE) + +### Predictors + +可以为客户端配置多个predictor,每个predictor代表一个要访问的预测服务 + +```shell +predictors { + name: "ximage" + service_name: "baidu.paddle_serving.predictor.image_classification.ImageClassifyService" + endpoint_router: "WeightedRandomRender" + weighted_random_render_conf { + variant_weight_list: "50|50" + } + variants { + tag: "var1" + naming_conf { + cluster: "list://127.0.0.1:8010, 127.0.0.1:8011" + } + } + variants { + tag: "var2" + naming_conf { + cluster: "list://127.0.0.1:8011" + } + } +} + +predictors { + name: "echo_service" + service_name: "baidu.paddle_serving.predictor.echo_service.BuiltinTestEchoService" + endpoint_router: "WeightedRandomRender" + weighted_random_render_conf { + variant_weight_list: "50" + } + variants { + tag: "var1" + naming_conf { + cluster: "list://127.0.0.1:8010" + } + } +} +``` +其中: + +service_name: 写sdk-cpp/proto/xx.proto的package name + +endpoint_router: 目前只支持WeightedRandomRender + +variant_weight_list: 与接下来的variants列表共用,用于表示variants之间相对权重;通过修改此数值可以调整variants调度的比重 + +cluster: Cluster支持的格式见 [BRPC DOC: naming service](https://github.com/apache/incubator-brpc/blob/master/docs/cn/client.md#%E5%91%BD%E5%90%8D%E6%9C%8D%E5%8A%A1) diff --git a/doc/CREATING.md b/doc/CREATING.md new file mode 100644 index 0000000000000000000000000000000000000000..21bbedf9f9bc514bd91cfc335614e117c109e1fc --- /dev/null +++ b/doc/CREATING.md @@ -0,0 +1,391 @@ +# 从零开始写一个预测服务 + +## 1. 示例说明 + +图像分类是根据图像的语义信息将不同类别图像区分开来,是计算机视觉中重要的基本问题,也是图像检测、图像分割、物体跟踪、行为分析等其他高层视觉任务的基础。图像分类在很多领域有广泛应用,包括安防领域的人脸识别和智能视频分析等,交通领域的交通场景识别,互联网领域基于内容的图像检索和相册自动归类,医学领域的图像识别等。 + +paddle-serving已经提供了一个基于ResNet的模型预测服务,按照INSTALL.md中所述步骤,编译paddle-serving,然后按GETTING_STARTED.md所述步骤启动client端和server端即可看到预测服务运行效果。 + +本文接下来以图像分类任务为例,介绍从零搭建一个模型预测服务的步骤。 + + +## 2. Serving端 + +### 2.1 定义预测接口 + +** 添加文件:serving/proto/image_class.proto ** +Paddle-serving服务端与客户端通过brpc进行通信,通信协议和格式可以自定,我们选择baidu_std协议。这是一种以protobuf为基本数据交换格式的协议,其说明可参考[BRPC文档: baidu_std](https://github.com/apache/incubator-brpc/blob/master/docs/cn/baidu_std.md)。 + + +我们编写图像分类任务预测接口的protobuf如下: + +```c++ +syntax="proto2"; +import "pds_option.proto"; +import "builtin_format.proto"; +package baidu.paddle_serving.predictor.image_classification; +option cc_generic_services = true; + +message ClassifyResponse { + repeated baidu.paddle_serving.predictor.format.DensePrediction predictions = 1; +}; + +message Request { + repeated baidu.paddle_serving.predictor.format.XImageReqInstance instances = 1; +}; + +message Response { + // Each json string is serialized from ClassifyResponse predictions + repeated baidu.paddle_serving.predictor.format.XImageResInstance predictions = 1; +}; + +service ImageClassifyService { + rpc inference(Request) returns (Response); + rpc debug(Request) returns (Response); + option (pds.options).generate_impl = true; +}; +``` + +其中: +`service ImageClassifiyService`定义一个RPC Service,并声明2个RPC接口:`inference`和`debug`,分别接受`Reqeust`类型请求参数,并返回`Response`类型结果。 + +`DensePrediction`, `XImageReqInstance`和`XImageResInstance`类型的消息分别在其他.proto文件中定义,因此要通过`import 'builtin_format.proto'`语句将需要的类型引入。 + +`generate_impl = true`: 告诉protobuf编译器,生成RPC service的实现 (在client端,此处为`generate_stub = true`,告诉protobuf编译器生成RPC的stub) + +### 2.2 Server端实现 + +图像分类任务的处理,设计分为3个阶段,对应3个OP +- 读请求:从Request消息解出请求样例数据 +- 调用Paddle预测lib的接口,对样例进行预测,并保存 +- 预测结果写到Response + +此后,框架将负责将Response回传给client端 + +#### 2.2.1 定制Op算子 + +** 在serving/op/目录下添加reader_op.cpp, classify_op.cpp, write_json_op.cpp ** + +- 预处理算子(ReaderOp, serving/op/reader_op.cpp):从Request中读取图像字节流,通过opencv解码,填充tensor对象并输出到channel; +- 预测调用算子(ClassifyOp, serving/op/classify_op.cpp):从ImageReaderOp的channel获得输入tensor,临时申请输出tensor,调用ModelToolkit进行预测,并将输出tensor写入channel +- 后处理算子(WriteJsonOp, serving/op/write_json.cpp):从ImageClassifyop的channel获得输出tensor,将其序列化为json字符串,写入作为rpc的output + +具体实现可参考demo中的源代码 + + +#### 2.2.2 示例配置([conf目录](http://icode.baidu.com/repos/baidu/personal-code/paddle-serving/tree/master:serving/conf)) + +以下配置文件将ReaderOP, ClassifyOP和WriteJsonOP串联成一个workflow (关于OP/workflow等概念,可参考[设计文档](DESIGN.md)) + +- 配置文件示例: + +** 添加文件 serving/conf/service.prototxt ** + +```shell +services { + name: "ImageClassifyService" + workflows: "workflow1" +} +``` + +** 添加文件 serving/conf/workflow.prototxt ** + +```shell +workflows { + name: "workflow1" + workflow_type: "Sequence" + nodes { + name: "image_reader_op" + type: "ReaderOp" + } + nodes { + name: "image_classify_op" + type: "ClassifyOp" + dependencies { + name: "image_reader_op" + mode: "RO" + } + } + nodes { + name: "write_json_op" + type: "WriteJsonOp" + dependencies { + name: "image_classify_op" + mode: "RO" + } + } +} +``` + +以下配置文件为模型加载配置 + +** 添加文件 serving/conf/resource.prototxt ** + +```shell +model_manager_path: ./conf +model_manager_file: model_toolkit.prototxt +``` + +** 添加文件 serving/conf/model_toolkit.prototxt ** + +```shell +engines { + name: "image_classification_resnet" + type: "FLUID_CPU_NATIVE_DIR" + reloadable_meta: "./data/model/paddle/fluid_time_file" + reloadable_type: "timestamp_ne" + model_data_path: "./data/model/paddle/fluid/SE_ResNeXt50_32x4d" + runtime_thread_num: 0 + batch_infer_size: 0 + enable_batch_align: 0 +} +``` + + +#### 2.2.3 代码编译 + +Serving端代码包含如下部分: +- protobuf接口文件,需要编译成.pb.cc及.pb.h文件并链接到最终可执行文件 +- OP算子实现,需要链接到最终可执行文件 +- Paddle-serving框架代码,封装在libpdserving.a中,需要链接到最终可执行文件 +- Paddle-serving封装paddle-fluid预测库的代码,在inferencer-fluid-cpu/目录产出的libfluid_cpu_engine.a中 +- 其他第三方依赖库:paddle预测库,brpc, opencv等 + +1) protobuf接口文件编译: 不能用protoc默认插件编译,需要编译成paddle-serving定制的.pb.cc及.pb.h文件。具体命令是 +```shell +$ protoc --cpp_out=/path/to/paddle-serving/build/serving/ --pdcodegen_out=/path/to/paddle-serving/ --plugin=protoc-gen-pdcodegen=/path/to/paddle-serving/build/predictor/pdcodegen --proto_path=/path/to/paddle-serving/predictor/proto +``` +其中 +`pdcodegen`是由predictor/src/pdcodegen.cpp编译成的protobuf编译插件, --proto_path用来指定去哪里寻找`import`语句需要的protobuf文件 + +predictor/proto目录下有serving端和client端都要包含的builtin_format.proto和pds_option.proto + +**NOTE** +上述protoc命令在paddle-serving编译系统中被封装成一个CMake函数了,在cmake/generic.cmake::PROTOBUF_GENERATE_SERVING_CPP +CMakeLists.txt中调用函数的方法为: +```shell +PROTOBUF_GENERATE_SERVING_CPP(PROTO_SRCS PROTO_HDRS xxx.proto) +``` + +2) OP +serving/op/目录下OP对应的.cpp文件 + +3) Paddle-serving框架代码,封装在predictor目录产出的libpdserving.a中 + +4) Paddle-serving封装paddle-fluid预测库的代码,在inference-fluid-cpu/目录产出的libfluid_cpu_engine.a中 + +5) serving端main函数 + +为简化用户编写初始化代码的工作量,serving端必须的初始化过程已经由paddle-serving框架提供,请参考predictor/src/pdserving.cpp。该文件中包含了完整的初始化过程,用户只需提供合适的配置文件列表即可(请参考2.2.2节),不必编写main函数 + +6) 第三方依赖库 + +brpc, paddle-fluid, opencv等第三方库, + +7) 链接 + +整个链接过程在CMakeLists.txt中写法如下: +```shell +target_link_libraries(serving opencv_imgcodecs + ${opencv_depend_libs} -Wl,--whole-archive fluid_cpu_engine + -Wl,--no-whole-archive pdserving paddle_fluid ${paddle_depend_libs} + ${MKLML_LIB} ${MKLML_IOMP_LIB} -lpthread -lcrypto -lm -lrt -lssl -ldl -lz) + +``` + +### 2.3 gflags配置项 + +以下是serving端支持的gflag配置选项列表,并提供了默认值。 + +| name | 默认值 | 含义 | +|------|--------|------| +|workflow_path|./conf|workflow配置目录名| +|workflow_file|workflow.prototxt|workflow配置文件名| +|inferservice_path|./conf|service配置目录名| +|inferservice_file|service.prototxt|service配置文件名| +|resource_path|./conf|资源管理器目录名| +|resource_file|resource.prototxt|资源管理器文件名| +|reload_interval_s|10|重载线程间隔时间(s)| +|enable_model_toolkit|true|模型管理| +|enable_protocol_list|baidu_std|brpc 通信协议列表| +|log_dir|./log|log dir| + +可以通过在serving/conf/gflags.conf覆盖默认值,例如 +``` +--log_dir=./serving_log/ +``` +将指定日志目录到./serving_log目录下 + + +## 3. Client端 + +### 3.1 定义预测接口 + +** 在sdk-cpp/proto添加image_class.proto ** + +与serving端预测接口protobuf文件基本一致,只要将`generate_impl=true`改为`generate_stub=true` + +```c++ +import "pds_option.proto"; +... +service ImageClassifyService { + rpc inference(Request) returns (Response); + rpc debug(Request) returns (Response); + // 改动:打开generate_stub开关(以支持配置驱动) + option (pds.options).generate_stub = true; +}; +``` + +### 3.2 Client端逻辑 + +Paddle-serving提供的C++ SDK在sdk-cpp/目录中,入口为sdk-cpp/include/predictor_sdk.h中的`class PredictorApi`类。 + +该类的主要接口: +```C++ +class PredictroApi { + // 创建PredictorApi句柄,输入为client端配置文件predictor.prototxt的目录和文件名 + int create(const char *path, const char *file); + + // 线程级初始化 + int thrd_initialize(); + + // 根据名称获取Predictor句柄; ep_name对应predictor.prototxt中predictors的name字段 + Predictor *fetch_predictor(std::string ep_name); +}; + +class Predictor { + // 预测 + int inference(google::protobuf::Message *req, google::protobuf::Message *res); + + // Debug模式 + int debug(google::protobuf::Message *req, + google::protobuf::Message *res, + buitl::IOBufBuilder *debug_os); +}; +``` + +#### 3.2.1 请求逻辑 + +** 增加sdk-cpp/demo/ximage.cpp ** + +```c++ +// 进程级初始化 +PredictorApi api; + +if (api.create("./conf/", "predictors.prototxt") == 0) { + return -1; +} + +// 线程级预测调用: +Request req; +Response res; + +api.thrd_initialize(); + +// Call this before every request +api.thrd_clear(); + +create_req(&req); + +Predictor* predictor = api.fetch_predictor("ximage"); +if (predictor == NULL) { + return -1; +} + +if (predictor->inference(req, res) != 0) { + return -1; +} + +// parse response +print_res(res); + +// 线程级销毁 +api.thrd_finalize(); + +// 进程级销毁 +api.destroy(); +``` + +具体实现可参考paddle-serving提供的例子sdk-cpp/demo/ximage.cpp + +### 3.3 链接 + +Client端可执行文件包含的代码有: +- protobuf接口文件,需要编译成.pb.cc及.pb.h文件并链接到最终可执行文件 +- main函数,以及调用SDK接口访问预测服务的逻辑,见3.2.1节 +- Client端读取并维护predictor信息列表的代码,在sdk-cpp/目录产出的libsdk-cpp.a +- 因为protobuf接口文件用到了predictor/proto/目录下的builtin_format.proto和pds_option.proto,因此还需要联编libpdserving.a + +1) protobuf接口文件,同serving端,需要用predictor/src/pdcodegen.cpp产出的pdcodegen插件,配合protoc使用,具体命令为 +```shell +$ protoc --cpp_out=/path/to/paddle-serving/build/serving/ --pdcodegen_out=/path/to/paddle-serving/ --plugin=protoc-gen-pdcodegen=/path/to/paddle-serving/build/predictor/pdcodegen --proto_path=/path/to/paddle-serving/predictor/proto +``` +其中 +`pdcodegen`是由predictor/src/pdcodegen.cpp编译成的protobuf编译插件, --proto_path用来指定去哪里寻找`import`语句需要的protobuf文件 + +** NOTE ** +上述protoc命令在paddle-serving编译系统中被封装成一个CMake函数了,在cmake/generic.cmake::PROTOBUF_GENERATE_SERVING_CPP +CMakeLists.txt中调用函数的方法为: +```shell +PROTOBUF_GENERATE_SERVING_CPP(PROTO_SRCS PROTO_HDRS xxx.proto) +``` +2) main函数,以及调用SDK接口访问预测服务的逻辑 + +3) Client端读取并维护predictor信息列表的代码,在sdk-cpp/目录产出的libsdk-cpp.a + +4) predictor/目录产出的libpdserving.a + +最终链接命令如下: + +```shell +add_executable(ximage ${CMAKE_CURRENT_LIST_DIR}/demo/ximage.cpp) +target_link_libraries(ximage -Wl,--whole-archive sdk-cpp + -Wl,--no-whole-archive pdserving -lpthread -lcrypto -lm -lrt -lssl -ldl + -lz) + +``` + +### 3.4 连接配置 + +** 增加配置文件sdk/conf/predictors.prototxt ** + +```shell +## 默认配置共享 +default_variant_conf { + tag: "default" + connection_conf { + connect_timeout_ms: 2000 + rpc_timeout_ms: 20000 + connect_retry_count: 2 + max_connection_per_host: 100 + hedge_request_timeout_ms: -1 + hedge_fetch_retry_count: 2 + connection_type: "pooled" + } + naming_conf { + cluster_filter_strategy: "Default" + load_balance_strategy: "la" + } + rpc_parameter { + compress_type: 0 + package_size: 20 + protocol: "baidu_std" + max_channel_per_request: 3 + } +} +predictors { + name: "ximage" + service_name: "baidu.paddle_serving.predictor.image_classification.ImageClassifyService" + endpoint_router: "WeightedRandomRender" + weighted_random_render_conf { + variant_weight_list: "50" + } + variants { + tag: "var1" + naming_conf { + cluster: "list://127.0.0.1:8010" + } + } +} +``` +关于客户端的详细配置选项,可参考[CLIENT CONFIGURATION](CLIENT_CONFIGURE.md) diff --git a/doc/DESIGN.md b/doc/DESIGN.md new file mode 100644 index 0000000000000000000000000000000000000000..d4b93dc33d072f14cedaa745a291d30dbae23817 --- /dev/null +++ b/doc/DESIGN.md @@ -0,0 +1,3 @@ +# 设计文档 + +# 项目背景 diff --git a/doc/GETTING_STARTED.md b/doc/GETTING_STARTED.md new file mode 100644 index 0000000000000000000000000000000000000000..9d65906ac1070214328240560ced840cc0f62fe1 --- /dev/null +++ b/doc/GETTING_STARTED.md @@ -0,0 +1,25 @@ + +# Getting Started + +## 运行示例 +说明:Imagenet图像分类模型,默认采用CPU模式(GPU模式当前版本暂未提供支持) + +Step1:启动Server端: +```shell +cd paddle-serving/output/demo/serving/ && ./bin/serving & +``` + +默认启动后日志写在./log/下,可tail日志查看serving端接收请求的日志: +```shell +tail -f log/serving.INFO +``` + +Step2:启动Client端: +```shell +cd paddle-serving/output/demo/client/image_class && ./bin/ximage & +``` + +默认启动后日志写在./log/下,可tail日志查看分类结果: +```shell +tail -f log/ximage.INFO +``` diff --git a/doc/INDEX.md b/doc/INDEX.md new file mode 100644 index 0000000000000000000000000000000000000000..a9d4e2b7145aa1368727d37ec918a46f33830fa5 --- /dev/null +++ b/doc/INDEX.md @@ -0,0 +1,5 @@ + +[Client Configure](CLIENT_CONFIGURE.md) +[Creating a Prediction Service](CREATING.md) +[Design](DESIGN.md) +[Getting Started](GETTING_STARTED.md) diff --git a/doc/INSTALL.md b/doc/INSTALL.md new file mode 100644 index 0000000000000000000000000000000000000000..c03f98cb8a0392da0f0fd2e971a9d251b6bb4eca --- /dev/null +++ b/doc/INSTALL.md @@ -0,0 +1,18 @@ +# Install + +## 系统需求 + +OS: Linux +CMake: 3.2 +python + +## 编译 +```shell +$ git clone ssh://wangguibao@icode.baidu.com:8235/baidu/personal-code/paddle-serving +$ cd paddle-serving +$ mkdir build +$ cd build +$ cmake .. +$ make -j4 +$ make install +``` diff --git a/predictor/src/pdserving.cpp b/predictor/src/pdserving.cpp index b4d65b40353890222b6e5690f1513398aba53eff..5d88ba353d9f4040a92a5e7526c05a370db0f9b1 100644 --- a/predictor/src/pdserving.cpp +++ b/predictor/src/pdserving.cpp @@ -107,15 +107,18 @@ int main(int argc, char** argv) { g_change_server_port(); // initialize logger instance - FLAGS_log_dir = "./log"; + if (FLAGS_log_dir == "") { + FLAGS_log_dir = "./log"; + } struct stat st_buf; int ret = 0; - if ((ret = stat("./log", &st_buf)) != 0) { - mkdir("./log", 0777); - ret = stat("./log", &st_buf); + if ((ret = stat(FLAGS_log_dir.c_str(), &st_buf)) != 0) { + mkdir(FLAGS_log_dir.c_str(), 0777); + ret = stat(FLAGS_log_dir.c_str(), &st_buf); if (ret != 0) { - LOG(WARNING) << "Log path ./log not exist, and create fail"; + LOG(WARNING) << "Log path " << FLAGS_log_dir + << " not exist, and create fail"; return -1; } } diff --git a/sdk-cpp/CMakeLists.txt b/sdk-cpp/CMakeLists.txt index b846a537d7f43090b429a269ce59cbad1847bb07..3aee225dce096e9ff420a57121ba3e38499f1f03 100644 --- a/sdk-cpp/CMakeLists.txt +++ b/sdk-cpp/CMakeLists.txt @@ -3,26 +3,29 @@ include(proto/CMakeLists.txt) add_library(sdk-cpp ${sdk_cpp_srcs}) add_dependencies(sdk-cpp pdcodegen configure) target_link_libraries(sdk-cpp brpc configure protobuf leveldb) +target_include_directories(sdk-cpp PUBLIC + ${CMAKE_BINARY_DIR}/predictor/) add_executable(ximage ${CMAKE_CURRENT_LIST_DIR}/demo/ximage.cpp) target_link_libraries(ximage -Wl,--whole-archive sdk-cpp - -Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl + -Wl,--no-whole-archive pdserving -lpthread -lcrypto -lm -lrt -lssl -ldl -lz) add_executable(echo ${CMAKE_CURRENT_LIST_DIR}/demo/echo.cpp) -target_link_libraries(echo -Wl,--whole-archive sdk-cpp -Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl +target_link_libraries(echo -Wl,--whole-archive sdk-cpp -Wl,--no-whole-archive + pdserving -lpthread -lcrypto -lm -lrt -lssl -ldl -lz) add_executable(dense_format ${CMAKE_CURRENT_LIST_DIR}/demo/dense_format.cpp) -target_link_libraries(dense_format -Wl,--whole-archive sdk-cpp -Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl +target_link_libraries(dense_format pdserving -Wl,--whole-archive sdk-cpp -Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl -lz) add_executable(sparse_format ${CMAKE_CURRENT_LIST_DIR}/demo/sparse_format.cpp) -target_link_libraries(sparse_format -Wl,--whole-archive sdk-cpp -Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl +target_link_libraries(sparse_format pdserving -Wl,--whole-archive sdk-cpp -Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl -lz) add_executable(int64tensor_format ${CMAKE_CURRENT_LIST_DIR}/demo/int64tensor_format.cpp) -target_link_libraries(int64tensor_format -Wl,--whole-archive sdk-cpp -Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl +target_link_libraries(int64tensor_format pdserving -Wl,--whole-archive sdk-cpp -Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl -lz) # install diff --git a/sdk-cpp/conf/predictors.prototxt b/sdk-cpp/conf/predictors.prototxt index 82b91fb2540f3a04c81d06265a0983cea2f10e18..ef5520513c789ee359d5f450f49fe57ebc3a371f 100644 --- a/sdk-cpp/conf/predictors.prototxt +++ b/sdk-cpp/conf/predictors.prototxt @@ -5,7 +5,7 @@ default_variant_conf { rpc_timeout_ms: 20000 connect_retry_count: 2 max_connection_per_host: 100 - hedge_request_timeout_ms: 4294967295 + hedge_request_timeout_ms: -1 hedge_fetch_retry_count: 2 connection_type: "pooled" } diff --git a/sdk-cpp/demo/dense_format.cpp b/sdk-cpp/demo/dense_format.cpp index beb13877fffa1cc48c73a8b831ffda302987314a..43d73630c6ffad4aa400e3f9b6c4e80fc8778d2d 100644 --- a/sdk-cpp/demo/dense_format.cpp +++ b/sdk-cpp/demo/dense_format.cpp @@ -17,7 +17,7 @@ #include #include -#include "sdk-cpp/builtin_format.pb.h" +#include "predictor/builtin_format.pb.h" #include "sdk-cpp/dense_service.pb.h" #include "sdk-cpp/include/common.h" #include "sdk-cpp/include/predictor_sdk.h" diff --git a/sdk-cpp/demo/echo.cpp b/sdk-cpp/demo/echo.cpp index ca68d2cbafd177864bb3e97164bb57959adf7430..b296cd281cb891a7c6f147e89e7d510e1c3cc0c6 100644 --- a/sdk-cpp/demo/echo.cpp +++ b/sdk-cpp/demo/echo.cpp @@ -17,7 +17,7 @@ #include #include -#include "sdk-cpp/builtin_format.pb.h" +#include "predictor/builtin_format.pb.h" #include "sdk-cpp/echo_service.pb.h" #include "sdk-cpp/include/common.h" #include "sdk-cpp/include/predictor_sdk.h" diff --git a/sdk-cpp/demo/int64tensor_format.cpp b/sdk-cpp/demo/int64tensor_format.cpp index 97160d82969d5932cc89416c8aecdbad117705de..15db68a2779bb93d8ceb7e9ea3573cd7b780a950 100644 --- a/sdk-cpp/demo/int64tensor_format.cpp +++ b/sdk-cpp/demo/int64tensor_format.cpp @@ -17,7 +17,7 @@ #include #include -#include "sdk-cpp/builtin_format.pb.h" +#include "predictor/builtin_format.pb.h" #include "sdk-cpp/include/common.h" #include "sdk-cpp/include/predictor_sdk.h" #include "sdk-cpp/int64tensor_service.pb.h" diff --git a/sdk-cpp/demo/sparse_format.cpp b/sdk-cpp/demo/sparse_format.cpp index c428881dc909237754a66f5c116fa2b4e266afeb..78c237d76827d8d2fa591a026e2ac299b3dca1ce 100644 --- a/sdk-cpp/demo/sparse_format.cpp +++ b/sdk-cpp/demo/sparse_format.cpp @@ -17,7 +17,7 @@ #include #include -#include "sdk-cpp/builtin_format.pb.h" +#include "predictor/builtin_format.pb.h" #include "sdk-cpp/include/common.h" #include "sdk-cpp/include/predictor_sdk.h" #include "sdk-cpp/sparse_service.pb.h" diff --git a/sdk-cpp/demo/ximage.cpp b/sdk-cpp/demo/ximage.cpp index 72d3407c6e039000a9e97c8bbcd60df80af07477..226f6420da3903bf5ac6e8afafeef8bddecb95d7 100644 --- a/sdk-cpp/demo/ximage.cpp +++ b/sdk-cpp/demo/ximage.cpp @@ -17,7 +17,7 @@ #include #include -#include "sdk-cpp/builtin_format.pb.h" +#include "predictor/builtin_format.pb.h" #include "sdk-cpp/image_class.pb.h" #include "sdk-cpp/include/common.h" #include "sdk-cpp/include/predictor_sdk.h" @@ -150,7 +150,7 @@ int main(int argc, char** argv) { Predictor* predictor = api.fetch_predictor("ximage"); if (!predictor) { - LOG(ERROR) << "Failed fetch predictor: wasq"; + LOG(ERROR) << "Failed fetch predictor: ximage"; return -1; } diff --git a/sdk-cpp/include/endpoint.h b/sdk-cpp/include/endpoint.h index d28b1d56b73b1f0f139ed3574c6cbcc7af6cd98c..3b9bf9a38952d67ae0e7548a2f1c04a39faf6576 100644 --- a/sdk-cpp/include/endpoint.h +++ b/sdk-cpp/include/endpoint.h @@ -16,6 +16,7 @@ #include #include +#include "sdk-cpp/include/abtest.h" #include "sdk-cpp/include/common.h" #include "sdk-cpp/include/endpoint_config.h" #include "sdk-cpp/include/predictor.h" @@ -59,6 +60,7 @@ class Endpoint { private: std::string _endpoint_name; std::vector _variant_list; + EndpointRouterBase* _abtest_router; }; } // namespace sdk_cpp diff --git a/sdk-cpp/proto/builtin_format.proto b/sdk-cpp/proto/builtin_format.proto deleted file mode 100644 index 8f0d1d8f01ffa6f026271a1f3d20b08ae072cc77..0000000000000000000000000000000000000000 --- a/sdk-cpp/proto/builtin_format.proto +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -syntax = "proto2"; -package baidu.paddle_serving.predictor.format; - -// dense format -message DenseInstance { repeated float features = 1; }; - -message DensePrediction { repeated float categories = 1; }; - -// sparse format -message SparseInstance { - repeated uint32 keys = 1; - repeated uint32 shape = 2; - repeated float values = 3; -}; - -message SparsePrediction { repeated float categories = 1; }; - -// int64-tensor format -message Int64TensorInstance { - repeated int64 data = 1; - repeated uint32 shape = 2; -}; - -message Float32TensorPredictor { - repeated float data = 1; - repeated uint32 shape = 2; -}; - -// x-image format -message XImageReqInstance { - required bytes image_binary = 1; - required uint32 image_length = 2; -}; - -message XImageResInstance { required string response_json = 1; }; - -// x-record format -message XRecordInstance { - // TODO - required bytes data = 1; -}; diff --git a/sdk-cpp/proto/pds_option.proto b/sdk-cpp/proto/pds_option.proto deleted file mode 100644 index c45c41ea8c5cd0f8378015c1abe575664ab61386..0000000000000000000000000000000000000000 --- a/sdk-cpp/proto/pds_option.proto +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -syntax = "proto2"; -import "google/protobuf/descriptor.proto"; -package pds; - -extend google.protobuf.FieldOptions { - optional bool pack_on = 70000 [ default = false ]; -}; - -extend google.protobuf.ServiceOptions { - optional PaddleServiceOption options = 80000; -}; - -message PaddleServiceOption { - optional bool generate_impl = 1 [ default = false ]; - optional bool generate_stub = 2 [ default = false ]; -}; diff --git a/sdk-cpp/src/endpoint.cpp b/sdk-cpp/src/endpoint.cpp index 8b2a15aa349080a242966dd9573b1fa8376c79b2..d1c66124c6e7657db23905eb681bfa0b957be9d2 100644 --- a/sdk-cpp/src/endpoint.cpp +++ b/sdk-cpp/src/endpoint.cpp @@ -22,6 +22,7 @@ namespace sdk_cpp { int Endpoint::initialize(const EndpointInfo& ep_info) { _variant_list.clear(); _endpoint_name = ep_info.endpoint_name; + _abtest_router = static_cast(ep_info.ab_test); uint32_t var_size = ep_info.vars.size(); for (uint32_t vi = 0; vi < var_size; ++vi) { const VariantInfo& var_info = ep_info.vars[vi]; @@ -80,25 +81,7 @@ int Endpoint::thrd_finalize() { return 0; } -// 带全流量分层实验路由信息 -Predictor* Endpoint::get_predictor(const void* params) { - Variant* var = NULL; - if (_variant_list.size() == 1) { - var = _variant_list[0]; - } - - if (!var) { - LOG(ERROR) << "get null var from endpoint."; - return NULL; - } - - return var->get_predictor(params); -} - Predictor* Endpoint::get_predictor() { -#if 1 - LOG(INFO) << "Endpoint::get_predictor"; -#endif if (_variant_list.size() == 1) { if (_variant_list[0] == NULL) { LOG(ERROR) << "Not valid variant info"; @@ -107,7 +90,18 @@ Predictor* Endpoint::get_predictor() { return _variant_list[0]->get_predictor(); } - return NULL; + if (_abtest_router == NULL) { + LOG(FATAL) << "Not valid abtest_router!"; + return NULL; + } + + Variant* var = _abtest_router->route(_variant_list); + if (!var) { + LOG(FATAL) << "get null var from endpoint"; + return NULL; + } + + return var->get_predictor(); } int Endpoint::ret_predictor(Predictor* predictor) { diff --git a/serving/conf/workflow.prototxt b/serving/conf/workflow.prototxt index 275873613444990e00fb390e0c51c8e312074181..0300b3702e093efa5cf3f17dbeb4e6fcb2d05f08 100644 --- a/serving/conf/workflow.prototxt +++ b/serving/conf/workflow.prototxt @@ -44,7 +44,7 @@ workflows { } nodes { name: "write_json_op" - type: "WriteOp" + type: "WriteJsonOp" dependencies { name: "image_classify_op" mode: "RO" diff --git a/serving/proto/builtin_format.proto b/serving/proto/builtin_format.proto deleted file mode 100644 index 6666f6479393a4912ed23d02c24585a3caf6e1b0..0000000000000000000000000000000000000000 --- a/serving/proto/builtin_format.proto +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -syntax = "proto2"; -import "pds_option.proto"; -package baidu.paddle_serving.predictor.format; - -// dense format -message DenseInstance { repeated float features = 1; }; - -message DensePrediction { repeated float categories = 1; }; - -// sparse format -message SparseInstance { - repeated uint32 keys = 1; - repeated uint32 shape = 2; - repeated float values = 3; -}; - -message SparsePrediction { repeated float categories = 1; }; - -// int64-tensor format -message Int64TensorInstance { - repeated int64 data = 1; - repeated uint32 shape = 2; -}; - -message Float32TensorPredictor { - repeated float data = 1; - repeated uint32 shape = 2; -}; - -// x-image format -message XImageReqInstance { - required bytes image_binary = 1; - required uint32 image_length = 2; -}; - -message XImageResInstance { required string response_json = 1; }; - -// x-record format -message XRecordInstance { - // TODO - required bytes data = 1; -}; diff --git a/serving/proto/pds_option.proto b/serving/proto/pds_option.proto deleted file mode 100644 index c45c41ea8c5cd0f8378015c1abe575664ab61386..0000000000000000000000000000000000000000 --- a/serving/proto/pds_option.proto +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -syntax = "proto2"; -import "google/protobuf/descriptor.proto"; -package pds; - -extend google.protobuf.FieldOptions { - optional bool pack_on = 70000 [ default = false ]; -}; - -extend google.protobuf.ServiceOptions { - optional PaddleServiceOption options = 80000; -}; - -message PaddleServiceOption { - optional bool generate_impl = 1 [ default = false ]; - optional bool generate_stub = 2 [ default = false ]; -};