diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 03066aca2b85e9c5473e1ec0dae648fdd86e41ad..861889266b0132b8812d2d958dd6675dc631fd33 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -832,6 +832,7 @@ function(PROTOBUF_GENERATE_SERVING_CPP FOR_SERVING_SIDE SRCS HDRS ) list(APPEND ${SRCS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc") list(APPEND ${HDRS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.h") + set(PDCODEGEN "${CMAKE_BINARY_DIR}/core/pdcodegen/pdcodegen") if (${FOR_SERVING_SIDE}) add_custom_command( OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc" @@ -842,7 +843,7 @@ function(PROTOBUF_GENERATE_SERVING_CPP FOR_SERVING_SIDE SRCS HDRS ) --plugin=protoc-gen-pdcodegen=${CMAKE_BINARY_DIR}/core/pdcodegen/pdcodegen --proto_path=${CMAKE_SOURCE_DIR}/core/predictor/proto ${_protobuf_include_path} ${ABS_FIL} - DEPENDS ${ABS_FIL} ${Protobuf_PROTOC_EXECUTABLE} + DEPENDS ${ABS_FIL} ${Protobuf_PROTOC_EXECUTABLE} ${PDCODEGEN} COMMENT "Running Paddle-serving C++ protocol buffer compiler on ${FIL}" VERBATIM) else() @@ -854,7 +855,7 @@ function(PROTOBUF_GENERATE_SERVING_CPP FOR_SERVING_SIDE SRCS HDRS ) --pdcodegen_out=${CMAKE_CURRENT_BINARY_DIR} --plugin=protoc-gen-pdcodegen=${CMAKE_BINARY_DIR}/pdcodegen/pdcodegen ${_protobuf_include_path} ${ABS_FIL} - DEPENDS ${ABS_FIL} ${Protobuf_PROTOC_EXECUTABLE} + DEPENDS ${ABS_FIL} ${Protobuf_PROTOC_EXECUTABLE} ${PDCODEGEN} COMMENT "Running Paddle-serving C++ protocol buffer compiler on ${FIL}" VERBATIM) endif() diff --git a/core/sdk-cpp/proto/CMakeLists.txt b/core/sdk-cpp/proto/CMakeLists.txt index 5bc378398f341025d48b1d39d22785305aba2dea..e0fb6aad3dba9491cde76017550f248f1e573e90 100644 --- a/core/sdk-cpp/proto/CMakeLists.txt +++ b/core/sdk-cpp/proto/CMakeLists.txt @@ -37,6 +37,7 @@ function(PROTOBUF_GENERATE_SERVING_CPP FOR_SERVING_SIDE SRCS HDRS ) list(APPEND ${SRCS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc") list(APPEND ${HDRS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.h") + set(PDCODEGEN "${CMAKE_BINARY_DIR}/core/pdcodegen/pdcodegen") if (${FOR_SERVING_SIDE}) add_custom_command( OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc" @@ -47,7 +48,7 @@ function(PROTOBUF_GENERATE_SERVING_CPP FOR_SERVING_SIDE SRCS HDRS ) --plugin=protoc-gen-pdcodegen=${CMAKE_BINARY_DIR}/core/pdcodegen/pdcodegen --proto_path=${CMAKE_SOURCE_DIR}/core/predictor/proto ${_protobuf_include_path} ${ABS_FIL} - DEPENDS ${ABS_FIL} ${Protobuf_PROTOC_EXECUTABLE} + DEPENDS ${ABS_FIL} ${Protobuf_PROTOC_EXECUTABLE} ${PDCODEGEN} COMMENT "Running Paddle-serving C++ protocol buffer compiler on ${FIL}" VERBATIM) else() @@ -59,7 +60,7 @@ function(PROTOBUF_GENERATE_SERVING_CPP FOR_SERVING_SIDE SRCS HDRS ) --pdcodegen_out=${CMAKE_CURRENT_BINARY_DIR} --plugin=protoc-gen-pdcodegen=${CMAKE_BINARY_DIR}/core/pdcodegen/pdcodegen ${_protobuf_include_path} ${ABS_FIL} - DEPENDS ${ABS_FIL} ${Protobuf_PROTOC_EXECUTABLE} + DEPENDS ${ABS_FIL} ${Protobuf_PROTOC_EXECUTABLE} ${PDCODEGEN} COMMENT "Running Paddle-serving C++ protocol buffer compiler on ${FIL}" VERBATIM) endif() diff --git a/core/sdk-cpp/src/config_manager.cpp b/core/sdk-cpp/src/config_manager.cpp index c422f0b52eba7d3a34e663f4198b9914a7722704..e3126855e082feaf9c6d237692c214fa8f66577b 100644 --- a/core/sdk-cpp/src/config_manager.cpp +++ b/core/sdk-cpp/src/config_manager.cpp @@ -31,6 +31,8 @@ int EndpointConfigManager::create(const std::string& sdk_desc_str) { LOG(ERROR) << "Failed reload endpoint config"; return -1; } + + return 0; } int EndpointConfigManager::create(const char* path, const char* file) { diff --git a/doc/RUN_IN_DOCKER.md b/doc/RUN_IN_DOCKER.md index 708739851b8e3ec5ca8b5e204a68169ec88041b5..48d1d265665fe40a97c6423a34c1e4d9361c850a 100644 --- a/doc/RUN_IN_DOCKER.md +++ b/doc/RUN_IN_DOCKER.md @@ -1,6 +1,6 @@ # How to run PaddleServing in Docker -([简体中文](./RUN_IN_DOCKER_CN.md)|English) +([简体中文](RUN_IN_DOCKER_CN.md)|English) ## Requirements @@ -137,6 +137,13 @@ pip install paddle-serving-server-gpu ### Test example +When running the GPU Server, you need to set the GPUs used by the prediction service through the `--gpu_ids` option, and the CPU is used by default. An error will be reported when the value of `--gpu_ids` exceeds the environment variable `CUDA_VISIBLE_DEVICES`. The following example specifies to use a GPU with index 0: +```shell +export CUDA_VISIBLE_DEVICES=0,1 +python -m paddle_serving_server_gpu.serve --model uci_housing_model --port 9292 --gpu_ids 0 +``` + + Get the trained Boston house price prediction model by the following command: ```bash @@ -180,4 +187,9 @@ tar -xzf uci_housing.tar.gz print(fetch_map) ``` - + + + +## Attention + +The images provided by this document are all runtime images, which do not support compilation. If you want to compile from source, refer to [COMPILE](COMPILE.md). diff --git a/doc/RUN_IN_DOCKER_CN.md b/doc/RUN_IN_DOCKER_CN.md index 9f2abba176ca89f6d03d9602c2fd1e7d4a78980b..8800b3a30690e03fce739714af1f24a3c8333b7f 100644 --- a/doc/RUN_IN_DOCKER_CN.md +++ b/doc/RUN_IN_DOCKER_CN.md @@ -129,12 +129,13 @@ pip install paddle-serving-server-gpu ### 测试example -GPU版本在运行Server端代码前需要设置`CUDA_VISIBLE_DEVICES`环境变量来指定预测服务使用的GPU,下面的示例为指定索引为0和1两块GPU: - -```bash - export CUDA_VISIBLE_DEVICES=0,1 +在运行GPU版Server时需要通过`--gpu_ids`选项设置预测服务使用的GPU,缺省状态默认使用CPU。当设置的`--gpu_ids`超出环境变量`CUDA_VISIBLE_DEVICES`时会报错。下面的示例为指定使用索引为0的GPU: +```shell +export CUDA_VISIBLE_DEVICES=0,1 +python -m paddle_serving_server_gpu.serve --model uci_housing_model --port 9292 --gpu_ids 0 ``` + 通过下面命令获取训练好的Boston房价预估模型: ```bash @@ -177,3 +178,7 @@ tar -xzf uci_housing.tar.gz fetch_map = client.predict(feed={"x": data}, fetch=["price"]) print(fetch_map) ``` + +## 注意事项 + +该文档提供的镜像均为运行镜像,不支持开发编译。如果想要从源码编译,请查看[如何编译PaddleServing](COMPILE.md)。 diff --git a/python/examples/bert/benchmark_batch.py b/python/examples/bert/benchmark_batch.py index 9b8e301a62eb0eee161cd701555543d329c6ae83..7cedb6aa451e0e4a128f0fedbfde1a896977f601 100644 --- a/python/examples/bert/benchmark_batch.py +++ b/python/examples/bert/benchmark_batch.py @@ -53,7 +53,7 @@ def single_func(idx, resource): feed_batch.append(reader.process(dataset[bi])) b_end = time.time() if profile_flags: - print("PROFILE\tpid:{}\tbert+pre_0:{} bert_pre_1:{}".format( + print("PROFILE\tpid:{}\tbert_pre_0:{} bert_pre_1:{}".format( os.getpid(), int(round(b_start * 1000000)), int(round(b_end * 1000000)))) @@ -69,9 +69,7 @@ def single_func(idx, resource): if __name__ == '__main__': multi_thread_runner = MultiThreadRunner() - endpoint_list = [ - "127.0.0.1:9292", "127.0.0.1:9293", "127.0.0.1:9294", "127.0.0.1:9295" - ] + endpoint_list = ["127.0.0.1:9292"] result = multi_thread_runner.run(single_func, args.thread, {"endpoint": endpoint_list}) avg_cost = 0 diff --git a/python/examples/criteo_ctr_with_cube/README.md b/python/examples/criteo_ctr_with_cube/README.md index eed612f4043ff7fdb41538b2a425d98f0d045718..4a69fd528a3d25f4010d6fdc724146027a90e896 100755 --- a/python/examples/criteo_ctr_with_cube/README.md +++ b/python/examples/criteo_ctr_with_cube/README.md @@ -2,21 +2,35 @@ ([简体中文](./README_CN.md)|English) +### Compile Source Code +in the root directory of this git project +``` +mkdir build_server +cd build_server +cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DCLIENT_ONLY=OFF .. +make -j10 +make install -j10 +``` + ### Get Sample Dataset +go to directory `python/examples/criteo_ctr_with_cube` ``` sh get_data.sh ``` -### Train and Save Model +### Download Model and Sparse Parameter Sequence Files ``` -python local_train.py +wget https://paddle-serving.bj.bcebos.com/unittest/ctr_cube_unittest.tar.gz +tar xf ctr_cube_unittest.tar.gz +mv models/ctr_client_conf ./ +mv models/ctr_serving_model_kv ./ +mv models/data ./cube/ ``` -the trained model will be in ./ctr_server_model and ./ctr_client_config, and ctr_server_model_kv, ctr_client_conf_kv。 +the model will be in ./ctr_server_model_kv and ./ctr_client_config. ### Start Sparse Parameter Indexing Service ``` -cp ../../../build_server/core/predictor/seq_generator seq_generator cp ../../../build_server/output/bin/cube* ./cube/ sh cube_prepare.sh & ``` diff --git a/python/examples/criteo_ctr_with_cube/README_CN.md b/python/examples/criteo_ctr_with_cube/README_CN.md index 868e8bce5a624904d532bf956fd5868abc0a1c52..5bc36869abe0c1f3fc010c2893731a0e86a1c270 100644 --- a/python/examples/criteo_ctr_with_cube/README_CN.md +++ b/python/examples/criteo_ctr_with_cube/README_CN.md @@ -1,20 +1,34 @@ ## 带稀疏参数索引服务的CTR预测服务 (简体中文|[English](./README.md)) +### 编译源代码 +在本项目的根目录下,执行 +``` +mkdir build_server +cd build_server +cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DCLIENT_ONLY=OFF .. +make -j10 +make install -j10 +``` + ### 获取样例数据 +进入目录 `python/examples/criteo_ctr_with_cube` ``` sh get_data.sh ``` -### 保存模型和配置文件 +### 下载模型和稀疏参数序列文件 ``` -python local_train.py +wget https://paddle-serving.bj.bcebos.com/unittest/ctr_cube_unittest.tar.gz +tar xf ctr_cube_unittest.tar.gz +mv models/ctr_client_conf ./ +mv models/ctr_serving_model_kv ./ +mv models/data ./cube/ ``` -执行脚本后会在当前目录生成ctr_server_model和ctr_client_config文件夹,以及ctr_server_model_kv, ctr_client_conf_kv。 +执行脚本后会在当前目录有ctr_server_model_kv和ctr_client_config文件夹。 ### 启动稀疏参数索引服务 ``` -cp ../../../build_server/core/predictor/seq_generator seq_generator cp ../../../build_server/output/bin/cube* ./cube/ sh cube_prepare.sh & ``` diff --git a/python/examples/imagenet/image_http_client.py b/python/examples/imagenet/image_http_client.py index 2a2e9ea20d7e428cfe42393e2fee60035c33283d..cda0f33ac82d0bd228a22a8f438cbe1aa013eadf 100644 --- a/python/examples/imagenet/image_http_client.py +++ b/python/examples/imagenet/image_http_client.py @@ -30,7 +30,10 @@ def predict(image_path, server): req = json.dumps({"image": image, "fetch": ["score"]}) r = requests.post( server, data=req, headers={"Content-Type": "application/json"}) - print(r.json()["score"][0]) + try: + print(r.json()["score"][0]) + except ValueError: + print(r.text) return r diff --git a/python/paddle_serving_client/io/__init__.py b/python/paddle_serving_client/io/__init__.py index f1a3dcf612e34d83387163d9fea491a7dca2c579..d723795f214e22957bff49f0ddf8fd42086b8a7e 100644 --- a/python/paddle_serving_client/io/__init__.py +++ b/python/paddle_serving_client/io/__init__.py @@ -32,7 +32,7 @@ def save_model(server_model_folder, executor = Executor(place=CPUPlace()) feed_var_names = [feed_var_dict[x].name for x in feed_var_dict] - target_vars = fetch_var_dict.values() + target_vars = list(fetch_var_dict.values()) save_inference_model( server_model_folder, diff --git a/python/paddle_serving_server/serve.py b/python/paddle_serving_server/serve.py index 279e3a895e975473fc5569c4716368c3dda1d9f1..088e3928f4409eaac4d42d771a72ecc9d13fdbce 100644 --- a/python/paddle_serving_server/serve.py +++ b/python/paddle_serving_server/serve.py @@ -38,6 +38,8 @@ def parse_args(): # pylint: disable=doc-string-missing help="Working dir of current service") parser.add_argument( "--device", type=str, default="cpu", help="Type of device") + parser.add_argument( + "--mem_optim", type=bool, default=False, help="Memory optimize") return parser.parse_args() @@ -48,6 +50,7 @@ def start_standard_model(): # pylint: disable=doc-string-missing port = args.port workdir = args.workdir device = args.device + mem_optim = args.mem_optim if model == "": print("You must specify your serving model") @@ -67,6 +70,7 @@ def start_standard_model(): # pylint: disable=doc-string-missing server = serving.Server() server.set_op_sequence(op_seq_maker.get_op_sequence()) server.set_num_threads(thread_num) + server.set_memory_optimize(mem_optim) server.load_model_config(model) server.prepare_server(workdir=workdir, port=port, device=device) diff --git a/python/paddle_serving_server_gpu/__init__.py b/python/paddle_serving_server_gpu/__init__.py index 2fd35c6d66e4bf282224a8775f1a6bf0d1c6a8c5..3dd330b18921c81cf17601ff7e52d860f0322f95 100644 --- a/python/paddle_serving_server_gpu/__init__.py +++ b/python/paddle_serving_server_gpu/__init__.py @@ -43,6 +43,8 @@ def serve_args(): parser.add_argument("--gpu_ids", type=str, default="", help="gpu ids") parser.add_argument( "--name", type=str, default="None", help="Default service name") + parser.add_argument( + "--mem_optim", type=bool, default=False, help="Memory optimize") return parser.parse_args() diff --git a/python/paddle_serving_server_gpu/serve.py b/python/paddle_serving_server_gpu/serve.py index d09efbfc8e1512ecb75b063ad760ce66e1a3159e..cb82e02cbec83324a6cb6029208325d8ce38e263 100644 --- a/python/paddle_serving_server_gpu/serve.py +++ b/python/paddle_serving_server_gpu/serve.py @@ -33,6 +33,7 @@ def start_gpu_card_model(index, gpuid, args): # pylint: disable=doc-string-miss port = args.port + index thread_num = args.thread model = args.model + mem_optim = args.mem_optim workdir = "{}_{}".format(args.workdir, gpuid) if model == "": @@ -53,6 +54,7 @@ def start_gpu_card_model(index, gpuid, args): # pylint: disable=doc-string-miss server = serving.Server() server.set_op_sequence(op_seq_maker.get_op_sequence()) server.set_num_threads(thread_num) + server.set_memory_optimize(mem_optim) server.load_model_config(model) server.prepare_server(workdir=workdir, port=port, device=device)