diff --git a/core/configure/proto/server_configure.proto b/core/configure/proto/server_configure.proto index c974f010737a8836d5de83d737ee0f9b9519462f..e6678f879397559d32acfa4b7dc3054d64c680ef 100755 --- a/core/configure/proto/server_configure.proto +++ b/core/configure/proto/server_configure.proto @@ -65,6 +65,10 @@ message EngineDesc { optional int32 batch_infer_size = 31 [ default = 32 ]; optional bool enable_overrun = 32 [ default = false ]; optional bool allow_split_request = 33 [ default = true ]; + optional int32 min_subgraph_size = 34 [ default = 3 ]; + map min_input_shape = 35; + map max_input_shape = 36; + map opt_input_shape = 37; }; // model_toolkit conf diff --git a/core/general-server/op/general_detection_op.cpp b/core/general-server/op/general_detection_op.cpp index b62a2d2544e12d493033cf1bb8e6606d72f614d3..6a4fe15f2de0bbe930bde850022c1dc3c34f59d3 100644 --- a/core/general-server/op/general_detection_op.cpp +++ b/core/general-server/op/general_detection_op.cpp @@ -244,7 +244,7 @@ int GeneralDetectionOp::inference() { databuf_char_out = reinterpret_cast(databuf_data_out); paddle::PaddleBuf paddleBuf(databuf_char_out, databuf_size_out); paddle::PaddleTensor tensor_out; - tensor_out.name = "image"; + tensor_out.name = "x"; tensor_out.dtype = paddle::PaddleDType::FLOAT32; tensor_out.shape = output_shape; tensor_out.data = paddleBuf; diff --git a/examples/C++/PaddleOCR/ocr/README.md b/examples/C++/PaddleOCR/ocr/README.md index 28849bf3278ace80340344a1661c620d70c7b062..12f0fd6cd5f06a97b0f5e846037217a40e37fe89 100755 --- a/examples/C++/PaddleOCR/ocr/README.md +++ b/examples/C++/PaddleOCR/ocr/README.md @@ -4,9 +4,9 @@ ## Get Model ``` -wget https://paddle-serving.bj.bcebos.com/paddle_hub_models/image/OCR/ocr_rec.tar.gz +python3 -m paddle_serving_app.package --get_model ocr_rec tar -xzvf ocr_rec.tar.gz -wget https://paddle-serving.bj.bcebos.com/ocr/ocr_det.tar.gz +python3 -m paddle_serving_app.package --get_model ocr_det tar -xzvf ocr_det.tar.gz ``` @@ -108,7 +108,7 @@ python3 rec_web_client.py When a service starts the concatenation of two models, it only needs to pass in the relative path of the model folder in order after `--model`, and the custom C++ OP class name after `--op`. The order of the model after `--model` and the class name after `--OP` needs to correspond. Here, it is assumed that we have defined the two OPs as GeneralDetectionOp and GeneralRecOp respectively, The script code is as follows: ```python #One service starts the concatenation of two models -python3 -m paddle_serving_server.serve --model ocr_det_model ocr_rec_model --op GeneralDetectionOp GeneralRecOp --port 9293 +python3 -m paddle_serving_server.serve --model ocr_det_model ocr_rec_model --op GeneralDetectionOp GeneralInferOp --port 9293 #ocr_det_model correspond to GeneralDetectionOp, ocr_rec_model correspond to GeneralRecOp ``` diff --git a/examples/C++/PaddleOCR/ocr/README_CN.md b/examples/C++/PaddleOCR/ocr/README_CN.md index d9671115e0c4970af48fcd224551a96526be1356..a737531d778a25ef225c46240736e1bbc7381f35 100755 --- a/examples/C++/PaddleOCR/ocr/README_CN.md +++ b/examples/C++/PaddleOCR/ocr/README_CN.md @@ -4,9 +4,9 @@ ## 获取模型 ``` -wget https://paddle-serving.bj.bcebos.com/paddle_hub_models/image/OCR/ocr_rec.tar.gz +python3 -m paddle_serving_app.package --get_model ocr_rec tar -xzvf ocr_rec.tar.gz -wget https://paddle-serving.bj.bcebos.com/ocr/ocr_det.tar.gz +python3 -m paddle_serving_app.package --get_model ocr_det tar -xzvf ocr_det.tar.gz ``` ## 获取数据集(可选) @@ -106,7 +106,7 @@ python3 rec_web_client.py 一个服务启动两个模型串联,只需要在`--model后依次按顺序传入模型文件夹的相对路径`,且需要在`--op后依次传入自定义C++OP类名称`,其中--model后面的模型与--op后面的类名称的顺序需要对应,`这里假设我们已经定义好了两个OP分别为GeneralDetectionOp和GeneralRecOp`,则脚本代码如下: ```python #一个服务启动多模型串联 -python3 -m paddle_serving_server.serve --model ocr_det_model ocr_rec_model --op GeneralDetectionOp GeneralRecOp --port 9293 +python3 -m paddle_serving_server.serve --model ocr_det_model ocr_rec_model --op GeneralDetectionOp GeneralInferOp --port 9293 #多模型串联 ocr_det_model对应GeneralDetectionOp ocr_rec_model对应GeneralRecOp ``` diff --git a/examples/C++/PaddleOCR/ocr/det_debugger_server.py b/examples/C++/PaddleOCR/ocr/det_debugger_server.py index 5b40fe9372a56b2b663c1bfeff02619a8ec9730b..6679ee0f4e0f18000ee33e5331a3d2e44197c846 100644 --- a/examples/C++/PaddleOCR/ocr/det_debugger_server.py +++ b/examples/C++/PaddleOCR/ocr/det_debugger_server.py @@ -47,18 +47,18 @@ class OCRService(WebService): }) def preprocess(self, feed=[], fetch=[]): - data = base64.b64decode(feed[0]["image"].encode('utf8')) + data = base64.b64decode(feed[0]["x"].encode('utf8')) data = np.fromstring(data, np.uint8) im = cv2.imdecode(data, cv2.IMREAD_COLOR) self.ori_h, self.ori_w, _ = im.shape det_img = self.det_preprocess(im) _, self.new_h, self.new_w = det_img.shape return { - "image": det_img[np.newaxis, :].copy() - }, ["concat_1.tmp_0"], True + "x": det_img[np.newaxis, :].copy() + }, ["save_infer_model/scale_0.tmp_1"], True def postprocess(self, feed={}, fetch=[], fetch_map=None): - det_out = fetch_map["concat_1.tmp_0"] + det_out = fetch_map["save_infer_model/scale_0.tmp_1"] ratio_list = [ float(self.new_h) / self.ori_h, float(self.new_w) / self.ori_w ] diff --git a/examples/C++/PaddleOCR/ocr/det_web_server.py b/examples/C++/PaddleOCR/ocr/det_web_server.py index d38686e5a86c4f2df45db7f495a8c08a72270919..d52f4b447006e220ed2f6362afee253c0b9eb69d 100644 --- a/examples/C++/PaddleOCR/ocr/det_web_server.py +++ b/examples/C++/PaddleOCR/ocr/det_web_server.py @@ -47,17 +47,17 @@ class OCRService(WebService): }) def preprocess(self, feed=[], fetch=[]): - data = base64.b64decode(feed[0]["image"].encode('utf8')) + data = base64.b64decode(feed[0]["x"].encode('utf8')) data = np.fromstring(data, np.uint8) im = cv2.imdecode(data, cv2.IMREAD_COLOR) self.ori_h, self.ori_w, _ = im.shape det_img = self.det_preprocess(im) _, self.new_h, self.new_w = det_img.shape print(det_img) - return {"image": det_img}, ["concat_1.tmp_0"], False + return {"x": det_img}, ["save_infer_model/scale_0.tmp_1"], False def postprocess(self, feed={}, fetch=[], fetch_map=None): - det_out = fetch_map["concat_1.tmp_0"] + det_out = fetch_map["save_infer_model/scale_0.tmp_1"] ratio_list = [ float(self.new_h) / self.ori_h, float(self.new_w) / self.ori_w ] diff --git a/examples/C++/PaddleOCR/ocr/ocr_cpp_client.py b/examples/C++/PaddleOCR/ocr/ocr_cpp_client.py index b3187f50e6f0d677d5377dac1735bd6e679b4755..507971c36c0e900cce471ceb2c636f4dd6232ccd 100644 --- a/examples/C++/PaddleOCR/ocr/ocr_cpp_client.py +++ b/examples/C++/PaddleOCR/ocr/ocr_cpp_client.py @@ -42,13 +42,11 @@ for img_file in os.listdir(test_img_dir): image_data = file.read() image = cv2_to_base64(image_data) fetch_map = client.predict( - feed={"image": image}, - fetch=["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"], + feed={"x": image}, + fetch=["save_infer_model/scale_0.tmp_1"], batch=True) result = {} - result["score"] = fetch_map["softmax_0.tmp_0"] - del fetch_map["softmax_0.tmp_0"] - rec_res = OCRReader().postprocess(fetch_map, with_score=False) + rec_res = OCRReader().postprocess_ocrv2(fetch_map, with_score=False) res_lst = [] for res in rec_res: res_lst.append(res[0]) diff --git a/examples/C++/PaddleOCR/ocr/ocr_debugger_server.py b/examples/C++/PaddleOCR/ocr/ocr_debugger_server.py index 88dd94a8224fc5c9c6f972b96d81af60ce518763..bb10dba44d4baf3a9ed1e6b1f2f9af02178c1261 100644 --- a/examples/C++/PaddleOCR/ocr/ocr_debugger_server.py +++ b/examples/C++/PaddleOCR/ocr/ocr_debugger_server.py @@ -48,7 +48,7 @@ class OCRService(WebService): self.ocr_reader = OCRReader() def preprocess(self, feed=[], fetch=[]): - data = base64.b64decode(feed[0]["image"].encode('utf8')) + data = base64.b64decode(feed[0]["x"].encode('utf8')) data = np.fromstring(data, np.uint8) im = cv2.imdecode(data, cv2.IMREAD_COLOR) ori_h, ori_w, _ = im.shape @@ -57,7 +57,7 @@ class OCRService(WebService): det_img = det_img[np.newaxis, :] det_img = det_img.copy() det_out = self.det_client.predict( - feed={"image": det_img}, fetch=["concat_1.tmp_0"], batch=True) + feed={"x": det_img}, fetch=["save_infer_model/scale_0.tmp_1"], batch=True) filter_func = FilterBoxes(10, 10) post_func = DBPostProcess({ "thresh": 0.3, @@ -68,7 +68,7 @@ class OCRService(WebService): }) sorted_boxes = SortedBoxes() ratio_list = [float(new_h) / ori_h, float(new_w) / ori_w] - dt_boxes_list = post_func(det_out["concat_1.tmp_0"], [ratio_list]) + dt_boxes_list = post_func(det_out["save_infer_model/scale_0.tmp_1"], [ratio_list]) dt_boxes = filter_func(dt_boxes_list[0], [ori_h, ori_w]) dt_boxes = sorted_boxes(dt_boxes) get_rotate_crop_image = GetRotateCropImage() @@ -88,12 +88,12 @@ class OCRService(WebService): for id, img in enumerate(img_list): norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio) imgs[id] = norm_img - feed = {"image": imgs.copy()} - fetch = ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"] + feed = {"x": imgs.copy()} + fetch = ["save_infer_model/scale_0.tmp_1"] return feed, fetch, True def postprocess(self, feed={}, fetch=[], fetch_map=None): - rec_res = self.ocr_reader.postprocess(fetch_map, with_score=True) + rec_res = self.ocr_reader.postprocess_ocrv2(fetch_map, with_score=True) res_lst = [] for res in rec_res: res_lst.append(res[0]) diff --git a/examples/C++/PaddleOCR/ocr/ocr_web_client.py b/examples/C++/PaddleOCR/ocr/ocr_web_client.py index ce96a8bbcd585f37368d70070d649e25a0129029..91620e8ed9b0973a6bb31b09afbdce7b99aac8b6 100644 --- a/examples/C++/PaddleOCR/ocr/ocr_web_client.py +++ b/examples/C++/PaddleOCR/ocr/ocr_web_client.py @@ -34,7 +34,7 @@ for img_file in os.listdir(test_img_dir): with open(os.path.join(test_img_dir, img_file), 'rb') as file: image_data1 = file.read() image = cv2_to_base64(image_data1) - data = {"feed": [{"image": image}], "fetch": ["res"]} + data = {"feed": [{"x": image}], "fetch": ["res"]} r = requests.post(url=url, headers=headers, data=json.dumps(data)) print(r) print(r.json()) diff --git a/examples/C++/PaddleOCR/ocr/ocr_web_server.py b/examples/C++/PaddleOCR/ocr/ocr_web_server.py index 58fc850c94a5e8d2f37ae5d03f14b60d343a2203..2273c7e1d2d90a1f4d3dd0a14a8469974a09ba98 100644 --- a/examples/C++/PaddleOCR/ocr/ocr_web_server.py +++ b/examples/C++/PaddleOCR/ocr/ocr_web_server.py @@ -44,13 +44,13 @@ class OCRService(WebService): self.ocr_reader = OCRReader() def preprocess(self, feed=[], fetch=[]): - data = base64.b64decode(feed[0]["image"].encode('utf8')) + data = base64.b64decode(feed[0]["x"].encode('utf8')) data = np.fromstring(data, np.uint8) im = cv2.imdecode(data, cv2.IMREAD_COLOR) ori_h, ori_w, _ = im.shape det_img = self.det_preprocess(im) det_out = self.det_client.predict( - feed={"image": det_img}, fetch=["concat_1.tmp_0"], batch=False) + feed={"x": det_img}, fetch=["save_infer_model/scale_0.tmp_1"], batch=False) _, new_h, new_w = det_img.shape filter_func = FilterBoxes(10, 10) post_func = DBPostProcess({ @@ -62,7 +62,7 @@ class OCRService(WebService): }) sorted_boxes = SortedBoxes() ratio_list = [float(new_h) / ori_h, float(new_w) / ori_w] - dt_boxes_list = post_func(det_out["concat_1.tmp_0"], [ratio_list]) + dt_boxes_list = post_func(det_out["save_infer_model/scale_0.tmp_1"], [ratio_list]) dt_boxes = filter_func(dt_boxes_list[0], [ori_h, ori_w]) dt_boxes = sorted_boxes(dt_boxes) get_rotate_crop_image = GetRotateCropImage() @@ -78,12 +78,12 @@ class OCRService(WebService): for img in img_list: norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio) feed_list.append(norm_img[np.newaxis, :]) - feed_batch = {"image": np.concatenate(feed_list, axis=0)} - fetch = ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"] + feed_batch = {"x": np.concatenate(feed_list, axis=0)} + fetch = ["save_infer_model/scale_0.tmp_1"] return feed_batch, fetch, True def postprocess(self, feed={}, fetch=[], fetch_map=None): - rec_res = self.ocr_reader.postprocess(fetch_map, with_score=True) + rec_res = self.ocr_reader.postprocess_ocrv2(fetch_map, with_score=True) res_lst = [] for res in rec_res: res_lst.append(res[0]) diff --git a/examples/C++/PaddleOCR/ocr/rec_debugger_server.py b/examples/C++/PaddleOCR/ocr/rec_debugger_server.py index f84463238af859a00983f515e405686c00fdf9fa..cb096ac1a7ab9475da0d61fb396bdec471fc2f2d 100644 --- a/examples/C++/PaddleOCR/ocr/rec_debugger_server.py +++ b/examples/C++/PaddleOCR/ocr/rec_debugger_server.py @@ -38,7 +38,7 @@ class OCRService(WebService): def preprocess(self, feed=[], fetch=[]): img_list = [] for feed_data in feed: - data = base64.b64decode(feed_data["image"].encode('utf8')) + data = base64.b64decode(feed_data["x"].encode('utf8')) data = np.fromstring(data, np.uint8) im = cv2.imdecode(data, cv2.IMREAD_COLOR) img_list.append(im) @@ -53,12 +53,12 @@ class OCRService(WebService): for i, img in enumerate(img_list): norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio) imgs[i] = norm_img - feed = {"image": imgs.copy()} - fetch = ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"] + feed = {"x": imgs.copy()} + fetch = ["save_infer_model/scale_0.tmp_1"] return feed, fetch, True def postprocess(self, feed={}, fetch=[], fetch_map=None): - rec_res = self.ocr_reader.postprocess(fetch_map, with_score=True) + rec_res = self.ocr_reader.postprocess_ocrv2(fetch_map, with_score=True) res_lst = [] for res in rec_res: res_lst.append(res[0]) diff --git a/examples/C++/PaddleOCR/ocr/rec_web_client.py b/examples/C++/PaddleOCR/ocr/rec_web_client.py index 312a2148886d6f084a1c077d84e907cb28c0652a..e78145b60d0c2d6c6032acbdba679bbdf89df51b 100644 --- a/examples/C++/PaddleOCR/ocr/rec_web_client.py +++ b/examples/C++/PaddleOCR/ocr/rec_web_client.py @@ -36,6 +36,6 @@ for img_file in os.listdir(test_img_dir): image_data1 = file.read() image = cv2_to_base64(image_data1) #data = {"feed": [{"image": image}], "fetch": ["res"]} - data = {"feed": [{"image": image}] * 3, "fetch": ["res"]} + data = {"feed": [{"x": image}] * 3, "fetch": ["res"]} r = requests.post(url=url, headers=headers, data=json.dumps(data)) print(r.json()) diff --git a/examples/C++/PaddleOCR/ocr/rec_web_server.py b/examples/C++/PaddleOCR/ocr/rec_web_server.py index 2db6e398d3a025e739761fabd50c5bb8a6609f07..1a6e45812c42280100fb14b029a3a16508c3b9a5 100644 --- a/examples/C++/PaddleOCR/ocr/rec_web_server.py +++ b/examples/C++/PaddleOCR/ocr/rec_web_server.py @@ -39,7 +39,7 @@ class OCRService(WebService): # TODO: to handle batch rec images img_list = [] for feed_data in feed: - data = base64.b64decode(feed_data["image"].encode('utf8')) + data = base64.b64decode(feed_data["x"].encode('utf8')) data = np.fromstring(data, np.uint8) im = cv2.imdecode(data, cv2.IMREAD_COLOR) img_list.append(im) @@ -55,12 +55,12 @@ class OCRService(WebService): norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio) imgs[i] = norm_img - feed = {"image": imgs.copy()} - fetch = ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"] + feed = {"x": imgs.copy()} + fetch = ["save_infer_model/scale_0.tmp_1"] return feed, fetch, True def postprocess(self, feed={}, fetch=[], fetch_map=None): - rec_res = self.ocr_reader.postprocess(fetch_map, with_score=True) + rec_res = self.ocr_reader.postprocess_ocrv2(fetch_map, with_score=True) res_lst = [] for res in rec_res: res_lst.append(res[0]) diff --git a/paddle_inference/paddle/include/paddle_engine.h b/paddle_inference/paddle/include/paddle_engine.h index bf8c98ede60bc4266965d1aa12e2627dd0d0647a..fff4f1c5ee94e9886893b09ca547ac60905bf8ad 100644 --- a/paddle_inference/paddle/include/paddle_engine.h +++ b/paddle_inference/paddle/include/paddle_engine.h @@ -225,6 +225,11 @@ class PaddleInferenceEngine : public EngineCore { config.SwitchIrOptim(true); } + int local_min_subgraph_size = min_subgraph_size; + if (engine_conf.has_min_subgraph_size()) { + local_min_subgraph_size = engine_conf.min_subgraph_size(); + } + if (engine_conf.has_use_trt() && engine_conf.use_trt()) { config.SwitchIrOptim(true); if (!engine_conf.has_use_gpu() || !engine_conf.use_gpu()) { @@ -236,10 +241,55 @@ class PaddleInferenceEngine : public EngineCore { } config.EnableTensorRtEngine(1 << 20, max_batch, - min_subgraph_size, + local_min_subgraph_size, precision_type, false, FLAGS_use_calib); + std::map> min_input_shape; + std::map> max_input_shape; + std::map> optim_input_shape; + if (engine_conf.min_input_shape_size() > 0) { + for (auto& iter : engine_conf.min_input_shape()) { + std::string key = iter.first; + std::string value = iter.second; + std::istringstream ss(value); + std::string word; + std::vector arr; + while(ss >> word) { + arr.push_back(std::stoi(word)); + } + min_input_shape[key] = arr; + } + } + if (engine_conf.max_input_shape_size() > 0) { + for (auto& iter : engine_conf.max_input_shape()) { + std::string key = iter.first; + std::string value = iter.second; + std::istringstream ss(value); + std::string word; + std::vector arr; + while(ss >> word) { + arr.push_back(std::stoi(word)); + } + max_input_shape[key] = arr; + } + } + if (engine_conf.opt_input_shape_size() > 0) { + for (auto& iter : engine_conf.opt_input_shape()) { + std::string key = iter.first; + std::string value = iter.second; + std::istringstream ss(value); + std::string word; + std::vector arr; + while(ss >> word) { + arr.push_back(std::stoi(word)); + } + optim_input_shape[key] = arr; + } + } + config.SetTRTDynamicShapeInfo(min_input_shape, + max_input_shape, + optim_input_shape); LOG(INFO) << "create TensorRT predictor"; } diff --git a/python/paddle_serving_server/serve.py b/python/paddle_serving_server/serve.py index 09931dad80e19b364cb4e17a4b878662ec190aff..c628ff4ee9a2644782625a0ff31af8fe0c9dac57 100755 --- a/python/paddle_serving_server/serve.py +++ b/python/paddle_serving_server/serve.py @@ -222,6 +222,8 @@ def serve_args(): "--prometheus_port", type=int, default=19393, help="Port of the Prometheus") parser.add_argument( "--request_cache_size", type=int, default=0, help="Port of the Prometheus") + parser.add_argument( + "--min_subgraph_size", type=str, default="", nargs="+", help="min_subgraph_size") return parser.parse_args() @@ -272,11 +274,14 @@ def start_gpu_card_model(gpu_mode, port, args): # pylint: disable=doc-string-mi read_op = op_maker.create('GeneralReaderOp') op_seq_maker.add_op(read_op) + is_ocr = False #如果dag_list_op不是空,那么证明通过--op 传入了自定义OP或自定义的DAG串联关系。 #此时,根据--op 传入的顺序去组DAG串联关系 if len(dag_list_op) > 0: for single_op in dag_list_op: op_seq_maker.add_op(op_maker.create(single_op)) + if single_op == "GeneralDetectionOp": + is_ocr = True #否则,仍然按照原有方式根虎--model去串联。 else: for idx, single_model in enumerate(model): @@ -287,6 +292,7 @@ def start_gpu_card_model(gpu_mode, port, args): # pylint: disable=doc-string-mi # 以后可能考虑不用python脚本来生成配置 if len(model) == 2 and idx == 0 and single_model == "ocr_det_model": infer_op_name = "GeneralDetectionOp" + is_ocr = True else: infer_op_name = "GeneralInferOp" general_infer_op = op_maker.create(infer_op_name) @@ -306,10 +312,14 @@ def start_gpu_card_model(gpu_mode, port, args): # pylint: disable=doc-string-mi server.set_enable_prometheus(args.enable_prometheus) server.set_prometheus_port(args.prometheus_port) server.set_request_cache_size(args.request_cache_size) + server.set_min_subgraph_size(args.min_subgraph_size) if args.use_trt and device == "gpu": server.set_trt() server.set_ir_optimize(True) + if is_ocr: + info = set_ocr_dynamic_shape_info() + server.set_trt_dynamic_shape_info(info) if args.gpu_multi_stream and device == "gpu": server.set_gpu_multi_stream() @@ -344,6 +354,51 @@ def start_gpu_card_model(gpu_mode, port, args): # pylint: disable=doc-string-mi use_encryption_model=args.use_encryption_model) server.run_server() +def set_ocr_dynamic_shape_info(): + info = [] + min_input_shape = { + "x": [1, 3, 50, 50], + "conv2d_182.tmp_0": [1, 1, 20, 20], + "nearest_interp_v2_2.tmp_0": [1, 1, 20, 20], + "nearest_interp_v2_3.tmp_0": [1, 1, 20, 20], + "nearest_interp_v2_4.tmp_0": [1, 1, 20, 20], + "nearest_interp_v2_5.tmp_0": [1, 1, 20, 20] + } + max_input_shape = { + "x": [1, 3, 1536, 1536], + "conv2d_182.tmp_0": [20, 200, 960, 960], + "nearest_interp_v2_2.tmp_0": [20, 200, 960, 960], + "nearest_interp_v2_3.tmp_0": [20, 200, 960, 960], + "nearest_interp_v2_4.tmp_0": [20, 200, 960, 960], + "nearest_interp_v2_5.tmp_0": [20, 200, 960, 960], + } + opt_input_shape = { + "x": [1, 3, 960, 960], + "conv2d_182.tmp_0": [3, 96, 240, 240], + "nearest_interp_v2_2.tmp_0": [3, 96, 240, 240], + "nearest_interp_v2_3.tmp_0": [3, 24, 240, 240], + "nearest_interp_v2_4.tmp_0": [3, 24, 240, 240], + "nearest_interp_v2_5.tmp_0": [3, 24, 240, 240], + } + det_info = { + "min_input_shape": min_input_shape, + "max_input_shape": max_input_shape, + "opt_input_shape": opt_input_shape, + } + info.append(det_info) + min_input_shape = {"x": [1, 3, 32, 10], "lstm_1.tmp_0": [1, 1, 128]} + max_input_shape = { + "x": [50, 3, 32, 1000], + "lstm_1.tmp_0": [500, 50, 128] + } + opt_input_shape = {"x": [6, 3, 32, 100], "lstm_1.tmp_0": [25, 5, 128]} + rec_info = { + "min_input_shape": min_input_shape, + "max_input_shape": max_input_shape, + "opt_input_shape": opt_input_shape, + } + info.append(rec_info) + return info def start_multi_card(args, serving_port=None): # pylint: disable=doc-string-missing diff --git a/python/paddle_serving_server/server.py b/python/paddle_serving_server/server.py index a1ed1c1c5647f32538417a06f4880550eaee211a..da20c9fe64d44658f5c4403ad9e45f54113e20c0 100755 --- a/python/paddle_serving_server/server.py +++ b/python/paddle_serving_server/server.py @@ -101,6 +101,8 @@ class Server(object): self.enable_prometheus = False self.prometheus_port = 19393 self.request_cache_size = 0 + self.min_subgraph_size = [] + self.trt_dynamic_shape_info = [] def get_fetch_list(self, infer_node_idx=-1): fetch_names = [ @@ -211,6 +213,17 @@ class Server(object): def set_request_cache_size(self, request_cache_size): self.request_cache_size = request_cache_size + def set_min_subgraph_size(self, min_subgraph_size): + for s in min_subgraph_size: + try: + size = int(s) + except: + size = 3 + self.min_subgraph_size.append(size) + + def set_trt_dynamic_shape_info(self, info): + self.trt_dynamic_shape_info = info + def _prepare_engine(self, model_config_paths, device, use_encryption_model): self.device = device if self.model_toolkit_conf == None: @@ -292,6 +305,25 @@ class Server(object): if use_encryption_model: engine.encrypted_model = True engine.type = "PADDLE_INFER" + if len(self.min_subgraph_size) > index: + engine.min_subgraph_size = self.min_subgraph_size[index] + if len(self.trt_dynamic_shape_info) > index: + dynamic_shape_info = self.trt_dynamic_shape_info[index] + try: + for key,value in dynamic_shape_info.items(): + shape_type = key + if shape_type == "min_input_shape": + local_map = engine.min_input_shape + if shape_type == "max_input_shape": + local_map = engine.max_input_shape + if shape_type == "opt_input_shape": + local_map = engine.opt_input_shape + for name,shape in value.items(): + local_value = ' '.join(str(i) for i in shape) + local_map[name] = local_value + except: + raise ValueError("Set TRT dynamic shape info error!") + self.model_toolkit_conf.append(server_sdk.ModelToolkitConf()) self.model_toolkit_conf[-1].engines.extend([engine]) index = index + 1