未验证 提交 a418dd44 编写于 作者: C chenjian 提交者: GitHub

Add fastdeploy server and client component (#1169)

* add backend support for fastdeploy server

* fix

* add code

* fix

* fix

* add fastdeploy server component

* add fastdeploy server and client

* add exception description

* fix

* add model repository judgement

* add component tab for fastdeploy client

* update more tasks in fastdeploy client

* sort filenames

* backup config

* noqa for autogenerated file

* add data validation

* add __init__ for package

* add calculating layout for frontend

* add alive server detection and optimize client

* add alive server detection and optimize client

* add alive server detection and optimize client

* add metrics in gradio client

* update presentation

* Change return value to None for frontend performance data when server not ready

* add get_server_config and download_pretrain_model api

* add get_server_config and download_pretrain_model api

* add unit for metric table

* add unit for metric table

* fix a bug

* add judgement pretrained model download

* add judgement pretrained model download

* add version info for frontend

* rename download model

* fix a bug

* add fastdeploy model list

* optimize for choose configuration files

* modify according to frontend need

* fix name in config to model name

* optimize for server list and alive judgement

* keep server name as string type

* optimize process judgement logic

* optimize for deleting resource files

* add rename resource file

* fix

* fix a bug

* optimize code structure

* optimize code structure

* remove chinese tips and remove fastdeploy-python in requirements
上级 b90619b9
# Copyright (c) 2022 VisualDL Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =======================================================================
# Copyright (c) 2022 VisualDL Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =======================================================================
import gradio as gr
import numpy as np
from .http_client_manager import get_metric_data
from .http_client_manager import HttpClientManager
from .http_client_manager import metrics_table_head
from .visualizer import visualize_detection
from .visualizer import visualize_face_alignment
from .visualizer import visualize_face_detection
from .visualizer import visualize_headpose
from .visualizer import visualize_keypoint_detection
from .visualizer import visualize_matting
from .visualizer import visualize_ocr
from .visualizer import visualize_segmentation
_http_manager = HttpClientManager()
supported_tasks = {
'detection': visualize_detection,
'facedet': visualize_face_detection,
'keypointdetection': visualize_keypoint_detection,
'segmentation': visualize_segmentation,
'matting': visualize_matting,
'ocr': visualize_ocr,
'facealignment': visualize_face_alignment,
'headpose': visualize_headpose,
'unspecified': lambda x: str(x)
}
def create_gradio_client_app(): # noqa:C901
css = """
.gradio-container {
font-family: 'IBM Plex Sans', sans-serif;
}
.gr-button {
color: white;
border-color: black;
background: black;
}
input[type='range'] {
accent-color: black;
}
.dark input[type='range'] {
accent-color: #dfdfdf;
}
#gallery {
min-height: 22rem;
margin-bottom: 15px;
margin-left: auto;
margin-right: auto;
border-bottom-right-radius: .5rem !important;
border-bottom-left-radius: .5rem !important;
}
#gallery>div>.h-full {
min-height: 20rem;
}
.details:hover {
text-decoration: underline;
}
.gr-button {
white-space: nowrap;
}
.gr-button:focus {
border-color: rgb(147 197 253 / var(--tw-border-opacity));
outline: none;
box-shadow: var(--tw-ring-offset-shadow), var(--tw-ring-shadow), var(--tw-shadow, 0 0 #0000);
--tw-border-opacity: 1;
--tw-ring-offset-shadow: var(--tw-ring-inset) 0 0 0 var(--tw-ring-offset-width) \
var(--tw-ring-offset-color);
--tw-ring-shadow: var(--tw-ring-inset) 0 0 0 calc(3px var(--tw-ring-offset-width)) var(--tw-ring-color);
--tw-ring-color: rgb(191 219 254 / var(--tw-ring-opacity));
--tw-ring-opacity: .5;
}
.footer {
margin-bottom: 45px;
margin-top: 35px;
text-align: center;
border-bottom: 1px solid #e5e5e5;
}
.footer>p {
font-size: .8rem;
display: inline-block;
padding: 0 10px;
transform: translateY(10px);
background: white;
}
.dark .footer {
border-color: #303030;
}
.dark .footer>p {
background: #0b0f19;
}
.prompt h4{
margin: 1.25em 0 .25em 0;
font-weight: bold;
font-size: 115%;
}
"""
block = gr.Blocks(css=css)
with block:
gr.HTML("""
<div style="text-align: center; max-width: 650px; margin: 0 auto;">
<div
style="
display: inline-flex;
gap: 0.8rem;
font-size: 1.75rem;
justify-content: center;
"
>
<h1>
FastDeploy Client
</h1>
</div>
<p font-size: 94%">
The client is used for creating requests to fastdeploy server.
</p>
</div>
""")
with gr.Group():
with gr.Box():
with gr.Column():
with gr.Row():
server_addr_text = gr.Textbox(
label="服务ip",
show_label=True,
max_lines=1,
placeholder="localhost",
)
server_http_port_text = gr.Textbox(
label="推理服务端口",
show_label=True,
max_lines=1,
placeholder="8000",
)
server_metric_port_text = gr.Textbox(
label="性能服务端口",
show_label=True,
max_lines=1,
placeholder="8002",
)
with gr.Row():
model_name_text = gr.Textbox(
label="模型名称",
show_label=True,
max_lines=1,
placeholder="yolov5",
)
model_version_text = gr.Textbox(
label="模型版本",
show_label=True,
max_lines=1,
placeholder="1",
)
with gr.Box():
with gr.Tab("组件形式"):
check_button = gr.Button("获取模型输入输出")
component_format_column = gr.Column(visible=False)
with component_format_column:
task_radio = gr.Radio(
choices=list(supported_tasks.keys()),
value='unspecified',
label='任务类型',
visible=True)
gr.Markdown("根据模型需要,挑选文本框或者图像框进行输入")
with gr.Row():
with gr.Column():
gr.Markdown("模型输入")
input_accordions = []
input_name_texts = []
input_images = []
input_texts = []
for i in range(6):
accordion = gr.Accordion(
"输入变量 {}".format(i),
open=True,
visible=False)
with accordion:
input_name_text = gr.Textbox(
label="变量名", interactive=False)
input_image = gr.Image(type='numpy')
input_text = gr.Textbox(
label="文本框", max_lines=1000)
input_accordions.append(accordion)
input_name_texts.append(input_name_text)
input_images.append(input_image)
input_texts.append(input_text)
with gr.Column():
gr.Markdown("模型输出")
output_accordions = []
output_name_texts = []
output_images = []
output_texts = []
for i in range(6):
accordion = gr.Accordion(
"输出变量 {}".format(i),
open=True,
visible=False)
with accordion:
output_name_text = gr.Textbox(
label="变量名", interactive=False)
output_text = gr.Textbox(
label="服务返回的原数据",
interactive=False,
show_label=True)
output_image = gr.Image(
interactive=False)
output_accordions.append(accordion)
output_name_texts.append(output_name_text)
output_images.append(output_image)
output_texts.append(output_text)
component_submit_button = gr.Button("提交请求")
with gr.Tab("原始形式"):
gr.Markdown("模型输入")
raw_payload_text = gr.Textbox(
label="负载数据", max_lines=10000)
with gr.Column():
gr.Markdown("输出")
output_raw_text = gr.Textbox(
label="服务返回的原始数据", interactive=False)
raw_submit_button = gr.Button("提交请求")
with gr.Box():
with gr.Column():
gr.Markdown("服务性能统计(每次提交请求会自动更新数据,您也可以手动点击更新)")
output_html_table = gr.HTML(
label="metrics",
interactive=False,
show_label=False,
value=metrics_table_head.format('', ''))
update_metric_button = gr.Button("更新统计数据")
status_text = gr.Textbox(
label="status",
show_label=True,
max_lines=1,
interactive=False)
all_input_output_components = input_accordions + input_name_texts + input_images + \
input_texts + output_accordions + output_name_texts + output_images + output_texts
def get_input_output_name(server_ip, server_port, model_name,
model_version):
try:
server_addr = server_ip + ':' + server_port
input_metas, output_metas = _http_manager.get_model_meta(
server_addr, model_name, model_version)
except Exception as e:
return {status_text: str(e)}
results = {
component: None
for component in all_input_output_components
}
results[component_format_column] = gr.update(visible=True)
# results[check_button] = gr.update(visible=False)
for input_accordio in input_accordions:
results[input_accordio] = gr.update(visible=False)
for output_accordio in output_accordions:
results[output_accordio] = gr.update(visible=False)
results[status_text] = 'GetInputOutputName Successful'
for i, input_meta in enumerate(input_metas):
results[input_accordions[i]] = gr.update(visible=True)
results[input_name_texts[i]] = input_meta['name']
for i, output_meta in enumerate(output_metas):
results[output_accordions[i]] = gr.update(visible=True)
results[output_name_texts[i]] = output_meta['name']
return results
def component_inference(*args):
server_ip = args[0]
http_port = args[1]
metric_port = args[2]
model_name = args[3]
model_version = args[4]
names = args[5:5 + len(input_name_texts)]
images = args[5 + len(input_name_texts):5 + len(input_name_texts) +
len(input_images)]
texts = args[5 + len(input_name_texts) + len(input_images):5 +
len(input_name_texts) + len(input_images) +
len(input_texts)]
task_type = args[-1]
server_addr = server_ip + ':' + http_port
if server_ip and http_port and model_name and model_version:
inputs = {}
for i, input_name in enumerate(names):
if input_name:
if images[i] is not None:
inputs[input_name] = np.array([images[i]])
if texts[i]:
inputs[input_name] = np.array(
[[texts[i].encode('utf-8')]], dtype=np.object_)
try:
infer_results = _http_manager.infer(
server_addr, model_name, model_version, inputs)
results = {status_text: 'Inference Successful'}
for i, (output_name,
data) in enumerate(infer_results.items()):
results[output_name_texts[i]] = output_name
results[output_texts[i]] = str(data)
if task_type != 'unspecified':
try:
results[output_images[i]] = supported_tasks[
task_type](images[0], data)
except Exception:
results[output_images[i]] = None
if metric_port:
html_table = get_metric_data(server_ip, metric_port)
results[output_html_table] = html_table
return results
except Exception as e:
return {status_text: 'Error: {}'.format(e)}
else:
return {
status_text:
'Please input server addr, model name and model version.'
}
def raw_inference(*args):
server_ip = args[0]
http_port = args[1]
metric_port = args[2]
model_name = args[3]
model_version = args[4]
payload_text = args[5]
server_addr = server_ip + ':' + http_port
try:
result = _http_manager.raw_infer(server_addr, model_name,
model_version, payload_text)
results = {
status_text: 'Get response from server',
output_raw_text: result
}
if server_ip and metric_port:
html_table = get_metric_data(server_ip, metric_port)
results[output_html_table] = html_table
return results
except Exception as e:
return {status_text: 'Error: {}'.format(e)}
def update_metric(server_ip, metrics_port):
if server_ip and metrics_port:
try:
html_table = get_metric_data(server_ip, metrics_port)
return {
output_html_table: html_table,
status_text: "Successfully update metrics."
}
except Exception as e:
return {status_text: 'Error: {}'.format(e)}
else:
return {
status_text: 'Please input server ip and metrics_port.'
}
check_button.click(
fn=get_input_output_name,
inputs=[
server_addr_text, server_http_port_text, model_name_text,
model_version_text
],
outputs=[
*all_input_output_components, check_button,
component_format_column, status_text
])
component_submit_button.click(
fn=component_inference,
inputs=[
server_addr_text, server_http_port_text,
server_metric_port_text, model_name_text, model_version_text,
*input_name_texts, *input_images, *input_texts, task_radio
],
outputs=[
*output_name_texts, *output_images, *output_texts, status_text,
output_html_table
])
raw_submit_button.click(
fn=raw_inference,
inputs=[
server_addr_text, server_http_port_text,
server_metric_port_text, model_name_text, model_version_text,
raw_payload_text
],
outputs=[output_raw_text, status_text, output_html_table])
update_metric_button.click(
fn=update_metric,
inputs=[server_addr_text, server_metric_port_text],
outputs=[output_html_table, status_text])
return block
# Copyright (c) 2022 VisualDL Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =======================================================================
import json
import re
import numpy as np
import requests
import tritonclient.http as httpclient
from attrdict import AttrDict
from tritonclient.utils import InferenceServerException
def convert_http_metadata_config(metadata):
metadata = AttrDict(metadata)
return metadata
def prepare_request(inputs_meta, inputs_data, outputs_meta):
'''
inputs_meta: inputs meta information from model. name: info
inputs_data: users input data. name: data
'''
# Set the input data
inputs = []
for input_dict in inputs_meta:
input_name = input_dict['name']
if input_name not in inputs_data:
raise RuntimeError(
'Error: input name {} required for model not existed.'.format(
input_name))
if input_dict['datatype'] == 'FP32':
inputs_data[input_name] = inputs_data[input_name].astype(
np.float32
) / 255 # image data returned by gradio is uint8, convert to fp32
if len(input_dict['shape']
) == 3 and input_dict['shape'][0] == 3: # NCHW
inputs_data[input_name] = inputs_data[input_name][0].transpose(
2, 0, 1)
elif len(input_dict['shape']
) == 4 and input_dict['shape'][1] == 3: # NCHW
inputs_data[input_name] = inputs_data[input_name].transpose(
0, 3, 1, 2)
infer_input = httpclient.InferInput(
input_name, inputs_data[input_name].shape, input_dict['datatype'])
infer_input.set_data_from_numpy(inputs_data[input_name])
inputs.append(infer_input)
outputs = []
for output_dict in outputs_meta:
infer_output = httpclient.InferRequestedOutput(output_dict.name)
outputs.append(infer_output)
return inputs, outputs
metrics_table_head = """
<style>
table, th {{
border:0.1px solid black;
}}
</style>
<div>
<table style="width:100%">
<tr>
<th rowspan="2">模型名称</th>
<th colspan="4">执行统计</th>
<th colspan="5">延迟统计</th>
</tr>
<tr>
<th>请求处理成功数</th>
<th>请求处理失败数</th>
<th>推理batch数</th>
<th>推理样本数</th>
<th>请求处理时间(ms)</th>
<th>任务队列等待时间(ms)</th>
<th>输入处理时间(ms)</th>
<th>模型推理时间(ms)</th>
<th>输出处理时间(ms)</th>
</tr>
{}
</table>
</div>
<br>
<br>
<br>
<br>
<br>
<div>
<table style="width:100%">
<tr>
<th rowspan="2">GPU</th>
<th colspan="4">性能指标</th>
<th colspan="2">显存</th>
</tr>
<tr>
<th>利用率(%)</th>
<th>功率(W)</th>
<th>功率限制(W)</th>
<th>耗电量(W)</th>
<th>总量(GB)</th>
<th>已使用(GB)</th>
</tr>
{}
</table>
</div>
"""
def get_metric_data(server_addr, metric_port): # noqa:C901
'''
Get metrics data from fastdeploy server, and transform it into html table.
Args:
server_addr(str): fastdeployserver ip address
metric_port(int): fastdeployserver metrics port
Returns:
htmltable(str): html table to show metrics data
'''
model_table = {}
gpu_table = {}
metric_column_name = {
"Model": {
"nv_inference_request_success", "nv_inference_request_failure",
"nv_inference_count", "nv_inference_exec_count",
"nv_inference_request_duration_us",
"nv_inference_queue_duration_us",
"nv_inference_compute_input_duration_us",
"nv_inference_compute_infer_duration_us",
"nv_inference_compute_output_duration_us"
},
"GPU": {
"nv_gpu_power_usage", "nv_gpu_power_limit",
"nv_energy_consumption", "nv_gpu_utilization",
"nv_gpu_memory_total_bytes", "nv_gpu_memory_used_bytes"
},
"CPU": {
"nv_cpu_utilization", "nv_cpu_memory_total_bytes",
"nv_cpu_memory_used_bytes"
}
}
try:
res = requests.get("http://{}:{}/metrics".format(
server_addr, metric_port))
except Exception:
return metrics_table_head.format('', '')
metric_content = res.text
for content in metric_content.split('\n'):
if content.startswith('#'):
continue
else:
res = re.match(r'(\w+){(.*)} (\w+)',
content) # match output by server metrics interface
if not res:
continue
metric_name = res.group(1)
model = res.group(2)
value = res.group(3)
infos = {}
for info in model.split(','):
k, v = info.split('=')
v = v.strip('"')
infos[k] = v
if metric_name in [
"nv_inference_request_duration_us",
"nv_inference_queue_duration_us",
"nv_inference_compute_input_duration_us",
"nv_inference_compute_infer_duration_us",
"nv_inference_compute_output_duration_us"
]:
value = str(float(value) / 1000)
elif metric_name in [
"nv_gpu_memory_total_bytes", "nv_gpu_memory_used_bytes"
]:
value = str(float(value) / 1024 / 1024 / 1024)
for key, metric_names in metric_column_name.items():
if metric_name in metric_names:
if key == 'Model':
model_name = infos['model']
if model_name not in model_table:
model_table[model_name] = {}
model_table[model_name][metric_name] = value
elif key == 'GPU':
gpu_name = infos['gpu_uuid']
if gpu_name not in gpu_table:
gpu_table[gpu_name] = {}
gpu_table[gpu_name][metric_name] = value
elif key == 'CPU':
pass
model_data_list = []
gpu_data_list = []
model_data_metric_names = [
"nv_inference_request_success", "nv_inference_request_failure",
"nv_inference_exec_count", "nv_inference_count",
"nv_inference_request_duration_us", "nv_inference_queue_duration_us",
"nv_inference_compute_input_duration_us",
"nv_inference_compute_infer_duration_us",
"nv_inference_compute_output_duration_us"
]
gpu_data_metric_names = [
"nv_gpu_utilization", "nv_gpu_power_usage", "nv_gpu_power_limit",
"nv_energy_consumption", "nv_gpu_memory_total_bytes",
"nv_gpu_memory_used_bytes"
]
for k, v in model_table.items():
data = []
data.append(k)
for data_metric in model_data_metric_names:
data.append(v[data_metric])
model_data_list.append(data)
for k, v in gpu_table.items():
data = []
data.append(k)
for data_metric in gpu_data_metric_names:
data.append(v[data_metric])
gpu_data_list.append(data)
model_data = '\n'.join([
"<tr>" + '\n'.join(["<td>" + item + "</td>"
for item in data]) + "</tr>"
for data in model_data_list
])
gpu_data = '\n'.join([
"<tr>" + '\n'.join(["<td>" + item + "</td>"
for item in data]) + "</tr>"
for data in gpu_data_list
])
return metrics_table_head.format(model_data, gpu_data)
class HttpClientManager:
def __init__(self):
self.clients = {} # server url: httpclient
def _create_client(self, server_url):
if server_url in self.clients:
return self.clients[server_url]
try:
fastdeploy_client = httpclient.InferenceServerClient(server_url)
self.clients[server_url] = fastdeploy_client
return fastdeploy_client
except Exception:
raise RuntimeError(
'Can not connect to server {}, please check your \
server address'.format(server_url))
def infer(self, server_url, model_name, model_version, inputs):
fastdeploy_client = self._create_client(server_url)
input_metadata, output_metadata = self.get_model_meta(
server_url, model_name, model_version)
inputs, outputs = prepare_request(input_metadata, inputs,
output_metadata)
response = fastdeploy_client.infer(
model_name, inputs, model_version=model_version, outputs=outputs)
results = {}
for output in output_metadata:
result = response.as_numpy(output.name) # datatype: numpy
if output.datatype == 'BYTES': # datatype: bytes
try:
value = result
if len(result.shape) == 1:
value = result[0]
elif len(result.shape) == 2:
value = result[0][0]
elif len(result.shape) == 3:
value = result[0][0][0]
result = json.loads(value) # datatype: json
except Exception:
pass
else:
result = result[0]
results[output.name] = result
return results
def raw_infer(self, server_url, model_name, model_version, raw_input):
url = 'http://{}/v2/models/{}/versions/{}/infer'.format(
server_url, model_name, model_version)
res = requests.post(url, data=json.dumps(json.loads(raw_input)))
return json.dumps(res.json())
def get_model_meta(self, server_url, model_name, model_version):
fastdeploy_client = self._create_client(server_url)
try:
model_metadata = fastdeploy_client.get_model_metadata(
model_name=model_name, model_version=model_version)
except InferenceServerException as e:
raise RuntimeError("Failed to retrieve the metadata: " + str(e))
model_metadata = convert_http_metadata_config(model_metadata)
input_metadata = model_metadata.inputs
output_metadata = model_metadata.outputs
return input_metadata, output_metadata
# Copyright (c) 2022 VisualDL Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =======================================================================
import numpy as np
__all__ = [
'visualize_detection', 'visualize_keypoint_detection',
'visualize_face_detection', 'visualize_face_alignment',
'visualize_segmentation', 'visualize_matting', 'visualize_ocr',
'visualize_headpose'
]
def visualize_detection(image, data):
try:
import fastdeploy as fd
except Exception:
raise RuntimeError(
"fastdeploy is required for visualizing results,please refer to \
https://github.com/PaddlePaddle/FastDeploy to install fastdeploy")
boxes = np.array(data['boxes'])
scores = np.array(data['scores'])
label_ids = np.array(data['label_ids'])
masks = np.array(data['masks'])
contain_masks = data['contain_masks']
detection_result = fd.C.vision.DetectionResult()
detection_result.boxes = boxes
detection_result.scores = scores
detection_result.label_ids = label_ids
detection_result.masks = masks
detection_result.contain_masks = contain_masks
result = fd.vision.vis_detection(image, detection_result)
return result
def visualize_keypoint_detection(image, data):
try:
import fastdeploy as fd
except Exception:
raise RuntimeError(
"fastdeploy is required for visualizing results,please refer to \
https://github.com/PaddlePaddle/FastDeploy to install fastdeploy")
keypoints = np.array(data['keypoints'])
scores = np.array(data['scores'])
num_joints = np.array(data['num_joints'])
detection_result = fd.C.vision.KeyPointDetectionResult()
detection_result.keypoints = keypoints
detection_result.scores = scores
detection_result.num_joints = num_joints
result = fd.vision.vis_keypoint_detection(image, detection_result)
return result
def visualize_face_detection(image, data):
try:
import fastdeploy as fd
except Exception:
raise RuntimeError(
"fastdeploy is required for visualizing results,please refer to \
https://github.com/PaddlePaddle/FastDeploy to install fastdeploy")
data = np.array(data['data'])
scores = np.array(data['scores'])
landmarks = np.array(data['landmarks'])
landmarks_per_face = data['landmarks_per_face']
detection_result = fd.C.vision.FaceDetectionResult()
detection_result.data = data
detection_result.scores = scores
detection_result.landmarks = landmarks
detection_result.landmarks_per_face = landmarks_per_face
result = fd.vision.vis_face_detection(image, detection_result)
return result
def visualize_face_alignment(image, data):
try:
import fastdeploy as fd
except Exception:
raise RuntimeError(
"fastdeploy is required for visualizing results,please refer to \
https://github.com/PaddlePaddle/FastDeploy to install fastdeploy")
landmarks = np.array(data['landmarks'])
facealignment_result = fd.C.vision.FaceAlignmentResult()
facealignment_result.landmarks = landmarks
result = fd.vision.vis_face_alignment(image, facealignment_result)
return result
def visualize_segmentation(image, data):
try:
import fastdeploy as fd
except Exception:
raise RuntimeError(
"fastdeploy is required for visualizing results,please refer to \
https://github.com/PaddlePaddle/FastDeploy to install fastdeploy")
label_ids = np.array(data['label_ids'])
score_map = np.array(data['score_map'])
shape = np.array(data['shape'])
segmentation_result = fd.C.vision.SegmentationResult()
segmentation_result.shape = shape
segmentation_result.score_map = score_map
segmentation_result.label_ids = label_ids
result = fd.vision.vis_segmentation(image, segmentation_result)
return result
def visualize_matting(image, data):
try:
import fastdeploy as fd
except Exception:
raise RuntimeError(
"fastdeploy is required for visualizing results,please refer to \
https://github.com/PaddlePaddle/FastDeploy to install fastdeploy")
alpha = np.array(data['alpha'])
foreground = np.array(data['foreground'])
contain_foreground = data['contain_foreground']
shape = np.array(data['shape'])
matting_result = fd.C.vision.MattingResult()
matting_result.alpha = alpha
matting_result.foreground = foreground
matting_result.contain_foreground = contain_foreground
matting_result.shape = shape
result = fd.vision.vis_matting(image, matting_result)
return result
def visualize_ocr(image, data):
try:
import fastdeploy as fd
except Exception:
raise RuntimeError(
"fastdeploy is required for visualizing results,please refer to \
https://github.com/PaddlePaddle/FastDeploy to install fastdeploy")
boxes = np.array(data['boxes'])
text = np.array(data['text'])
rec_scores = np.array(data['rec_scores'])
cls_scores = np.array(data['cls_scores'])
cls_labels = data['cls_labels']
ocr_result = fd.C.vision.OCRResult()
ocr_result.boxes = boxes
ocr_result.text = text
ocr_result.rec_scores = rec_scores
ocr_result.cls_scores = cls_scores
ocr_result.cls_labels = cls_labels
result = fd.vision.vis_ppocr(image, ocr_result)
return result
def visualize_headpose(image, data):
try:
import fastdeploy as fd
except Exception:
raise RuntimeError(
"fastdeploy is required for visualizing results,please refer to \
https://github.com/PaddlePaddle/FastDeploy to install fastdeploy")
euler_angles = np.array(data['euler_angles'])
headpose_result = fd.C.vision.HeadPoseResult()
headpose_result.euler_angles = euler_angles
result = fd.vision.vis_headpose(image, headpose_result)
return result
# Copyright (c) 2022 VisualDL Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =======================================================================
import copy
import json
import os
import random
import re
import signal
import string
from collections import defaultdict
from subprocess import Popen
from subprocess import STDOUT
import google.protobuf.json_format as json_format
import google.protobuf.text_format as text_format
import psutil
import requests
from .proto.model_config_pb2 import ModelConfig
from visualdl.utils.dir import FASTDEPLOYSERVER_PATH
def pbtxt2json(content: str):
'''
Convert protocol messages in text format to json format string.
'''
message = text_format.Parse(content, ModelConfig())
json_string = json_format.MessageToJson(message)
return json_string
def json2pbtxt(content: str):
'''
Convert json format string to protocol messages in text format.
'''
message = json_format.Parse(content, ModelConfig())
text_proto = text_format.MessageToString(message)
return text_proto
def validate_data(model_config):
'''
Validate data in model config, we should check empty value recieved from front end.
The easiest way to handle it is to drop empty value.
Args:
model_config: model config to be saved in config file
Return:
model config after filtering.
'''
model_config_filtered = {}
for key, value in model_config.items():
if value:
model_config_filtered[key] = value
return model_config_filtered
def analyse_config(cur_dir: str):
'''
Analyse the model config in specified directory.
Return a json object to describe configuration.
'''
all_model_configs = {}
all_model_versions = {}
parent_dir, sub_dirs, filenames = os.walk(cur_dir).send(
None) # models can only put directory in model repository,
# so we should only search depth 1 directories.
for model_dir_name in sub_dirs:
model_dir, model_sub_dirs, filenames = os.walk(
os.path.join(parent_dir, model_dir_name)).send(None)
model_name = os.path.basename(model_dir)
config_filenames = []
for filename in filenames:
if '.pbtxt' in filename:
config_filenames.append(
filename
) # filenames with extension .pbtxt are all config files
if config_filenames:
default_config_filename = config_filenames[0]
if 'config.pbtxt' in config_filenames:
default_config_filename = 'config.pbtxt'
config_filenames.remove(default_config_filename)
config_filenames.insert(0, default_config_filename)
else:
# if no config.pbtxt, we choose the first file in config_filenames list to create config.pbtxt
copy_config_file_to_default_config(model_dir,
default_config_filename)
default_config_filename = 'config.pbtxt'
config_filenames.insert(0, default_config_filename)
json_config = json.loads(
pbtxt2json(
open(os.path.join(model_dir,
default_config_filename)).read()))
json_config["config_filenames"] = config_filenames[
0] # add config_filenames to config data (frontend developer said he only wanted one filename,
# and to request config_filenames by get_config_filenames_for_one_model later)
all_model_configs[
model_name] = json_config # store original config file content in json format
json_config[
'name'] = model_name # because name in config data may be different from model_name,
# model_name is model directory name actually, we should conform name with model_name.
else:
continue
for model_sub_dir in model_sub_dirs:
if re.match(
r'\d+',
model_sub_dir): # version directory consists of numbers
if model_name not in all_model_versions:
all_model_versions[model_name] = {}
if model_sub_dir not in all_model_versions[model_name]:
all_model_versions[model_name][model_sub_dir] = []
for version_resource_file in os.listdir(
os.path.join(model_dir, model_sub_dir)):
all_model_versions[model_name][model_sub_dir].append(
version_resource_file)
if model_name not in all_model_versions: # if a model has config but no version directory,
# to convenient users, we create one
all_model_versions[model_name] = {}
os.mkdir(os.path.join(model_dir, '1'))
all_model_versions[model_name]['1'] = []
if not all_model_configs:
raise Exception(
'The path you choose is not a valid model repository, please choose a valid path.'
)
return all_model_configs, all_model_versions
def exchange_format_to_original_format(exchange_format):
'''
Change config exchange format to original format.
'''
ensembles = []
models = []
all_models = {}
if 'ensembles' in exchange_format:
ensembles = exchange_format['ensembles']
if 'models' in exchange_format:
models = exchange_format['models']
alls = ensembles + models
for model_config in alls:
# 1. add 'executionAccelerators' keyword
if 'optimization' in model_config:
optimization_config = model_config['optimization']
del model_config['optimization']
model_config['optimization'] = {}
model_config['optimization'][
'executionAccelerators'] = optimization_config
# 2. delete versions information
if 'versions' in model_config:
del model_config['versions']
if 'config_filenames' in model_config:
del model_config['config_filenames']
if 'platform' in model_config and model_config[
'platform'] == 'ensemble': # emsemble model
# 3. add 'ensembleScheduling' keyword
if 'step' in model_config:
step_configs = model_config['step']
if 'ensembleScheduling' not in model_config:
model_config['ensembleScheduling'] = {}
model_config['ensembleScheduling']['step'] = step_configs
del model_config['step']
# 4. remove two virtual models(feed, fetch), and
# "modelType", "inputModels", "outputModels", "inputVars", "outputVars"
remove_list = []
for model_config_in_step in step_configs:
if model_config_in_step[
'modelName'] == 'feed' or model_config_in_step[
'modelName'] == 'fetch':
remove_list.append(model_config_in_step)
continue
del model_config_in_step['modelType']
del model_config_in_step['inputModels']
del model_config_in_step['outputModels']
del model_config_in_step['inputVars']
del model_config_in_step['outputVars']
for remove_item in remove_list:
step_configs.remove(remove_item)
all_models[model_config['name']] = model_config
return all_models
def copy_config_file_to_default_config(model_dir, config_name):
json_config = json.loads(
pbtxt2json(open(os.path.join(model_dir, config_name)).read()))
model_name = os.path.basename(model_dir)
json_config['name'] = model_name
text_proto = json2pbtxt(json.dumps(json_config))
with open(os.path.join(model_dir, 'config.pbtxt'), 'w') as f:
f.write(text_proto)
def original_format_to_exchange_format(original_format, version_info):
'''
Change config original format to exchange format.
'''
exchange_format = {}
exchange_format['ensembles'] = []
exchange_format['models'] = []
# 0. transform version info into component format in frontend
for model_name, version_filenames_dict in version_info.items():
version_info_for_frontend = []
for version_name, filenames in version_filenames_dict.items():
version_filenames_dict_for_frontend = {}
version_filenames_dict_for_frontend['title'] = version_name
version_filenames_dict_for_frontend['key'] = version_name
version_filenames_dict_for_frontend['children'] = []
for filename in filenames:
version_filenames_dict_for_frontend['children'].append({
'title':
filename,
'key':
filename
})
version_info_for_frontend.append(
version_filenames_dict_for_frontend)
version_info[model_name] = version_info_for_frontend
for model_name, model_config in original_format.items():
# 1. remove 'executionAccelerators' keyword
transformed_config = copy.deepcopy(model_config)
if 'optimization' in model_config:
if 'executionAccelerators' in model_config['optimization']:
transformed_optimization_config = model_config['optimization'][
'executionAccelerators']
del transformed_config['optimization']
transformed_config[
'optimization'] = transformed_optimization_config
# 2. add versions information
if model_name in version_info:
transformed_config['versions'] = version_info[model_name]
if 'platform' in model_config and model_config[
'platform'] == 'ensemble': # emsemble model
# 3. remove ensembleScheduling
if 'ensembleScheduling' in model_config:
if 'step' in model_config['ensembleScheduling']:
del transformed_config['ensembleScheduling']
transformed_config['step'] = model_config[
'ensembleScheduling']['step']
# 4. add two virtual models(feed, fetch), and
# "modelType", "inputModels", "outputModels", "inputVars", "outputVars"
for model_config_in_step in transformed_config['step']:
model_config_in_step['modelType'] = 'normal'
model_config_in_step['inputModels'] = []
model_config_in_step['outputModels'] = []
model_config_in_step['inputVars'] = []
model_config_in_step['outputVars'] = []
transformed_config['step'].append({
"modelName": "feed",
"modelType": "virtual",
"inputModels": [],
"outputModels": [],
"inputVars": [],
"outputVars": []
})
transformed_config['step'].append({
"modelName": "fetch",
"modelType": "virtual",
"inputModels": [],
"outputModels": [],
"inputVars": [],
"outputVars": []
})
analyse_step_relationships(transformed_config['step'],
transformed_config['input'],
transformed_config['output'])
exchange_format['ensembles'].append(transformed_config)
elif 'backend' in model_config: # single model
exchange_format['models'].append(transformed_config)
return exchange_format
def analyse_step_relationships(step_config, inputs, outputs): # noqa: C901
'''
Analyse model relationships in ensemble step. And fill \
"inputModels", "outputModels", "inputVars", "outputVars" in step_config.
step_config: step data in ensemble model config.
inputs: inputs in ensemble model config.
outputs: outputs in ensemble model config.
'''
models_dict = {}
vars_dict = {}
for model_config_in_step in step_config:
models_dict[model_config_in_step['modelName']] = model_config_in_step
if model_config_in_step['modelType'] == 'virtual':
for var in inputs:
if var['name'] not in vars_dict:
vars_dict[var['name']] = {}
vars_dict[var['name']]['from_models'] = set()
vars_dict[var['name']]['to_models'] = set()
vars_dict[var['name']]['from_models'].add('feed')
for var in outputs:
if var['name'] not in vars_dict:
vars_dict[var['name']] = {}
vars_dict[var['name']]['from_models'] = set()
vars_dict[var['name']]['to_models'] = set()
vars_dict[var['name']]['to_models'].add('fetch')
else:
for var_placehold_name, var_name in model_config_in_step[
'inputMap'].items():
if var_name not in vars_dict:
vars_dict[var_name] = {}
vars_dict[var_name]['from_models'] = set()
vars_dict[var_name]['to_models'] = set()
vars_dict[var_name]['to_models'].add(
model_config_in_step['modelName'])
for var_placehold_name, var_name in model_config_in_step[
'outputMap'].items():
if var_name not in vars_dict:
vars_dict[var_name] = {}
vars_dict[var_name]['from_models'] = set()
vars_dict[var_name]['to_models'] = set()
vars_dict[var_name]['from_models'].add(
model_config_in_step['modelName'])
for var_name, relationships in vars_dict.items():
for from_model in relationships['from_models']:
models_dict[from_model]['outputVars'].append(var_name)
for var_to_model in relationships['to_models']:
if var_to_model not in models_dict[from_model]['outputModels']:
models_dict[from_model]['outputModels'].append(
var_to_model)
for to_model in relationships['to_models']:
models_dict[to_model]['inputVars'].append(var_name)
for var_from_model in relationships['from_models']:
if var_from_model not in models_dict[to_model]['inputModels']:
models_dict[to_model]['inputModels'].append(var_from_model)
calculate_layout_for_frontend(models_dict)
def get_config_filenames_for_one_model(cur_dir, name):
_, _, filenames = os.walk(os.path.join(cur_dir, name)).send(None)
config_filenames = []
backup_config_filenames = []
for filename in filenames:
if '.pbtxt' in filename and 'vdlbackup' not in filename:
config_filenames.append(
filename
) # filenames with extension .pbtxt and not contain 'vdlbackup' are normal config files
elif '.pbtxt' in filename and 'vdlbackup' in filename:
backup_config_filenames.append(
filename
) # filenames with extension .pbtxt and contain 'vdlbackup' are backup config files
config_filenames = sorted(config_filenames) + sorted(
backup_config_filenames)
return config_filenames
def get_config_for_one_model(cur_dir, name, config_filename):
all_model_configs = {}
all_model_versions = {}
filename = os.path.join(cur_dir, name, config_filename)
json_config = json.loads(pbtxt2json(open(filename).read()))
json_config[
'name'] = name # because name in config data may be different from model_name,
# model_name is model directory name actually, we should conform name with model_name.
json_config["config_filenames"] = config_filename
all_model_configs[
name] = json_config # store original config file content in json format
all_model_versions[name] = {}
for model_sub_dir in os.listdir(os.path.join(cur_dir, name)):
if re.match(r'\d+',
model_sub_dir): # version directory consists of numbers
if model_sub_dir not in all_model_versions[name]:
all_model_versions[name][model_sub_dir] = []
for version_resource_file in os.listdir(
os.path.join(cur_dir, name, model_sub_dir)):
all_model_versions[name][model_sub_dir].append(
version_resource_file)
model_config = original_format_to_exchange_format(all_model_configs,
all_model_versions)
if model_config['ensembles']:
return model_config['ensembles'][0]
elif model_config['models']:
return model_config['models'][0]
def calculate_layout_for_frontend(model_config_in_step):
'''
Analyse model topology connections and prepare the positions for each model in layout.
Dynamic program algorithm:
depth(cur_node) = max([depth(prev_node) for prev_node in cur_node['inputModels']])
Args:
model_config_in_step(dict): model config in ensemble models' step, indexed by model name.
Returns:
None. Results calculated will be saved in place.
'''
path_depth = defaultdict(int)
def depth_recursive(model):
if model['modelName'] == 'feed':
path_depth[model['modelName']] = 0
return 0
if path_depth[model['modelName']] != 0:
return path_depth[model['modelName']]
path_depth[model['modelName']] = max([
depth_recursive(model_config_in_step[model_name]) for model_name in
model_config_in_step[model['modelName']]['inputModels']
]) + 1
return path_depth[model['modelName']]
depth_recursive(model_config_in_step['fetch'])
path_depth_tuple = [
(k, v)
for k, v in sorted(path_depth.items(), key=lambda item: item[1])
]
cur_x = 0
last_depth = -1
for model_name, depth in path_depth_tuple:
if depth == last_depth:
model_config_in_step[model_name]['pos_y'] = depth
model_config_in_step[model_name]['pos_x'] = cur_x
cur_x += 1
else:
cur_x = 0
model_config_in_step[model_name]['pos_y'] = depth
model_config_in_step[model_name]['pos_x'] = cur_x
cur_x += 1
last_depth = depth
return
def launch_process(kwargs: dict):
'''
Launch a fastdeploy server according to specified arguments.
'''
cmd = ['fastdeployserver']
launch_env = os.environ.copy()
start_args = {}
for key, value in kwargs.items():
if key == 'default_model_name': # Used to fill client model_name automatically
start_args[key] = value
continue
if key == 'server-name' or key == 'ensemble-img': # extra information
start_args[key] = value
continue
if key == 'gpus':
if value:
launch_env['CUDA_VISIBLE_DEVICES'] = value
start_args[key] = value
continue
cmd.append('--{}'.format(key))
cmd.append('{}'.format(value))
start_args[key] = value
if start_args['server-name'] and start_args['server-name'] in os.listdir(
FASTDEPLOYSERVER_PATH):
raise RuntimeError(
"Failed to launch server,server name {} has been used,please write a different server name."
.format(start_args['server-name']))
all_model_configs, all_model_versions = analyse_config(
start_args['model-repository'])
model_repo_config = original_format_to_exchange_format(
all_model_configs, all_model_versions)
model_repo_config['ensemble-img'] = start_args['ensemble-img']
logfilename = 'logfile-{}'.format(get_random_string(8))
while os.path.exists(os.path.join(FASTDEPLOYSERVER_PATH, logfilename)):
logfilename = 'logfile-{}'.format(get_random_string(8))
p = Popen(
cmd,
stdout=open(
os.path.join(FASTDEPLOYSERVER_PATH, logfilename), 'w',
buffering=1),
stderr=STDOUT,
universal_newlines=True,
env=launch_env)
server_name = start_args['server-name'] if start_args[
'server-name'] else p.pid
with open(
os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_name)),
'w') as f:
# filename ${server_name} contain 4 lines:
# line1 : the real log filename ${logfilename}
# line2 : pid
# line3 : launch arguments
# line4 : model-repository configuration
f.write(logfilename + '\n' + str(p.pid) + '\n' +
json.dumps(start_args) + '\n' + json.dumps(model_repo_config))
return p
def get_random_string(length):
# choose from all lowercase letter
letters = string.ascii_lowercase
result_str = ''.join([random.choice(letters) for i in range(length)])
return result_str
def get_start_arguments(server_id):
'''
Get the start arguments for fastdeployserver process.
Args:
server_id(str): fastdeployserver process name
Returns:
args(dict): launch arguments when start fastdeployserver process.
'''
args = {}
if os.path.exists(
os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id))):
with open(
os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id)),
'r') as f:
arguments_json = f.read().split('\n')[2]
args = json.loads(arguments_json)
return args
def get_process_pid(server_id):
'''
Get the process id for fastdeployserver process.
Args:
server_id(str): fastdeployserver process name
Returns:
pid(int): process id.
'''
pid = None
if os.path.exists(
os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id))):
with open(
os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id)),
'r') as f:
pid = int(f.read().split('\n')[1])
return pid
def get_process_logfile_name(server_id):
'''
Get the process logfile name for fastdeployserver process.
Args:
server_id(str): fastdeployserver process name
Returns:
logfile(str): logfile name.
'''
filename = None
if os.path.exists(
os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id))):
with open(
os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id)),
'r') as f:
filename = f.read().split('\n')[0]
return filename
def get_process_model_configuration(server_id):
'''
Get the model repository configuration for fastdeployserver process.
Args:
server_id(str): fastdeployserver process name
Returns:
configuration(dict): model repository configuration
'''
conf = {}
if os.path.exists(
os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id))):
with open(
os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id)),
'r') as f:
conf_json = f.read().split('\n')[3]
conf = json.loads(conf_json)
return conf
def get_process_output(server_id, length):
'''
Get the standard output of a opened subprocess.
'''
if os.path.exists(
os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id))):
logfilename = get_process_logfile_name(server_id)
# delete file ${logfilename} if exists
if os.path.exists(
os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(logfilename))):
with open(
os.path.join(FASTDEPLOYSERVER_PATH,
'{}'.format(logfilename)), 'r') as f:
f.seek(length)
data = f.read()
return data
def mark_pid_for_dead_process(server_id):
'''
Resource files for a dead server only deleted when user closes the server in frontend.
When user close the server, pid recorded in logfile will be killed.
In case a dead process id is reassigned for a new process, we should mark the pid recorded in logfile as outdated.
Here, we choose to replace the pid to -1 in logfile to denote the zombie process \
which has been polled and becomes dead.
Args:
server_id(str): fastdeployserver process name
'''
if os.path.exists(
os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id))):
with open(
os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id)),
'r') as f:
contents = f.read().split('\n')
contents[1] = '-1' # we replace pid to -1
with open(
os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id)),
'w') as f:
f.write('\n'.join(contents))
def delete_files_for_process(server_id):
'''
Delete logfile for fastdeployserver process.
Args:
server_id(str): fastdeployserver process name
'''
if os.path.exists(
os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id))):
logfilename = get_process_logfile_name(server_id)
# delete file ${logfilename} if exists
if os.path.exists(
os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(logfilename))):
os.remove(
os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(logfilename)))
os.remove(os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id)))
def kill_process(process):
'''
Stop a opened subprocess.
'''
if type(process) == str: # server_id, use os.kill to terminate
pid = get_process_pid(process)
if pid == -1: # we use -1 to mark dead process
return
try:
os.kill(pid, signal.SIGKILL)
except Exception:
pass
else:
pid = process.pid
process.kill()
try:
process.wait(10)
except Exception:
pass
def get_alive_fastdeploy_servers():
'''
Search server names in `FASTDEPLOYSERVER_PATH`, if process is dead and log still exists due to \
some unexpectable reasons, delete log file.
'''
server_names = [
name for name in os.listdir(FASTDEPLOYSERVER_PATH)
if 'logfile' not in name
]
should_delete_servers = []
for server_name in server_names:
if check_process_alive(server_name) is False:
delete_files_for_process(server_name)
should_delete_servers.append(server_name)
for server_name in should_delete_servers:
server_names.remove(server_name)
return server_names
def check_process_zombie(server_id):
'''
Given a server id, check whether the process became zoombie and mark pid as -1.
Args:
server_id(str): fastdeployserver process name
Return:
status(bool): True if process became zoombie.
'''
pid = get_process_pid(server_id)
if pid == -1:
return True
else:
return False
def check_process_alive(server_id):
'''
Given a server id, check whether the process is alive or not.
Args:
server_id(str): fastdeployserver process name
Return:
status(bool): True if process is still alive.
'''
pid = get_process_pid(server_id)
if pid is None:
return False
if pid == -1: # We use -1 to mark zombie process which has been dead process.
# Consider user wants to know the reason for dead process due to exception,
# we return True to let user in frontend can get the log for dead process.
return True
try:
os.kill(pid, 0)
except OSError:
return False
else:
if 'fastdeployserve' not in psutil.Process(pid).name(
): # We should judge the pid is fastdeployserver process, in case pid has been reassigned.
# Note: I do not know why psutil.Process(pid).name() is fastdeployserve but not fastdeployserver.
return False
else:
return True
_metric_column_name = {
"Model": {
"nv_inference_request_success", "nv_inference_request_failure",
"nv_inference_count", "nv_inference_exec_count",
"nv_inference_request_duration_us", "nv_inference_queue_duration_us",
"nv_inference_compute_input_duration_us",
"nv_inference_compute_infer_duration_us",
"nv_inference_compute_output_duration_us"
},
"GPU": {
"nv_gpu_power_usage", "nv_gpu_power_limit", "nv_energy_consumption",
"nv_gpu_utilization", "nv_gpu_memory_total_bytes",
"nv_gpu_memory_used_bytes"
},
"CPU": {
"nv_cpu_utilization", "nv_cpu_memory_total_bytes",
"nv_cpu_memory_used_bytes"
}
}
def generate_metric_table(server_addr, server_port): # noqa:C901
model_table = {}
gpu_table = {}
try:
res = requests.get("http://{}:{}/metrics".format(
server_addr, server_port))
except Exception:
return None
metric_content = res.text
for content in metric_content.split('\n'):
if content.startswith('#'):
continue
else:
res = re.match(r'(\w+){(.*)} (\w+)',
content) # match output by server metrics interface
if not res:
continue
metric_name = res.group(1)
model = res.group(2)
value = res.group(3)
infos = {}
for info in model.split(','):
k, v = info.split('=')
v = v.strip('"')
infos[k] = v
if metric_name in [
"nv_inference_request_duration_us",
"nv_inference_queue_duration_us",
"nv_inference_compute_input_duration_us",
"nv_inference_compute_infer_duration_us",
"nv_inference_compute_output_duration_us"
]:
value = float(value) / 1000
elif metric_name in [
"nv_gpu_memory_total_bytes", "nv_gpu_memory_used_bytes"
]:
value = float(value) / 1024 / 1024 / 1024
for key, metric_names in _metric_column_name.items():
if metric_name in metric_names:
if key == 'Model':
model_name = infos['model']
if model_name not in model_table:
model_table[model_name] = {}
model_table[model_name][metric_name] = value
elif key == 'GPU':
gpu_name = infos['gpu_uuid']
if gpu_name not in gpu_table:
gpu_table[gpu_name] = {}
gpu_table[gpu_name][metric_name] = value
elif key == 'CPU':
pass
results = {}
results['Model'] = model_table
results['GPU'] = gpu_table
return results
# Copyright (c) 2022 VisualDL Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =======================================================================
import datetime
import json
import os
import re
import shutil
import socket
import time
from multiprocessing import Process
from pathlib import Path
import requests
from .fastdeploy_client.client_app import create_gradio_client_app
from .fastdeploy_lib import analyse_config
from .fastdeploy_lib import check_process_zombie
from .fastdeploy_lib import copy_config_file_to_default_config
from .fastdeploy_lib import delete_files_for_process
from .fastdeploy_lib import exchange_format_to_original_format
from .fastdeploy_lib import generate_metric_table
from .fastdeploy_lib import get_alive_fastdeploy_servers
from .fastdeploy_lib import get_config_filenames_for_one_model
from .fastdeploy_lib import get_config_for_one_model
from .fastdeploy_lib import get_process_model_configuration
from .fastdeploy_lib import get_process_output
from .fastdeploy_lib import get_start_arguments
from .fastdeploy_lib import json2pbtxt
from .fastdeploy_lib import kill_process
from .fastdeploy_lib import launch_process
from .fastdeploy_lib import mark_pid_for_dead_process
from .fastdeploy_lib import original_format_to_exchange_format
from .fastdeploy_lib import validate_data
from visualdl.server.api import gen_result
from visualdl.server.api import result
from visualdl.utils.dir import FASTDEPLOYSERVER_PATH
class FastDeployServerApi(object):
def __init__(self):
self.root_dir = Path(os.getcwd())
self.opened_servers = {
} # Use to store the opened server process pid and process itself
self.client_port = None
@result()
def get_directory(self, cur_dir):
if self.root_dir not in Path(os.path.abspath(cur_dir)).parents:
cur_dir = '.'
cur_dir, sub_dirs, filenames = os.walk(cur_dir).send(None)
if Path(self.root_dir) != Path(os.path.abspath(cur_dir)):
sub_dirs.append('..')
sub_dirs = sorted(sub_dirs)
directorys = {
'parent_dir':
os.path.relpath(Path(os.path.abspath(cur_dir)), self.root_dir),
'sub_dir':
sub_dirs
}
return directorys
@result()
def get_config(self, cur_dir):
all_model_configs, all_model_versions = analyse_config(cur_dir)
return original_format_to_exchange_format(all_model_configs,
all_model_versions)
@result()
def config_update(self, cur_dir, model_name, config, config_filename):
config = json.loads(config)
all_models = exchange_format_to_original_format(config)
model_dir = os.path.join(os.path.abspath(cur_dir), model_name)
filtered_config = validate_data(all_models[model_name])
text_proto = json2pbtxt(json.dumps(filtered_config))
# backup user's config data first, when data corrupted by front-end, we still can recovery data
# backup config filename: {original_name}_vdlbackup_{datetime}.pbtxt
# backup config can only used to restore config.pbtxt
if 'vdlbackup' in config_filename:
raise RuntimeError(
"Backup config file is not permitted to update.")
basename = os.path.splitext(config_filename)[0]
shutil.copy(
os.path.join(model_dir, config_filename),
os.path.join(
model_dir, '{}_vdlbackup_{}.pbtxt'.format(
basename,
datetime.datetime.now().isoformat())))
with open(os.path.join(model_dir, config_filename), 'w') as f:
f.write(text_proto)
return
@result()
def start_server(self, configs):
configs = json.loads(configs)
process = launch_process(configs)
if process.poll() is not None:
raise RuntimeError(
"Failed to launch fastdeployserver,please check fastdeployserver is installed in environment."
)
server_name = configs['server-name'] if configs[
'server-name'] else str(process.pid)
self.opened_servers[server_name] = process
return server_name
@result()
def stop_server(self, server_id):
if server_id in self.opened_servers: # check if server_id in self.opened_servers
kill_process(self.opened_servers[server_id])
del self.opened_servers[server_id]
elif server_id in set(
os.listdir(FASTDEPLOYSERVER_PATH)): # check if server_id in
# FASTDEPLOYSERVER_PATH(may be launched by other vdl app instance by gunicorn)
kill_process(server_id)
delete_files_for_process(server_id)
self._poll_zombie_process()
@result('text/plain')
def get_server_output(self, server_id, length):
length = int(length)
if server_id in self.opened_servers: # check if server_id in self.opened_servers
return get_process_output(server_id, length)
elif str(server_id) in set(
os.listdir(FASTDEPLOYSERVER_PATH)): # check if server_id in
# FASTDEPLOYSERVER_PATH(may be launched by other vdl app instance by gunicorn)
return get_process_output(server_id, length)
else:
return
@result()
def get_server_metric(self, server_id):
args = get_start_arguments(server_id)
host = 'localhost'
port = args.get('metrics-port', 8002)
return generate_metric_table(host, port)
@result()
def get_server_list(self):
return get_alive_fastdeploy_servers()
@result()
def check_server_alive(self, server_id):
self._poll_zombie_process()
if check_process_zombie(server_id) is True:
raise RuntimeError(
"Server {} is down due to exception or killed,please check the reason according to the log, \
then close this server.".format(server_id))
return
@result()
def get_server_config(self, server_id):
return get_process_model_configuration(server_id)
@result()
def get_pretrain_model_list(self):
'''
Get all available fastdeploy models from hub server.
'''
res = requests.get(
'http://paddlepaddle.org.cn/paddlehub/fastdeploy_listmodels')
result = res.json()
if result['status'] != 0:
raise RuntimeError(
"Failed to get pre-trained model list from hub server.")
else:
data = result['data']
model_list = {}
for category, models in data.items():
if category not in model_list:
model_list[category] = set()
for model in models:
model_list[category].add(model['name'])
# adapt data format for frontend
models_info = []
for category, model_names in model_list.items():
models_info.append({
"value": category,
"label": category,
"children": []
})
for model_name in sorted(model_names):
models_info[-1]["children"].append({
"value": model_name,
"label": model_name
})
return models_info
@result()
def download_pretrain_model(self, cur_dir, model_name, version,
pretrain_model_name):
version_resource_dir = os.path.join(
os.path.abspath(cur_dir), model_name, version)
try:
import fastdeploy as fd
except Exception:
raise RuntimeError(
"fastdeploy is required for visualizing results,please refer to \
https://github.com/PaddlePaddle/FastDeploy to install fastdeploy")
model_path = fd.download_model(
name=pretrain_model_name, path=version_resource_dir)
if model_path:
if '.onnx' in model_path:
shutil.move(
model_path,
os.path.join(os.path.dirname(model_path), 'model.onnx'))
else:
for filename in os.listdir(model_path):
if '.pdmodel' in filename or '.pdiparams' in filename:
shutil.move(
os.path.join(model_path, filename),
os.path.join(
os.path.dirname(model_path), 'model{}'.format(
os.path.splitext(filename)[1])))
else:
shutil.move(
os.path.join(model_path, filename),
os.path.join(
os.path.dirname(model_path), filename))
shutil.rmtree(model_path)
version_info_for_frontend = []
for version_name in os.listdir(os.path.join(cur_dir, model_name)):
if re.match(
r'\d+',
version_name): # version directory consists of numbers
version_filenames_dict_for_frontend = {}
version_filenames_dict_for_frontend['title'] = version_name
version_filenames_dict_for_frontend['key'] = version_name
version_filenames_dict_for_frontend['children'] = []
for filename in os.listdir(
os.path.join(cur_dir, model_name, version_name)):
version_filenames_dict_for_frontend['children'].append(
{
'title': filename,
'key': filename
})
version_info_for_frontend.append(
version_filenames_dict_for_frontend)
return version_info_for_frontend
else:
raise RuntimeError(
"Failed to download pre-trained model {}.".format(
pretrain_model_name))
@result()
def get_config_for_model(self, cur_dir, name, config_filename):
return get_config_for_one_model(cur_dir, name, config_filename)
@result()
def get_config_filenames_for_model(self, cur_dir, name):
return get_config_filenames_for_one_model(cur_dir, name)
@result()
def delete_config_for_model(self, cur_dir, name, config_filename):
if self.root_dir not in Path(
os.path.abspath(cur_dir)
).parents: # should prevent user remove files outside model-repository
raise RuntimeError(
'Failed to delete config file, please check filepath.')
if os.path.exists(os.path.join(cur_dir, name, config_filename)):
os.remove(os.path.join(cur_dir, name, config_filename))
return get_config_filenames_for_one_model(cur_dir, name)
@result()
def set_default_config_for_model(self, cur_dir, name, config_filename):
model_dir = os.path.join(os.path.abspath(cur_dir), name)
# backup config.pbtxt to config_vdlbackup_{datetime}.pbtxt
if os.path.exists(os.path.join(model_dir, 'config.pbtxt')):
shutil.copy(
os.path.join(model_dir, 'config.pbtxt'),
os.path.join(
model_dir, 'config_vdlbackup_{}.pbtxt'.format(
datetime.datetime.now().isoformat())))
if config_filename != 'config.pbtxt':
copy_config_file_to_default_config(model_dir, config_filename)
return
@result()
def delete_resource_for_model(self, cur_dir, model_name, version,
resource_filename):
if self.root_dir not in Path(
os.path.abspath(cur_dir)
).parents: # should prevent user remove files outside model-repository
raise RuntimeError(
'Failed to delete resource file, please check filepath.')
resource_path = os.path.join(
os.path.abspath(cur_dir), model_name, version, resource_filename)
if os.path.exists(resource_path):
os.remove(resource_path)
version_info_for_frontend = []
for version_name in os.listdir(os.path.join(cur_dir, model_name)):
if re.match(r'\d+',
version_name): # version directory consists of numbers
version_filenames_dict_for_frontend = {}
version_filenames_dict_for_frontend['title'] = version_name
version_filenames_dict_for_frontend['key'] = version_name
version_filenames_dict_for_frontend['children'] = []
for filename in os.listdir(
os.path.join(cur_dir, model_name, version_name)):
version_filenames_dict_for_frontend['children'].append({
'title':
filename,
'key':
filename
})
version_info_for_frontend.append(
version_filenames_dict_for_frontend)
return version_info_for_frontend
@result()
def rename_resource_for_model(self, cur_dir, model_name, version,
resource_filename, new_filename):
if self.root_dir not in Path(
os.path.abspath(cur_dir)
).parents: # should prevent user remove files outside model-repository
raise RuntimeError(
'Failed to rename resource file, please check filepath.')
resource_path = os.path.join(
os.path.abspath(cur_dir), model_name, version, resource_filename)
new_file_path = os.path.join(
os.path.abspath(cur_dir), model_name, version, new_filename)
if os.path.exists(resource_path):
shutil.move(resource_path, new_file_path)
version_info_for_frontend = []
for version_name in os.listdir(os.path.join(cur_dir, model_name)):
if re.match(r'\d+',
version_name): # version directory consists of numbers
version_filenames_dict_for_frontend = {}
version_filenames_dict_for_frontend['title'] = version_name
version_filenames_dict_for_frontend['key'] = version_name
version_filenames_dict_for_frontend['children'] = []
for filename in os.listdir(
os.path.join(cur_dir, model_name, version_name)):
version_filenames_dict_for_frontend['children'].append({
'title':
filename,
'key':
filename
})
version_info_for_frontend.append(
version_filenames_dict_for_frontend)
return version_info_for_frontend
def create_fastdeploy_client(self):
if self.client_port is None:
def get_free_tcp_port():
tcp = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
# tcp.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT, 1)
tcp.bind(('localhost', 0))
addr, port = tcp.getsockname()
tcp.close()
return port
self.client_port = get_free_tcp_port()
app = create_gradio_client_app()
thread = Process(
target=app.launch, kwargs={'server_port': self.client_port})
thread.start()
def check_alive():
while True:
try:
requests.get('http://localhost:{}/'.format(
self.client_port))
break
except Exception:
time.sleep(1)
check_alive()
return self.client_port
def _poll_zombie_process(self):
# check if there are servers killed by other vdl app instance and become zoombie
should_delete = []
for server_id, process in self.opened_servers.items():
if process.poll() is not None:
mark_pid_for_dead_process(server_id)
should_delete.append(server_id)
for server_id in should_delete:
del self.opened_servers[server_id]
def create_fastdeploy_api_call():
api = FastDeployServerApi()
routes = {
'get_directory': (api.get_directory, ['dir']),
'config_update': (api.config_update,
['dir', 'name', 'config', 'config_filename']),
'get_config': (api.get_config, ['dir']),
'get_config_filenames_for_model': (api.get_config_filenames_for_model,
['dir', 'name']),
'get_config_for_model': (api.get_config_for_model,
['dir', 'name', 'config_filename']),
'set_default_config_for_model': (api.set_default_config_for_model,
['dir', 'name', 'config_filename']),
'delete_config_for_model': (api.delete_config_for_model,
['dir', 'name', 'config_filename']),
'start_server': (api.start_server, ['config']),
'stop_server': (api.stop_server, ['server_id']),
'get_server_output': (api.get_server_output, ['server_id', 'length']),
'create_fastdeploy_client': (api.create_fastdeploy_client, []),
'get_server_list': (api.get_server_list, []),
'get_server_metric': (api.get_server_metric, ['server_id']),
'get_server_config': (api.get_server_config, ['server_id']),
'get_pretrain_model_list': (api.get_pretrain_model_list, []),
'check_server_alive': (api.check_server_alive, ['server_id']),
'download_pretrain_model':
(api.download_pretrain_model,
['dir', 'name', 'version', 'pretrain_model_name']),
'delete_resource_for_model':
(api.delete_resource_for_model,
['dir', 'name', 'version', 'resource_filename']),
'rename_resource_for_model': (api.rename_resource_for_model, [
'dir', 'name', 'version', 'resource_filename', 'new_filename'
])
}
def call(path: str, args):
route = routes.get(path)
if not route:
return json.dumps(gen_result(
status=1, msg='api not found')), 'application/json', None
method, call_arg_names = route
call_args = [args.get(name) for name in call_arg_names]
return method(*call_args)
return call
# Copyright (c) 2022 VisualDL Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =======================================================================
// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2018, TensorFlow Authors. All rights reserved.
syntax = "proto3";
package inference;
//@@.. cpp:namespace:: inference
//@@
//@@.. cpp:enum:: DataType
//@@
//@@ Data types supported for input and output tensors.
//@@
enum DataType {
//@@ .. cpp:enumerator:: DataType::INVALID = 0
TYPE_INVALID = 0;
//@@ .. cpp:enumerator:: DataType::BOOL = 1
TYPE_BOOL = 1;
//@@ .. cpp:enumerator:: DataType::UINT8 = 2
TYPE_UINT8 = 2;
//@@ .. cpp:enumerator:: DataType::UINT16 = 3
TYPE_UINT16 = 3;
//@@ .. cpp:enumerator:: DataType::UINT32 = 4
TYPE_UINT32 = 4;
//@@ .. cpp:enumerator:: DataType::UINT64 = 5
TYPE_UINT64 = 5;
//@@ .. cpp:enumerator:: DataType::INT8 = 6
TYPE_INT8 = 6;
//@@ .. cpp:enumerator:: DataType::INT16 = 7
TYPE_INT16 = 7;
//@@ .. cpp:enumerator:: DataType::INT32 = 8
TYPE_INT32 = 8;
//@@ .. cpp:enumerator:: DataType::INT64 = 9
TYPE_INT64 = 9;
//@@ .. cpp:enumerator:: DataType::FP16 = 10
TYPE_FP16 = 10;
//@@ .. cpp:enumerator:: DataType::FP32 = 11
TYPE_FP32 = 11;
//@@ .. cpp:enumerator:: DataType::FP64 = 12
TYPE_FP64 = 12;
//@@ .. cpp:enumerator:: DataType::STRING = 13
TYPE_STRING = 13;
//@@ .. cpp:enumerator:: DataType::BF16 = 14
TYPE_BF16 = 14;
}
//@@
//@@ .. cpp:var:: message ModelRateLimiter
//@@
//@@ The specifications required by the rate limiter to properly
//@@ schedule the inference requests across the different models
//@@ and their instances.
//@@
message ModelRateLimiter
{
//@@ .. cpp:var:: message Resource
//@@
//@@ The resource property.
//@@
message Resource
{
//@@ .. cpp:var:: string name
//@@
//@@ The name associated with the resource.
//@@
string name = 1;
//@@ .. cpp:var:: bool global
//@@
//@@ Whether or not the resource is global. If true then the resource
//@@ is assumed to be shared among the devices otherwise specified
//@@ count of the resource is assumed for each device associated
//@@ with the instance.
//@@
bool global = 2;
//@@ .. cpp:var:: uint32 count
//@@
//@@ The number of resources required for the execution of the model
//@@ instance.
//@@
uint32 count = 3;
}
//@@ .. cpp:var:: Resource resources (repeated)
//@@
//@@ The resources required to execute the request on a model instance.
//@@ Resources are just names with a corresponding count. The execution
//@@ of the instance will be blocked until the specificied resources are
//@@ available. By default an instance uses no rate-limiter resources.
//@@
repeated Resource resources = 1;
//@@ .. cpp:var:: uint32 priority
//@@
//@@ The optional weighting value to be used for prioritizing across
//@@ instances. An instance with priority 2 will be given 1/2 the
//@@ number of scheduling chances as an instance_group with priority
//@@ 1. The default priority is 1. The priority of value 0 will be
//@@ treated as priority 1.
//@@
uint32 priority = 2;
}
//@@
//@@.. cpp:var:: message ModelInstanceGroup
//@@
//@@ A group of one or more instances of a model and resources made
//@@ available for those instances.
//@@
message ModelInstanceGroup
{
//@@
//@@ .. cpp:enum:: Kind
//@@
//@@ Kind of this instance group.
//@@
enum Kind {
//@@ .. cpp:enumerator:: Kind::KIND_AUTO = 0
//@@
//@@ This instance group represents instances that can run on either
//@@ CPU or GPU. If all GPUs listed in 'gpus' are available then
//@@ instances will be created on GPU(s), otherwise instances will
//@@ be created on CPU.
//@@
KIND_AUTO = 0;
//@@ .. cpp:enumerator:: Kind::KIND_GPU = 1
//@@
//@@ This instance group represents instances that must run on the
//@@ GPU.
//@@
KIND_GPU = 1;
//@@ .. cpp:enumerator:: Kind::KIND_CPU = 2
//@@
//@@ This instance group represents instances that must run on the
//@@ CPU.
//@@
KIND_CPU = 2;
//@@ .. cpp:enumerator:: Kind::KIND_MODEL = 3
//@@
//@@ This instance group represents instances that should run on the
//@@ CPU and/or GPU(s) as specified by the model or backend itself.
//@@ The inference server will not override the model/backend
//@@ settings.
//@@
KIND_MODEL = 3;
}
//@@
//@@ .. cpp:var:: message SecondaryDevice
//@@
//@@ A secondary device required for a model instance.
//@@
message SecondaryDevice
{
//@@
//@@ .. cpp:enum:: SecondaryDeviceKind
//@@
//@@ The kind of the secondary device.
//@@
enum SecondaryDeviceKind {
//@@ .. cpp:enumerator:: SecondaryDeviceKind::KIND_NVDLA = 0
//@@
//@@ An NVDLA core. http://nvdla.org
//@@ Currently KIND_NVDLA is only supported by the TensorRT backend.
//@@
KIND_NVDLA = 0;
}
//@@ .. cpp:var:: SecondaryDeviceKind kind
//@@
//@@ The secondary device kind.
//@@
SecondaryDeviceKind kind = 1;
//@@ .. cpp:var:: int64 device_id
//@@
//@@ Identifier for the secondary device.
//@@
int64 device_id = 2;
}
//@@ .. cpp:var:: string name
//@@
//@@ Optional name of this group of instances. If not specified the
//@@ name will be formed as <model name>_<group number>. The name of
//@@ individual instances will be further formed by a unique instance
//@@ number and GPU index:
//@@
string name = 1;
//@@ .. cpp:var:: Kind kind
//@@
//@@ The kind of this instance group. Default is KIND_AUTO. If
//@@ KIND_AUTO or KIND_GPU then both 'count' and 'gpu' are valid and
//@@ may be specified. If KIND_CPU or KIND_MODEL only 'count' is valid
//@@ and 'gpu' cannot be specified.
//@@
Kind kind = 4;
//@@ .. cpp:var:: int32 count
//@@
//@@ For a group assigned to GPU, the number of instances created for
//@@ each GPU listed in 'gpus'. For a group assigned to CPU the number
//@@ of instances created. Default is 1.
int32 count = 2;
//@@ .. cpp:var:: ModelRateLimiter rate_limiter
//@@
//@@ The rate limiter specific settings to be associated with this
//@@ instance group. Optional, if not specified no rate limiting
//@@ will be applied to this instance group.
//@@
ModelRateLimiter rate_limiter = 6;
//@@ .. cpp:var:: int32 gpus (repeated)
//@@
//@@ GPU(s) where instances should be available. For each GPU listed,
//@@ 'count' instances of the model will be available. Setting 'gpus'
//@@ to empty (or not specifying at all) is eqivalent to listing all
//@@ available GPUs.
//@@
repeated int32 gpus = 3;
//@@ .. cpp:var:: SecondaryDevice secondary_devices (repeated)
//@@
//@@ Secondary devices that are required by instances specified by this
//@@ instance group. Optional.
//@@
repeated SecondaryDevice secondary_devices = 8;
//@@ .. cpp:var:: string profile (repeated)
//@@
//@@ For TensorRT models containing multiple optimization profile, this
//@@ parameter specifies a set of optimization profiles available to this
//@@ instance group. The inference server will choose the optimal profile
//@@ based on the shapes of the input tensors. This field should lie
//@@ between 0 and <TotalNumberOfOptimizationProfilesInPlanModel> - 1
//@@ and be specified only for TensorRT backend, otherwise an error will
//@@ be generated. If not specified, the server will select the first
//@@ optimization profile by default.
//@@
repeated string profile = 5;
//@@ .. cpp:var:: bool passive
//@@
//@@ Whether the instances within this instance group will be accepting
//@@ inference requests from the scheduler. If true, the instances will
//@@ not be added to the scheduler. Default value is false.
//@@
bool passive = 7;
//@@ .. cpp:var:: string host_policy
//@@
//@@ The host policy name that the instance to be associated with.
//@@ The default value is set to reflect the device kind of the instance,
//@@ for instance, KIND_CPU is "cpu", KIND_MODEL is "model" and
//@@ KIND_GPU is "gpu_<gpu_id>".
//@@
string host_policy = 9;
}
//@@
//@@.. cpp:var:: message ModelTensorReshape
//@@
//@@ Reshape specification for input and output tensors.
//@@
message ModelTensorReshape
{
//@@ .. cpp:var:: int64 shape (repeated)
//@@
//@@ The shape to use for reshaping.
//@@
repeated int64 shape = 1;
}
//@@
//@@.. cpp:var:: message ModelInput
//@@
//@@ An input required by the model.
//@@
message ModelInput
{
//@@
//@@ .. cpp:enum:: Format
//@@
//@@ The format for the input.
//@@
enum Format {
//@@ .. cpp:enumerator:: Format::FORMAT_NONE = 0
//@@
//@@ The input has no specific format. This is the default.
//@@
FORMAT_NONE = 0;
//@@ .. cpp:enumerator:: Format::FORMAT_NHWC = 1
//@@
//@@ HWC image format. Tensors with this format require 3 dimensions
//@@ if the model does not support batching (max_batch_size = 0) or 4
//@@ dimensions if the model does support batching (max_batch_size
//@@ >= 1). In either case the 'dims' below should only specify the
//@@ 3 non-batch dimensions (i.e. HWC or CHW).
//@@
FORMAT_NHWC = 1;
//@@ .. cpp:enumerator:: Format::FORMAT_NCHW = 2
//@@
//@@ CHW image format. Tensors with this format require 3 dimensions
//@@ if the model does not support batching (max_batch_size = 0) or 4
//@@ dimensions if the model does support batching (max_batch_size
//@@ >= 1). In either case the 'dims' below should only specify the
//@@ 3 non-batch dimensions (i.e. HWC or CHW).
//@@
FORMAT_NCHW = 2;
}
//@@ .. cpp:var:: string name
//@@
//@@ The name of the input.
//@@
string name = 1;
//@@ .. cpp:var:: DataType data_type
//@@
//@@ The data-type of the input.
//@@
DataType data_type = 2;
//@@ .. cpp:var:: Format format
//@@
//@@ The format of the input. Optional.
//@@
Format format = 3;
//@@ .. cpp:var:: int64 dims (repeated)
//@@
//@@ The dimensions/shape of the input tensor that must be provided
//@@ when invoking the inference API for this model.
//@@
repeated int64 dims = 4;
//@@ .. cpp:var:: ModelTensorReshape reshape
//@@
//@@ The shape expected for this input by the backend. The input will
//@@ be reshaped to this before being presented to the backend. The
//@@ reshape must have the same number of elements as the input shape
//@@ specified by 'dims'. Optional.
//@@
ModelTensorReshape reshape = 5;
//@@ .. cpp:var:: bool is_shape_tensor
//@@
//@@ Whether or not the input is a shape tensor to the model. This field
//@@ is currently supported only for the TensorRT model. An error will be
//@@ generated if this specification does not comply with underlying
//@@ model.
//@@
bool is_shape_tensor = 6;
//@@ .. cpp:var:: bool allow_ragged_batch
//@@
//@@ Whether or not the input is allowed to be "ragged" in a dynamically
//@@ created batch. Default is false indicating that two requests will
//@@ only be batched if this tensor has the same shape in both requests.
//@@ True indicates that two requests can be batched even if this tensor
//@@ has a different shape in each request.
//@@
bool allow_ragged_batch = 7;
//@@ .. cpp:var:: bool optional
//@@
//@@ Whether or not the input is optional for the model execution.
//@@ If true, the input is not required in the inference request.
//@@ Default value is false.
//@@
bool optional = 8;
}
//@@
//@@.. cpp:var:: message ModelOutput
//@@
//@@ An output produced by the model.
//@@
message ModelOutput
{
//@@ .. cpp:var:: string name
//@@
//@@ The name of the output.
//@@
string name = 1;
//@@ .. cpp:var:: DataType data_type
//@@
//@@ The data-type of the output.
//@@
DataType data_type = 2;
//@@ .. cpp:var:: int64 dims (repeated)
//@@
//@@ The dimensions/shape of the output tensor.
//@@
repeated int64 dims = 3;
//@@ .. cpp:var:: ModelTensorReshape reshape
//@@
//@@ The shape produced for this output by the backend. The output will
//@@ be reshaped from this to the shape specifed in 'dims' before being
//@@ returned in the inference response. The reshape must have the same
//@@ number of elements as the output shape specified by 'dims'. Optional.
//@@
ModelTensorReshape reshape = 5;
//@@ .. cpp:var:: string label_filename
//@@
//@@ The label file associated with this output. Should be specified only
//@@ for outputs that represent classifications. Optional.
//@@
string label_filename = 4;
//@@ .. cpp:var:: bool is_shape_tensor
//@@
//@@ Whether or not the output is a shape tensor to the model. This field
//@@ is currently supported only for the TensorRT model. An error will be
//@@ generated if this specification does not comply with underlying
//@@ model.
//@@
bool is_shape_tensor = 6;
}
//@@ .. cpp:var:: message BatchInput
//@@
//@@ A batch input is an additional input that must be added by
//@@ the backend based on all the requests in a batch.
//@@
message BatchInput
{
//@@
//@@ .. cpp:enum:: Kind
//@@
//@@ The kind of the batch input.
//@@
enum Kind {
//@@ .. cpp:enumerator:: Kind::BATCH_ELEMENT_COUNT = 0
//@@
//@@ The element count of the 'source_input' will be added as
//@@ input with shape [1].
//@@
BATCH_ELEMENT_COUNT = 0;
//@@ .. cpp:enumerator:: Kind::BATCH_ACCUMULATED_ELEMENT_COUNT = 1
//@@
//@@ The accumulated element count of the 'source_input' will be
//@@ added as input with shape [1]. For example, if there is a
//@@ batch of two request, each with 2 elements, an input of value
//@@ 2 will be added to the first request, and an input of value
//@@ 4 will be added to the second request.
//@@
BATCH_ACCUMULATED_ELEMENT_COUNT = 1;
//@@ .. cpp:enumerator::
//@@ Kind::BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO = 2
//@@
//@@ The accumulated element count of the 'source_input' will be
//@@ added as input with shape [1], except for the first request
//@@ in the batch. For the first request in the batch, the input
//@@ will have shape [2] where the first element is value 0.
//@@
BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO = 2;
//@@ .. cpp:enumerator:: Kind::BATCH_MAX_ELEMENT_COUNT_AS_SHAPE = 3
//@@
//@@ Among the requests in the batch, the max element count of the
//@@ 'source_input' will be added as input with shape
//@@ [max_element_count] for the first request in the batch.
//@@ For other requests, such input will be with shape [0].
//@@ The data of the tensor will be uninitialized.
//@@
BATCH_MAX_ELEMENT_COUNT_AS_SHAPE = 3;
//@@ .. cpp:enumerator:: Kind::BATCH_ITEM_SHAPE = 4
//@@
//@@ Among the requests in the batch, the shape of the
//@@ 'source_input' will be added as input with shape
//@@ [batch_size, len(input_dim)]. For example, if one
//@@ batch-2 input with shape [3, 1] and batch-1 input
//@@ with shape [2, 2] are batched, the batch input will
//@@ have shape [3, 2] and value [ [3, 1], [3, 1], [2, 2]].
//@@
BATCH_ITEM_SHAPE = 4;
//@@ .. cpp:enumerator:: Kind::BATCH_ITEM_SHAPE_FLATTEN = 5
//@@
//@@ Among the requests in the batch, the shape of the
//@@ 'source_input' will be added as input with single dimensional
//@@ shape [batch_size * len(input_dim)]. For example, if one
//@@ batch-2 input with shape [3, 1] and batch-1 input
//@@ with shape [2, 2] are batched, the batch input will
//@@ have shape [6] and value [3, 1, 3, 1, 2, 2].
//@@
BATCH_ITEM_SHAPE_FLATTEN = 5;
}
//@@ .. cpp:var:: Kind kind
//@@
//@@ The kind of this batch input.
//@@
Kind kind = 1;
//@@ .. cpp:var:: string target_name (repeated)
//@@
//@@ The name of the model inputs that the backend will create
//@@ for this batch input.
//@@
repeated string target_name = 2;
//@@ .. cpp:var:: DataType data_type
//@@
//@@ The input's datatype. The data type can be TYPE_INT32 or
//@@ TYPE_FP32.
//@@
DataType data_type = 3;
//@@ .. cpp:var:: string source_input (repeated)
//@@
//@@ The backend derives the value for each batch input from one or
//@@ more other inputs. 'source_input' gives the names of those
//@@ inputs.
//@@
repeated string source_input = 4;
}
//@@.. cpp:var:: message BatchOutput
//@@
//@@ A batch output is an output produced by the model that must be handled
//@@ differently by the backend based on all the requests in a batch.
//@@
message BatchOutput
{
//@@
//@@ .. cpp:enum:: Kind
//@@
//@@ The kind of the batch output.
//@@
enum Kind {
//@@ .. cpp:enumerator:: Kind::BATCH_SCATTER_WITH_INPUT_SHAPE = 0
//@@
//@@ The output should be scattered according to the shape of
//@@ 'source_input'. The dynamic dimension of the output will
//@@ be set to the value of the same dimension in the input.
//@@
BATCH_SCATTER_WITH_INPUT_SHAPE = 0;
}
//@@ .. cpp:var:: string target_name (repeated)
//@@
//@@ The name of the outputs to be produced by this batch output
//@@ specification.
//@@
repeated string target_name = 1;
//@@ .. cpp:var:: Kind kind
//@@
//@@ The kind of this batch output.
//@@
Kind kind = 2;
//@@ .. cpp:var:: string source_input (repeated)
//@@
//@@ The backend derives each batch output from one or more inputs.
//@@ 'source_input' gives the names of those inputs.
//@@
repeated string source_input = 3;
}
//@@
//@@.. cpp:var:: message ModelVersionPolicy
//@@
//@@ Policy indicating which versions of a model should be made
//@@ available by the inference server.
//@@
message ModelVersionPolicy
{
//@@ .. cpp:var:: message Latest
//@@
//@@ Serve only the latest version(s) of a model. This is
//@@ the default policy.
//@@
message Latest
{
//@@ .. cpp:var:: uint32 num_versions
//@@
//@@ Serve only the 'num_versions' highest-numbered versions. T
//@@ The default value of 'num_versions' is 1, indicating that by
//@@ default only the single highest-number version of a
//@@ model will be served.
//@@
uint32 num_versions = 1;
}
//@@ .. cpp:var:: message All
//@@
//@@ Serve all versions of the model.
//@@
message All {}
//@@ .. cpp:var:: message Specific
//@@
//@@ Serve only specific versions of the model.
//@@
message Specific
{
//@@ .. cpp:var:: int64 versions (repeated)
//@@
//@@ The specific versions of the model that will be served.
//@@
repeated int64 versions = 1;
}
//@@ .. cpp:var:: oneof policy_choice
//@@
//@@ Each model must implement only a single version policy. The
//@@ default policy is 'Latest'.
//@@
oneof policy_choice
{
//@@ .. cpp:var:: Latest latest
//@@
//@@ Serve only latest version(s) of the model.
//@@
Latest latest = 1;
//@@ .. cpp:var:: All all
//@@
//@@ Serve all versions of the model.
//@@
All all = 2;
//@@ .. cpp:var:: Specific specific
//@@
//@@ Serve only specific version(s) of the model.
//@@
Specific specific = 3;
}
}
//@@
//@@.. cpp:var:: message ModelOptimizationPolicy
//@@
//@@ Optimization settings for a model. These settings control if/how a
//@@ model is optimized and prioritized by the backend framework when
//@@ it is loaded.
//@@
message ModelOptimizationPolicy
{
//@@
//@@ .. cpp:var:: message Graph
//@@
//@@ Enable generic graph optimization of the model. If not specified
//@@ the framework's default level of optimization is used. Supports
//@@ TensorFlow graphdef and savedmodel and Onnx models. For TensorFlow
//@@ causes XLA to be enabled/disabled for the model. For Onnx defaults
//@@ to enabling all optimizations, -1 enables only basic optimizations,
//@@ +1 enables only basic and extended optimizations.
//@@
message Graph
{
//@@ .. cpp:var:: int32 level
//@@
//@@ The optimization level. Defaults to 0 (zero) if not specified.
//@@
//@@ - -1: Disabled
//@@ - 0: Framework default
//@@ - 1+: Enable optimization level (greater values indicate
//@@ higher optimization levels)
//@@
int32 level = 1;
}
//@@
//@@ .. cpp:enum:: ModelPriority
//@@
//@@ Model priorities. A model will be given scheduling and execution
//@@ preference over models at lower priorities. Current model
//@@ priorities only work for TensorRT models.
//@@
enum ModelPriority {
//@@ .. cpp:enumerator:: ModelPriority::PRIORITY_DEFAULT = 0
//@@
//@@ The default model priority.
//@@
PRIORITY_DEFAULT = 0;
//@@ .. cpp:enumerator:: ModelPriority::PRIORITY_MAX = 1
//@@
//@@ The maximum model priority.
//@@
PRIORITY_MAX = 1;
//@@ .. cpp:enumerator:: ModelPriority::PRIORITY_MIN = 2
//@@
//@@ The minimum model priority.
//@@
PRIORITY_MIN = 2;
}
//@@
//@@ .. cpp:var:: message Cuda
//@@
//@@ CUDA-specific optimization settings.
//@@
message Cuda
{
//@@ .. cpp:var:: message GraphSpec
//@@
//@@ Specification of the CUDA graph to be captured.
//@@
message GraphSpec
{
//@@ .. cpp:var:: message Dims
//@@
//@@ Specification of tensor dimension.
//@@
message Shape
{
//@@ .. cpp:var:: int64 dim (repeated)
//@@
//@@ The dimension.
//@@
repeated int64 dim = 1;
}
message LowerBound
{
//@@ .. cpp:var:: int32 batch_size
//@@
//@@ The batch size of the CUDA graph. If 'max_batch_size' is 0,
//@@ 'batch_size' must be set to 0. Otherwise, 'batch_size' must
//@@ be set to value between 1 and 'max_batch_size'.
//@@
int32 batch_size = 1;
//@@ .. cpp:var:: map<string, Shape> input
//@@
//@@ The specification of the inputs. 'Shape' is the shape of
//@@ the input without batching dimension.
//@@
map<string, Shape> input = 2;
}
//@@ .. cpp:var:: int32 batch_size
//@@
//@@ The batch size of the CUDA graph. If 'max_batch_size' is 0,
//@@ 'batch_size' must be set to 0. Otherwise, 'batch_size' must
//@@ be set to value between 1 and 'max_batch_size'.
//@@
int32 batch_size = 1;
//@@ .. cpp:var:: map<string, Shape> input
//@@
//@@ The specification of the inputs. 'Shape' is the shape of the
//@@ input without batching dimension.
//@@
map<string, Shape> input = 2;
//@@ .. cpp:var:: LowerBound graph_lower_bound
//@@
//@@ Specify the lower bound of the CUDA graph. Optional.
//@@ If specified, the graph can be used for input shapes and
//@@ batch sizes that are in closed interval between the lower
//@@ bound specification and graph specification. For dynamic
//@@ shape model, this allows CUDA graphs to be launched
//@@ frequently without capturing all possible shape combinations.
//@@ However, using graph for shape combinations different from
//@@ the one used for capturing introduces uninitialized data for
//@@ execution and it may distort the inference result if
//@@ the model is sensitive to uninitialized data.
//@@
LowerBound graph_lower_bound = 3;
}
//@@ .. cpp:var:: bool graphs
//@@
//@@ Use CUDA graphs API to capture model operations and execute
//@@ them more efficiently. Default value is false.
//@@ Currently only recognized by TensorRT backend.
//@@
bool graphs = 1;
//@@ .. cpp:var:: bool busy_wait_events
//@@
//@@ Use busy-waiting to synchronize CUDA events to achieve minimum
//@@ latency from event complete to host thread to be notified, with
//@@ the cost of high CPU load. Default value is false.
//@@ Currently only recognized by TensorRT backend.
//@@
bool busy_wait_events = 2;
//@@ .. cpp:var:: GraphSpec graph_spec (repeated)
//@@
//@@ Specification of the CUDA graph to be captured. If not specified
//@@ and 'graphs' is true, the default CUDA graphs will be captured
//@@ based on model settings.
//@@ Currently only recognized by TensorRT backend.
//@@
repeated GraphSpec graph_spec = 3;
//@@ .. cpp:var:: bool output_copy_stream
//@@
//@@ Uses a CUDA stream separate from the inference stream to copy the
//@@ output to host. However, be aware that setting this option to
//@@ true will lead to an increase in the memory consumption of the
//@@ model as Triton will allocate twice as much GPU memory for its
//@@ I/O tensor buffers. Default value is false.
//@@ Currently only recognized by TensorRT backend.
//@@
bool output_copy_stream = 4;
}
//@@
//@@ .. cpp:var:: message ExecutionAccelerators
//@@
//@@ Specify the preferred execution accelerators to be used to execute
//@@ the model. Currently only recognized by ONNX Runtime backend and
//@@ TensorFlow backend.
//@@
//@@ For ONNX Runtime backend, it will deploy the model with the execution
//@@ accelerators by priority, the priority is determined based on the
//@@ order that they are set, i.e. the provider at the front has highest
//@@ priority. Overall, the priority will be in the following order:
//@@ <gpu_execution_accelerator> (if instance is on GPU)
//@@ CUDA Execution Provider (if instance is on GPU)
//@@ <cpu_execution_accelerator>
//@@ Default CPU Execution Provider
//@@
message ExecutionAccelerators
{
//@@
//@@ .. cpp:var:: message Accelerator
//@@
//@@ Specify the accelerator to be used to execute the model.
//@@ Accelerator with the same name may accept different parameters
//@@ depending on the backends.
//@@
message Accelerator
{
//@@ .. cpp:var:: string name
//@@
//@@ The name of the execution accelerator.
//@@
string name = 1;
//@@ .. cpp:var:: map<string, string> parameters
//@@
//@@ Additional paremeters used to configure the accelerator.
//@@
map<string, string> parameters = 2;
}
//@@ .. cpp:var:: Accelerator gpu_execution_accelerator (repeated)
//@@
//@@ The preferred execution provider to be used if the model instance
//@@ is deployed on GPU.
//@@
//@@ For ONNX Runtime backend, possible value is "tensorrt" as name,
//@@ and no parameters are required.
//@@
//@@ For TensorFlow backend, possible values are "tensorrt",
//@@ "auto_mixed_precision", "gpu_io".
//@@
//@@ For "tensorrt", the following parameters can be specified:
//@@ "precision_mode": The precision used for optimization.
//@@ Allowed values are "FP32" and "FP16". Default value is "FP32".
//@@
//@@ "max_cached_engines": The maximum number of cached TensorRT
//@@ engines in dynamic TensorRT ops. Default value is 100.
//@@
//@@ "minimum_segment_size": The smallest model subgraph that will
//@@ be considered for optimization by TensorRT. Default value is 3.
//@@
//@@ "max_workspace_size_bytes": The maximum GPU memory the model
//@@ can use temporarily during execution. Default value is 1GB.
//@@
//@@ For "auto_mixed_precision", no parameters are required. If set,
//@@ the model will try to use FP16 for better performance.
//@@ This optimization can not be set with "tensorrt".
//@@
//@@ For "gpu_io", no parameters are required. If set, the model will
//@@ be executed using TensorFlow Callable API to set input and output
//@@ tensors in GPU memory if possible, which can reduce data transfer
//@@ overhead if the model is used in ensemble. However, the Callable
//@@ object will be created on model creation and it will request all
//@@ outputs for every model execution, which may impact the
//@@ performance if a request does not require all outputs. This
//@@ optimization will only take affect if the model instance is
//@@ created with KIND_GPU.
//@@
repeated Accelerator gpu_execution_accelerator = 1;
//@@ .. cpp:var:: Accelerator cpu_execution_accelerator (repeated)
//@@
//@@ The preferred execution provider to be used if the model instance
//@@ is deployed on CPU.
//@@
//@@ For ONNX Runtime backend, possible value is "openvino" as name,
//@@ and no parameters are required.
//@@
repeated Accelerator cpu_execution_accelerator = 2;
}
//@@
//@@ .. cpp:var:: message PinnedMemoryBuffer
//@@
//@@ Specify whether to use a pinned memory buffer when transferring data
//@@ between non-pinned system memory and GPU memory. Using a pinned
//@@ memory buffer for system from/to GPU transfers will typically provide
//@@ increased performance. For example, in the common use case where the
//@@ request provides inputs and delivers outputs via non-pinned system
//@@ memory, if the model instance accepts GPU IOs, the inputs will be
//@@ processed by two copies: from non-pinned system memory to pinned
//@@ memory, and from pinned memory to GPU memory. Similarly, pinned
//@@ memory will be used for delivering the outputs.
//@@
message PinnedMemoryBuffer
{
//@@ .. cpp:var:: bool enable
//@@
//@@ Use pinned memory buffer. Default is true.
//@@
bool enable = 1;
}
//@@ .. cpp:var:: Graph graph
//@@
//@@ The graph optimization setting for the model. Optional.
//@@
Graph graph = 1;
//@@ .. cpp:var:: ModelPriority priority
//@@
//@@ The priority setting for the model. Optional.
//@@
ModelPriority priority = 2;
//@@ .. cpp:var:: Cuda cuda
//@@
//@@ CUDA-specific optimization settings. Optional.
//@@
Cuda cuda = 3;
//@@ .. cpp:var:: ExecutionAccelerators execution_accelerators
//@@
//@@ The accelerators used for the model. Optional.
//@@
ExecutionAccelerators execution_accelerators = 4;
//@@ .. cpp:var:: PinnedMemoryBuffer input_pinned_memory
//@@
//@@ Use pinned memory buffer when the data transfer for inputs
//@@ is between GPU memory and non-pinned system memory.
//@@ Default is true.
//@@
PinnedMemoryBuffer input_pinned_memory = 5;
//@@ .. cpp:var:: PinnedMemoryBuffer output_pinned_memory
//@@
//@@ Use pinned memory buffer when the data transfer for outputs
//@@ is between GPU memory and non-pinned system memory.
//@@ Default is true.
//@@
PinnedMemoryBuffer output_pinned_memory = 6;
//@@ .. cpp:var:: uint32 gather_kernel_buffer_threshold
//@@
//@@ The backend may use a gather kernel to gather input data if the
//@@ device has direct access to the source buffer and the destination
//@@ buffer. In such case, the gather kernel will be used only if the
//@@ number of buffers to be gathered is greater or equal to
//@@ the specifed value. If 0, the gather kernel will be disabled.
//@@ Default value is 0.
//@@ Currently only recognized by TensorRT backend.
//@@
uint32 gather_kernel_buffer_threshold = 7;
//@@ .. cpp:var:: bool eager_batching
//@@
//@@ Start preparing the next batch before the model instance is ready
//@@ for the next inference. This option can be used to overlap the
//@@ batch preparation with model execution, with the trade-off that
//@@ the next batch might be smaller than what it could have been.
//@@ Default value is false.
//@@ Currently only recognized by TensorRT backend.
//@@
bool eager_batching = 8;
}
//@@
//@@.. cpp:var:: message ModelQueuePolicy
//@@
//@@ Queue policy for inference requests.
//@@
message ModelQueuePolicy
{
//@@
//@@ .. cpp:enum:: TimeoutAction
//@@
//@@ The action applied to timed-out requests.
//@@
enum TimeoutAction {
//@@ .. cpp:enumerator:: Action::REJECT = 0
//@@
//@@ Reject the request and return error message accordingly.
//@@
REJECT = 0;
//@@ .. cpp:enumerator:: Action::DELAY = 1
//@@
//@@ Delay the request until all other requests at the same
//@@ (or higher) priority levels that have not reached their timeouts
//@@ are processed. A delayed request will eventually be processed,
//@@ but may be delayed indefinitely due to newly arriving requests.
//@@
DELAY = 1;
}
//@@
//@@ .. cpp:var:: TimeoutAction timeout_action
//@@
//@@ The action applied to timed-out request.
//@@ The default action is REJECT.
//@@
TimeoutAction timeout_action = 1;
//@@
//@@ .. cpp:var:: uint64 default_timeout_microseconds
//@@
//@@ The default timeout for every request, in microseconds.
//@@ The default value is 0 which indicates that no timeout is set.
//@@
uint64 default_timeout_microseconds = 2;
//@@
//@@ .. cpp:var:: bool allow_timeout_override
//@@
//@@ Whether individual request can override the default timeout value.
//@@ When true, individual requests can set a timeout that is less than
//@@ the default timeout value but may not increase the timeout.
//@@ The default value is false.
//@@
bool allow_timeout_override = 3;
//@@
//@@ .. cpp:var:: uint32 max_queue_size
//@@
//@@ The maximum queue size for holding requests. A request will be
//@@ rejected immediately if it can't be enqueued because the queue is
//@@ full. The default value is 0 which indicates that no maximum
//@@ queue size is enforced.
//@@
uint32 max_queue_size = 4;
}
//@@
//@@.. cpp:var:: message ModelDynamicBatching
//@@
//@@ Dynamic batching configuration. These settings control how dynamic
//@@ batching operates for the model.
//@@
message ModelDynamicBatching
{
//@@ .. cpp:var:: int32 preferred_batch_size (repeated)
//@@
//@@ Preferred batch sizes for dynamic batching. If a batch of one of
//@@ these sizes can be formed it will be executed immediately. If
//@@ not specified a preferred batch size will be chosen automatically
//@@ based on model and GPU characteristics.
//@@
repeated int32 preferred_batch_size = 1;
//@@ .. cpp:var:: uint64 max_queue_delay_microseconds
//@@
//@@ The maximum time, in microseconds, a request will be delayed in
//@@ the scheduling queue to wait for additional requests for
//@@ batching. Default is 0.
//@@
uint64 max_queue_delay_microseconds = 2;
//@@ .. cpp:var:: bool preserve_ordering
//@@
//@@ Should the dynamic batcher preserve the ordering of responses to
//@@ match the order of requests received by the scheduler. Default is
//@@ false. If true, the responses will be returned in the same order as
//@@ the order of requests sent to the scheduler. If false, the responses
//@@ may be returned in arbitrary order. This option is specifically
//@@ needed when a sequence of related inference requests (i.e. inference
//@@ requests with the same correlation ID) are sent to the dynamic
//@@ batcher to ensure that the sequence responses are in the correct
//@@ order.
//@@
bool preserve_ordering = 3;
//@@ .. cpp:var:: uint32 priority_levels
//@@
//@@ The number of priority levels to be enabled for the model,
//@@ the priority level starts from 1 and 1 is the highest priority.
//@@ Requests are handled in priority order with all priority 1 requests
//@@ processed before priority 2, all priority 2 requests processed before
//@@ priority 3, etc. Requests with the same priority level will be
//@@ handled in the order that they are received.
//@@
uint32 priority_levels = 4;
//@@ .. cpp:var:: uint32 default_priority_level
//@@
//@@ The priority level used for requests that don't specify their
//@@ priority. The value must be in the range [ 1, 'priority_levels' ].
//@@
uint32 default_priority_level = 5;
//@@ .. cpp:var:: ModelQueuePolicy default_queue_policy
//@@
//@@ The default queue policy used for requests that don't require
//@@ priority handling and requests that specify priority levels where
//@@ there is no specific policy given. If not specified, a policy with
//@@ default field values will be used.
//@@
ModelQueuePolicy default_queue_policy = 6;
//@@ .. cpp:var:: map<uint32, ModelQueuePolicy> priority_queue_policy
//@@
//@@ Specify the queue policy for the priority level. The default queue
//@@ policy will be used if a priority level doesn't specify a queue
//@@ policy.
//@@
map<uint32, ModelQueuePolicy> priority_queue_policy = 7;
}
//@@
//@@.. cpp:var:: message ModelSequenceBatching
//@@
//@@ Sequence batching configuration. These settings control how sequence
//@@ batching operates for the model.
//@@
message ModelSequenceBatching
{
//@@ .. cpp:var:: message Control
//@@
//@@ A control is a signal that the sequence batcher uses to
//@@ communicate with a backend.
//@@
message Control
{
//@@
//@@ .. cpp:enum:: Kind
//@@
//@@ The kind of the control.
//@@
enum Kind {
//@@ .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_START = 0
//@@
//@@ A new sequence is/is-not starting. If true a sequence is
//@@ starting, if false a sequence is continuing. Must
//@@ specify either int32_false_true, fp32_false_true or
//@@ bool_false_true for this control. This control is optional.
//@@
CONTROL_SEQUENCE_START = 0;
//@@ .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_READY = 1
//@@
//@@ A sequence is/is-not ready for inference. If true the
//@@ input tensor data is valid and should be used. If false
//@@ the input tensor data is invalid and inferencing should
//@@ be "skipped". Must specify either int32_false_true,
//@@ fp32_false_true or bool_false_true for this control. This
//@@ control is optional.
//@@
CONTROL_SEQUENCE_READY = 1;
//@@ .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_END = 2
//@@
//@@ A sequence is/is-not ending. If true a sequence is
//@@ ending, if false a sequence is continuing. Must specify
//@@ either int32_false_true, fp32_false_true or bool_false_true
//@@ for this control. This control is optional.
//@@
CONTROL_SEQUENCE_END = 2;
//@@ .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_CORRID = 3
//@@
//@@ The correlation ID of the sequence. The correlation ID
//@@ is an uint64_t value that is communicated in whole or
//@@ in part by the tensor. The tensor's datatype must be
//@@ specified by data_type and must be TYPE_UINT64, TYPE_INT64,
//@@ TYPE_UINT32 or TYPE_INT32. If a 32-bit datatype is specified
//@@ the correlation ID will be truncated to the low-order 32
//@@ bits. This control is optional.
//@@
CONTROL_SEQUENCE_CORRID = 3;
}
//@@ .. cpp:var:: Kind kind
//@@
//@@ The kind of this control.
//@@
Kind kind = 1;
//@@ .. cpp:var:: int32 int32_false_true (repeated)
//@@
//@@ The control's true and false setting is indicated by setting
//@@ a value in an int32 tensor. The tensor must be a
//@@ 1-dimensional tensor with size equal to the batch size of
//@@ the request. 'int32_false_true' must have two entries: the
//@@ first the false value and the second the true value.
//@@
repeated int32 int32_false_true = 2;
//@@ .. cpp:var:: float fp32_false_true (repeated)
//@@
//@@ The control's true and false setting is indicated by setting
//@@ a value in a fp32 tensor. The tensor must be a
//@@ 1-dimensional tensor with size equal to the batch size of
//@@ the request. 'fp32_false_true' must have two entries: the
//@@ first the false value and the second the true value.
//@@
repeated float fp32_false_true = 3;
//@@ .. cpp:var:: bool bool_false_true (repeated)
//@@
//@@ The control's true and false setting is indicated by setting
//@@ a value in a bool tensor. The tensor must be a
//@@ 1-dimensional tensor with size equal to the batch size of
//@@ the request. 'bool_false_true' must have two entries: the
//@@ first the false value and the second the true value.
//@@
repeated bool bool_false_true = 5;
//@@ .. cpp:var:: DataType data_type
//@@
//@@ The control's datatype.
//@@
DataType data_type = 4;
}
//@@ .. cpp:var:: message ControlInput
//@@
//@@ The sequence control values to communicate by a model input.
//@@
message ControlInput
{
//@@ .. cpp:var:: string name
//@@
//@@ The name of the model input.
//@@
string name = 1;
//@@ .. cpp:var:: Control control (repeated)
//@@
//@@ The control value(s) that should be communicated to the
//@@ model using this model input.
//@@
repeated Control control = 2;
}
//@@
//@@ .. cpp:var:: message InitialState
//@@
//@@ Settings used to initialize data for implicit state.
//@@
message InitialState
{
//@@ .. cpp:var:: DataType data_type
//@@
//@@ The data-type of the state.
//@@
DataType data_type = 1;
//@@ .. cpp:var:: int64 dims (repeated)
//@@
//@@ The shape of the state tensor, not including the batch dimension.
//@@
repeated int64 dims = 2;
//@@ .. cpp:var:: oneof state_data
//@@
//@@ Specify how the initial state data is generated.
//@@
oneof state_data
{
//@@
//@@ .. cpp:var:: bool zero_data
//@@
//@@ The identifier for using zeros as initial state data.
//@@ Note that the value of 'zero_data' will not be checked,
//@@ instead, zero data will be used as long as the field is set.
//@@
bool zero_data = 3;
//@@ .. cpp:var:: string data_file
//@@
//@@ The file whose content will be used as the initial data for
//@@ the state in row-major order. The file must be provided in
//@@ sub-directory 'initial_state' under the model directory.
//@@
string data_file = 4;
}
//@@ .. cpp:var:: string name
//@@
//@@ The name of the state initialization.
//@@
string name = 5;
}
//@@ .. cpp:var:: message State
//@@
//@@ An input / output pair of tensors that carry state for the sequence.
//@@
message State
{
//@@ .. cpp:var:: string input_name
//@@
//@@ The name of the model state input.
//@@
string input_name = 1;
//@@ .. cpp:var:: string output_name
//@@
//@@ The name of the model state output.
//@@
string output_name = 2;
//@@ .. cpp:var:: DataType data_type
//@@
//@@ The data-type of the state.
//@@
DataType data_type = 3;
//@@ .. cpp:var:: int64 dim (repeated)
//@@
//@@ The dimension.
//@@
repeated int64 dims = 4;
//@@ .. cpp:var:: InitialState initial_state (repeated)
//@@
//@@ The optional field to specify the initial state for the model.
//@@
repeated InitialState initial_state = 5;
}
//@@ .. cpp:var:: message StrategyDirect
//@@
//@@ The sequence batcher uses a specific, unique batch
//@@ slot for each sequence. All inference requests in a
//@@ sequence are directed to the same batch slot in the same
//@@ model instance over the lifetime of the sequence. This
//@@ is the default strategy.
//@@
message StrategyDirect
{
//@@ .. cpp:var:: uint64 max_queue_delay_microseconds
//@@
//@@ The maximum time, in microseconds, a candidate request
//@@ will be delayed in the sequence batch scheduling queue to
//@@ wait for additional requests for batching. Default is 0.
//@@
uint64 max_queue_delay_microseconds = 1;
//@@ .. cpp:var:: float minimum_slot_utilization
//@@
//@@ The minimum slot utilization that must be satisfied to
//@@ execute the batch before 'max_queue_delay_microseconds' expires.
//@@ For example, a value of 0.5 indicates that the batch should be
//@@ executed as soon as 50% or more of the slots are ready even if
//@@ the 'max_queue_delay_microseconds' timeout has not expired.
//@@ The default is 0.0, indicating that a batch will be executed
//@@ before 'max_queue_delay_microseconds' timeout expires if at least
//@@ one batch slot is ready. 'max_queue_delay_microseconds' will be
//@@ ignored unless minimum_slot_utilization is set to a non-zero
//@@ value.
//@@
float minimum_slot_utilization = 2;
}
//@@ .. cpp:var:: message StrategyOldest
//@@
//@@ The sequence batcher maintains up to 'max_candidate_sequences'
//@@ candidate sequences. 'max_candidate_sequences' can be greater
//@@ than the model's 'max_batch_size'. For inferencing the batcher
//@@ chooses from the candidate sequences up to 'max_batch_size'
//@@ inference requests. Requests are chosen in an oldest-first
//@@ manner across all candidate sequences. A given sequence is
//@@ not guaranteed to be assigned to the same batch slot for
//@@ all inference requests of that sequence.
//@@
message StrategyOldest
{
//@@ .. cpp:var:: int32 max_candidate_sequences
//@@
//@@ Maximum number of candidate sequences that the batcher
//@@ maintains. Excess seqences are kept in an ordered backlog
//@@ and become candidates when existing candidate sequences
//@@ complete.
//@@
int32 max_candidate_sequences = 1;
//@@ .. cpp:var:: int32 preferred_batch_size (repeated)
//@@
//@@ Preferred batch sizes for dynamic batching of candidate
//@@ sequences. If a batch of one of these sizes can be formed
//@@ it will be executed immediately. If not specified a
//@@ preferred batch size will be chosen automatically
//@@ based on model and GPU characteristics.
//@@
repeated int32 preferred_batch_size = 2;
//@@ .. cpp:var:: uint64 max_queue_delay_microseconds
//@@
//@@ The maximum time, in microseconds, a candidate request
//@@ will be delayed in the dynamic batch scheduling queue to
//@@ wait for additional requests for batching. Default is 0.
//@@
uint64 max_queue_delay_microseconds = 3;
}
//@@ .. cpp:var:: oneof strategy_choice
//@@
//@@ The strategy used by the sequence batcher. Default strategy
//@@ is 'direct'.
//@@
oneof strategy_choice
{
//@@ .. cpp:var:: StrategyDirect direct
//@@
//@@ StrategyDirect scheduling strategy.
//@@
StrategyDirect direct = 3;
//@@ .. cpp:var:: StrategyOldest oldest
//@@
//@@ StrategyOldest scheduling strategy.
//@@
StrategyOldest oldest = 4;
}
//@@ .. cpp:var:: uint64 max_sequence_idle_microseconds
//@@
//@@ The maximum time, in microseconds, that a sequence is allowed to
//@@ be idle before it is aborted. The inference server considers a
//@@ sequence idle when it does not have any inference request queued
//@@ for the sequence. If this limit is exceeded, the inference server
//@@ will free the sequence slot allocated by the sequence and make it
//@@ available for another sequence. If not specified (or specified as
//@@ zero) a default value of 1000000 (1 second) is used.
//@@
uint64 max_sequence_idle_microseconds = 1;
//@@ .. cpp:var:: ControlInput control_input (repeated)
//@@
//@@ The model input(s) that the server should use to communicate
//@@ sequence start, stop, ready and similar control values to the
//@@ model.
//@@
repeated ControlInput control_input = 2;
//@@ .. cpp:var:: State state (repeated)
//@@
//@@ The optional state that can be stored in Triton for performing
//@@ inference requests on a sequence. Each sequence holds an implicit
//@@ state local to itself. The output state tensor provided by the
//@@ model in 'output_name' field of the current inference request will
//@@ be transferred as an input tensor named 'input_name' in the next
//@@ request of the same sequence. The input state of the first request
//@@ in the sequence contains garbage data.
//@@
repeated State state = 5;
}
//@@
//@@.. cpp:var:: message ModelEnsembling
//@@
//@@ Model ensembling configuration. These settings specify the models that
//@@ compose the ensemble and how data flows between the models.
//@@
message ModelEnsembling
{
//@@ .. cpp:var:: message Step
//@@
//@@ Each step specifies a model included in the ensemble,
//@@ maps ensemble tensor names to the model input tensors,
//@@ and maps model output tensors to ensemble tensor names
//@@
message Step
{
//@@ .. cpp:var:: string model_name
//@@
//@@ The name of the model to execute for this step of the ensemble.
//@@
string model_name = 1;
//@@ .. cpp:var:: int64 model_version
//@@
//@@ The version of the model to use for inference. If -1
//@@ the latest/most-recent version of the model is used.
//@@
int64 model_version = 2;
//@@ .. cpp:var:: map<string,string> input_map
//@@
//@@ Map from name of an input tensor on this step's model to ensemble
//@@ tensor name. The ensemble tensor must have the same data type and
//@@ shape as the model input. Each model input must be assigned to
//@@ one ensemble tensor, but the same ensemble tensor can be assigned
//@@ to multiple model inputs.
//@@
map<string, string> input_map = 3;
//@@ .. cpp:var:: map<string,string> output_map
//@@
//@@ Map from name of an output tensor on this step's model to ensemble
//@@ tensor name. The data type and shape of the ensemble tensor will
//@@ be inferred from the model output. It is optional to assign all
//@@ model outputs to ensemble tensors. One ensemble tensor name
//@@ can appear in an output map only once.
//@@
map<string, string> output_map = 4;
}
//@@ .. cpp:var:: Step step (repeated)
//@@
//@@ The models and the input / output mappings used within the ensemble.
//@@
repeated Step step = 1;
}
//@@
//@@.. cpp:var:: message ModelParameter
//@@
//@@ A model parameter.
//@@
message ModelParameter
{
//@@ .. cpp:var:: string string_value
//@@
//@@ The string value of the parameter.
//@@
string string_value = 1;
}
//@@
//@@.. cpp:var:: message ModelWarmup
//@@
//@@ Settings used to construct the request sample for model warmup.
//@@
message ModelWarmup
{
//@@
//@@ .. cpp:var:: message Input
//@@
//@@ Meta data associated with an input.
//@@
message Input
{
//@@ .. cpp:var:: DataType data_type
//@@
//@@ The data-type of the input.
//@@
DataType data_type = 1;
//@@ .. cpp:var:: int64 dims (repeated)
//@@
//@@ The shape of the input tensor, not including the batch dimension.
//@@
repeated int64 dims = 2;
//@@ .. cpp:var:: oneof input_data_type
//@@
//@@ Specify how the input data is generated. If the input has STRING
//@@ data type and 'random_data' is set, the data generation will fall
//@@ back to 'zero_data'.
//@@
oneof input_data_type
{
//@@
//@@ .. cpp:var:: bool zero_data
//@@
//@@ The identifier for using zeros as input data. Note that the
//@@ value of 'zero_data' will not be checked, instead, zero data
//@@ will be used as long as the field is set.
//@@
bool zero_data = 3;
//@@
//@@ .. cpp:var:: bool random_data
//@@
//@@ The identifier for using random data as input data. Note that
//@@ the value of 'random_data' will not be checked, instead,
//@@ random data will be used as long as the field is set.
//@@
bool random_data = 4;
//@@ .. cpp:var:: string input_data_file
//@@
//@@ The file whose content will be used as raw input data in
//@@ row-major order. The file must be provided in a sub-directory
//@@ 'warmup' under the model directory. The file contents should be
//@@ in binary format. For TYPE_STRING data-type, an element is
//@@ represented by a 4-byte unsigned integer giving the length
//@@ followed by the actual bytes.
//@@
string input_data_file = 5;
}
}
//@@ .. cpp:var:: string name
//@@
//@@ The name of the request sample.
//@@
string name = 1;
//@@ .. cpp:var:: uint32 batch_size
//@@
//@@ The batch size of the inference request. This must be >= 1. For
//@@ models that don't support batching, batch_size must be 1. If
//@@ batch_size > 1, the 'inputs' specified below will be duplicated to
//@@ match the batch size requested.
//@@
uint32 batch_size = 2;
//@@ .. cpp:var:: map<string, Input> inputs
//@@
//@@ The warmup meta data associated with every model input, including
//@@ control tensors.
//@@
map<string, Input> inputs = 3;
//@@ .. cpp:var:: uint32 count
//@@
//@@ The number of iterations that this warmup sample will be executed.
//@@ For example, if this field is set to 2, 2 model executions using this
//@@ sample will be scheduled for warmup. Default value is 0 which
//@@ indicates that this sample will be used only once.
//@@ Note that for sequence model, 'count' may not work well
//@@ because the model often expect a valid sequence of requests which
//@@ should be represented by a series of warmup samples. 'count > 1'
//@@ essentially "resends" one of the sample, which may invalidate the
//@@ sequence and result in unexpected warmup failure.
//@@
uint32 count = 4;
}
//@@
//@@ .. cpp:var:: message ModelOperations
//@@
//@@ The metadata of libraries providing custom operations for this model.
//@@
message ModelOperations
{
//@@ .. cpp:var:: string op_library_filename (repeated)
//@@
//@@ Optional paths of the libraries providing custom operations for
//@@ this model. Valid only for ONNX models.
//@@
repeated string op_library_filename = 1;
}
//@@
//@@ .. cpp:var:: message ModelTransactionPolicy
//@@
//@@ The specification that describes the nature of transactions
//@@ to be expected from the model.
//@@
message ModelTransactionPolicy
{
//@@ .. cpp:var:: bool decoupled
//@@
//@@ Indicates whether responses generated by the model are decoupled with
//@@ the requests issued to it, which means the number of responses
//@@ generated by model may differ from number of requests issued, and
//@@ that the responses may be out of order relative to the order of
//@@ requests. The default is false, which means the model will generate
//@@ exactly one response for each request.
//@@
bool decoupled = 1;
}
//@@
//@@.. cpp:var:: message ModelRepositoryAgents
//@@
//@@ The repository agents for the model.
//@@
message ModelRepositoryAgents
{
//@@
//@@ .. cpp:var:: message Agent
//@@
//@@ A repository agent that should be invoked for the specified
//@@ repository actions for this model.
//@@
message Agent
{
//@@ .. cpp:var:: string name
//@@
//@@ The name of the agent.
//@@
string name = 1;
//@@ .. cpp:var:: map<string, string> parameters
//@@
//@@ The parameters for the agent.
//@@
map<string, string> parameters = 2;
}
//@@
//@@ .. cpp:var:: Agent agents (repeated)
//@@
//@@ The ordered list of agents for the model. These agents will be
//@@ invoked in order to respond to repository actions occuring for the
//@@ model.
//@@
repeated Agent agents = 1;
}
//@@
//@@.. cpp:var:: message ModelResponseCache
//@@
//@@ The response cache setting for the model.
//@@
message ModelResponseCache
{
//@@
//@@ .. cpp::var:: bool enable
//@@
//@@ Whether or not to use response cache for the model. If True, the
//@@ responses from the model are cached and when identical request
//@@ is encountered, instead of going through the model execution,
//@@ the response from the cache is utilized. By default, response
//@@ cache is disabled for the models.
//@@
bool enable = 1;
}
//@@
//@@.. cpp:var:: message ModelConfig
//@@
//@@ A model configuration.
//@@
message ModelConfig
{
//@@ .. cpp:var:: string name
//@@
//@@ The name of the model.
//@@
string name = 1;
//@@ .. cpp:var:: string platform
//@@
//@@ The framework for the model. Possible values are
//@@ "tensorrt_plan", "tensorflow_graphdef",
//@@ "tensorflow_savedmodel", "onnxruntime_onnx",
//@@ "pytorch_libtorch".
//@@
string platform = 2;
//@@ .. cpp:var:: string backend
//@@
//@@ The backend used by the model.
//@@
string backend = 17;
//@@ .. cpp:var:: ModelVersionPolicy version_policy
//@@
//@@ Policy indicating which version(s) of the model will be served.
//@@
ModelVersionPolicy version_policy = 3;
//@@ .. cpp:var:: int32 max_batch_size
//@@
//@@ Maximum batch size allowed for inference. This can only decrease
//@@ what is allowed by the model itself. A max_batch_size value of 0
//@@ indicates that batching is not allowed for the model and the
//@@ dimension/shape of the input and output tensors must exactly
//@@ match what is specified in the input and output configuration. A
//@@ max_batch_size value > 0 indicates that batching is allowed and
//@@ so the model expects the input tensors to have an additional
//@@ initial dimension for the batching that is not specified in the
//@@ input (for example, if the model supports batched inputs of
//@@ 2-dimensional tensors then the model configuration will specify
//@@ the input shape as [ X, Y ] but the model will expect the actual
//@@ input tensors to have shape [ N, X, Y ]). For max_batch_size > 0
//@@ returned outputs will also have an additional initial dimension
//@@ for the batch.
//@@
int32 max_batch_size = 4;
//@@ .. cpp:var:: ModelInput input (repeated)
//@@
//@@ The inputs request by the model.
//@@
repeated ModelInput input = 5;
//@@ .. cpp:var:: ModelOutput output (repeated)
//@@
//@@ The outputs produced by the model.
//@@
repeated ModelOutput output = 6;
//@@ .. cpp:var:: BatchInput batch_input (repeated)
//@@
//@@ The model input(s) that the server should use to communicate
//@@ batch related values to the model.
//@@
repeated BatchInput batch_input = 20;
//@@ .. cpp:var:: BatchOutput batch_output (repeated)
//@@
//@@ The outputs produced by the model that requires special handling
//@@ by the model backend.
//@@
repeated BatchOutput batch_output = 21;
//@@ .. cpp:var:: ModelOptimizationPolicy optimization
//@@
//@@ Optimization configuration for the model. If not specified
//@@ then default optimization policy is used.
//@@
ModelOptimizationPolicy optimization = 12;
//@@ .. cpp:var:: oneof scheduling_choice
//@@
//@@ The scheduling policy for the model. If not specified the
//@@ default scheduling policy is used for the model. The default
//@@ policy is to execute each inference request independently.
//@@
oneof scheduling_choice
{
//@@ .. cpp:var:: ModelDynamicBatching dynamic_batching
//@@
//@@ If specified, enables the dynamic-batching scheduling
//@@ policy. With dynamic-batching the scheduler may group
//@@ together independent requests into a single batch to
//@@ improve inference throughput.
//@@
ModelDynamicBatching dynamic_batching = 11;
//@@ .. cpp:var:: ModelSequenceBatching sequence_batching
//@@
//@@ If specified, enables the sequence-batching scheduling
//@@ policy. With sequence-batching, inference requests
//@@ with the same correlation ID are routed to the same
//@@ model instance. Multiple sequences of inference requests
//@@ may be batched together into a single batch to
//@@ improve inference throughput.
//@@
ModelSequenceBatching sequence_batching = 13;
//@@ .. cpp:var:: ModelEnsembling ensemble_scheduling
//@@
//@@ If specified, enables the model-ensembling scheduling
//@@ policy. With model-ensembling, inference requests
//@@ will be processed according to the specification, such as an
//@@ execution sequence of models. The input specified in this model
//@@ config will be the input for the ensemble, and the output
//@@ specified will be the output of the ensemble.
//@@
ModelEnsembling ensemble_scheduling = 15;
}
//@@ .. cpp:var:: ModelInstanceGroup instance_group (repeated)
//@@
//@@ Instances of this model. If not specified, one instance
//@@ of the model will be instantiated on each available GPU.
//@@
repeated ModelInstanceGroup instance_group = 7;
//@@ .. cpp:var:: string default_model_filename
//@@
//@@ Optional filename of the model file to use if a
//@@ compute-capability specific model is not specified in
//@@ :cpp:var:`cc_model_filenames`. If not specified the default name
//@@ is 'model.graphdef', 'model.savedmodel', 'model.plan' or
//@@ 'model.pt' depending on the model type.
//@@
string default_model_filename = 8;
//@@ .. cpp:var:: map<string,string> cc_model_filenames
//@@
//@@ Optional map from CUDA compute capability to the filename of
//@@ the model that supports that compute capability. The filename
//@@ refers to a file within the model version directory.
//@@
map<string, string> cc_model_filenames = 9;
//@@ .. cpp:var:: map<string,string> metric_tags
//@@
//@@ Optional metric tags. User-specific key-value pairs for metrics
//@@ reported for this model. These tags are applied to the metrics
//@@ reported on the HTTP metrics port.
//@@
map<string, string> metric_tags = 10;
//@@ .. cpp:var:: map<string,ModelParameter> parameters
//@@
//@@ Optional model parameters. User-specified parameter values.
//@@
map<string, ModelParameter> parameters = 14;
//@@ .. cpp:var:: ModelWarmup model_warmup (repeated)
//@@
//@@ Warmup setting of this model. If specified, all instances
//@@ will be run with the request samples in sequence before
//@@ serving the model.
//@@ This field can only be specified if the model is not an ensemble
//@@ model.
//@@
repeated ModelWarmup model_warmup = 16;
//@@ .. cpp:var:: ModelOperations model_operations
//@@
//@@ Optional metadata of the libraries providing custom operations for
//@@ this model.
//@@
ModelOperations model_operations = 18;
//@@ .. cpp:var:: ModelTransactionPolicy model_transaction_policy
//@@
//@@ Optional specification that describes the nature of transactions
//@@ to be expected from the model.
//@@
ModelTransactionPolicy model_transaction_policy = 19;
//@@ .. cpp:var:: ModelRepositoryAgents model_repository_agents
//@@
//@@ Optional specification of the agent(s) that should be invoked
//@@ with repository actions are performed for this model.
//@@
ModelRepositoryAgents model_repository_agents = 23;
//@@ .. cpp:var:: ModelResponseCache response_cache
//@@
//@@ Optional setting for utilizing the response cache for this
//@@ model.
//@@
ModelResponseCache response_cache = 24;
}
\ No newline at end of file
# flake8: noqa
# -*- coding: utf-8 -*-
# Generated by the protocol buffer compiler. DO NOT EDIT!
# source: model_config.protxt
"""Generated protocol buffer code."""
from google.protobuf import descriptor as _descriptor
from google.protobuf import descriptor_pool as _descriptor_pool
from google.protobuf import message as _message
from google.protobuf import reflection as _reflection
from google.protobuf import symbol_database as _symbol_database
from google.protobuf.internal import enum_type_wrapper
# @@protoc_insertion_point(imports)
_sym_db = _symbol_database.Default()
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
b'\n\x13model_config.protxt\x12\tinference\"\x96\x01\n\x10ModelRateLimiter\x12\x37\n\tresources\x18\x01 \x03(\x0b\x32$.inference.ModelRateLimiter.Resource\x12\x10\n\x08priority\x18\x02 \x01(\r\x1a\x37\n\x08Resource\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0e\n\x06global\x18\x02 \x01(\x08\x12\r\n\x05\x63ount\x18\x03 \x01(\r\"\x87\x04\n\x12ModelInstanceGroup\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x30\n\x04kind\x18\x04 \x01(\x0e\x32\".inference.ModelInstanceGroup.Kind\x12\r\n\x05\x63ount\x18\x02 \x01(\x05\x12\x31\n\x0crate_limiter\x18\x06 \x01(\x0b\x32\x1b.inference.ModelRateLimiter\x12\x0c\n\x04gpus\x18\x03 \x03(\x05\x12H\n\x11secondary_devices\x18\x08 \x03(\x0b\x32-.inference.ModelInstanceGroup.SecondaryDevice\x12\x0f\n\x07profile\x18\x05 \x03(\t\x12\x0f\n\x07passive\x18\x07 \x01(\x08\x12\x13\n\x0bhost_policy\x18\t \x01(\t\x1a\x9c\x01\n\x0fSecondaryDevice\x12O\n\x04kind\x18\x01 \x01(\x0e\x32\x41.inference.ModelInstanceGroup.SecondaryDevice.SecondaryDeviceKind\x12\x11\n\tdevice_id\x18\x02 \x01(\x03\"%\n\x13SecondaryDeviceKind\x12\x0e\n\nKIND_NVDLA\x10\x00\"A\n\x04Kind\x12\r\n\tKIND_AUTO\x10\x00\x12\x0c\n\x08KIND_GPU\x10\x01\x12\x0c\n\x08KIND_CPU\x10\x02\x12\x0e\n\nKIND_MODEL\x10\x03\"#\n\x12ModelTensorReshape\x12\r\n\x05shape\x18\x01 \x03(\x03\"\xb2\x02\n\nModelInput\x12\x0c\n\x04name\x18\x01 \x01(\t\x12&\n\tdata_type\x18\x02 \x01(\x0e\x32\x13.inference.DataType\x12,\n\x06\x66ormat\x18\x03 \x01(\x0e\x32\x1c.inference.ModelInput.Format\x12\x0c\n\x04\x64ims\x18\x04 \x03(\x03\x12.\n\x07reshape\x18\x05 \x01(\x0b\x32\x1d.inference.ModelTensorReshape\x12\x17\n\x0fis_shape_tensor\x18\x06 \x01(\x08\x12\x1a\n\x12\x61llow_ragged_batch\x18\x07 \x01(\x08\x12\x10\n\x08optional\x18\x08 \x01(\x08\";\n\x06\x46ormat\x12\x0f\n\x0b\x46ORMAT_NONE\x10\x00\x12\x0f\n\x0b\x46ORMAT_NHWC\x10\x01\x12\x0f\n\x0b\x46ORMAT_NCHW\x10\x02\"\xb2\x01\n\x0bModelOutput\x12\x0c\n\x04name\x18\x01 \x01(\t\x12&\n\tdata_type\x18\x02 \x01(\x0e\x32\x13.inference.DataType\x12\x0c\n\x04\x64ims\x18\x03 \x03(\x03\x12.\n\x07reshape\x18\x05 \x01(\x0b\x32\x1d.inference.ModelTensorReshape\x12\x16\n\x0elabel_filename\x18\x04 \x01(\t\x12\x17\n\x0fis_shape_tensor\x18\x06 \x01(\x08\"\xd9\x02\n\nBatchInput\x12(\n\x04kind\x18\x01 \x01(\x0e\x32\x1a.inference.BatchInput.Kind\x12\x13\n\x0btarget_name\x18\x02 \x03(\t\x12&\n\tdata_type\x18\x03 \x01(\x0e\x32\x13.inference.DataType\x12\x14\n\x0csource_input\x18\x04 \x03(\t\"\xcd\x01\n\x04Kind\x12\x17\n\x13\x42\x41TCH_ELEMENT_COUNT\x10\x00\x12#\n\x1f\x42\x41TCH_ACCUMULATED_ELEMENT_COUNT\x10\x01\x12-\n)BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO\x10\x02\x12$\n BATCH_MAX_ELEMENT_COUNT_AS_SHAPE\x10\x03\x12\x14\n\x10\x42\x41TCH_ITEM_SHAPE\x10\x04\x12\x1c\n\x18\x42\x41TCH_ITEM_SHAPE_FLATTEN\x10\x05\"\x8f\x01\n\x0b\x42\x61tchOutput\x12\x13\n\x0btarget_name\x18\x01 \x03(\t\x12)\n\x04kind\x18\x02 \x01(\x0e\x32\x1b.inference.BatchOutput.Kind\x12\x14\n\x0csource_input\x18\x03 \x03(\t\"*\n\x04Kind\x12\"\n\x1e\x42\x41TCH_SCATTER_WITH_INPUT_SHAPE\x10\x00\"\x90\x02\n\x12ModelVersionPolicy\x12\x36\n\x06latest\x18\x01 \x01(\x0b\x32$.inference.ModelVersionPolicy.LatestH\x00\x12\x30\n\x03\x61ll\x18\x02 \x01(\x0b\x32!.inference.ModelVersionPolicy.AllH\x00\x12:\n\x08specific\x18\x03 \x01(\x0b\x32&.inference.ModelVersionPolicy.SpecificH\x00\x1a\x1e\n\x06Latest\x12\x14\n\x0cnum_versions\x18\x01 \x01(\r\x1a\x05\n\x03\x41ll\x1a\x1c\n\x08Specific\x12\x10\n\x08versions\x18\x01 \x03(\x03\x42\x0f\n\rpolicy_choice\"\xfd\r\n\x17ModelOptimizationPolicy\x12\x37\n\x05graph\x18\x01 \x01(\x0b\x32(.inference.ModelOptimizationPolicy.Graph\x12\x42\n\x08priority\x18\x02 \x01(\x0e\x32\x30.inference.ModelOptimizationPolicy.ModelPriority\x12\x35\n\x04\x63uda\x18\x03 \x01(\x0b\x32\'.inference.ModelOptimizationPolicy.Cuda\x12X\n\x16\x65xecution_accelerators\x18\x04 \x01(\x0b\x32\x38.inference.ModelOptimizationPolicy.ExecutionAccelerators\x12R\n\x13input_pinned_memory\x18\x05 \x01(\x0b\x32\x35.inference.ModelOptimizationPolicy.PinnedMemoryBuffer\x12S\n\x14output_pinned_memory\x18\x06 \x01(\x0b\x32\x35.inference.ModelOptimizationPolicy.PinnedMemoryBuffer\x12&\n\x1egather_kernel_buffer_threshold\x18\x07 \x01(\r\x12\x16\n\x0e\x65\x61ger_batching\x18\x08 \x01(\x08\x1a\x16\n\x05Graph\x12\r\n\x05level\x18\x01 \x01(\x05\x1a\xba\x05\n\x04\x43uda\x12\x0e\n\x06graphs\x18\x01 \x01(\x08\x12\x18\n\x10\x62usy_wait_events\x18\x02 \x01(\x08\x12\x45\n\ngraph_spec\x18\x03 \x03(\x0b\x32\x31.inference.ModelOptimizationPolicy.Cuda.GraphSpec\x12\x1a\n\x12output_copy_stream\x18\x04 \x01(\x08\x1a\xa4\x04\n\tGraphSpec\x12\x12\n\nbatch_size\x18\x01 \x01(\x05\x12K\n\x05input\x18\x02 \x03(\x0b\x32<.inference.ModelOptimizationPolicy.Cuda.GraphSpec.InputEntry\x12W\n\x11graph_lower_bound\x18\x03 \x01(\x0b\x32<.inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound\x1a\x14\n\x05Shape\x12\x0b\n\x03\x64im\x18\x01 \x03(\x03\x1a\xdf\x01\n\nLowerBound\x12\x12\n\nbatch_size\x18\x01 \x01(\x05\x12V\n\x05input\x18\x02 \x03(\x0b\x32G.inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound.InputEntry\x1a\x65\n\nInputEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x46\n\x05value\x18\x02 \x01(\x0b\x32\x37.inference.ModelOptimizationPolicy.Cuda.GraphSpec.Shape:\x02\x38\x01\x1a\x65\n\nInputEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x46\n\x05value\x18\x02 \x01(\x0b\x32\x37.inference.ModelOptimizationPolicy.Cuda.GraphSpec.Shape:\x02\x38\x01\x1a\xa4\x03\n\x15\x45xecutionAccelerators\x12g\n\x19gpu_execution_accelerator\x18\x01 \x03(\x0b\x32\x44.inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator\x12g\n\x19\x63pu_execution_accelerator\x18\x02 \x03(\x0b\x32\x44.inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator\x1a\xb8\x01\n\x0b\x41\x63\x63\x65lerator\x12\x0c\n\x04name\x18\x01 \x01(\t\x12h\n\nparameters\x18\x02 \x03(\x0b\x32T.inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator.ParametersEntry\x1a\x31\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a$\n\x12PinnedMemoryBuffer\x12\x0e\n\x06\x65nable\x18\x01 \x01(\x08\"I\n\rModelPriority\x12\x14\n\x10PRIORITY_DEFAULT\x10\x00\x12\x10\n\x0cPRIORITY_MAX\x10\x01\x12\x10\n\x0cPRIORITY_MIN\x10\x02\"\xdb\x01\n\x10ModelQueuePolicy\x12\x41\n\x0etimeout_action\x18\x01 \x01(\x0e\x32).inference.ModelQueuePolicy.TimeoutAction\x12$\n\x1c\x64\x65\x66\x61ult_timeout_microseconds\x18\x02 \x01(\x04\x12\x1e\n\x16\x61llow_timeout_override\x18\x03 \x01(\x08\x12\x16\n\x0emax_queue_size\x18\x04 \x01(\r\"&\n\rTimeoutAction\x12\n\n\x06REJECT\x10\x00\x12\t\n\x05\x44\x45LAY\x10\x01\"\x9b\x03\n\x14ModelDynamicBatching\x12\x1c\n\x14preferred_batch_size\x18\x01 \x03(\x05\x12$\n\x1cmax_queue_delay_microseconds\x18\x02 \x01(\x04\x12\x19\n\x11preserve_ordering\x18\x03 \x01(\x08\x12\x17\n\x0fpriority_levels\x18\x04 \x01(\r\x12\x1e\n\x16\x64\x65\x66\x61ult_priority_level\x18\x05 \x01(\r\x12\x39\n\x14\x64\x65\x66\x61ult_queue_policy\x18\x06 \x01(\x0b\x32\x1b.inference.ModelQueuePolicy\x12W\n\x15priority_queue_policy\x18\x07 \x03(\x0b\x32\x38.inference.ModelDynamicBatching.PriorityQueuePolicyEntry\x1aW\n\x18PriorityQueuePolicyEntry\x12\x0b\n\x03key\x18\x01 \x01(\r\x12*\n\x05value\x18\x02 \x01(\x0b\x32\x1b.inference.ModelQueuePolicy:\x02\x38\x01\"\xef\t\n\x15ModelSequenceBatching\x12\x41\n\x06\x64irect\x18\x03 \x01(\x0b\x32/.inference.ModelSequenceBatching.StrategyDirectH\x00\x12\x41\n\x06oldest\x18\x04 \x01(\x0b\x32/.inference.ModelSequenceBatching.StrategyOldestH\x00\x12&\n\x1emax_sequence_idle_microseconds\x18\x01 \x01(\x04\x12\x44\n\rcontrol_input\x18\x02 \x03(\x0b\x32-.inference.ModelSequenceBatching.ControlInput\x12\x35\n\x05state\x18\x05 \x03(\x0b\x32&.inference.ModelSequenceBatching.State\x1a\xb1\x02\n\x07\x43ontrol\x12;\n\x04kind\x18\x01 \x01(\x0e\x32-.inference.ModelSequenceBatching.Control.Kind\x12\x18\n\x10int32_false_true\x18\x02 \x03(\x05\x12\x17\n\x0f\x66p32_false_true\x18\x03 \x03(\x02\x12\x17\n\x0f\x62ool_false_true\x18\x05 \x03(\x08\x12&\n\tdata_type\x18\x04 \x01(\x0e\x32\x13.inference.DataType\"u\n\x04Kind\x12\x1a\n\x16\x43ONTROL_SEQUENCE_START\x10\x00\x12\x1a\n\x16\x43ONTROL_SEQUENCE_READY\x10\x01\x12\x18\n\x14\x43ONTROL_SEQUENCE_END\x10\x02\x12\x1b\n\x17\x43ONTROL_SEQUENCE_CORRID\x10\x03\x1aW\n\x0c\x43ontrolInput\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x39\n\x07\x63ontrol\x18\x02 \x03(\x0b\x32(.inference.ModelSequenceBatching.Control\x1a\x8a\x01\n\x0cInitialState\x12&\n\tdata_type\x18\x01 \x01(\x0e\x32\x13.inference.DataType\x12\x0c\n\x04\x64ims\x18\x02 \x03(\x03\x12\x13\n\tzero_data\x18\x03 \x01(\x08H\x00\x12\x13\n\tdata_file\x18\x04 \x01(\tH\x00\x12\x0c\n\x04name\x18\x05 \x01(\tB\x0c\n\nstate_data\x1a\xac\x01\n\x05State\x12\x12\n\ninput_name\x18\x01 \x01(\t\x12\x13\n\x0boutput_name\x18\x02 \x01(\t\x12&\n\tdata_type\x18\x03 \x01(\x0e\x32\x13.inference.DataType\x12\x0c\n\x04\x64ims\x18\x04 \x03(\x03\x12\x44\n\rinitial_state\x18\x05 \x03(\x0b\x32-.inference.ModelSequenceBatching.InitialState\x1aX\n\x0eStrategyDirect\x12$\n\x1cmax_queue_delay_microseconds\x18\x01 \x01(\x04\x12 \n\x18minimum_slot_utilization\x18\x02 \x01(\x02\x1au\n\x0eStrategyOldest\x12\x1f\n\x17max_candidate_sequences\x18\x01 \x01(\x05\x12\x1c\n\x14preferred_batch_size\x18\x02 \x03(\x05\x12$\n\x1cmax_queue_delay_microseconds\x18\x03 \x01(\x04\x42\x11\n\x0fstrategy_choice\"\xdd\x02\n\x0fModelEnsembling\x12-\n\x04step\x18\x01 \x03(\x0b\x32\x1f.inference.ModelEnsembling.Step\x1a\x9a\x02\n\x04Step\x12\x12\n\nmodel_name\x18\x01 \x01(\t\x12\x15\n\rmodel_version\x18\x02 \x01(\x03\x12@\n\tinput_map\x18\x03 \x03(\x0b\x32-.inference.ModelEnsembling.Step.InputMapEntry\x12\x42\n\noutput_map\x18\x04 \x03(\x0b\x32..inference.ModelEnsembling.Step.OutputMapEntry\x1a/\n\rInputMapEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a\x30\n\x0eOutputMapEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"&\n\x0eModelParameter\x12\x14\n\x0cstring_value\x18\x01 \x01(\t\"\xd9\x02\n\x0bModelWarmup\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x12\n\nbatch_size\x18\x02 \x01(\r\x12\x32\n\x06inputs\x18\x03 \x03(\x0b\x32\".inference.ModelWarmup.InputsEntry\x12\r\n\x05\x63ount\x18\x04 \x01(\r\x1a\x97\x01\n\x05Input\x12&\n\tdata_type\x18\x01 \x01(\x0e\x32\x13.inference.DataType\x12\x0c\n\x04\x64ims\x18\x02 \x03(\x03\x12\x13\n\tzero_data\x18\x03 \x01(\x08H\x00\x12\x15\n\x0brandom_data\x18\x04 \x01(\x08H\x00\x12\x19\n\x0finput_data_file\x18\x05 \x01(\tH\x00\x42\x11\n\x0finput_data_type\x1aK\n\x0bInputsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12+\n\x05value\x18\x02 \x01(\x0b\x32\x1c.inference.ModelWarmup.Input:\x02\x38\x01\".\n\x0fModelOperations\x12\x1b\n\x13op_library_filename\x18\x01 \x03(\t\"+\n\x16ModelTransactionPolicy\x12\x11\n\tdecoupled\x18\x01 \x01(\x08\"\xe6\x01\n\x15ModelRepositoryAgents\x12\x36\n\x06\x61gents\x18\x01 \x03(\x0b\x32&.inference.ModelRepositoryAgents.Agent\x1a\x94\x01\n\x05\x41gent\x12\x0c\n\x04name\x18\x01 \x01(\t\x12J\n\nparameters\x18\x02 \x03(\x0b\x32\x36.inference.ModelRepositoryAgents.Agent.ParametersEntry\x1a\x31\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"$\n\x12ModelResponseCache\x12\x0e\n\x06\x65nable\x18\x01 \x01(\x08\"\xb2\n\n\x0bModelConfig\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x10\n\x08platform\x18\x02 \x01(\t\x12\x0f\n\x07\x62\x61\x63kend\x18\x11 \x01(\t\x12\x35\n\x0eversion_policy\x18\x03 \x01(\x0b\x32\x1d.inference.ModelVersionPolicy\x12\x16\n\x0emax_batch_size\x18\x04 \x01(\x05\x12$\n\x05input\x18\x05 \x03(\x0b\x32\x15.inference.ModelInput\x12&\n\x06output\x18\x06 \x03(\x0b\x32\x16.inference.ModelOutput\x12*\n\x0b\x62\x61tch_input\x18\x14 \x03(\x0b\x32\x15.inference.BatchInput\x12,\n\x0c\x62\x61tch_output\x18\x15 \x03(\x0b\x32\x16.inference.BatchOutput\x12\x38\n\x0coptimization\x18\x0c \x01(\x0b\x32\".inference.ModelOptimizationPolicy\x12;\n\x10\x64ynamic_batching\x18\x0b \x01(\x0b\x32\x1f.inference.ModelDynamicBatchingH\x00\x12=\n\x11sequence_batching\x18\r \x01(\x0b\x32 .inference.ModelSequenceBatchingH\x00\x12\x39\n\x13\x65nsemble_scheduling\x18\x0f \x01(\x0b\x32\x1a.inference.ModelEnsemblingH\x00\x12\x35\n\x0einstance_group\x18\x07 \x03(\x0b\x32\x1d.inference.ModelInstanceGroup\x12\x1e\n\x16\x64\x65\x66\x61ult_model_filename\x18\x08 \x01(\t\x12H\n\x12\x63\x63_model_filenames\x18\t \x03(\x0b\x32,.inference.ModelConfig.CcModelFilenamesEntry\x12;\n\x0bmetric_tags\x18\n \x03(\x0b\x32&.inference.ModelConfig.MetricTagsEntry\x12:\n\nparameters\x18\x0e \x03(\x0b\x32&.inference.ModelConfig.ParametersEntry\x12,\n\x0cmodel_warmup\x18\x10 \x03(\x0b\x32\x16.inference.ModelWarmup\x12\x34\n\x10model_operations\x18\x12 \x01(\x0b\x32\x1a.inference.ModelOperations\x12\x43\n\x18model_transaction_policy\x18\x13 \x01(\x0b\x32!.inference.ModelTransactionPolicy\x12\x41\n\x17model_repository_agents\x18\x17 \x01(\x0b\x32 .inference.ModelRepositoryAgents\x12\x35\n\x0eresponse_cache\x18\x18 \x01(\x0b\x32\x1d.inference.ModelResponseCache\x1a\x37\n\x15\x43\x63ModelFilenamesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a\x31\n\x0fMetricTagsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1aL\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12(\n\x05value\x18\x02 \x01(\x0b\x32\x19.inference.ModelParameter:\x02\x38\x01\x42\x13\n\x11scheduling_choice*\xfa\x01\n\x08\x44\x61taType\x12\x10\n\x0cTYPE_INVALID\x10\x00\x12\r\n\tTYPE_BOOL\x10\x01\x12\x0e\n\nTYPE_UINT8\x10\x02\x12\x0f\n\x0bTYPE_UINT16\x10\x03\x12\x0f\n\x0bTYPE_UINT32\x10\x04\x12\x0f\n\x0bTYPE_UINT64\x10\x05\x12\r\n\tTYPE_INT8\x10\x06\x12\x0e\n\nTYPE_INT16\x10\x07\x12\x0e\n\nTYPE_INT32\x10\x08\x12\x0e\n\nTYPE_INT64\x10\t\x12\r\n\tTYPE_FP16\x10\n\x12\r\n\tTYPE_FP32\x10\x0b\x12\r\n\tTYPE_FP64\x10\x0c\x12\x0f\n\x0bTYPE_STRING\x10\r\x12\r\n\tTYPE_BF16\x10\x0e\x62\x06proto3'
)
_DATATYPE = DESCRIPTOR.enum_types_by_name['DataType']
DataType = enum_type_wrapper.EnumTypeWrapper(_DATATYPE)
TYPE_INVALID = 0
TYPE_BOOL = 1
TYPE_UINT8 = 2
TYPE_UINT16 = 3
TYPE_UINT32 = 4
TYPE_UINT64 = 5
TYPE_INT8 = 6
TYPE_INT16 = 7
TYPE_INT32 = 8
TYPE_INT64 = 9
TYPE_FP16 = 10
TYPE_FP32 = 11
TYPE_FP64 = 12
TYPE_STRING = 13
TYPE_BF16 = 14
_MODELRATELIMITER = DESCRIPTOR.message_types_by_name['ModelRateLimiter']
_MODELRATELIMITER_RESOURCE = _MODELRATELIMITER.nested_types_by_name['Resource']
_MODELINSTANCEGROUP = DESCRIPTOR.message_types_by_name['ModelInstanceGroup']
_MODELINSTANCEGROUP_SECONDARYDEVICE = _MODELINSTANCEGROUP.nested_types_by_name[
'SecondaryDevice']
_MODELTENSORRESHAPE = DESCRIPTOR.message_types_by_name['ModelTensorReshape']
_MODELINPUT = DESCRIPTOR.message_types_by_name['ModelInput']
_MODELOUTPUT = DESCRIPTOR.message_types_by_name['ModelOutput']
_BATCHINPUT = DESCRIPTOR.message_types_by_name['BatchInput']
_BATCHOUTPUT = DESCRIPTOR.message_types_by_name['BatchOutput']
_MODELVERSIONPOLICY = DESCRIPTOR.message_types_by_name['ModelVersionPolicy']
_MODELVERSIONPOLICY_LATEST = _MODELVERSIONPOLICY.nested_types_by_name['Latest']
_MODELVERSIONPOLICY_ALL = _MODELVERSIONPOLICY.nested_types_by_name['All']
_MODELVERSIONPOLICY_SPECIFIC = _MODELVERSIONPOLICY.nested_types_by_name[
'Specific']
_MODELOPTIMIZATIONPOLICY = DESCRIPTOR.message_types_by_name[
'ModelOptimizationPolicy']
_MODELOPTIMIZATIONPOLICY_GRAPH = _MODELOPTIMIZATIONPOLICY.nested_types_by_name[
'Graph']
_MODELOPTIMIZATIONPOLICY_CUDA = _MODELOPTIMIZATIONPOLICY.nested_types_by_name[
'Cuda']
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC = _MODELOPTIMIZATIONPOLICY_CUDA.nested_types_by_name[
'GraphSpec']
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC.nested_types_by_name[
'Shape']
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC.nested_types_by_name[
'LowerBound']
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND.nested_types_by_name[
'InputEntry']
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC.nested_types_by_name[
'InputEntry']
_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS = _MODELOPTIMIZATIONPOLICY.nested_types_by_name[
'ExecutionAccelerators']
_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR = _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS.nested_types_by_name[
'Accelerator']
_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY = _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR.nested_types_by_name[
'ParametersEntry']
_MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER = _MODELOPTIMIZATIONPOLICY.nested_types_by_name[
'PinnedMemoryBuffer']
_MODELQUEUEPOLICY = DESCRIPTOR.message_types_by_name['ModelQueuePolicy']
_MODELDYNAMICBATCHING = DESCRIPTOR.message_types_by_name[
'ModelDynamicBatching']
_MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY = _MODELDYNAMICBATCHING.nested_types_by_name[
'PriorityQueuePolicyEntry']
_MODELSEQUENCEBATCHING = DESCRIPTOR.message_types_by_name[
'ModelSequenceBatching']
_MODELSEQUENCEBATCHING_CONTROL = _MODELSEQUENCEBATCHING.nested_types_by_name[
'Control']
_MODELSEQUENCEBATCHING_CONTROLINPUT = _MODELSEQUENCEBATCHING.nested_types_by_name[
'ControlInput']
_MODELSEQUENCEBATCHING_INITIALSTATE = _MODELSEQUENCEBATCHING.nested_types_by_name[
'InitialState']
_MODELSEQUENCEBATCHING_STATE = _MODELSEQUENCEBATCHING.nested_types_by_name[
'State']
_MODELSEQUENCEBATCHING_STRATEGYDIRECT = _MODELSEQUENCEBATCHING.nested_types_by_name[
'StrategyDirect']
_MODELSEQUENCEBATCHING_STRATEGYOLDEST = _MODELSEQUENCEBATCHING.nested_types_by_name[
'StrategyOldest']
_MODELENSEMBLING = DESCRIPTOR.message_types_by_name['ModelEnsembling']
_MODELENSEMBLING_STEP = _MODELENSEMBLING.nested_types_by_name['Step']
_MODELENSEMBLING_STEP_INPUTMAPENTRY = _MODELENSEMBLING_STEP.nested_types_by_name[
'InputMapEntry']
_MODELENSEMBLING_STEP_OUTPUTMAPENTRY = _MODELENSEMBLING_STEP.nested_types_by_name[
'OutputMapEntry']
_MODELPARAMETER = DESCRIPTOR.message_types_by_name['ModelParameter']
_MODELWARMUP = DESCRIPTOR.message_types_by_name['ModelWarmup']
_MODELWARMUP_INPUT = _MODELWARMUP.nested_types_by_name['Input']
_MODELWARMUP_INPUTSENTRY = _MODELWARMUP.nested_types_by_name['InputsEntry']
_MODELOPERATIONS = DESCRIPTOR.message_types_by_name['ModelOperations']
_MODELTRANSACTIONPOLICY = DESCRIPTOR.message_types_by_name[
'ModelTransactionPolicy']
_MODELREPOSITORYAGENTS = DESCRIPTOR.message_types_by_name[
'ModelRepositoryAgents']
_MODELREPOSITORYAGENTS_AGENT = _MODELREPOSITORYAGENTS.nested_types_by_name[
'Agent']
_MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY = _MODELREPOSITORYAGENTS_AGENT.nested_types_by_name[
'ParametersEntry']
_MODELRESPONSECACHE = DESCRIPTOR.message_types_by_name['ModelResponseCache']
_MODELCONFIG = DESCRIPTOR.message_types_by_name['ModelConfig']
_MODELCONFIG_CCMODELFILENAMESENTRY = _MODELCONFIG.nested_types_by_name[
'CcModelFilenamesEntry']
_MODELCONFIG_METRICTAGSENTRY = _MODELCONFIG.nested_types_by_name[
'MetricTagsEntry']
_MODELCONFIG_PARAMETERSENTRY = _MODELCONFIG.nested_types_by_name[
'ParametersEntry']
_MODELINSTANCEGROUP_SECONDARYDEVICE_SECONDARYDEVICEKIND = _MODELINSTANCEGROUP_SECONDARYDEVICE.enum_types_by_name[
'SecondaryDeviceKind']
_MODELINSTANCEGROUP_KIND = _MODELINSTANCEGROUP.enum_types_by_name['Kind']
_MODELINPUT_FORMAT = _MODELINPUT.enum_types_by_name['Format']
_BATCHINPUT_KIND = _BATCHINPUT.enum_types_by_name['Kind']
_BATCHOUTPUT_KIND = _BATCHOUTPUT.enum_types_by_name['Kind']
_MODELOPTIMIZATIONPOLICY_MODELPRIORITY = _MODELOPTIMIZATIONPOLICY.enum_types_by_name[
'ModelPriority']
_MODELQUEUEPOLICY_TIMEOUTACTION = _MODELQUEUEPOLICY.enum_types_by_name[
'TimeoutAction']
_MODELSEQUENCEBATCHING_CONTROL_KIND = _MODELSEQUENCEBATCHING_CONTROL.enum_types_by_name[
'Kind']
ModelRateLimiter = _reflection.GeneratedProtocolMessageType(
'ModelRateLimiter',
(_message.Message, ),
{
'Resource':
_reflection.GeneratedProtocolMessageType(
'Resource',
(_message.Message, ),
{
'DESCRIPTOR': _MODELRATELIMITER_RESOURCE,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelRateLimiter.Resource)
}),
'DESCRIPTOR':
_MODELRATELIMITER,
'__module__':
'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelRateLimiter)
})
_sym_db.RegisterMessage(ModelRateLimiter)
_sym_db.RegisterMessage(ModelRateLimiter.Resource)
ModelInstanceGroup = _reflection.GeneratedProtocolMessageType(
'ModelInstanceGroup',
(_message.Message, ),
{
'SecondaryDevice':
_reflection.GeneratedProtocolMessageType(
'SecondaryDevice',
(_message.Message, ),
{
'DESCRIPTOR': _MODELINSTANCEGROUP_SECONDARYDEVICE,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelInstanceGroup.SecondaryDevice)
}),
'DESCRIPTOR':
_MODELINSTANCEGROUP,
'__module__':
'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelInstanceGroup)
})
_sym_db.RegisterMessage(ModelInstanceGroup)
_sym_db.RegisterMessage(ModelInstanceGroup.SecondaryDevice)
ModelTensorReshape = _reflection.GeneratedProtocolMessageType(
'ModelTensorReshape',
(_message.Message, ),
{
'DESCRIPTOR': _MODELTENSORRESHAPE,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelTensorReshape)
})
_sym_db.RegisterMessage(ModelTensorReshape)
ModelInput = _reflection.GeneratedProtocolMessageType(
'ModelInput',
(_message.Message, ),
{
'DESCRIPTOR': _MODELINPUT,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelInput)
})
_sym_db.RegisterMessage(ModelInput)
ModelOutput = _reflection.GeneratedProtocolMessageType(
'ModelOutput',
(_message.Message, ),
{
'DESCRIPTOR': _MODELOUTPUT,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelOutput)
})
_sym_db.RegisterMessage(ModelOutput)
BatchInput = _reflection.GeneratedProtocolMessageType(
'BatchInput',
(_message.Message, ),
{
'DESCRIPTOR': _BATCHINPUT,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.BatchInput)
})
_sym_db.RegisterMessage(BatchInput)
BatchOutput = _reflection.GeneratedProtocolMessageType(
'BatchOutput',
(_message.Message, ),
{
'DESCRIPTOR': _BATCHOUTPUT,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.BatchOutput)
})
_sym_db.RegisterMessage(BatchOutput)
ModelVersionPolicy = _reflection.GeneratedProtocolMessageType(
'ModelVersionPolicy',
(_message.Message, ),
{
'Latest':
_reflection.GeneratedProtocolMessageType(
'Latest',
(_message.Message, ),
{
'DESCRIPTOR': _MODELVERSIONPOLICY_LATEST,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelVersionPolicy.Latest)
}),
'All':
_reflection.GeneratedProtocolMessageType(
'All',
(_message.Message, ),
{
'DESCRIPTOR': _MODELVERSIONPOLICY_ALL,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelVersionPolicy.All)
}),
'Specific':
_reflection.GeneratedProtocolMessageType(
'Specific',
(_message.Message, ),
{
'DESCRIPTOR': _MODELVERSIONPOLICY_SPECIFIC,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelVersionPolicy.Specific)
}),
'DESCRIPTOR':
_MODELVERSIONPOLICY,
'__module__':
'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelVersionPolicy)
})
_sym_db.RegisterMessage(ModelVersionPolicy)
_sym_db.RegisterMessage(ModelVersionPolicy.Latest)
_sym_db.RegisterMessage(ModelVersionPolicy.All)
_sym_db.RegisterMessage(ModelVersionPolicy.Specific)
ModelOptimizationPolicy = _reflection.GeneratedProtocolMessageType(
'ModelOptimizationPolicy',
(_message.Message, ),
{
'Graph':
_reflection.GeneratedProtocolMessageType(
'Graph',
(_message.Message, ),
{
'DESCRIPTOR': _MODELOPTIMIZATIONPOLICY_GRAPH,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Graph)
}),
'Cuda':
_reflection.GeneratedProtocolMessageType(
'Cuda',
(_message.Message, ),
{
'GraphSpec':
_reflection.GeneratedProtocolMessageType(
'GraphSpec',
(_message.Message, ),
{
'Shape':
_reflection.GeneratedProtocolMessageType(
'Shape',
(_message.Message, ),
{
'DESCRIPTOR':
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec.Shape)
}),
'LowerBound':
_reflection.GeneratedProtocolMessageType(
'LowerBound',
(_message.Message, ),
{
'InputEntry':
_reflection.GeneratedProtocolMessageType(
'InputEntry',
(_message.Message, ),
{
'DESCRIPTOR':
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound.InputEntry)
}),
'DESCRIPTOR':
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND,
'__module__':
'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound)
}),
'InputEntry':
_reflection.GeneratedProtocolMessageType(
'InputEntry',
(_message.Message, ),
{
'DESCRIPTOR':
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec.InputEntry)
}),
'DESCRIPTOR':
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC,
'__module__':
'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec)
}),
'DESCRIPTOR':
_MODELOPTIMIZATIONPOLICY_CUDA,
'__module__':
'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda)
}),
'ExecutionAccelerators':
_reflection.GeneratedProtocolMessageType(
'ExecutionAccelerators',
(_message.Message, ),
{
'Accelerator':
_reflection.GeneratedProtocolMessageType(
'Accelerator',
(_message.Message, ),
{
'ParametersEntry':
_reflection.GeneratedProtocolMessageType(
'ParametersEntry',
(_message.Message, ),
{
'DESCRIPTOR':
_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator.ParametersEntry)
}),
'DESCRIPTOR':
_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR,
'__module__':
'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator)
}),
'DESCRIPTOR':
_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS,
'__module__':
'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.ExecutionAccelerators)
}),
'PinnedMemoryBuffer':
_reflection.GeneratedProtocolMessageType(
'PinnedMemoryBuffer',
(_message.Message, ),
{
'DESCRIPTOR': _MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.PinnedMemoryBuffer)
}),
'DESCRIPTOR':
_MODELOPTIMIZATIONPOLICY,
'__module__':
'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy)
})
_sym_db.RegisterMessage(ModelOptimizationPolicy)
_sym_db.RegisterMessage(ModelOptimizationPolicy.Graph)
_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda)
_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec)
_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec.Shape)
_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound)
_sym_db.RegisterMessage(
ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound.InputEntry)
_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec.InputEntry)
_sym_db.RegisterMessage(ModelOptimizationPolicy.ExecutionAccelerators)
_sym_db.RegisterMessage(
ModelOptimizationPolicy.ExecutionAccelerators.Accelerator)
_sym_db.RegisterMessage(
ModelOptimizationPolicy.ExecutionAccelerators.Accelerator.ParametersEntry)
_sym_db.RegisterMessage(ModelOptimizationPolicy.PinnedMemoryBuffer)
ModelQueuePolicy = _reflection.GeneratedProtocolMessageType(
'ModelQueuePolicy',
(_message.Message, ),
{
'DESCRIPTOR': _MODELQUEUEPOLICY,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelQueuePolicy)
})
_sym_db.RegisterMessage(ModelQueuePolicy)
ModelDynamicBatching = _reflection.GeneratedProtocolMessageType(
'ModelDynamicBatching',
(_message.Message, ),
{
'PriorityQueuePolicyEntry':
_reflection.GeneratedProtocolMessageType(
'PriorityQueuePolicyEntry',
(_message.Message, ),
{
'DESCRIPTOR': _MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelDynamicBatching.PriorityQueuePolicyEntry)
}),
'DESCRIPTOR':
_MODELDYNAMICBATCHING,
'__module__':
'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelDynamicBatching)
})
_sym_db.RegisterMessage(ModelDynamicBatching)
_sym_db.RegisterMessage(ModelDynamicBatching.PriorityQueuePolicyEntry)
ModelSequenceBatching = _reflection.GeneratedProtocolMessageType(
'ModelSequenceBatching',
(_message.Message, ),
{
'Control':
_reflection.GeneratedProtocolMessageType(
'Control',
(_message.Message, ),
{
'DESCRIPTOR': _MODELSEQUENCEBATCHING_CONTROL,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.Control)
}),
'ControlInput':
_reflection.GeneratedProtocolMessageType(
'ControlInput',
(_message.Message, ),
{
'DESCRIPTOR': _MODELSEQUENCEBATCHING_CONTROLINPUT,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.ControlInput)
}),
'InitialState':
_reflection.GeneratedProtocolMessageType(
'InitialState',
(_message.Message, ),
{
'DESCRIPTOR': _MODELSEQUENCEBATCHING_INITIALSTATE,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.InitialState)
}),
'State':
_reflection.GeneratedProtocolMessageType(
'State',
(_message.Message, ),
{
'DESCRIPTOR': _MODELSEQUENCEBATCHING_STATE,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.State)
}),
'StrategyDirect':
_reflection.GeneratedProtocolMessageType(
'StrategyDirect',
(_message.Message, ),
{
'DESCRIPTOR': _MODELSEQUENCEBATCHING_STRATEGYDIRECT,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.StrategyDirect)
}),
'StrategyOldest':
_reflection.GeneratedProtocolMessageType(
'StrategyOldest',
(_message.Message, ),
{
'DESCRIPTOR': _MODELSEQUENCEBATCHING_STRATEGYOLDEST,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.StrategyOldest)
}),
'DESCRIPTOR':
_MODELSEQUENCEBATCHING,
'__module__':
'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching)
})
_sym_db.RegisterMessage(ModelSequenceBatching)
_sym_db.RegisterMessage(ModelSequenceBatching.Control)
_sym_db.RegisterMessage(ModelSequenceBatching.ControlInput)
_sym_db.RegisterMessage(ModelSequenceBatching.InitialState)
_sym_db.RegisterMessage(ModelSequenceBatching.State)
_sym_db.RegisterMessage(ModelSequenceBatching.StrategyDirect)
_sym_db.RegisterMessage(ModelSequenceBatching.StrategyOldest)
ModelEnsembling = _reflection.GeneratedProtocolMessageType(
'ModelEnsembling',
(_message.Message, ),
{
'Step':
_reflection.GeneratedProtocolMessageType(
'Step',
(_message.Message, ),
{
'InputMapEntry':
_reflection.GeneratedProtocolMessageType(
'InputMapEntry',
(_message.Message, ),
{
'DESCRIPTOR': _MODELENSEMBLING_STEP_INPUTMAPENTRY,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelEnsembling.Step.InputMapEntry)
}),
'OutputMapEntry':
_reflection.GeneratedProtocolMessageType(
'OutputMapEntry',
(_message.Message, ),
{
'DESCRIPTOR': _MODELENSEMBLING_STEP_OUTPUTMAPENTRY,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelEnsembling.Step.OutputMapEntry)
}),
'DESCRIPTOR':
_MODELENSEMBLING_STEP,
'__module__':
'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelEnsembling.Step)
}),
'DESCRIPTOR':
_MODELENSEMBLING,
'__module__':
'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelEnsembling)
})
_sym_db.RegisterMessage(ModelEnsembling)
_sym_db.RegisterMessage(ModelEnsembling.Step)
_sym_db.RegisterMessage(ModelEnsembling.Step.InputMapEntry)
_sym_db.RegisterMessage(ModelEnsembling.Step.OutputMapEntry)
ModelParameter = _reflection.GeneratedProtocolMessageType(
'ModelParameter',
(_message.Message, ),
{
'DESCRIPTOR': _MODELPARAMETER,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelParameter)
})
_sym_db.RegisterMessage(ModelParameter)
ModelWarmup = _reflection.GeneratedProtocolMessageType(
'ModelWarmup',
(_message.Message, ),
{
'Input':
_reflection.GeneratedProtocolMessageType(
'Input',
(_message.Message, ),
{
'DESCRIPTOR': _MODELWARMUP_INPUT,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelWarmup.Input)
}),
'InputsEntry':
_reflection.GeneratedProtocolMessageType(
'InputsEntry',
(_message.Message, ),
{
'DESCRIPTOR': _MODELWARMUP_INPUTSENTRY,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelWarmup.InputsEntry)
}),
'DESCRIPTOR':
_MODELWARMUP,
'__module__':
'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelWarmup)
})
_sym_db.RegisterMessage(ModelWarmup)
_sym_db.RegisterMessage(ModelWarmup.Input)
_sym_db.RegisterMessage(ModelWarmup.InputsEntry)
ModelOperations = _reflection.GeneratedProtocolMessageType(
'ModelOperations',
(_message.Message, ),
{
'DESCRIPTOR': _MODELOPERATIONS,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelOperations)
})
_sym_db.RegisterMessage(ModelOperations)
ModelTransactionPolicy = _reflection.GeneratedProtocolMessageType(
'ModelTransactionPolicy',
(_message.Message, ),
{
'DESCRIPTOR': _MODELTRANSACTIONPOLICY,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelTransactionPolicy)
})
_sym_db.RegisterMessage(ModelTransactionPolicy)
ModelRepositoryAgents = _reflection.GeneratedProtocolMessageType(
'ModelRepositoryAgents',
(_message.Message, ),
{
'Agent':
_reflection.GeneratedProtocolMessageType(
'Agent',
(_message.Message, ),
{
'ParametersEntry':
_reflection.GeneratedProtocolMessageType(
'ParametersEntry',
(_message.Message, ),
{
'DESCRIPTOR':
_MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelRepositoryAgents.Agent.ParametersEntry)
}),
'DESCRIPTOR':
_MODELREPOSITORYAGENTS_AGENT,
'__module__':
'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelRepositoryAgents.Agent)
}),
'DESCRIPTOR':
_MODELREPOSITORYAGENTS,
'__module__':
'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelRepositoryAgents)
})
_sym_db.RegisterMessage(ModelRepositoryAgents)
_sym_db.RegisterMessage(ModelRepositoryAgents.Agent)
_sym_db.RegisterMessage(ModelRepositoryAgents.Agent.ParametersEntry)
ModelResponseCache = _reflection.GeneratedProtocolMessageType(
'ModelResponseCache',
(_message.Message, ),
{
'DESCRIPTOR': _MODELRESPONSECACHE,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelResponseCache)
})
_sym_db.RegisterMessage(ModelResponseCache)
ModelConfig = _reflection.GeneratedProtocolMessageType(
'ModelConfig',
(_message.Message, ),
{
'CcModelFilenamesEntry':
_reflection.GeneratedProtocolMessageType(
'CcModelFilenamesEntry',
(_message.Message, ),
{
'DESCRIPTOR': _MODELCONFIG_CCMODELFILENAMESENTRY,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelConfig.CcModelFilenamesEntry)
}),
'MetricTagsEntry':
_reflection.GeneratedProtocolMessageType(
'MetricTagsEntry',
(_message.Message, ),
{
'DESCRIPTOR': _MODELCONFIG_METRICTAGSENTRY,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelConfig.MetricTagsEntry)
}),
'ParametersEntry':
_reflection.GeneratedProtocolMessageType(
'ParametersEntry',
(_message.Message, ),
{
'DESCRIPTOR': _MODELCONFIG_PARAMETERSENTRY,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelConfig.ParametersEntry)
}),
'DESCRIPTOR':
_MODELCONFIG,
'__module__':
'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelConfig)
})
_sym_db.RegisterMessage(ModelConfig)
_sym_db.RegisterMessage(ModelConfig.CcModelFilenamesEntry)
_sym_db.RegisterMessage(ModelConfig.MetricTagsEntry)
_sym_db.RegisterMessage(ModelConfig.ParametersEntry)
if _descriptor._USE_C_DESCRIPTORS == False:
DESCRIPTOR._options = None
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY._options = None
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY._serialized_options = b'8\001'
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY._options = None
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY._serialized_options = b'8\001'
_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY._options = None
_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY._serialized_options = b'8\001'
_MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY._options = None
_MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY._serialized_options = b'8\001'
_MODELENSEMBLING_STEP_INPUTMAPENTRY._options = None
_MODELENSEMBLING_STEP_INPUTMAPENTRY._serialized_options = b'8\001'
_MODELENSEMBLING_STEP_OUTPUTMAPENTRY._options = None
_MODELENSEMBLING_STEP_OUTPUTMAPENTRY._serialized_options = b'8\001'
_MODELWARMUP_INPUTSENTRY._options = None
_MODELWARMUP_INPUTSENTRY._serialized_options = b'8\001'
_MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY._options = None
_MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY._serialized_options = b'8\001'
_MODELCONFIG_CCMODELFILENAMESENTRY._options = None
_MODELCONFIG_CCMODELFILENAMESENTRY._serialized_options = b'8\001'
_MODELCONFIG_METRICTAGSENTRY._options = None
_MODELCONFIG_METRICTAGSENTRY._serialized_options = b'8\001'
_MODELCONFIG_PARAMETERSENTRY._options = None
_MODELCONFIG_PARAMETERSENTRY._serialized_options = b'8\001'
_DATATYPE._serialized_start = 8137
_DATATYPE._serialized_end = 8387
_MODELRATELIMITER._serialized_start = 35
_MODELRATELIMITER._serialized_end = 185
_MODELRATELIMITER_RESOURCE._serialized_start = 130
_MODELRATELIMITER_RESOURCE._serialized_end = 185
_MODELINSTANCEGROUP._serialized_start = 188
_MODELINSTANCEGROUP._serialized_end = 707
_MODELINSTANCEGROUP_SECONDARYDEVICE._serialized_start = 484
_MODELINSTANCEGROUP_SECONDARYDEVICE._serialized_end = 640
_MODELINSTANCEGROUP_SECONDARYDEVICE_SECONDARYDEVICEKIND._serialized_start = 603
_MODELINSTANCEGROUP_SECONDARYDEVICE_SECONDARYDEVICEKIND._serialized_end = 640
_MODELINSTANCEGROUP_KIND._serialized_start = 642
_MODELINSTANCEGROUP_KIND._serialized_end = 707
_MODELTENSORRESHAPE._serialized_start = 709
_MODELTENSORRESHAPE._serialized_end = 744
_MODELINPUT._serialized_start = 747
_MODELINPUT._serialized_end = 1053
_MODELINPUT_FORMAT._serialized_start = 994
_MODELINPUT_FORMAT._serialized_end = 1053
_MODELOUTPUT._serialized_start = 1056
_MODELOUTPUT._serialized_end = 1234
_BATCHINPUT._serialized_start = 1237
_BATCHINPUT._serialized_end = 1582
_BATCHINPUT_KIND._serialized_start = 1377
_BATCHINPUT_KIND._serialized_end = 1582
_BATCHOUTPUT._serialized_start = 1585
_BATCHOUTPUT._serialized_end = 1728
_BATCHOUTPUT_KIND._serialized_start = 1686
_BATCHOUTPUT_KIND._serialized_end = 1728
_MODELVERSIONPOLICY._serialized_start = 1731
_MODELVERSIONPOLICY._serialized_end = 2003
_MODELVERSIONPOLICY_LATEST._serialized_start = 1919
_MODELVERSIONPOLICY_LATEST._serialized_end = 1949
_MODELVERSIONPOLICY_ALL._serialized_start = 1951
_MODELVERSIONPOLICY_ALL._serialized_end = 1956
_MODELVERSIONPOLICY_SPECIFIC._serialized_start = 1958
_MODELVERSIONPOLICY_SPECIFIC._serialized_end = 1986
_MODELOPTIMIZATIONPOLICY._serialized_start = 2006
_MODELOPTIMIZATIONPOLICY._serialized_end = 3795
_MODELOPTIMIZATIONPOLICY_GRAPH._serialized_start = 2536
_MODELOPTIMIZATIONPOLICY_GRAPH._serialized_end = 2558
_MODELOPTIMIZATIONPOLICY_CUDA._serialized_start = 2561
_MODELOPTIMIZATIONPOLICY_CUDA._serialized_end = 3259
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC._serialized_start = 2711
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC._serialized_end = 3259
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE._serialized_start = 2910
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE._serialized_end = 2930
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND._serialized_start = 2933
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND._serialized_end = 3156
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY._serialized_start = 3055
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY._serialized_end = 3156
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY._serialized_start = 3055
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY._serialized_end = 3156
_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS._serialized_start = 3262
_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS._serialized_end = 3682
_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR._serialized_start = 3498
_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR._serialized_end = 3682
_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY._serialized_start = 3633
_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY._serialized_end = 3682
_MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER._serialized_start = 3684
_MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER._serialized_end = 3720
_MODELOPTIMIZATIONPOLICY_MODELPRIORITY._serialized_start = 3722
_MODELOPTIMIZATIONPOLICY_MODELPRIORITY._serialized_end = 3795
_MODELQUEUEPOLICY._serialized_start = 3798
_MODELQUEUEPOLICY._serialized_end = 4017
_MODELQUEUEPOLICY_TIMEOUTACTION._serialized_start = 3979
_MODELQUEUEPOLICY_TIMEOUTACTION._serialized_end = 4017
_MODELDYNAMICBATCHING._serialized_start = 4020
_MODELDYNAMICBATCHING._serialized_end = 4431
_MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY._serialized_start = 4344
_MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY._serialized_end = 4431
_MODELSEQUENCEBATCHING._serialized_start = 4434
_MODELSEQUENCEBATCHING._serialized_end = 5697
_MODELSEQUENCEBATCHING_CONTROL._serialized_start = 4759
_MODELSEQUENCEBATCHING_CONTROL._serialized_end = 5064
_MODELSEQUENCEBATCHING_CONTROL_KIND._serialized_start = 4947
_MODELSEQUENCEBATCHING_CONTROL_KIND._serialized_end = 5064
_MODELSEQUENCEBATCHING_CONTROLINPUT._serialized_start = 5066
_MODELSEQUENCEBATCHING_CONTROLINPUT._serialized_end = 5153
_MODELSEQUENCEBATCHING_INITIALSTATE._serialized_start = 5156
_MODELSEQUENCEBATCHING_INITIALSTATE._serialized_end = 5294
_MODELSEQUENCEBATCHING_STATE._serialized_start = 5297
_MODELSEQUENCEBATCHING_STATE._serialized_end = 5469
_MODELSEQUENCEBATCHING_STRATEGYDIRECT._serialized_start = 5471
_MODELSEQUENCEBATCHING_STRATEGYDIRECT._serialized_end = 5559
_MODELSEQUENCEBATCHING_STRATEGYOLDEST._serialized_start = 5561
_MODELSEQUENCEBATCHING_STRATEGYOLDEST._serialized_end = 5678
_MODELENSEMBLING._serialized_start = 5700
_MODELENSEMBLING._serialized_end = 6049
_MODELENSEMBLING_STEP._serialized_start = 5767
_MODELENSEMBLING_STEP._serialized_end = 6049
_MODELENSEMBLING_STEP_INPUTMAPENTRY._serialized_start = 5952
_MODELENSEMBLING_STEP_INPUTMAPENTRY._serialized_end = 5999
_MODELENSEMBLING_STEP_OUTPUTMAPENTRY._serialized_start = 6001
_MODELENSEMBLING_STEP_OUTPUTMAPENTRY._serialized_end = 6049
_MODELPARAMETER._serialized_start = 6051
_MODELPARAMETER._serialized_end = 6089
_MODELWARMUP._serialized_start = 6092
_MODELWARMUP._serialized_end = 6437
_MODELWARMUP_INPUT._serialized_start = 6209
_MODELWARMUP_INPUT._serialized_end = 6360
_MODELWARMUP_INPUTSENTRY._serialized_start = 6362
_MODELWARMUP_INPUTSENTRY._serialized_end = 6437
_MODELOPERATIONS._serialized_start = 6439
_MODELOPERATIONS._serialized_end = 6485
_MODELTRANSACTIONPOLICY._serialized_start = 6487
_MODELTRANSACTIONPOLICY._serialized_end = 6530
_MODELREPOSITORYAGENTS._serialized_start = 6533
_MODELREPOSITORYAGENTS._serialized_end = 6763
_MODELREPOSITORYAGENTS_AGENT._serialized_start = 6615
_MODELREPOSITORYAGENTS_AGENT._serialized_end = 6763
_MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY._serialized_start = 3633
_MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY._serialized_end = 3682
_MODELRESPONSECACHE._serialized_start = 6765
_MODELRESPONSECACHE._serialized_end = 6801
_MODELCONFIG._serialized_start = 6804
_MODELCONFIG._serialized_end = 8134
_MODELCONFIG_CCMODELFILENAMESENTRY._serialized_start = 7929
_MODELCONFIG_CCMODELFILENAMESENTRY._serialized_end = 7984
_MODELCONFIG_METRICTAGSENTRY._serialized_start = 7986
_MODELCONFIG_METRICTAGSENTRY._serialized_end = 8035
_MODELCONFIG_PARAMETERSENTRY._serialized_start = 8037
_MODELCONFIG_PARAMETERSENTRY._serialized_end = 8113
# @@protoc_insertion_point(module_scope)
......@@ -417,7 +417,10 @@ def get_component_tabs(*apis, vdl_args, request_args):
all_tabs.update(api('component_tabs', request_args))
all_tabs.add('static_graph')
else:
return ['static_graph', 'x2paddle', 'fastdeploy_server']
return [
'static_graph', 'x2paddle', 'fastdeploy_server',
'fastdeploy_client'
]
return list(all_tabs)
......
......@@ -13,12 +13,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# =======================================================================
import json
import multiprocessing
import os
import re
import sys
import threading
import time
import urllib
import webbrowser
import requests
......@@ -32,6 +34,8 @@ from flask_babel import Babel
import visualdl.server
from visualdl import __version__
from visualdl.component.inference.fastdeploy_lib import get_start_arguments
from visualdl.component.inference.fastdeploy_server import create_fastdeploy_api_call
from visualdl.component.inference.model_convert_server import create_model_convert_api_call
from visualdl.component.profiler.profiler_server import create_profiler_api_call
from visualdl.server.api import create_api_call
......@@ -71,6 +75,7 @@ def create_app(args): # noqa: C901
api_call = create_api_call(args.logdir, args.model, args.cache_timeout)
profiler_api_call = create_profiler_api_call(args.logdir)
inference_api_call = create_model_convert_api_call()
fastdeploy_api_call = create_fastdeploy_api_call()
if args.telemetry:
update_util.PbUpdater(args.product).start()
......@@ -153,6 +158,141 @@ def create_app(args): # noqa: C901
return make_response(
Response(data, mimetype=mimetype, headers=headers))
@app.route(api_path + '/fastdeploy/<path:method>', methods=["GET", "POST"])
def serve_fastdeploy_api(method):
if request.method == 'POST':
data, mimetype, headers = fastdeploy_api_call(method, request.form)
else:
data, mimetype, headers = fastdeploy_api_call(method, request.args)
return make_response(
Response(data, mimetype=mimetype, headers=headers))
@app.route(
api_path + '/fastdeploy/fastdeploy_client', methods=["GET", "POST"])
def serve_fastdeploy_create_fastdeploy_client():
try:
if request.method == 'POST':
fastdeploy_api_call('create_fastdeploy_client', request.form)
request_args = request.form
else:
fastdeploy_api_call('create_fastdeploy_client', request.args)
request_args = request.args
except Exception as e:
error_msg = '{}'.format(e)
return make_response(error_msg)
args = urllib.parse.urlencode(request_args)
if args:
return redirect(
api_path + "/fastdeploy/fastdeploy_client/app?{}".format(args),
code=302)
return redirect(
api_path + "/fastdeploy/fastdeploy_client/app", code=302)
@app.route(
api_path + "/fastdeploy/fastdeploy_client/<path:path>",
methods=["GET", "POST"])
def request_fastdeploy_create_fastdeploy_client_app(path: str):
'''
Gradio app server url interface. We route urls for gradio app to gradio server.
Args:
path(str): All resource path from gradio server.
Returns:
Any thing from gradio server.
'''
if request.method == 'POST':
port = fastdeploy_api_call('create_fastdeploy_client',
request.form)
request_args = request.form
else:
port = fastdeploy_api_call('create_fastdeploy_client',
request.args)
request_args = request.args
if path == 'app':
proxy_url = request.url.replace(
request.host_url.rstrip('/') + api_path +
'/fastdeploy/fastdeploy_client/app',
'http://localhost:{}/'.format(port))
else:
proxy_url = request.url.replace(
request.host_url.rstrip('/') + api_path +
'/fastdeploy/fastdeploy_client/',
'http://localhost:{}/'.format(port))
resp = requests.request(
method=request.method,
url=proxy_url,
headers={
key: value
for (key, value) in request.headers if key != 'Host'
},
data=request.get_data(),
cookies=request.cookies,
allow_redirects=False)
if path == 'app':
content = resp.content
if request_args and 'server_id' in request_args:
server_id = request_args.get('server_id')
start_args = get_start_arguments(server_id)
http_port = start_args.get('http-port', '')
metrics_port = start_args.get('metrics-port', '')
model_name = start_args.get('default_model_name', '')
content = content.decode()
try:
default_server_addr = re.search(
'"label": {}.*?"value": "".*?}}'.format(
json.dumps("服务ip", ensure_ascii=True).replace(
'\\', '\\\\')), content).group(0)
cur_server_addr = default_server_addr.replace(
'"value": ""', '"value": "localhost"')
default_http_port = re.search(
'"label": {}.*?"value": "".*?}}'.format(
json.dumps("推理服务端口", ensure_ascii=True).replace(
'\\', '\\\\')), content).group(0)
cur_http_port = default_http_port.replace(
'"value": ""', '"value": "{}"'.format(http_port))
default_metrics_port = re.search(
'"label": {}.*?"value": "".*?}}'.format(
json.dumps("性能服务端口", ensure_ascii=True).replace(
'\\', '\\\\')), content).group(0)
cur_metrics_port = default_metrics_port.replace(
'"value": ""', '"value": "{}"'.format(metrics_port))
default_model_name = re.search(
'"label": {}.*?"value": "".*?}}'.format(
json.dumps("模型名称", ensure_ascii=True).replace(
'\\', '\\\\')), content).group(0)
cur_model_name = default_model_name.replace(
'"value": ""', '"value": "{}"'.format(model_name))
default_model_version = re.search(
'"label": {}.*?"value": "".*?}}'.format(
json.dumps("模型版本", ensure_ascii=True).replace(
'\\', '\\\\')), content).group(0)
cur_model_version = default_model_version.replace(
'"value": ""', '"value": "{}"'.format('1'))
content = content.replace(default_server_addr,
cur_server_addr)
if http_port:
content = content.replace(default_http_port,
cur_http_port)
if metrics_port:
content = content.replace(default_metrics_port,
cur_metrics_port)
if model_name:
content = content.replace(default_model_name,
cur_model_name)
content = content.replace(default_model_version,
cur_model_version)
except Exception:
pass
finally:
content = content.encode()
else:
content = resp.content
headers = [(name, value) for (name, value) in resp.raw.headers.items()]
response = Response(content, resp.status_code, headers)
return response
@app.route(api_path + '/component_tabs')
def component_tabs():
data, mimetype, headers = get_component_tabs(
......
......@@ -78,7 +78,8 @@ def validate_args(args):
supported_tabs = [
'scalar', 'image', 'text', 'embeddings', 'audio', 'histogram',
'hyper_parameters', 'static_graph', 'dynamic_graph', 'pr_curve',
'roc_curve', 'profiler', 'x2paddle', 'fastdeploy_server'
'roc_curve', 'profiler', 'x2paddle', 'fastdeploy_server',
'fastdeploy_client'
]
if args.component_tabs is not None:
for component_tab in args.component_tabs:
......
......@@ -23,6 +23,7 @@ USER_HOME = os.path.expanduser('~')
VDL_HOME = os.path.join(USER_HOME, '.visualdl')
CONF_HOME = os.path.join(VDL_HOME, 'conf')
CONFIG_PATH = os.path.join(CONF_HOME, 'config.json')
FASTDEPLOYSERVER_PATH = os.path.join(VDL_HOME, 'fastdeployserver')
X2PADDLE_CACHE_PATH = os.path.join(VDL_HOME, 'x2paddle')
......@@ -32,5 +33,7 @@ def init_vdl_config():
if not os.path.exists(CONFIG_PATH) or 0 == os.path.getsize(CONFIG_PATH):
with open(CONFIG_PATH, 'w') as fp:
fp.write(json.dumps(default_vdl_config))
if not os.path.exists(FASTDEPLOYSERVER_PATH):
os.makedirs(FASTDEPLOYSERVER_PATH, exist_ok=True)
if not os.path.exists(X2PADDLE_CACHE_PATH):
os.makedirs(X2PADDLE_CACHE_PATH, exist_ok=True)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册