未验证 提交 a418dd44 编写于 作者: C chenjian 提交者: GitHub

Add fastdeploy server and client component (#1169)

* add backend support for fastdeploy server

* fix

* add code

* fix

* fix

* add fastdeploy server component

* add fastdeploy server and client

* add exception description

* fix

* add model repository judgement

* add component tab for fastdeploy client

* update more tasks in fastdeploy client

* sort filenames

* backup config

* noqa for autogenerated file

* add data validation

* add __init__ for package

* add calculating layout for frontend

* add alive server detection and optimize client

* add alive server detection and optimize client

* add alive server detection and optimize client

* add metrics in gradio client

* update presentation

* Change return value to None for frontend performance data when server not ready

* add get_server_config and download_pretrain_model api

* add get_server_config and download_pretrain_model api

* add unit for metric table

* add unit for metric table

* fix a bug

* add judgement pretrained model download

* add judgement pretrained model download

* add version info for frontend

* rename download model

* fix a bug

* add fastdeploy model list

* optimize for choose configuration files

* modify according to frontend need

* fix name in config to model name

* optimize for server list and alive judgement

* keep server name as string type

* optimize process judgement logic

* optimize for deleting resource files

* add rename resource file

* fix

* fix a bug

* optimize code structure

* optimize code structure

* remove chinese tips and remove fastdeploy-python in requirements
上级 b90619b9
...@@ -12,4 +12,8 @@ multiprocess ...@@ -12,4 +12,8 @@ multiprocess
packaging packaging
x2paddle x2paddle
rarfile rarfile
onnx >= 1.6.0 gradio
\ No newline at end of file tritonclient[all]
attrdict
psutil
onnx >= 1.6.0
# Copyright (c) 2022 VisualDL Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =======================================================================
# Copyright (c) 2022 VisualDL Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =======================================================================
import gradio as gr
import numpy as np
from .http_client_manager import get_metric_data
from .http_client_manager import HttpClientManager
from .http_client_manager import metrics_table_head
from .visualizer import visualize_detection
from .visualizer import visualize_face_alignment
from .visualizer import visualize_face_detection
from .visualizer import visualize_headpose
from .visualizer import visualize_keypoint_detection
from .visualizer import visualize_matting
from .visualizer import visualize_ocr
from .visualizer import visualize_segmentation
_http_manager = HttpClientManager()
supported_tasks = {
'detection': visualize_detection,
'facedet': visualize_face_detection,
'keypointdetection': visualize_keypoint_detection,
'segmentation': visualize_segmentation,
'matting': visualize_matting,
'ocr': visualize_ocr,
'facealignment': visualize_face_alignment,
'headpose': visualize_headpose,
'unspecified': lambda x: str(x)
}
def create_gradio_client_app(): # noqa:C901
css = """
.gradio-container {
font-family: 'IBM Plex Sans', sans-serif;
}
.gr-button {
color: white;
border-color: black;
background: black;
}
input[type='range'] {
accent-color: black;
}
.dark input[type='range'] {
accent-color: #dfdfdf;
}
#gallery {
min-height: 22rem;
margin-bottom: 15px;
margin-left: auto;
margin-right: auto;
border-bottom-right-radius: .5rem !important;
border-bottom-left-radius: .5rem !important;
}
#gallery>div>.h-full {
min-height: 20rem;
}
.details:hover {
text-decoration: underline;
}
.gr-button {
white-space: nowrap;
}
.gr-button:focus {
border-color: rgb(147 197 253 / var(--tw-border-opacity));
outline: none;
box-shadow: var(--tw-ring-offset-shadow), var(--tw-ring-shadow), var(--tw-shadow, 0 0 #0000);
--tw-border-opacity: 1;
--tw-ring-offset-shadow: var(--tw-ring-inset) 0 0 0 var(--tw-ring-offset-width) \
var(--tw-ring-offset-color);
--tw-ring-shadow: var(--tw-ring-inset) 0 0 0 calc(3px var(--tw-ring-offset-width)) var(--tw-ring-color);
--tw-ring-color: rgb(191 219 254 / var(--tw-ring-opacity));
--tw-ring-opacity: .5;
}
.footer {
margin-bottom: 45px;
margin-top: 35px;
text-align: center;
border-bottom: 1px solid #e5e5e5;
}
.footer>p {
font-size: .8rem;
display: inline-block;
padding: 0 10px;
transform: translateY(10px);
background: white;
}
.dark .footer {
border-color: #303030;
}
.dark .footer>p {
background: #0b0f19;
}
.prompt h4{
margin: 1.25em 0 .25em 0;
font-weight: bold;
font-size: 115%;
}
"""
block = gr.Blocks(css=css)
with block:
gr.HTML("""
<div style="text-align: center; max-width: 650px; margin: 0 auto;">
<div
style="
display: inline-flex;
gap: 0.8rem;
font-size: 1.75rem;
justify-content: center;
"
>
<h1>
FastDeploy Client
</h1>
</div>
<p font-size: 94%">
The client is used for creating requests to fastdeploy server.
</p>
</div>
""")
with gr.Group():
with gr.Box():
with gr.Column():
with gr.Row():
server_addr_text = gr.Textbox(
label="服务ip",
show_label=True,
max_lines=1,
placeholder="localhost",
)
server_http_port_text = gr.Textbox(
label="推理服务端口",
show_label=True,
max_lines=1,
placeholder="8000",
)
server_metric_port_text = gr.Textbox(
label="性能服务端口",
show_label=True,
max_lines=1,
placeholder="8002",
)
with gr.Row():
model_name_text = gr.Textbox(
label="模型名称",
show_label=True,
max_lines=1,
placeholder="yolov5",
)
model_version_text = gr.Textbox(
label="模型版本",
show_label=True,
max_lines=1,
placeholder="1",
)
with gr.Box():
with gr.Tab("组件形式"):
check_button = gr.Button("获取模型输入输出")
component_format_column = gr.Column(visible=False)
with component_format_column:
task_radio = gr.Radio(
choices=list(supported_tasks.keys()),
value='unspecified',
label='任务类型',
visible=True)
gr.Markdown("根据模型需要,挑选文本框或者图像框进行输入")
with gr.Row():
with gr.Column():
gr.Markdown("模型输入")
input_accordions = []
input_name_texts = []
input_images = []
input_texts = []
for i in range(6):
accordion = gr.Accordion(
"输入变量 {}".format(i),
open=True,
visible=False)
with accordion:
input_name_text = gr.Textbox(
label="变量名", interactive=False)
input_image = gr.Image(type='numpy')
input_text = gr.Textbox(
label="文本框", max_lines=1000)
input_accordions.append(accordion)
input_name_texts.append(input_name_text)
input_images.append(input_image)
input_texts.append(input_text)
with gr.Column():
gr.Markdown("模型输出")
output_accordions = []
output_name_texts = []
output_images = []
output_texts = []
for i in range(6):
accordion = gr.Accordion(
"输出变量 {}".format(i),
open=True,
visible=False)
with accordion:
output_name_text = gr.Textbox(
label="变量名", interactive=False)
output_text = gr.Textbox(
label="服务返回的原数据",
interactive=False,
show_label=True)
output_image = gr.Image(
interactive=False)
output_accordions.append(accordion)
output_name_texts.append(output_name_text)
output_images.append(output_image)
output_texts.append(output_text)
component_submit_button = gr.Button("提交请求")
with gr.Tab("原始形式"):
gr.Markdown("模型输入")
raw_payload_text = gr.Textbox(
label="负载数据", max_lines=10000)
with gr.Column():
gr.Markdown("输出")
output_raw_text = gr.Textbox(
label="服务返回的原始数据", interactive=False)
raw_submit_button = gr.Button("提交请求")
with gr.Box():
with gr.Column():
gr.Markdown("服务性能统计(每次提交请求会自动更新数据,您也可以手动点击更新)")
output_html_table = gr.HTML(
label="metrics",
interactive=False,
show_label=False,
value=metrics_table_head.format('', ''))
update_metric_button = gr.Button("更新统计数据")
status_text = gr.Textbox(
label="status",
show_label=True,
max_lines=1,
interactive=False)
all_input_output_components = input_accordions + input_name_texts + input_images + \
input_texts + output_accordions + output_name_texts + output_images + output_texts
def get_input_output_name(server_ip, server_port, model_name,
model_version):
try:
server_addr = server_ip + ':' + server_port
input_metas, output_metas = _http_manager.get_model_meta(
server_addr, model_name, model_version)
except Exception as e:
return {status_text: str(e)}
results = {
component: None
for component in all_input_output_components
}
results[component_format_column] = gr.update(visible=True)
# results[check_button] = gr.update(visible=False)
for input_accordio in input_accordions:
results[input_accordio] = gr.update(visible=False)
for output_accordio in output_accordions:
results[output_accordio] = gr.update(visible=False)
results[status_text] = 'GetInputOutputName Successful'
for i, input_meta in enumerate(input_metas):
results[input_accordions[i]] = gr.update(visible=True)
results[input_name_texts[i]] = input_meta['name']
for i, output_meta in enumerate(output_metas):
results[output_accordions[i]] = gr.update(visible=True)
results[output_name_texts[i]] = output_meta['name']
return results
def component_inference(*args):
server_ip = args[0]
http_port = args[1]
metric_port = args[2]
model_name = args[3]
model_version = args[4]
names = args[5:5 + len(input_name_texts)]
images = args[5 + len(input_name_texts):5 + len(input_name_texts) +
len(input_images)]
texts = args[5 + len(input_name_texts) + len(input_images):5 +
len(input_name_texts) + len(input_images) +
len(input_texts)]
task_type = args[-1]
server_addr = server_ip + ':' + http_port
if server_ip and http_port and model_name and model_version:
inputs = {}
for i, input_name in enumerate(names):
if input_name:
if images[i] is not None:
inputs[input_name] = np.array([images[i]])
if texts[i]:
inputs[input_name] = np.array(
[[texts[i].encode('utf-8')]], dtype=np.object_)
try:
infer_results = _http_manager.infer(
server_addr, model_name, model_version, inputs)
results = {status_text: 'Inference Successful'}
for i, (output_name,
data) in enumerate(infer_results.items()):
results[output_name_texts[i]] = output_name
results[output_texts[i]] = str(data)
if task_type != 'unspecified':
try:
results[output_images[i]] = supported_tasks[
task_type](images[0], data)
except Exception:
results[output_images[i]] = None
if metric_port:
html_table = get_metric_data(server_ip, metric_port)
results[output_html_table] = html_table
return results
except Exception as e:
return {status_text: 'Error: {}'.format(e)}
else:
return {
status_text:
'Please input server addr, model name and model version.'
}
def raw_inference(*args):
server_ip = args[0]
http_port = args[1]
metric_port = args[2]
model_name = args[3]
model_version = args[4]
payload_text = args[5]
server_addr = server_ip + ':' + http_port
try:
result = _http_manager.raw_infer(server_addr, model_name,
model_version, payload_text)
results = {
status_text: 'Get response from server',
output_raw_text: result
}
if server_ip and metric_port:
html_table = get_metric_data(server_ip, metric_port)
results[output_html_table] = html_table
return results
except Exception as e:
return {status_text: 'Error: {}'.format(e)}
def update_metric(server_ip, metrics_port):
if server_ip and metrics_port:
try:
html_table = get_metric_data(server_ip, metrics_port)
return {
output_html_table: html_table,
status_text: "Successfully update metrics."
}
except Exception as e:
return {status_text: 'Error: {}'.format(e)}
else:
return {
status_text: 'Please input server ip and metrics_port.'
}
check_button.click(
fn=get_input_output_name,
inputs=[
server_addr_text, server_http_port_text, model_name_text,
model_version_text
],
outputs=[
*all_input_output_components, check_button,
component_format_column, status_text
])
component_submit_button.click(
fn=component_inference,
inputs=[
server_addr_text, server_http_port_text,
server_metric_port_text, model_name_text, model_version_text,
*input_name_texts, *input_images, *input_texts, task_radio
],
outputs=[
*output_name_texts, *output_images, *output_texts, status_text,
output_html_table
])
raw_submit_button.click(
fn=raw_inference,
inputs=[
server_addr_text, server_http_port_text,
server_metric_port_text, model_name_text, model_version_text,
raw_payload_text
],
outputs=[output_raw_text, status_text, output_html_table])
update_metric_button.click(
fn=update_metric,
inputs=[server_addr_text, server_metric_port_text],
outputs=[output_html_table, status_text])
return block
# Copyright (c) 2022 VisualDL Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =======================================================================
import json
import re
import numpy as np
import requests
import tritonclient.http as httpclient
from attrdict import AttrDict
from tritonclient.utils import InferenceServerException
def convert_http_metadata_config(metadata):
metadata = AttrDict(metadata)
return metadata
def prepare_request(inputs_meta, inputs_data, outputs_meta):
'''
inputs_meta: inputs meta information from model. name: info
inputs_data: users input data. name: data
'''
# Set the input data
inputs = []
for input_dict in inputs_meta:
input_name = input_dict['name']
if input_name not in inputs_data:
raise RuntimeError(
'Error: input name {} required for model not existed.'.format(
input_name))
if input_dict['datatype'] == 'FP32':
inputs_data[input_name] = inputs_data[input_name].astype(
np.float32
) / 255 # image data returned by gradio is uint8, convert to fp32
if len(input_dict['shape']
) == 3 and input_dict['shape'][0] == 3: # NCHW
inputs_data[input_name] = inputs_data[input_name][0].transpose(
2, 0, 1)
elif len(input_dict['shape']
) == 4 and input_dict['shape'][1] == 3: # NCHW
inputs_data[input_name] = inputs_data[input_name].transpose(
0, 3, 1, 2)
infer_input = httpclient.InferInput(
input_name, inputs_data[input_name].shape, input_dict['datatype'])
infer_input.set_data_from_numpy(inputs_data[input_name])
inputs.append(infer_input)
outputs = []
for output_dict in outputs_meta:
infer_output = httpclient.InferRequestedOutput(output_dict.name)
outputs.append(infer_output)
return inputs, outputs
metrics_table_head = """
<style>
table, th {{
border:0.1px solid black;
}}
</style>
<div>
<table style="width:100%">
<tr>
<th rowspan="2">模型名称</th>
<th colspan="4">执行统计</th>
<th colspan="5">延迟统计</th>
</tr>
<tr>
<th>请求处理成功数</th>
<th>请求处理失败数</th>
<th>推理batch数</th>
<th>推理样本数</th>
<th>请求处理时间(ms)</th>
<th>任务队列等待时间(ms)</th>
<th>输入处理时间(ms)</th>
<th>模型推理时间(ms)</th>
<th>输出处理时间(ms)</th>
</tr>
{}
</table>
</div>
<br>
<br>
<br>
<br>
<br>
<div>
<table style="width:100%">
<tr>
<th rowspan="2">GPU</th>
<th colspan="4">性能指标</th>
<th colspan="2">显存</th>
</tr>
<tr>
<th>利用率(%)</th>
<th>功率(W)</th>
<th>功率限制(W)</th>
<th>耗电量(W)</th>
<th>总量(GB)</th>
<th>已使用(GB)</th>
</tr>
{}
</table>
</div>
"""
def get_metric_data(server_addr, metric_port): # noqa:C901
'''
Get metrics data from fastdeploy server, and transform it into html table.
Args:
server_addr(str): fastdeployserver ip address
metric_port(int): fastdeployserver metrics port
Returns:
htmltable(str): html table to show metrics data
'''
model_table = {}
gpu_table = {}
metric_column_name = {
"Model": {
"nv_inference_request_success", "nv_inference_request_failure",
"nv_inference_count", "nv_inference_exec_count",
"nv_inference_request_duration_us",
"nv_inference_queue_duration_us",
"nv_inference_compute_input_duration_us",
"nv_inference_compute_infer_duration_us",
"nv_inference_compute_output_duration_us"
},
"GPU": {
"nv_gpu_power_usage", "nv_gpu_power_limit",
"nv_energy_consumption", "nv_gpu_utilization",
"nv_gpu_memory_total_bytes", "nv_gpu_memory_used_bytes"
},
"CPU": {
"nv_cpu_utilization", "nv_cpu_memory_total_bytes",
"nv_cpu_memory_used_bytes"
}
}
try:
res = requests.get("http://{}:{}/metrics".format(
server_addr, metric_port))
except Exception:
return metrics_table_head.format('', '')
metric_content = res.text
for content in metric_content.split('\n'):
if content.startswith('#'):
continue
else:
res = re.match(r'(\w+){(.*)} (\w+)',
content) # match output by server metrics interface
if not res:
continue
metric_name = res.group(1)
model = res.group(2)
value = res.group(3)
infos = {}
for info in model.split(','):
k, v = info.split('=')
v = v.strip('"')
infos[k] = v
if metric_name in [
"nv_inference_request_duration_us",
"nv_inference_queue_duration_us",
"nv_inference_compute_input_duration_us",
"nv_inference_compute_infer_duration_us",
"nv_inference_compute_output_duration_us"
]:
value = str(float(value) / 1000)
elif metric_name in [
"nv_gpu_memory_total_bytes", "nv_gpu_memory_used_bytes"
]:
value = str(float(value) / 1024 / 1024 / 1024)
for key, metric_names in metric_column_name.items():
if metric_name in metric_names:
if key == 'Model':
model_name = infos['model']
if model_name not in model_table:
model_table[model_name] = {}
model_table[model_name][metric_name] = value
elif key == 'GPU':
gpu_name = infos['gpu_uuid']
if gpu_name not in gpu_table:
gpu_table[gpu_name] = {}
gpu_table[gpu_name][metric_name] = value
elif key == 'CPU':
pass
model_data_list = []
gpu_data_list = []
model_data_metric_names = [
"nv_inference_request_success", "nv_inference_request_failure",
"nv_inference_exec_count", "nv_inference_count",
"nv_inference_request_duration_us", "nv_inference_queue_duration_us",
"nv_inference_compute_input_duration_us",
"nv_inference_compute_infer_duration_us",
"nv_inference_compute_output_duration_us"
]
gpu_data_metric_names = [
"nv_gpu_utilization", "nv_gpu_power_usage", "nv_gpu_power_limit",
"nv_energy_consumption", "nv_gpu_memory_total_bytes",
"nv_gpu_memory_used_bytes"
]
for k, v in model_table.items():
data = []
data.append(k)
for data_metric in model_data_metric_names:
data.append(v[data_metric])
model_data_list.append(data)
for k, v in gpu_table.items():
data = []
data.append(k)
for data_metric in gpu_data_metric_names:
data.append(v[data_metric])
gpu_data_list.append(data)
model_data = '\n'.join([
"<tr>" + '\n'.join(["<td>" + item + "</td>"
for item in data]) + "</tr>"
for data in model_data_list
])
gpu_data = '\n'.join([
"<tr>" + '\n'.join(["<td>" + item + "</td>"
for item in data]) + "</tr>"
for data in gpu_data_list
])
return metrics_table_head.format(model_data, gpu_data)
class HttpClientManager:
def __init__(self):
self.clients = {} # server url: httpclient
def _create_client(self, server_url):
if server_url in self.clients:
return self.clients[server_url]
try:
fastdeploy_client = httpclient.InferenceServerClient(server_url)
self.clients[server_url] = fastdeploy_client
return fastdeploy_client
except Exception:
raise RuntimeError(
'Can not connect to server {}, please check your \
server address'.format(server_url))
def infer(self, server_url, model_name, model_version, inputs):
fastdeploy_client = self._create_client(server_url)
input_metadata, output_metadata = self.get_model_meta(
server_url, model_name, model_version)
inputs, outputs = prepare_request(input_metadata, inputs,
output_metadata)
response = fastdeploy_client.infer(
model_name, inputs, model_version=model_version, outputs=outputs)
results = {}
for output in output_metadata:
result = response.as_numpy(output.name) # datatype: numpy
if output.datatype == 'BYTES': # datatype: bytes
try:
value = result
if len(result.shape) == 1:
value = result[0]
elif len(result.shape) == 2:
value = result[0][0]
elif len(result.shape) == 3:
value = result[0][0][0]
result = json.loads(value) # datatype: json
except Exception:
pass
else:
result = result[0]
results[output.name] = result
return results
def raw_infer(self, server_url, model_name, model_version, raw_input):
url = 'http://{}/v2/models/{}/versions/{}/infer'.format(
server_url, model_name, model_version)
res = requests.post(url, data=json.dumps(json.loads(raw_input)))
return json.dumps(res.json())
def get_model_meta(self, server_url, model_name, model_version):
fastdeploy_client = self._create_client(server_url)
try:
model_metadata = fastdeploy_client.get_model_metadata(
model_name=model_name, model_version=model_version)
except InferenceServerException as e:
raise RuntimeError("Failed to retrieve the metadata: " + str(e))
model_metadata = convert_http_metadata_config(model_metadata)
input_metadata = model_metadata.inputs
output_metadata = model_metadata.outputs
return input_metadata, output_metadata
# Copyright (c) 2022 VisualDL Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =======================================================================
import numpy as np
__all__ = [
'visualize_detection', 'visualize_keypoint_detection',
'visualize_face_detection', 'visualize_face_alignment',
'visualize_segmentation', 'visualize_matting', 'visualize_ocr',
'visualize_headpose'
]
def visualize_detection(image, data):
try:
import fastdeploy as fd
except Exception:
raise RuntimeError(
"fastdeploy is required for visualizing results,please refer to \
https://github.com/PaddlePaddle/FastDeploy to install fastdeploy")
boxes = np.array(data['boxes'])
scores = np.array(data['scores'])
label_ids = np.array(data['label_ids'])
masks = np.array(data['masks'])
contain_masks = data['contain_masks']
detection_result = fd.C.vision.DetectionResult()
detection_result.boxes = boxes
detection_result.scores = scores
detection_result.label_ids = label_ids
detection_result.masks = masks
detection_result.contain_masks = contain_masks
result = fd.vision.vis_detection(image, detection_result)
return result
def visualize_keypoint_detection(image, data):
try:
import fastdeploy as fd
except Exception:
raise RuntimeError(
"fastdeploy is required for visualizing results,please refer to \
https://github.com/PaddlePaddle/FastDeploy to install fastdeploy")
keypoints = np.array(data['keypoints'])
scores = np.array(data['scores'])
num_joints = np.array(data['num_joints'])
detection_result = fd.C.vision.KeyPointDetectionResult()
detection_result.keypoints = keypoints
detection_result.scores = scores
detection_result.num_joints = num_joints
result = fd.vision.vis_keypoint_detection(image, detection_result)
return result
def visualize_face_detection(image, data):
try:
import fastdeploy as fd
except Exception:
raise RuntimeError(
"fastdeploy is required for visualizing results,please refer to \
https://github.com/PaddlePaddle/FastDeploy to install fastdeploy")
data = np.array(data['data'])
scores = np.array(data['scores'])
landmarks = np.array(data['landmarks'])
landmarks_per_face = data['landmarks_per_face']
detection_result = fd.C.vision.FaceDetectionResult()
detection_result.data = data
detection_result.scores = scores
detection_result.landmarks = landmarks
detection_result.landmarks_per_face = landmarks_per_face
result = fd.vision.vis_face_detection(image, detection_result)
return result
def visualize_face_alignment(image, data):
try:
import fastdeploy as fd
except Exception:
raise RuntimeError(
"fastdeploy is required for visualizing results,please refer to \
https://github.com/PaddlePaddle/FastDeploy to install fastdeploy")
landmarks = np.array(data['landmarks'])
facealignment_result = fd.C.vision.FaceAlignmentResult()
facealignment_result.landmarks = landmarks
result = fd.vision.vis_face_alignment(image, facealignment_result)
return result
def visualize_segmentation(image, data):
try:
import fastdeploy as fd
except Exception:
raise RuntimeError(
"fastdeploy is required for visualizing results,please refer to \
https://github.com/PaddlePaddle/FastDeploy to install fastdeploy")
label_ids = np.array(data['label_ids'])
score_map = np.array(data['score_map'])
shape = np.array(data['shape'])
segmentation_result = fd.C.vision.SegmentationResult()
segmentation_result.shape = shape
segmentation_result.score_map = score_map
segmentation_result.label_ids = label_ids
result = fd.vision.vis_segmentation(image, segmentation_result)
return result
def visualize_matting(image, data):
try:
import fastdeploy as fd
except Exception:
raise RuntimeError(
"fastdeploy is required for visualizing results,please refer to \
https://github.com/PaddlePaddle/FastDeploy to install fastdeploy")
alpha = np.array(data['alpha'])
foreground = np.array(data['foreground'])
contain_foreground = data['contain_foreground']
shape = np.array(data['shape'])
matting_result = fd.C.vision.MattingResult()
matting_result.alpha = alpha
matting_result.foreground = foreground
matting_result.contain_foreground = contain_foreground
matting_result.shape = shape
result = fd.vision.vis_matting(image, matting_result)
return result
def visualize_ocr(image, data):
try:
import fastdeploy as fd
except Exception:
raise RuntimeError(
"fastdeploy is required for visualizing results,please refer to \
https://github.com/PaddlePaddle/FastDeploy to install fastdeploy")
boxes = np.array(data['boxes'])
text = np.array(data['text'])
rec_scores = np.array(data['rec_scores'])
cls_scores = np.array(data['cls_scores'])
cls_labels = data['cls_labels']
ocr_result = fd.C.vision.OCRResult()
ocr_result.boxes = boxes
ocr_result.text = text
ocr_result.rec_scores = rec_scores
ocr_result.cls_scores = cls_scores
ocr_result.cls_labels = cls_labels
result = fd.vision.vis_ppocr(image, ocr_result)
return result
def visualize_headpose(image, data):
try:
import fastdeploy as fd
except Exception:
raise RuntimeError(
"fastdeploy is required for visualizing results,please refer to \
https://github.com/PaddlePaddle/FastDeploy to install fastdeploy")
euler_angles = np.array(data['euler_angles'])
headpose_result = fd.C.vision.HeadPoseResult()
headpose_result.euler_angles = euler_angles
result = fd.vision.vis_headpose(image, headpose_result)
return result
# Copyright (c) 2022 VisualDL Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =======================================================================
import copy
import json
import os
import random
import re
import signal
import string
from collections import defaultdict
from subprocess import Popen
from subprocess import STDOUT
import google.protobuf.json_format as json_format
import google.protobuf.text_format as text_format
import psutil
import requests
from .proto.model_config_pb2 import ModelConfig
from visualdl.utils.dir import FASTDEPLOYSERVER_PATH
def pbtxt2json(content: str):
'''
Convert protocol messages in text format to json format string.
'''
message = text_format.Parse(content, ModelConfig())
json_string = json_format.MessageToJson(message)
return json_string
def json2pbtxt(content: str):
'''
Convert json format string to protocol messages in text format.
'''
message = json_format.Parse(content, ModelConfig())
text_proto = text_format.MessageToString(message)
return text_proto
def validate_data(model_config):
'''
Validate data in model config, we should check empty value recieved from front end.
The easiest way to handle it is to drop empty value.
Args:
model_config: model config to be saved in config file
Return:
model config after filtering.
'''
model_config_filtered = {}
for key, value in model_config.items():
if value:
model_config_filtered[key] = value
return model_config_filtered
def analyse_config(cur_dir: str):
'''
Analyse the model config in specified directory.
Return a json object to describe configuration.
'''
all_model_configs = {}
all_model_versions = {}
parent_dir, sub_dirs, filenames = os.walk(cur_dir).send(
None) # models can only put directory in model repository,
# so we should only search depth 1 directories.
for model_dir_name in sub_dirs:
model_dir, model_sub_dirs, filenames = os.walk(
os.path.join(parent_dir, model_dir_name)).send(None)
model_name = os.path.basename(model_dir)
config_filenames = []
for filename in filenames:
if '.pbtxt' in filename:
config_filenames.append(
filename
) # filenames with extension .pbtxt are all config files
if config_filenames:
default_config_filename = config_filenames[0]
if 'config.pbtxt' in config_filenames:
default_config_filename = 'config.pbtxt'
config_filenames.remove(default_config_filename)
config_filenames.insert(0, default_config_filename)
else:
# if no config.pbtxt, we choose the first file in config_filenames list to create config.pbtxt
copy_config_file_to_default_config(model_dir,
default_config_filename)
default_config_filename = 'config.pbtxt'
config_filenames.insert(0, default_config_filename)
json_config = json.loads(
pbtxt2json(
open(os.path.join(model_dir,
default_config_filename)).read()))
json_config["config_filenames"] = config_filenames[
0] # add config_filenames to config data (frontend developer said he only wanted one filename,
# and to request config_filenames by get_config_filenames_for_one_model later)
all_model_configs[
model_name] = json_config # store original config file content in json format
json_config[
'name'] = model_name # because name in config data may be different from model_name,
# model_name is model directory name actually, we should conform name with model_name.
else:
continue
for model_sub_dir in model_sub_dirs:
if re.match(
r'\d+',
model_sub_dir): # version directory consists of numbers
if model_name not in all_model_versions:
all_model_versions[model_name] = {}
if model_sub_dir not in all_model_versions[model_name]:
all_model_versions[model_name][model_sub_dir] = []
for version_resource_file in os.listdir(
os.path.join(model_dir, model_sub_dir)):
all_model_versions[model_name][model_sub_dir].append(
version_resource_file)
if model_name not in all_model_versions: # if a model has config but no version directory,
# to convenient users, we create one
all_model_versions[model_name] = {}
os.mkdir(os.path.join(model_dir, '1'))
all_model_versions[model_name]['1'] = []
if not all_model_configs:
raise Exception(
'The path you choose is not a valid model repository, please choose a valid path.'
)
return all_model_configs, all_model_versions
def exchange_format_to_original_format(exchange_format):
'''
Change config exchange format to original format.
'''
ensembles = []
models = []
all_models = {}
if 'ensembles' in exchange_format:
ensembles = exchange_format['ensembles']
if 'models' in exchange_format:
models = exchange_format['models']
alls = ensembles + models
for model_config in alls:
# 1. add 'executionAccelerators' keyword
if 'optimization' in model_config:
optimization_config = model_config['optimization']
del model_config['optimization']
model_config['optimization'] = {}
model_config['optimization'][
'executionAccelerators'] = optimization_config
# 2. delete versions information
if 'versions' in model_config:
del model_config['versions']
if 'config_filenames' in model_config:
del model_config['config_filenames']
if 'platform' in model_config and model_config[
'platform'] == 'ensemble': # emsemble model
# 3. add 'ensembleScheduling' keyword
if 'step' in model_config:
step_configs = model_config['step']
if 'ensembleScheduling' not in model_config:
model_config['ensembleScheduling'] = {}
model_config['ensembleScheduling']['step'] = step_configs
del model_config['step']
# 4. remove two virtual models(feed, fetch), and
# "modelType", "inputModels", "outputModels", "inputVars", "outputVars"
remove_list = []
for model_config_in_step in step_configs:
if model_config_in_step[
'modelName'] == 'feed' or model_config_in_step[
'modelName'] == 'fetch':
remove_list.append(model_config_in_step)
continue
del model_config_in_step['modelType']
del model_config_in_step['inputModels']
del model_config_in_step['outputModels']
del model_config_in_step['inputVars']
del model_config_in_step['outputVars']
for remove_item in remove_list:
step_configs.remove(remove_item)
all_models[model_config['name']] = model_config
return all_models
def copy_config_file_to_default_config(model_dir, config_name):
json_config = json.loads(
pbtxt2json(open(os.path.join(model_dir, config_name)).read()))
model_name = os.path.basename(model_dir)
json_config['name'] = model_name
text_proto = json2pbtxt(json.dumps(json_config))
with open(os.path.join(model_dir, 'config.pbtxt'), 'w') as f:
f.write(text_proto)
def original_format_to_exchange_format(original_format, version_info):
'''
Change config original format to exchange format.
'''
exchange_format = {}
exchange_format['ensembles'] = []
exchange_format['models'] = []
# 0. transform version info into component format in frontend
for model_name, version_filenames_dict in version_info.items():
version_info_for_frontend = []
for version_name, filenames in version_filenames_dict.items():
version_filenames_dict_for_frontend = {}
version_filenames_dict_for_frontend['title'] = version_name
version_filenames_dict_for_frontend['key'] = version_name
version_filenames_dict_for_frontend['children'] = []
for filename in filenames:
version_filenames_dict_for_frontend['children'].append({
'title':
filename,
'key':
filename
})
version_info_for_frontend.append(
version_filenames_dict_for_frontend)
version_info[model_name] = version_info_for_frontend
for model_name, model_config in original_format.items():
# 1. remove 'executionAccelerators' keyword
transformed_config = copy.deepcopy(model_config)
if 'optimization' in model_config:
if 'executionAccelerators' in model_config['optimization']:
transformed_optimization_config = model_config['optimization'][
'executionAccelerators']
del transformed_config['optimization']
transformed_config[
'optimization'] = transformed_optimization_config
# 2. add versions information
if model_name in version_info:
transformed_config['versions'] = version_info[model_name]
if 'platform' in model_config and model_config[
'platform'] == 'ensemble': # emsemble model
# 3. remove ensembleScheduling
if 'ensembleScheduling' in model_config:
if 'step' in model_config['ensembleScheduling']:
del transformed_config['ensembleScheduling']
transformed_config['step'] = model_config[
'ensembleScheduling']['step']
# 4. add two virtual models(feed, fetch), and
# "modelType", "inputModels", "outputModels", "inputVars", "outputVars"
for model_config_in_step in transformed_config['step']:
model_config_in_step['modelType'] = 'normal'
model_config_in_step['inputModels'] = []
model_config_in_step['outputModels'] = []
model_config_in_step['inputVars'] = []
model_config_in_step['outputVars'] = []
transformed_config['step'].append({
"modelName": "feed",
"modelType": "virtual",
"inputModels": [],
"outputModels": [],
"inputVars": [],
"outputVars": []
})
transformed_config['step'].append({
"modelName": "fetch",
"modelType": "virtual",
"inputModels": [],
"outputModels": [],
"inputVars": [],
"outputVars": []
})
analyse_step_relationships(transformed_config['step'],
transformed_config['input'],
transformed_config['output'])
exchange_format['ensembles'].append(transformed_config)
elif 'backend' in model_config: # single model
exchange_format['models'].append(transformed_config)
return exchange_format
def analyse_step_relationships(step_config, inputs, outputs): # noqa: C901
'''
Analyse model relationships in ensemble step. And fill \
"inputModels", "outputModels", "inputVars", "outputVars" in step_config.
step_config: step data in ensemble model config.
inputs: inputs in ensemble model config.
outputs: outputs in ensemble model config.
'''
models_dict = {}
vars_dict = {}
for model_config_in_step in step_config:
models_dict[model_config_in_step['modelName']] = model_config_in_step
if model_config_in_step['modelType'] == 'virtual':
for var in inputs:
if var['name'] not in vars_dict:
vars_dict[var['name']] = {}
vars_dict[var['name']]['from_models'] = set()
vars_dict[var['name']]['to_models'] = set()
vars_dict[var['name']]['from_models'].add('feed')
for var in outputs:
if var['name'] not in vars_dict:
vars_dict[var['name']] = {}
vars_dict[var['name']]['from_models'] = set()
vars_dict[var['name']]['to_models'] = set()
vars_dict[var['name']]['to_models'].add('fetch')
else:
for var_placehold_name, var_name in model_config_in_step[
'inputMap'].items():
if var_name not in vars_dict:
vars_dict[var_name] = {}
vars_dict[var_name]['from_models'] = set()
vars_dict[var_name]['to_models'] = set()
vars_dict[var_name]['to_models'].add(
model_config_in_step['modelName'])
for var_placehold_name, var_name in model_config_in_step[
'outputMap'].items():
if var_name not in vars_dict:
vars_dict[var_name] = {}
vars_dict[var_name]['from_models'] = set()
vars_dict[var_name]['to_models'] = set()
vars_dict[var_name]['from_models'].add(
model_config_in_step['modelName'])
for var_name, relationships in vars_dict.items():
for from_model in relationships['from_models']:
models_dict[from_model]['outputVars'].append(var_name)
for var_to_model in relationships['to_models']:
if var_to_model not in models_dict[from_model]['outputModels']:
models_dict[from_model]['outputModels'].append(
var_to_model)
for to_model in relationships['to_models']:
models_dict[to_model]['inputVars'].append(var_name)
for var_from_model in relationships['from_models']:
if var_from_model not in models_dict[to_model]['inputModels']:
models_dict[to_model]['inputModels'].append(var_from_model)
calculate_layout_for_frontend(models_dict)
def get_config_filenames_for_one_model(cur_dir, name):
_, _, filenames = os.walk(os.path.join(cur_dir, name)).send(None)
config_filenames = []
backup_config_filenames = []
for filename in filenames:
if '.pbtxt' in filename and 'vdlbackup' not in filename:
config_filenames.append(
filename
) # filenames with extension .pbtxt and not contain 'vdlbackup' are normal config files
elif '.pbtxt' in filename and 'vdlbackup' in filename:
backup_config_filenames.append(
filename
) # filenames with extension .pbtxt and contain 'vdlbackup' are backup config files
config_filenames = sorted(config_filenames) + sorted(
backup_config_filenames)
return config_filenames
def get_config_for_one_model(cur_dir, name, config_filename):
all_model_configs = {}
all_model_versions = {}
filename = os.path.join(cur_dir, name, config_filename)
json_config = json.loads(pbtxt2json(open(filename).read()))
json_config[
'name'] = name # because name in config data may be different from model_name,
# model_name is model directory name actually, we should conform name with model_name.
json_config["config_filenames"] = config_filename
all_model_configs[
name] = json_config # store original config file content in json format
all_model_versions[name] = {}
for model_sub_dir in os.listdir(os.path.join(cur_dir, name)):
if re.match(r'\d+',
model_sub_dir): # version directory consists of numbers
if model_sub_dir not in all_model_versions[name]:
all_model_versions[name][model_sub_dir] = []
for version_resource_file in os.listdir(
os.path.join(cur_dir, name, model_sub_dir)):
all_model_versions[name][model_sub_dir].append(
version_resource_file)
model_config = original_format_to_exchange_format(all_model_configs,
all_model_versions)
if model_config['ensembles']:
return model_config['ensembles'][0]
elif model_config['models']:
return model_config['models'][0]
def calculate_layout_for_frontend(model_config_in_step):
'''
Analyse model topology connections and prepare the positions for each model in layout.
Dynamic program algorithm:
depth(cur_node) = max([depth(prev_node) for prev_node in cur_node['inputModels']])
Args:
model_config_in_step(dict): model config in ensemble models' step, indexed by model name.
Returns:
None. Results calculated will be saved in place.
'''
path_depth = defaultdict(int)
def depth_recursive(model):
if model['modelName'] == 'feed':
path_depth[model['modelName']] = 0
return 0
if path_depth[model['modelName']] != 0:
return path_depth[model['modelName']]
path_depth[model['modelName']] = max([
depth_recursive(model_config_in_step[model_name]) for model_name in
model_config_in_step[model['modelName']]['inputModels']
]) + 1
return path_depth[model['modelName']]
depth_recursive(model_config_in_step['fetch'])
path_depth_tuple = [
(k, v)
for k, v in sorted(path_depth.items(), key=lambda item: item[1])
]
cur_x = 0
last_depth = -1
for model_name, depth in path_depth_tuple:
if depth == last_depth:
model_config_in_step[model_name]['pos_y'] = depth
model_config_in_step[model_name]['pos_x'] = cur_x
cur_x += 1
else:
cur_x = 0
model_config_in_step[model_name]['pos_y'] = depth
model_config_in_step[model_name]['pos_x'] = cur_x
cur_x += 1
last_depth = depth
return
def launch_process(kwargs: dict):
'''
Launch a fastdeploy server according to specified arguments.
'''
cmd = ['fastdeployserver']
launch_env = os.environ.copy()
start_args = {}
for key, value in kwargs.items():
if key == 'default_model_name': # Used to fill client model_name automatically
start_args[key] = value
continue
if key == 'server-name' or key == 'ensemble-img': # extra information
start_args[key] = value
continue
if key == 'gpus':
if value:
launch_env['CUDA_VISIBLE_DEVICES'] = value
start_args[key] = value
continue
cmd.append('--{}'.format(key))
cmd.append('{}'.format(value))
start_args[key] = value
if start_args['server-name'] and start_args['server-name'] in os.listdir(
FASTDEPLOYSERVER_PATH):
raise RuntimeError(
"Failed to launch server,server name {} has been used,please write a different server name."
.format(start_args['server-name']))
all_model_configs, all_model_versions = analyse_config(
start_args['model-repository'])
model_repo_config = original_format_to_exchange_format(
all_model_configs, all_model_versions)
model_repo_config['ensemble-img'] = start_args['ensemble-img']
logfilename = 'logfile-{}'.format(get_random_string(8))
while os.path.exists(os.path.join(FASTDEPLOYSERVER_PATH, logfilename)):
logfilename = 'logfile-{}'.format(get_random_string(8))
p = Popen(
cmd,
stdout=open(
os.path.join(FASTDEPLOYSERVER_PATH, logfilename), 'w',
buffering=1),
stderr=STDOUT,
universal_newlines=True,
env=launch_env)
server_name = start_args['server-name'] if start_args[
'server-name'] else p.pid
with open(
os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_name)),
'w') as f:
# filename ${server_name} contain 4 lines:
# line1 : the real log filename ${logfilename}
# line2 : pid
# line3 : launch arguments
# line4 : model-repository configuration
f.write(logfilename + '\n' + str(p.pid) + '\n' +
json.dumps(start_args) + '\n' + json.dumps(model_repo_config))
return p
def get_random_string(length):
# choose from all lowercase letter
letters = string.ascii_lowercase
result_str = ''.join([random.choice(letters) for i in range(length)])
return result_str
def get_start_arguments(server_id):
'''
Get the start arguments for fastdeployserver process.
Args:
server_id(str): fastdeployserver process name
Returns:
args(dict): launch arguments when start fastdeployserver process.
'''
args = {}
if os.path.exists(
os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id))):
with open(
os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id)),
'r') as f:
arguments_json = f.read().split('\n')[2]
args = json.loads(arguments_json)
return args
def get_process_pid(server_id):
'''
Get the process id for fastdeployserver process.
Args:
server_id(str): fastdeployserver process name
Returns:
pid(int): process id.
'''
pid = None
if os.path.exists(
os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id))):
with open(
os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id)),
'r') as f:
pid = int(f.read().split('\n')[1])
return pid
def get_process_logfile_name(server_id):
'''
Get the process logfile name for fastdeployserver process.
Args:
server_id(str): fastdeployserver process name
Returns:
logfile(str): logfile name.
'''
filename = None
if os.path.exists(
os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id))):
with open(
os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id)),
'r') as f:
filename = f.read().split('\n')[0]
return filename
def get_process_model_configuration(server_id):
'''
Get the model repository configuration for fastdeployserver process.
Args:
server_id(str): fastdeployserver process name
Returns:
configuration(dict): model repository configuration
'''
conf = {}
if os.path.exists(
os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id))):
with open(
os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id)),
'r') as f:
conf_json = f.read().split('\n')[3]
conf = json.loads(conf_json)
return conf
def get_process_output(server_id, length):
'''
Get the standard output of a opened subprocess.
'''
if os.path.exists(
os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id))):
logfilename = get_process_logfile_name(server_id)
# delete file ${logfilename} if exists
if os.path.exists(
os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(logfilename))):
with open(
os.path.join(FASTDEPLOYSERVER_PATH,
'{}'.format(logfilename)), 'r') as f:
f.seek(length)
data = f.read()
return data
def mark_pid_for_dead_process(server_id):
'''
Resource files for a dead server only deleted when user closes the server in frontend.
When user close the server, pid recorded in logfile will be killed.
In case a dead process id is reassigned for a new process, we should mark the pid recorded in logfile as outdated.
Here, we choose to replace the pid to -1 in logfile to denote the zombie process \
which has been polled and becomes dead.
Args:
server_id(str): fastdeployserver process name
'''
if os.path.exists(
os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id))):
with open(
os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id)),
'r') as f:
contents = f.read().split('\n')
contents[1] = '-1' # we replace pid to -1
with open(
os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id)),
'w') as f:
f.write('\n'.join(contents))
def delete_files_for_process(server_id):
'''
Delete logfile for fastdeployserver process.
Args:
server_id(str): fastdeployserver process name
'''
if os.path.exists(
os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id))):
logfilename = get_process_logfile_name(server_id)
# delete file ${logfilename} if exists
if os.path.exists(
os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(logfilename))):
os.remove(
os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(logfilename)))
os.remove(os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id)))
def kill_process(process):
'''
Stop a opened subprocess.
'''
if type(process) == str: # server_id, use os.kill to terminate
pid = get_process_pid(process)
if pid == -1: # we use -1 to mark dead process
return
try:
os.kill(pid, signal.SIGKILL)
except Exception:
pass
else:
pid = process.pid
process.kill()
try:
process.wait(10)
except Exception:
pass
def get_alive_fastdeploy_servers():
'''
Search server names in `FASTDEPLOYSERVER_PATH`, if process is dead and log still exists due to \
some unexpectable reasons, delete log file.
'''
server_names = [
name for name in os.listdir(FASTDEPLOYSERVER_PATH)
if 'logfile' not in name
]
should_delete_servers = []
for server_name in server_names:
if check_process_alive(server_name) is False:
delete_files_for_process(server_name)
should_delete_servers.append(server_name)
for server_name in should_delete_servers:
server_names.remove(server_name)
return server_names
def check_process_zombie(server_id):
'''
Given a server id, check whether the process became zoombie and mark pid as -1.
Args:
server_id(str): fastdeployserver process name
Return:
status(bool): True if process became zoombie.
'''
pid = get_process_pid(server_id)
if pid == -1:
return True
else:
return False
def check_process_alive(server_id):
'''
Given a server id, check whether the process is alive or not.
Args:
server_id(str): fastdeployserver process name
Return:
status(bool): True if process is still alive.
'''
pid = get_process_pid(server_id)
if pid is None:
return False
if pid == -1: # We use -1 to mark zombie process which has been dead process.
# Consider user wants to know the reason for dead process due to exception,
# we return True to let user in frontend can get the log for dead process.
return True
try:
os.kill(pid, 0)
except OSError:
return False
else:
if 'fastdeployserve' not in psutil.Process(pid).name(
): # We should judge the pid is fastdeployserver process, in case pid has been reassigned.
# Note: I do not know why psutil.Process(pid).name() is fastdeployserve but not fastdeployserver.
return False
else:
return True
_metric_column_name = {
"Model": {
"nv_inference_request_success", "nv_inference_request_failure",
"nv_inference_count", "nv_inference_exec_count",
"nv_inference_request_duration_us", "nv_inference_queue_duration_us",
"nv_inference_compute_input_duration_us",
"nv_inference_compute_infer_duration_us",
"nv_inference_compute_output_duration_us"
},
"GPU": {
"nv_gpu_power_usage", "nv_gpu_power_limit", "nv_energy_consumption",
"nv_gpu_utilization", "nv_gpu_memory_total_bytes",
"nv_gpu_memory_used_bytes"
},
"CPU": {
"nv_cpu_utilization", "nv_cpu_memory_total_bytes",
"nv_cpu_memory_used_bytes"
}
}
def generate_metric_table(server_addr, server_port): # noqa:C901
model_table = {}
gpu_table = {}
try:
res = requests.get("http://{}:{}/metrics".format(
server_addr, server_port))
except Exception:
return None
metric_content = res.text
for content in metric_content.split('\n'):
if content.startswith('#'):
continue
else:
res = re.match(r'(\w+){(.*)} (\w+)',
content) # match output by server metrics interface
if not res:
continue
metric_name = res.group(1)
model = res.group(2)
value = res.group(3)
infos = {}
for info in model.split(','):
k, v = info.split('=')
v = v.strip('"')
infos[k] = v
if metric_name in [
"nv_inference_request_duration_us",
"nv_inference_queue_duration_us",
"nv_inference_compute_input_duration_us",
"nv_inference_compute_infer_duration_us",
"nv_inference_compute_output_duration_us"
]:
value = float(value) / 1000
elif metric_name in [
"nv_gpu_memory_total_bytes", "nv_gpu_memory_used_bytes"
]:
value = float(value) / 1024 / 1024 / 1024
for key, metric_names in _metric_column_name.items():
if metric_name in metric_names:
if key == 'Model':
model_name = infos['model']
if model_name not in model_table:
model_table[model_name] = {}
model_table[model_name][metric_name] = value
elif key == 'GPU':
gpu_name = infos['gpu_uuid']
if gpu_name not in gpu_table:
gpu_table[gpu_name] = {}
gpu_table[gpu_name][metric_name] = value
elif key == 'CPU':
pass
results = {}
results['Model'] = model_table
results['GPU'] = gpu_table
return results
# Copyright (c) 2022 VisualDL Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =======================================================================
import datetime
import json
import os
import re
import shutil
import socket
import time
from multiprocessing import Process
from pathlib import Path
import requests
from .fastdeploy_client.client_app import create_gradio_client_app
from .fastdeploy_lib import analyse_config
from .fastdeploy_lib import check_process_zombie
from .fastdeploy_lib import copy_config_file_to_default_config
from .fastdeploy_lib import delete_files_for_process
from .fastdeploy_lib import exchange_format_to_original_format
from .fastdeploy_lib import generate_metric_table
from .fastdeploy_lib import get_alive_fastdeploy_servers
from .fastdeploy_lib import get_config_filenames_for_one_model
from .fastdeploy_lib import get_config_for_one_model
from .fastdeploy_lib import get_process_model_configuration
from .fastdeploy_lib import get_process_output
from .fastdeploy_lib import get_start_arguments
from .fastdeploy_lib import json2pbtxt
from .fastdeploy_lib import kill_process
from .fastdeploy_lib import launch_process
from .fastdeploy_lib import mark_pid_for_dead_process
from .fastdeploy_lib import original_format_to_exchange_format
from .fastdeploy_lib import validate_data
from visualdl.server.api import gen_result
from visualdl.server.api import result
from visualdl.utils.dir import FASTDEPLOYSERVER_PATH
class FastDeployServerApi(object):
def __init__(self):
self.root_dir = Path(os.getcwd())
self.opened_servers = {
} # Use to store the opened server process pid and process itself
self.client_port = None
@result()
def get_directory(self, cur_dir):
if self.root_dir not in Path(os.path.abspath(cur_dir)).parents:
cur_dir = '.'
cur_dir, sub_dirs, filenames = os.walk(cur_dir).send(None)
if Path(self.root_dir) != Path(os.path.abspath(cur_dir)):
sub_dirs.append('..')
sub_dirs = sorted(sub_dirs)
directorys = {
'parent_dir':
os.path.relpath(Path(os.path.abspath(cur_dir)), self.root_dir),
'sub_dir':
sub_dirs
}
return directorys
@result()
def get_config(self, cur_dir):
all_model_configs, all_model_versions = analyse_config(cur_dir)
return original_format_to_exchange_format(all_model_configs,
all_model_versions)
@result()
def config_update(self, cur_dir, model_name, config, config_filename):
config = json.loads(config)
all_models = exchange_format_to_original_format(config)
model_dir = os.path.join(os.path.abspath(cur_dir), model_name)
filtered_config = validate_data(all_models[model_name])
text_proto = json2pbtxt(json.dumps(filtered_config))
# backup user's config data first, when data corrupted by front-end, we still can recovery data
# backup config filename: {original_name}_vdlbackup_{datetime}.pbtxt
# backup config can only used to restore config.pbtxt
if 'vdlbackup' in config_filename:
raise RuntimeError(
"Backup config file is not permitted to update.")
basename = os.path.splitext(config_filename)[0]
shutil.copy(
os.path.join(model_dir, config_filename),
os.path.join(
model_dir, '{}_vdlbackup_{}.pbtxt'.format(
basename,
datetime.datetime.now().isoformat())))
with open(os.path.join(model_dir, config_filename), 'w') as f:
f.write(text_proto)
return
@result()
def start_server(self, configs):
configs = json.loads(configs)
process = launch_process(configs)
if process.poll() is not None:
raise RuntimeError(
"Failed to launch fastdeployserver,please check fastdeployserver is installed in environment."
)
server_name = configs['server-name'] if configs[
'server-name'] else str(process.pid)
self.opened_servers[server_name] = process
return server_name
@result()
def stop_server(self, server_id):
if server_id in self.opened_servers: # check if server_id in self.opened_servers
kill_process(self.opened_servers[server_id])
del self.opened_servers[server_id]
elif server_id in set(
os.listdir(FASTDEPLOYSERVER_PATH)): # check if server_id in
# FASTDEPLOYSERVER_PATH(may be launched by other vdl app instance by gunicorn)
kill_process(server_id)
delete_files_for_process(server_id)
self._poll_zombie_process()
@result('text/plain')
def get_server_output(self, server_id, length):
length = int(length)
if server_id in self.opened_servers: # check if server_id in self.opened_servers
return get_process_output(server_id, length)
elif str(server_id) in set(
os.listdir(FASTDEPLOYSERVER_PATH)): # check if server_id in
# FASTDEPLOYSERVER_PATH(may be launched by other vdl app instance by gunicorn)
return get_process_output(server_id, length)
else:
return
@result()
def get_server_metric(self, server_id):
args = get_start_arguments(server_id)
host = 'localhost'
port = args.get('metrics-port', 8002)
return generate_metric_table(host, port)
@result()
def get_server_list(self):
return get_alive_fastdeploy_servers()
@result()
def check_server_alive(self, server_id):
self._poll_zombie_process()
if check_process_zombie(server_id) is True:
raise RuntimeError(
"Server {} is down due to exception or killed,please check the reason according to the log, \
then close this server.".format(server_id))
return
@result()
def get_server_config(self, server_id):
return get_process_model_configuration(server_id)
@result()
def get_pretrain_model_list(self):
'''
Get all available fastdeploy models from hub server.
'''
res = requests.get(
'http://paddlepaddle.org.cn/paddlehub/fastdeploy_listmodels')
result = res.json()
if result['status'] != 0:
raise RuntimeError(
"Failed to get pre-trained model list from hub server.")
else:
data = result['data']
model_list = {}
for category, models in data.items():
if category not in model_list:
model_list[category] = set()
for model in models:
model_list[category].add(model['name'])
# adapt data format for frontend
models_info = []
for category, model_names in model_list.items():
models_info.append({
"value": category,
"label": category,
"children": []
})
for model_name in sorted(model_names):
models_info[-1]["children"].append({
"value": model_name,
"label": model_name
})
return models_info
@result()
def download_pretrain_model(self, cur_dir, model_name, version,
pretrain_model_name):
version_resource_dir = os.path.join(
os.path.abspath(cur_dir), model_name, version)
try:
import fastdeploy as fd
except Exception:
raise RuntimeError(
"fastdeploy is required for visualizing results,please refer to \
https://github.com/PaddlePaddle/FastDeploy to install fastdeploy")
model_path = fd.download_model(
name=pretrain_model_name, path=version_resource_dir)
if model_path:
if '.onnx' in model_path:
shutil.move(
model_path,
os.path.join(os.path.dirname(model_path), 'model.onnx'))
else:
for filename in os.listdir(model_path):
if '.pdmodel' in filename or '.pdiparams' in filename:
shutil.move(
os.path.join(model_path, filename),
os.path.join(
os.path.dirname(model_path), 'model{}'.format(
os.path.splitext(filename)[1])))
else:
shutil.move(
os.path.join(model_path, filename),
os.path.join(
os.path.dirname(model_path), filename))
shutil.rmtree(model_path)
version_info_for_frontend = []
for version_name in os.listdir(os.path.join(cur_dir, model_name)):
if re.match(
r'\d+',
version_name): # version directory consists of numbers
version_filenames_dict_for_frontend = {}
version_filenames_dict_for_frontend['title'] = version_name
version_filenames_dict_for_frontend['key'] = version_name
version_filenames_dict_for_frontend['children'] = []
for filename in os.listdir(
os.path.join(cur_dir, model_name, version_name)):
version_filenames_dict_for_frontend['children'].append(
{
'title': filename,
'key': filename
})
version_info_for_frontend.append(
version_filenames_dict_for_frontend)
return version_info_for_frontend
else:
raise RuntimeError(
"Failed to download pre-trained model {}.".format(
pretrain_model_name))
@result()
def get_config_for_model(self, cur_dir, name, config_filename):
return get_config_for_one_model(cur_dir, name, config_filename)
@result()
def get_config_filenames_for_model(self, cur_dir, name):
return get_config_filenames_for_one_model(cur_dir, name)
@result()
def delete_config_for_model(self, cur_dir, name, config_filename):
if self.root_dir not in Path(
os.path.abspath(cur_dir)
).parents: # should prevent user remove files outside model-repository
raise RuntimeError(
'Failed to delete config file, please check filepath.')
if os.path.exists(os.path.join(cur_dir, name, config_filename)):
os.remove(os.path.join(cur_dir, name, config_filename))
return get_config_filenames_for_one_model(cur_dir, name)
@result()
def set_default_config_for_model(self, cur_dir, name, config_filename):
model_dir = os.path.join(os.path.abspath(cur_dir), name)
# backup config.pbtxt to config_vdlbackup_{datetime}.pbtxt
if os.path.exists(os.path.join(model_dir, 'config.pbtxt')):
shutil.copy(
os.path.join(model_dir, 'config.pbtxt'),
os.path.join(
model_dir, 'config_vdlbackup_{}.pbtxt'.format(
datetime.datetime.now().isoformat())))
if config_filename != 'config.pbtxt':
copy_config_file_to_default_config(model_dir, config_filename)
return
@result()
def delete_resource_for_model(self, cur_dir, model_name, version,
resource_filename):
if self.root_dir not in Path(
os.path.abspath(cur_dir)
).parents: # should prevent user remove files outside model-repository
raise RuntimeError(
'Failed to delete resource file, please check filepath.')
resource_path = os.path.join(
os.path.abspath(cur_dir), model_name, version, resource_filename)
if os.path.exists(resource_path):
os.remove(resource_path)
version_info_for_frontend = []
for version_name in os.listdir(os.path.join(cur_dir, model_name)):
if re.match(r'\d+',
version_name): # version directory consists of numbers
version_filenames_dict_for_frontend = {}
version_filenames_dict_for_frontend['title'] = version_name
version_filenames_dict_for_frontend['key'] = version_name
version_filenames_dict_for_frontend['children'] = []
for filename in os.listdir(
os.path.join(cur_dir, model_name, version_name)):
version_filenames_dict_for_frontend['children'].append({
'title':
filename,
'key':
filename
})
version_info_for_frontend.append(
version_filenames_dict_for_frontend)
return version_info_for_frontend
@result()
def rename_resource_for_model(self, cur_dir, model_name, version,
resource_filename, new_filename):
if self.root_dir not in Path(
os.path.abspath(cur_dir)
).parents: # should prevent user remove files outside model-repository
raise RuntimeError(
'Failed to rename resource file, please check filepath.')
resource_path = os.path.join(
os.path.abspath(cur_dir), model_name, version, resource_filename)
new_file_path = os.path.join(
os.path.abspath(cur_dir), model_name, version, new_filename)
if os.path.exists(resource_path):
shutil.move(resource_path, new_file_path)
version_info_for_frontend = []
for version_name in os.listdir(os.path.join(cur_dir, model_name)):
if re.match(r'\d+',
version_name): # version directory consists of numbers
version_filenames_dict_for_frontend = {}
version_filenames_dict_for_frontend['title'] = version_name
version_filenames_dict_for_frontend['key'] = version_name
version_filenames_dict_for_frontend['children'] = []
for filename in os.listdir(
os.path.join(cur_dir, model_name, version_name)):
version_filenames_dict_for_frontend['children'].append({
'title':
filename,
'key':
filename
})
version_info_for_frontend.append(
version_filenames_dict_for_frontend)
return version_info_for_frontend
def create_fastdeploy_client(self):
if self.client_port is None:
def get_free_tcp_port():
tcp = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
# tcp.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT, 1)
tcp.bind(('localhost', 0))
addr, port = tcp.getsockname()
tcp.close()
return port
self.client_port = get_free_tcp_port()
app = create_gradio_client_app()
thread = Process(
target=app.launch, kwargs={'server_port': self.client_port})
thread.start()
def check_alive():
while True:
try:
requests.get('http://localhost:{}/'.format(
self.client_port))
break
except Exception:
time.sleep(1)
check_alive()
return self.client_port
def _poll_zombie_process(self):
# check if there are servers killed by other vdl app instance and become zoombie
should_delete = []
for server_id, process in self.opened_servers.items():
if process.poll() is not None:
mark_pid_for_dead_process(server_id)
should_delete.append(server_id)
for server_id in should_delete:
del self.opened_servers[server_id]
def create_fastdeploy_api_call():
api = FastDeployServerApi()
routes = {
'get_directory': (api.get_directory, ['dir']),
'config_update': (api.config_update,
['dir', 'name', 'config', 'config_filename']),
'get_config': (api.get_config, ['dir']),
'get_config_filenames_for_model': (api.get_config_filenames_for_model,
['dir', 'name']),
'get_config_for_model': (api.get_config_for_model,
['dir', 'name', 'config_filename']),
'set_default_config_for_model': (api.set_default_config_for_model,
['dir', 'name', 'config_filename']),
'delete_config_for_model': (api.delete_config_for_model,
['dir', 'name', 'config_filename']),
'start_server': (api.start_server, ['config']),
'stop_server': (api.stop_server, ['server_id']),
'get_server_output': (api.get_server_output, ['server_id', 'length']),
'create_fastdeploy_client': (api.create_fastdeploy_client, []),
'get_server_list': (api.get_server_list, []),
'get_server_metric': (api.get_server_metric, ['server_id']),
'get_server_config': (api.get_server_config, ['server_id']),
'get_pretrain_model_list': (api.get_pretrain_model_list, []),
'check_server_alive': (api.check_server_alive, ['server_id']),
'download_pretrain_model':
(api.download_pretrain_model,
['dir', 'name', 'version', 'pretrain_model_name']),
'delete_resource_for_model':
(api.delete_resource_for_model,
['dir', 'name', 'version', 'resource_filename']),
'rename_resource_for_model': (api.rename_resource_for_model, [
'dir', 'name', 'version', 'resource_filename', 'new_filename'
])
}
def call(path: str, args):
route = routes.get(path)
if not route:
return json.dumps(gen_result(
status=1, msg='api not found')), 'application/json', None
method, call_arg_names = route
call_args = [args.get(name) for name in call_arg_names]
return method(*call_args)
return call
# Copyright (c) 2022 VisualDL Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =======================================================================
// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2018, TensorFlow Authors. All rights reserved.
syntax = "proto3";
package inference;
//@@.. cpp:namespace:: inference
//@@
//@@.. cpp:enum:: DataType
//@@
//@@ Data types supported for input and output tensors.
//@@
enum DataType {
//@@ .. cpp:enumerator:: DataType::INVALID = 0
TYPE_INVALID = 0;
//@@ .. cpp:enumerator:: DataType::BOOL = 1
TYPE_BOOL = 1;
//@@ .. cpp:enumerator:: DataType::UINT8 = 2
TYPE_UINT8 = 2;
//@@ .. cpp:enumerator:: DataType::UINT16 = 3
TYPE_UINT16 = 3;
//@@ .. cpp:enumerator:: DataType::UINT32 = 4
TYPE_UINT32 = 4;
//@@ .. cpp:enumerator:: DataType::UINT64 = 5
TYPE_UINT64 = 5;
//@@ .. cpp:enumerator:: DataType::INT8 = 6
TYPE_INT8 = 6;
//@@ .. cpp:enumerator:: DataType::INT16 = 7
TYPE_INT16 = 7;
//@@ .. cpp:enumerator:: DataType::INT32 = 8
TYPE_INT32 = 8;
//@@ .. cpp:enumerator:: DataType::INT64 = 9
TYPE_INT64 = 9;
//@@ .. cpp:enumerator:: DataType::FP16 = 10
TYPE_FP16 = 10;
//@@ .. cpp:enumerator:: DataType::FP32 = 11
TYPE_FP32 = 11;
//@@ .. cpp:enumerator:: DataType::FP64 = 12
TYPE_FP64 = 12;
//@@ .. cpp:enumerator:: DataType::STRING = 13
TYPE_STRING = 13;
//@@ .. cpp:enumerator:: DataType::BF16 = 14
TYPE_BF16 = 14;
}
//@@
//@@ .. cpp:var:: message ModelRateLimiter
//@@
//@@ The specifications required by the rate limiter to properly
//@@ schedule the inference requests across the different models
//@@ and their instances.
//@@
message ModelRateLimiter
{
//@@ .. cpp:var:: message Resource
//@@
//@@ The resource property.
//@@
message Resource
{
//@@ .. cpp:var:: string name
//@@
//@@ The name associated with the resource.
//@@
string name = 1;
//@@ .. cpp:var:: bool global
//@@
//@@ Whether or not the resource is global. If true then the resource
//@@ is assumed to be shared among the devices otherwise specified
//@@ count of the resource is assumed for each device associated
//@@ with the instance.
//@@
bool global = 2;
//@@ .. cpp:var:: uint32 count
//@@
//@@ The number of resources required for the execution of the model
//@@ instance.
//@@
uint32 count = 3;
}
//@@ .. cpp:var:: Resource resources (repeated)
//@@
//@@ The resources required to execute the request on a model instance.
//@@ Resources are just names with a corresponding count. The execution
//@@ of the instance will be blocked until the specificied resources are
//@@ available. By default an instance uses no rate-limiter resources.
//@@
repeated Resource resources = 1;
//@@ .. cpp:var:: uint32 priority
//@@
//@@ The optional weighting value to be used for prioritizing across
//@@ instances. An instance with priority 2 will be given 1/2 the
//@@ number of scheduling chances as an instance_group with priority
//@@ 1. The default priority is 1. The priority of value 0 will be
//@@ treated as priority 1.
//@@
uint32 priority = 2;
}
//@@
//@@.. cpp:var:: message ModelInstanceGroup
//@@
//@@ A group of one or more instances of a model and resources made
//@@ available for those instances.
//@@
message ModelInstanceGroup
{
//@@
//@@ .. cpp:enum:: Kind
//@@
//@@ Kind of this instance group.
//@@
enum Kind {
//@@ .. cpp:enumerator:: Kind::KIND_AUTO = 0
//@@
//@@ This instance group represents instances that can run on either
//@@ CPU or GPU. If all GPUs listed in 'gpus' are available then
//@@ instances will be created on GPU(s), otherwise instances will
//@@ be created on CPU.
//@@
KIND_AUTO = 0;
//@@ .. cpp:enumerator:: Kind::KIND_GPU = 1
//@@
//@@ This instance group represents instances that must run on the
//@@ GPU.
//@@
KIND_GPU = 1;
//@@ .. cpp:enumerator:: Kind::KIND_CPU = 2
//@@
//@@ This instance group represents instances that must run on the
//@@ CPU.
//@@
KIND_CPU = 2;
//@@ .. cpp:enumerator:: Kind::KIND_MODEL = 3
//@@
//@@ This instance group represents instances that should run on the
//@@ CPU and/or GPU(s) as specified by the model or backend itself.
//@@ The inference server will not override the model/backend
//@@ settings.
//@@
KIND_MODEL = 3;
}
//@@
//@@ .. cpp:var:: message SecondaryDevice
//@@
//@@ A secondary device required for a model instance.
//@@
message SecondaryDevice
{
//@@
//@@ .. cpp:enum:: SecondaryDeviceKind
//@@
//@@ The kind of the secondary device.
//@@
enum SecondaryDeviceKind {
//@@ .. cpp:enumerator:: SecondaryDeviceKind::KIND_NVDLA = 0
//@@
//@@ An NVDLA core. http://nvdla.org
//@@ Currently KIND_NVDLA is only supported by the TensorRT backend.
//@@
KIND_NVDLA = 0;
}
//@@ .. cpp:var:: SecondaryDeviceKind kind
//@@
//@@ The secondary device kind.
//@@
SecondaryDeviceKind kind = 1;
//@@ .. cpp:var:: int64 device_id
//@@
//@@ Identifier for the secondary device.
//@@
int64 device_id = 2;
}
//@@ .. cpp:var:: string name
//@@
//@@ Optional name of this group of instances. If not specified the
//@@ name will be formed as <model name>_<group number>. The name of
//@@ individual instances will be further formed by a unique instance
//@@ number and GPU index:
//@@
string name = 1;
//@@ .. cpp:var:: Kind kind
//@@
//@@ The kind of this instance group. Default is KIND_AUTO. If
//@@ KIND_AUTO or KIND_GPU then both 'count' and 'gpu' are valid and
//@@ may be specified. If KIND_CPU or KIND_MODEL only 'count' is valid
//@@ and 'gpu' cannot be specified.
//@@
Kind kind = 4;
//@@ .. cpp:var:: int32 count
//@@
//@@ For a group assigned to GPU, the number of instances created for
//@@ each GPU listed in 'gpus'. For a group assigned to CPU the number
//@@ of instances created. Default is 1.
int32 count = 2;
//@@ .. cpp:var:: ModelRateLimiter rate_limiter
//@@
//@@ The rate limiter specific settings to be associated with this
//@@ instance group. Optional, if not specified no rate limiting
//@@ will be applied to this instance group.
//@@
ModelRateLimiter rate_limiter = 6;
//@@ .. cpp:var:: int32 gpus (repeated)
//@@
//@@ GPU(s) where instances should be available. For each GPU listed,
//@@ 'count' instances of the model will be available. Setting 'gpus'
//@@ to empty (or not specifying at all) is eqivalent to listing all
//@@ available GPUs.
//@@
repeated int32 gpus = 3;
//@@ .. cpp:var:: SecondaryDevice secondary_devices (repeated)
//@@
//@@ Secondary devices that are required by instances specified by this
//@@ instance group. Optional.
//@@
repeated SecondaryDevice secondary_devices = 8;
//@@ .. cpp:var:: string profile (repeated)
//@@
//@@ For TensorRT models containing multiple optimization profile, this
//@@ parameter specifies a set of optimization profiles available to this
//@@ instance group. The inference server will choose the optimal profile
//@@ based on the shapes of the input tensors. This field should lie
//@@ between 0 and <TotalNumberOfOptimizationProfilesInPlanModel> - 1
//@@ and be specified only for TensorRT backend, otherwise an error will
//@@ be generated. If not specified, the server will select the first
//@@ optimization profile by default.
//@@
repeated string profile = 5;
//@@ .. cpp:var:: bool passive
//@@
//@@ Whether the instances within this instance group will be accepting
//@@ inference requests from the scheduler. If true, the instances will
//@@ not be added to the scheduler. Default value is false.
//@@
bool passive = 7;
//@@ .. cpp:var:: string host_policy
//@@
//@@ The host policy name that the instance to be associated with.
//@@ The default value is set to reflect the device kind of the instance,
//@@ for instance, KIND_CPU is "cpu", KIND_MODEL is "model" and
//@@ KIND_GPU is "gpu_<gpu_id>".
//@@
string host_policy = 9;
}
//@@
//@@.. cpp:var:: message ModelTensorReshape
//@@
//@@ Reshape specification for input and output tensors.
//@@
message ModelTensorReshape
{
//@@ .. cpp:var:: int64 shape (repeated)
//@@
//@@ The shape to use for reshaping.
//@@
repeated int64 shape = 1;
}
//@@
//@@.. cpp:var:: message ModelInput
//@@
//@@ An input required by the model.
//@@
message ModelInput
{
//@@
//@@ .. cpp:enum:: Format
//@@
//@@ The format for the input.
//@@
enum Format {
//@@ .. cpp:enumerator:: Format::FORMAT_NONE = 0
//@@
//@@ The input has no specific format. This is the default.
//@@
FORMAT_NONE = 0;
//@@ .. cpp:enumerator:: Format::FORMAT_NHWC = 1
//@@
//@@ HWC image format. Tensors with this format require 3 dimensions
//@@ if the model does not support batching (max_batch_size = 0) or 4
//@@ dimensions if the model does support batching (max_batch_size
//@@ >= 1). In either case the 'dims' below should only specify the
//@@ 3 non-batch dimensions (i.e. HWC or CHW).
//@@
FORMAT_NHWC = 1;
//@@ .. cpp:enumerator:: Format::FORMAT_NCHW = 2
//@@
//@@ CHW image format. Tensors with this format require 3 dimensions
//@@ if the model does not support batching (max_batch_size = 0) or 4
//@@ dimensions if the model does support batching (max_batch_size
//@@ >= 1). In either case the 'dims' below should only specify the
//@@ 3 non-batch dimensions (i.e. HWC or CHW).
//@@
FORMAT_NCHW = 2;
}
//@@ .. cpp:var:: string name
//@@
//@@ The name of the input.
//@@
string name = 1;
//@@ .. cpp:var:: DataType data_type
//@@
//@@ The data-type of the input.
//@@
DataType data_type = 2;
//@@ .. cpp:var:: Format format
//@@
//@@ The format of the input. Optional.
//@@
Format format = 3;
//@@ .. cpp:var:: int64 dims (repeated)
//@@
//@@ The dimensions/shape of the input tensor that must be provided
//@@ when invoking the inference API for this model.
//@@
repeated int64 dims = 4;
//@@ .. cpp:var:: ModelTensorReshape reshape
//@@
//@@ The shape expected for this input by the backend. The input will
//@@ be reshaped to this before being presented to the backend. The
//@@ reshape must have the same number of elements as the input shape
//@@ specified by 'dims'. Optional.
//@@
ModelTensorReshape reshape = 5;
//@@ .. cpp:var:: bool is_shape_tensor
//@@
//@@ Whether or not the input is a shape tensor to the model. This field
//@@ is currently supported only for the TensorRT model. An error will be
//@@ generated if this specification does not comply with underlying
//@@ model.
//@@
bool is_shape_tensor = 6;
//@@ .. cpp:var:: bool allow_ragged_batch
//@@
//@@ Whether or not the input is allowed to be "ragged" in a dynamically
//@@ created batch. Default is false indicating that two requests will
//@@ only be batched if this tensor has the same shape in both requests.
//@@ True indicates that two requests can be batched even if this tensor
//@@ has a different shape in each request.
//@@
bool allow_ragged_batch = 7;
//@@ .. cpp:var:: bool optional
//@@
//@@ Whether or not the input is optional for the model execution.
//@@ If true, the input is not required in the inference request.
//@@ Default value is false.
//@@
bool optional = 8;
}
//@@
//@@.. cpp:var:: message ModelOutput
//@@
//@@ An output produced by the model.
//@@
message ModelOutput
{
//@@ .. cpp:var:: string name
//@@
//@@ The name of the output.
//@@
string name = 1;
//@@ .. cpp:var:: DataType data_type
//@@
//@@ The data-type of the output.
//@@
DataType data_type = 2;
//@@ .. cpp:var:: int64 dims (repeated)
//@@
//@@ The dimensions/shape of the output tensor.
//@@
repeated int64 dims = 3;
//@@ .. cpp:var:: ModelTensorReshape reshape
//@@
//@@ The shape produced for this output by the backend. The output will
//@@ be reshaped from this to the shape specifed in 'dims' before being
//@@ returned in the inference response. The reshape must have the same
//@@ number of elements as the output shape specified by 'dims'. Optional.
//@@
ModelTensorReshape reshape = 5;
//@@ .. cpp:var:: string label_filename
//@@
//@@ The label file associated with this output. Should be specified only
//@@ for outputs that represent classifications. Optional.
//@@
string label_filename = 4;
//@@ .. cpp:var:: bool is_shape_tensor
//@@
//@@ Whether or not the output is a shape tensor to the model. This field
//@@ is currently supported only for the TensorRT model. An error will be
//@@ generated if this specification does not comply with underlying
//@@ model.
//@@
bool is_shape_tensor = 6;
}
//@@ .. cpp:var:: message BatchInput
//@@
//@@ A batch input is an additional input that must be added by
//@@ the backend based on all the requests in a batch.
//@@
message BatchInput
{
//@@
//@@ .. cpp:enum:: Kind
//@@
//@@ The kind of the batch input.
//@@
enum Kind {
//@@ .. cpp:enumerator:: Kind::BATCH_ELEMENT_COUNT = 0
//@@
//@@ The element count of the 'source_input' will be added as
//@@ input with shape [1].
//@@
BATCH_ELEMENT_COUNT = 0;
//@@ .. cpp:enumerator:: Kind::BATCH_ACCUMULATED_ELEMENT_COUNT = 1
//@@
//@@ The accumulated element count of the 'source_input' will be
//@@ added as input with shape [1]. For example, if there is a
//@@ batch of two request, each with 2 elements, an input of value
//@@ 2 will be added to the first request, and an input of value
//@@ 4 will be added to the second request.
//@@
BATCH_ACCUMULATED_ELEMENT_COUNT = 1;
//@@ .. cpp:enumerator::
//@@ Kind::BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO = 2
//@@
//@@ The accumulated element count of the 'source_input' will be
//@@ added as input with shape [1], except for the first request
//@@ in the batch. For the first request in the batch, the input
//@@ will have shape [2] where the first element is value 0.
//@@
BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO = 2;
//@@ .. cpp:enumerator:: Kind::BATCH_MAX_ELEMENT_COUNT_AS_SHAPE = 3
//@@
//@@ Among the requests in the batch, the max element count of the
//@@ 'source_input' will be added as input with shape
//@@ [max_element_count] for the first request in the batch.
//@@ For other requests, such input will be with shape [0].
//@@ The data of the tensor will be uninitialized.
//@@
BATCH_MAX_ELEMENT_COUNT_AS_SHAPE = 3;
//@@ .. cpp:enumerator:: Kind::BATCH_ITEM_SHAPE = 4
//@@
//@@ Among the requests in the batch, the shape of the
//@@ 'source_input' will be added as input with shape
//@@ [batch_size, len(input_dim)]. For example, if one
//@@ batch-2 input with shape [3, 1] and batch-1 input
//@@ with shape [2, 2] are batched, the batch input will
//@@ have shape [3, 2] and value [ [3, 1], [3, 1], [2, 2]].
//@@
BATCH_ITEM_SHAPE = 4;
//@@ .. cpp:enumerator:: Kind::BATCH_ITEM_SHAPE_FLATTEN = 5
//@@
//@@ Among the requests in the batch, the shape of the
//@@ 'source_input' will be added as input with single dimensional
//@@ shape [batch_size * len(input_dim)]. For example, if one
//@@ batch-2 input with shape [3, 1] and batch-1 input
//@@ with shape [2, 2] are batched, the batch input will
//@@ have shape [6] and value [3, 1, 3, 1, 2, 2].
//@@
BATCH_ITEM_SHAPE_FLATTEN = 5;
}
//@@ .. cpp:var:: Kind kind
//@@
//@@ The kind of this batch input.
//@@
Kind kind = 1;
//@@ .. cpp:var:: string target_name (repeated)
//@@
//@@ The name of the model inputs that the backend will create
//@@ for this batch input.
//@@
repeated string target_name = 2;
//@@ .. cpp:var:: DataType data_type
//@@
//@@ The input's datatype. The data type can be TYPE_INT32 or
//@@ TYPE_FP32.
//@@
DataType data_type = 3;
//@@ .. cpp:var:: string source_input (repeated)
//@@
//@@ The backend derives the value for each batch input from one or
//@@ more other inputs. 'source_input' gives the names of those
//@@ inputs.
//@@
repeated string source_input = 4;
}
//@@.. cpp:var:: message BatchOutput
//@@
//@@ A batch output is an output produced by the model that must be handled
//@@ differently by the backend based on all the requests in a batch.
//@@
message BatchOutput
{
//@@
//@@ .. cpp:enum:: Kind
//@@
//@@ The kind of the batch output.
//@@
enum Kind {
//@@ .. cpp:enumerator:: Kind::BATCH_SCATTER_WITH_INPUT_SHAPE = 0
//@@
//@@ The output should be scattered according to the shape of
//@@ 'source_input'. The dynamic dimension of the output will
//@@ be set to the value of the same dimension in the input.
//@@
BATCH_SCATTER_WITH_INPUT_SHAPE = 0;
}
//@@ .. cpp:var:: string target_name (repeated)
//@@
//@@ The name of the outputs to be produced by this batch output
//@@ specification.
//@@
repeated string target_name = 1;
//@@ .. cpp:var:: Kind kind
//@@
//@@ The kind of this batch output.
//@@
Kind kind = 2;
//@@ .. cpp:var:: string source_input (repeated)
//@@
//@@ The backend derives each batch output from one or more inputs.
//@@ 'source_input' gives the names of those inputs.
//@@
repeated string source_input = 3;
}
//@@
//@@.. cpp:var:: message ModelVersionPolicy
//@@
//@@ Policy indicating which versions of a model should be made
//@@ available by the inference server.
//@@
message ModelVersionPolicy
{
//@@ .. cpp:var:: message Latest
//@@
//@@ Serve only the latest version(s) of a model. This is
//@@ the default policy.
//@@
message Latest
{
//@@ .. cpp:var:: uint32 num_versions
//@@
//@@ Serve only the 'num_versions' highest-numbered versions. T
//@@ The default value of 'num_versions' is 1, indicating that by
//@@ default only the single highest-number version of a
//@@ model will be served.
//@@
uint32 num_versions = 1;
}
//@@ .. cpp:var:: message All
//@@
//@@ Serve all versions of the model.
//@@
message All {}
//@@ .. cpp:var:: message Specific
//@@
//@@ Serve only specific versions of the model.
//@@
message Specific
{
//@@ .. cpp:var:: int64 versions (repeated)
//@@
//@@ The specific versions of the model that will be served.
//@@
repeated int64 versions = 1;
}
//@@ .. cpp:var:: oneof policy_choice
//@@
//@@ Each model must implement only a single version policy. The
//@@ default policy is 'Latest'.
//@@
oneof policy_choice
{
//@@ .. cpp:var:: Latest latest
//@@
//@@ Serve only latest version(s) of the model.
//@@
Latest latest = 1;
//@@ .. cpp:var:: All all
//@@
//@@ Serve all versions of the model.
//@@
All all = 2;
//@@ .. cpp:var:: Specific specific
//@@
//@@ Serve only specific version(s) of the model.
//@@
Specific specific = 3;
}
}
//@@
//@@.. cpp:var:: message ModelOptimizationPolicy
//@@
//@@ Optimization settings for a model. These settings control if/how a
//@@ model is optimized and prioritized by the backend framework when
//@@ it is loaded.
//@@
message ModelOptimizationPolicy
{
//@@
//@@ .. cpp:var:: message Graph
//@@
//@@ Enable generic graph optimization of the model. If not specified
//@@ the framework's default level of optimization is used. Supports
//@@ TensorFlow graphdef and savedmodel and Onnx models. For TensorFlow
//@@ causes XLA to be enabled/disabled for the model. For Onnx defaults
//@@ to enabling all optimizations, -1 enables only basic optimizations,
//@@ +1 enables only basic and extended optimizations.
//@@
message Graph
{
//@@ .. cpp:var:: int32 level
//@@
//@@ The optimization level. Defaults to 0 (zero) if not specified.
//@@
//@@ - -1: Disabled
//@@ - 0: Framework default
//@@ - 1+: Enable optimization level (greater values indicate
//@@ higher optimization levels)
//@@
int32 level = 1;
}
//@@
//@@ .. cpp:enum:: ModelPriority
//@@
//@@ Model priorities. A model will be given scheduling and execution
//@@ preference over models at lower priorities. Current model
//@@ priorities only work for TensorRT models.
//@@
enum ModelPriority {
//@@ .. cpp:enumerator:: ModelPriority::PRIORITY_DEFAULT = 0
//@@
//@@ The default model priority.
//@@
PRIORITY_DEFAULT = 0;
//@@ .. cpp:enumerator:: ModelPriority::PRIORITY_MAX = 1
//@@
//@@ The maximum model priority.
//@@
PRIORITY_MAX = 1;
//@@ .. cpp:enumerator:: ModelPriority::PRIORITY_MIN = 2
//@@
//@@ The minimum model priority.
//@@
PRIORITY_MIN = 2;
}
//@@
//@@ .. cpp:var:: message Cuda
//@@
//@@ CUDA-specific optimization settings.
//@@
message Cuda
{
//@@ .. cpp:var:: message GraphSpec
//@@
//@@ Specification of the CUDA graph to be captured.
//@@
message GraphSpec
{
//@@ .. cpp:var:: message Dims
//@@
//@@ Specification of tensor dimension.
//@@
message Shape
{
//@@ .. cpp:var:: int64 dim (repeated)
//@@
//@@ The dimension.
//@@
repeated int64 dim = 1;
}
message LowerBound
{
//@@ .. cpp:var:: int32 batch_size
//@@
//@@ The batch size of the CUDA graph. If 'max_batch_size' is 0,
//@@ 'batch_size' must be set to 0. Otherwise, 'batch_size' must
//@@ be set to value between 1 and 'max_batch_size'.
//@@
int32 batch_size = 1;
//@@ .. cpp:var:: map<string, Shape> input
//@@
//@@ The specification of the inputs. 'Shape' is the shape of
//@@ the input without batching dimension.
//@@
map<string, Shape> input = 2;
}
//@@ .. cpp:var:: int32 batch_size
//@@
//@@ The batch size of the CUDA graph. If 'max_batch_size' is 0,
//@@ 'batch_size' must be set to 0. Otherwise, 'batch_size' must
//@@ be set to value between 1 and 'max_batch_size'.
//@@
int32 batch_size = 1;
//@@ .. cpp:var:: map<string, Shape> input
//@@
//@@ The specification of the inputs. 'Shape' is the shape of the
//@@ input without batching dimension.
//@@
map<string, Shape> input = 2;
//@@ .. cpp:var:: LowerBound graph_lower_bound
//@@
//@@ Specify the lower bound of the CUDA graph. Optional.
//@@ If specified, the graph can be used for input shapes and
//@@ batch sizes that are in closed interval between the lower
//@@ bound specification and graph specification. For dynamic
//@@ shape model, this allows CUDA graphs to be launched
//@@ frequently without capturing all possible shape combinations.
//@@ However, using graph for shape combinations different from
//@@ the one used for capturing introduces uninitialized data for
//@@ execution and it may distort the inference result if
//@@ the model is sensitive to uninitialized data.
//@@
LowerBound graph_lower_bound = 3;
}
//@@ .. cpp:var:: bool graphs
//@@
//@@ Use CUDA graphs API to capture model operations and execute
//@@ them more efficiently. Default value is false.
//@@ Currently only recognized by TensorRT backend.
//@@
bool graphs = 1;
//@@ .. cpp:var:: bool busy_wait_events
//@@
//@@ Use busy-waiting to synchronize CUDA events to achieve minimum
//@@ latency from event complete to host thread to be notified, with
//@@ the cost of high CPU load. Default value is false.
//@@ Currently only recognized by TensorRT backend.
//@@
bool busy_wait_events = 2;
//@@ .. cpp:var:: GraphSpec graph_spec (repeated)
//@@
//@@ Specification of the CUDA graph to be captured. If not specified
//@@ and 'graphs' is true, the default CUDA graphs will be captured
//@@ based on model settings.
//@@ Currently only recognized by TensorRT backend.
//@@
repeated GraphSpec graph_spec = 3;
//@@ .. cpp:var:: bool output_copy_stream
//@@
//@@ Uses a CUDA stream separate from the inference stream to copy the
//@@ output to host. However, be aware that setting this option to
//@@ true will lead to an increase in the memory consumption of the
//@@ model as Triton will allocate twice as much GPU memory for its
//@@ I/O tensor buffers. Default value is false.
//@@ Currently only recognized by TensorRT backend.
//@@
bool output_copy_stream = 4;
}
//@@
//@@ .. cpp:var:: message ExecutionAccelerators
//@@
//@@ Specify the preferred execution accelerators to be used to execute
//@@ the model. Currently only recognized by ONNX Runtime backend and
//@@ TensorFlow backend.
//@@
//@@ For ONNX Runtime backend, it will deploy the model with the execution
//@@ accelerators by priority, the priority is determined based on the
//@@ order that they are set, i.e. the provider at the front has highest
//@@ priority. Overall, the priority will be in the following order:
//@@ <gpu_execution_accelerator> (if instance is on GPU)
//@@ CUDA Execution Provider (if instance is on GPU)
//@@ <cpu_execution_accelerator>
//@@ Default CPU Execution Provider
//@@
message ExecutionAccelerators
{
//@@
//@@ .. cpp:var:: message Accelerator
//@@
//@@ Specify the accelerator to be used to execute the model.
//@@ Accelerator with the same name may accept different parameters
//@@ depending on the backends.
//@@
message Accelerator
{
//@@ .. cpp:var:: string name
//@@
//@@ The name of the execution accelerator.
//@@
string name = 1;
//@@ .. cpp:var:: map<string, string> parameters
//@@
//@@ Additional paremeters used to configure the accelerator.
//@@
map<string, string> parameters = 2;
}
//@@ .. cpp:var:: Accelerator gpu_execution_accelerator (repeated)
//@@
//@@ The preferred execution provider to be used if the model instance
//@@ is deployed on GPU.
//@@
//@@ For ONNX Runtime backend, possible value is "tensorrt" as name,
//@@ and no parameters are required.
//@@
//@@ For TensorFlow backend, possible values are "tensorrt",
//@@ "auto_mixed_precision", "gpu_io".
//@@
//@@ For "tensorrt", the following parameters can be specified:
//@@ "precision_mode": The precision used for optimization.
//@@ Allowed values are "FP32" and "FP16". Default value is "FP32".
//@@
//@@ "max_cached_engines": The maximum number of cached TensorRT
//@@ engines in dynamic TensorRT ops. Default value is 100.
//@@
//@@ "minimum_segment_size": The smallest model subgraph that will
//@@ be considered for optimization by TensorRT. Default value is 3.
//@@
//@@ "max_workspace_size_bytes": The maximum GPU memory the model
//@@ can use temporarily during execution. Default value is 1GB.
//@@
//@@ For "auto_mixed_precision", no parameters are required. If set,
//@@ the model will try to use FP16 for better performance.
//@@ This optimization can not be set with "tensorrt".
//@@
//@@ For "gpu_io", no parameters are required. If set, the model will
//@@ be executed using TensorFlow Callable API to set input and output
//@@ tensors in GPU memory if possible, which can reduce data transfer
//@@ overhead if the model is used in ensemble. However, the Callable
//@@ object will be created on model creation and it will request all
//@@ outputs for every model execution, which may impact the
//@@ performance if a request does not require all outputs. This
//@@ optimization will only take affect if the model instance is
//@@ created with KIND_GPU.
//@@
repeated Accelerator gpu_execution_accelerator = 1;
//@@ .. cpp:var:: Accelerator cpu_execution_accelerator (repeated)
//@@
//@@ The preferred execution provider to be used if the model instance
//@@ is deployed on CPU.
//@@
//@@ For ONNX Runtime backend, possible value is "openvino" as name,
//@@ and no parameters are required.
//@@
repeated Accelerator cpu_execution_accelerator = 2;
}
//@@
//@@ .. cpp:var:: message PinnedMemoryBuffer
//@@
//@@ Specify whether to use a pinned memory buffer when transferring data
//@@ between non-pinned system memory and GPU memory. Using a pinned
//@@ memory buffer for system from/to GPU transfers will typically provide
//@@ increased performance. For example, in the common use case where the
//@@ request provides inputs and delivers outputs via non-pinned system
//@@ memory, if the model instance accepts GPU IOs, the inputs will be
//@@ processed by two copies: from non-pinned system memory to pinned
//@@ memory, and from pinned memory to GPU memory. Similarly, pinned
//@@ memory will be used for delivering the outputs.
//@@
message PinnedMemoryBuffer
{
//@@ .. cpp:var:: bool enable
//@@
//@@ Use pinned memory buffer. Default is true.
//@@
bool enable = 1;
}
//@@ .. cpp:var:: Graph graph
//@@
//@@ The graph optimization setting for the model. Optional.
//@@
Graph graph = 1;
//@@ .. cpp:var:: ModelPriority priority
//@@
//@@ The priority setting for the model. Optional.
//@@
ModelPriority priority = 2;
//@@ .. cpp:var:: Cuda cuda
//@@
//@@ CUDA-specific optimization settings. Optional.
//@@
Cuda cuda = 3;
//@@ .. cpp:var:: ExecutionAccelerators execution_accelerators
//@@
//@@ The accelerators used for the model. Optional.
//@@
ExecutionAccelerators execution_accelerators = 4;
//@@ .. cpp:var:: PinnedMemoryBuffer input_pinned_memory
//@@
//@@ Use pinned memory buffer when the data transfer for inputs
//@@ is between GPU memory and non-pinned system memory.
//@@ Default is true.
//@@
PinnedMemoryBuffer input_pinned_memory = 5;
//@@ .. cpp:var:: PinnedMemoryBuffer output_pinned_memory
//@@
//@@ Use pinned memory buffer when the data transfer for outputs
//@@ is between GPU memory and non-pinned system memory.
//@@ Default is true.
//@@
PinnedMemoryBuffer output_pinned_memory = 6;
//@@ .. cpp:var:: uint32 gather_kernel_buffer_threshold
//@@
//@@ The backend may use a gather kernel to gather input data if the
//@@ device has direct access to the source buffer and the destination
//@@ buffer. In such case, the gather kernel will be used only if the
//@@ number of buffers to be gathered is greater or equal to
//@@ the specifed value. If 0, the gather kernel will be disabled.
//@@ Default value is 0.
//@@ Currently only recognized by TensorRT backend.
//@@
uint32 gather_kernel_buffer_threshold = 7;
//@@ .. cpp:var:: bool eager_batching
//@@
//@@ Start preparing the next batch before the model instance is ready
//@@ for the next inference. This option can be used to overlap the
//@@ batch preparation with model execution, with the trade-off that
//@@ the next batch might be smaller than what it could have been.
//@@ Default value is false.
//@@ Currently only recognized by TensorRT backend.
//@@
bool eager_batching = 8;
}
//@@
//@@.. cpp:var:: message ModelQueuePolicy
//@@
//@@ Queue policy for inference requests.
//@@
message ModelQueuePolicy
{
//@@
//@@ .. cpp:enum:: TimeoutAction
//@@
//@@ The action applied to timed-out requests.
//@@
enum TimeoutAction {
//@@ .. cpp:enumerator:: Action::REJECT = 0
//@@
//@@ Reject the request and return error message accordingly.
//@@
REJECT = 0;
//@@ .. cpp:enumerator:: Action::DELAY = 1
//@@
//@@ Delay the request until all other requests at the same
//@@ (or higher) priority levels that have not reached their timeouts
//@@ are processed. A delayed request will eventually be processed,
//@@ but may be delayed indefinitely due to newly arriving requests.
//@@
DELAY = 1;
}
//@@
//@@ .. cpp:var:: TimeoutAction timeout_action
//@@
//@@ The action applied to timed-out request.
//@@ The default action is REJECT.
//@@
TimeoutAction timeout_action = 1;
//@@
//@@ .. cpp:var:: uint64 default_timeout_microseconds
//@@
//@@ The default timeout for every request, in microseconds.
//@@ The default value is 0 which indicates that no timeout is set.
//@@
uint64 default_timeout_microseconds = 2;
//@@
//@@ .. cpp:var:: bool allow_timeout_override
//@@
//@@ Whether individual request can override the default timeout value.
//@@ When true, individual requests can set a timeout that is less than
//@@ the default timeout value but may not increase the timeout.
//@@ The default value is false.
//@@
bool allow_timeout_override = 3;
//@@
//@@ .. cpp:var:: uint32 max_queue_size
//@@
//@@ The maximum queue size for holding requests. A request will be
//@@ rejected immediately if it can't be enqueued because the queue is
//@@ full. The default value is 0 which indicates that no maximum
//@@ queue size is enforced.
//@@
uint32 max_queue_size = 4;
}
//@@
//@@.. cpp:var:: message ModelDynamicBatching
//@@
//@@ Dynamic batching configuration. These settings control how dynamic
//@@ batching operates for the model.
//@@
message ModelDynamicBatching
{
//@@ .. cpp:var:: int32 preferred_batch_size (repeated)
//@@
//@@ Preferred batch sizes for dynamic batching. If a batch of one of
//@@ these sizes can be formed it will be executed immediately. If
//@@ not specified a preferred batch size will be chosen automatically
//@@ based on model and GPU characteristics.
//@@
repeated int32 preferred_batch_size = 1;
//@@ .. cpp:var:: uint64 max_queue_delay_microseconds
//@@
//@@ The maximum time, in microseconds, a request will be delayed in
//@@ the scheduling queue to wait for additional requests for
//@@ batching. Default is 0.
//@@
uint64 max_queue_delay_microseconds = 2;
//@@ .. cpp:var:: bool preserve_ordering
//@@
//@@ Should the dynamic batcher preserve the ordering of responses to
//@@ match the order of requests received by the scheduler. Default is
//@@ false. If true, the responses will be returned in the same order as
//@@ the order of requests sent to the scheduler. If false, the responses
//@@ may be returned in arbitrary order. This option is specifically
//@@ needed when a sequence of related inference requests (i.e. inference
//@@ requests with the same correlation ID) are sent to the dynamic
//@@ batcher to ensure that the sequence responses are in the correct
//@@ order.
//@@
bool preserve_ordering = 3;
//@@ .. cpp:var:: uint32 priority_levels
//@@
//@@ The number of priority levels to be enabled for the model,
//@@ the priority level starts from 1 and 1 is the highest priority.
//@@ Requests are handled in priority order with all priority 1 requests
//@@ processed before priority 2, all priority 2 requests processed before
//@@ priority 3, etc. Requests with the same priority level will be
//@@ handled in the order that they are received.
//@@
uint32 priority_levels = 4;
//@@ .. cpp:var:: uint32 default_priority_level
//@@
//@@ The priority level used for requests that don't specify their
//@@ priority. The value must be in the range [ 1, 'priority_levels' ].
//@@
uint32 default_priority_level = 5;
//@@ .. cpp:var:: ModelQueuePolicy default_queue_policy
//@@
//@@ The default queue policy used for requests that don't require
//@@ priority handling and requests that specify priority levels where
//@@ there is no specific policy given. If not specified, a policy with
//@@ default field values will be used.
//@@
ModelQueuePolicy default_queue_policy = 6;
//@@ .. cpp:var:: map<uint32, ModelQueuePolicy> priority_queue_policy
//@@
//@@ Specify the queue policy for the priority level. The default queue
//@@ policy will be used if a priority level doesn't specify a queue
//@@ policy.
//@@
map<uint32, ModelQueuePolicy> priority_queue_policy = 7;
}
//@@
//@@.. cpp:var:: message ModelSequenceBatching
//@@
//@@ Sequence batching configuration. These settings control how sequence
//@@ batching operates for the model.
//@@
message ModelSequenceBatching
{
//@@ .. cpp:var:: message Control
//@@
//@@ A control is a signal that the sequence batcher uses to
//@@ communicate with a backend.
//@@
message Control
{
//@@
//@@ .. cpp:enum:: Kind
//@@
//@@ The kind of the control.
//@@
enum Kind {
//@@ .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_START = 0
//@@
//@@ A new sequence is/is-not starting. If true a sequence is
//@@ starting, if false a sequence is continuing. Must
//@@ specify either int32_false_true, fp32_false_true or
//@@ bool_false_true for this control. This control is optional.
//@@
CONTROL_SEQUENCE_START = 0;
//@@ .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_READY = 1
//@@
//@@ A sequence is/is-not ready for inference. If true the
//@@ input tensor data is valid and should be used. If false
//@@ the input tensor data is invalid and inferencing should
//@@ be "skipped". Must specify either int32_false_true,
//@@ fp32_false_true or bool_false_true for this control. This
//@@ control is optional.
//@@
CONTROL_SEQUENCE_READY = 1;
//@@ .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_END = 2
//@@
//@@ A sequence is/is-not ending. If true a sequence is
//@@ ending, if false a sequence is continuing. Must specify
//@@ either int32_false_true, fp32_false_true or bool_false_true
//@@ for this control. This control is optional.
//@@
CONTROL_SEQUENCE_END = 2;
//@@ .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_CORRID = 3
//@@
//@@ The correlation ID of the sequence. The correlation ID
//@@ is an uint64_t value that is communicated in whole or
//@@ in part by the tensor. The tensor's datatype must be
//@@ specified by data_type and must be TYPE_UINT64, TYPE_INT64,
//@@ TYPE_UINT32 or TYPE_INT32. If a 32-bit datatype is specified
//@@ the correlation ID will be truncated to the low-order 32
//@@ bits. This control is optional.
//@@
CONTROL_SEQUENCE_CORRID = 3;
}
//@@ .. cpp:var:: Kind kind
//@@
//@@ The kind of this control.
//@@
Kind kind = 1;
//@@ .. cpp:var:: int32 int32_false_true (repeated)
//@@
//@@ The control's true and false setting is indicated by setting
//@@ a value in an int32 tensor. The tensor must be a
//@@ 1-dimensional tensor with size equal to the batch size of
//@@ the request. 'int32_false_true' must have two entries: the
//@@ first the false value and the second the true value.
//@@
repeated int32 int32_false_true = 2;
//@@ .. cpp:var:: float fp32_false_true (repeated)
//@@
//@@ The control's true and false setting is indicated by setting
//@@ a value in a fp32 tensor. The tensor must be a
//@@ 1-dimensional tensor with size equal to the batch size of
//@@ the request. 'fp32_false_true' must have two entries: the
//@@ first the false value and the second the true value.
//@@
repeated float fp32_false_true = 3;
//@@ .. cpp:var:: bool bool_false_true (repeated)
//@@
//@@ The control's true and false setting is indicated by setting
//@@ a value in a bool tensor. The tensor must be a
//@@ 1-dimensional tensor with size equal to the batch size of
//@@ the request. 'bool_false_true' must have two entries: the
//@@ first the false value and the second the true value.
//@@
repeated bool bool_false_true = 5;
//@@ .. cpp:var:: DataType data_type
//@@
//@@ The control's datatype.
//@@
DataType data_type = 4;
}
//@@ .. cpp:var:: message ControlInput
//@@
//@@ The sequence control values to communicate by a model input.
//@@
message ControlInput
{
//@@ .. cpp:var:: string name
//@@
//@@ The name of the model input.
//@@
string name = 1;
//@@ .. cpp:var:: Control control (repeated)
//@@
//@@ The control value(s) that should be communicated to the
//@@ model using this model input.
//@@
repeated Control control = 2;
}
//@@
//@@ .. cpp:var:: message InitialState
//@@
//@@ Settings used to initialize data for implicit state.
//@@
message InitialState
{
//@@ .. cpp:var:: DataType data_type
//@@
//@@ The data-type of the state.
//@@
DataType data_type = 1;
//@@ .. cpp:var:: int64 dims (repeated)
//@@
//@@ The shape of the state tensor, not including the batch dimension.
//@@
repeated int64 dims = 2;
//@@ .. cpp:var:: oneof state_data
//@@
//@@ Specify how the initial state data is generated.
//@@
oneof state_data
{
//@@
//@@ .. cpp:var:: bool zero_data
//@@
//@@ The identifier for using zeros as initial state data.
//@@ Note that the value of 'zero_data' will not be checked,
//@@ instead, zero data will be used as long as the field is set.
//@@
bool zero_data = 3;
//@@ .. cpp:var:: string data_file
//@@
//@@ The file whose content will be used as the initial data for
//@@ the state in row-major order. The file must be provided in
//@@ sub-directory 'initial_state' under the model directory.
//@@
string data_file = 4;
}
//@@ .. cpp:var:: string name
//@@
//@@ The name of the state initialization.
//@@
string name = 5;
}
//@@ .. cpp:var:: message State
//@@
//@@ An input / output pair of tensors that carry state for the sequence.
//@@
message State
{
//@@ .. cpp:var:: string input_name
//@@
//@@ The name of the model state input.
//@@
string input_name = 1;
//@@ .. cpp:var:: string output_name
//@@
//@@ The name of the model state output.
//@@
string output_name = 2;
//@@ .. cpp:var:: DataType data_type
//@@
//@@ The data-type of the state.
//@@
DataType data_type = 3;
//@@ .. cpp:var:: int64 dim (repeated)
//@@
//@@ The dimension.
//@@
repeated int64 dims = 4;
//@@ .. cpp:var:: InitialState initial_state (repeated)
//@@
//@@ The optional field to specify the initial state for the model.
//@@
repeated InitialState initial_state = 5;
}
//@@ .. cpp:var:: message StrategyDirect
//@@
//@@ The sequence batcher uses a specific, unique batch
//@@ slot for each sequence. All inference requests in a
//@@ sequence are directed to the same batch slot in the same
//@@ model instance over the lifetime of the sequence. This
//@@ is the default strategy.
//@@
message StrategyDirect
{
//@@ .. cpp:var:: uint64 max_queue_delay_microseconds
//@@
//@@ The maximum time, in microseconds, a candidate request
//@@ will be delayed in the sequence batch scheduling queue to
//@@ wait for additional requests for batching. Default is 0.
//@@
uint64 max_queue_delay_microseconds = 1;
//@@ .. cpp:var:: float minimum_slot_utilization
//@@
//@@ The minimum slot utilization that must be satisfied to
//@@ execute the batch before 'max_queue_delay_microseconds' expires.
//@@ For example, a value of 0.5 indicates that the batch should be
//@@ executed as soon as 50% or more of the slots are ready even if
//@@ the 'max_queue_delay_microseconds' timeout has not expired.
//@@ The default is 0.0, indicating that a batch will be executed
//@@ before 'max_queue_delay_microseconds' timeout expires if at least
//@@ one batch slot is ready. 'max_queue_delay_microseconds' will be
//@@ ignored unless minimum_slot_utilization is set to a non-zero
//@@ value.
//@@
float minimum_slot_utilization = 2;
}
//@@ .. cpp:var:: message StrategyOldest
//@@
//@@ The sequence batcher maintains up to 'max_candidate_sequences'
//@@ candidate sequences. 'max_candidate_sequences' can be greater
//@@ than the model's 'max_batch_size'. For inferencing the batcher
//@@ chooses from the candidate sequences up to 'max_batch_size'
//@@ inference requests. Requests are chosen in an oldest-first
//@@ manner across all candidate sequences. A given sequence is
//@@ not guaranteed to be assigned to the same batch slot for
//@@ all inference requests of that sequence.
//@@
message StrategyOldest
{
//@@ .. cpp:var:: int32 max_candidate_sequences
//@@
//@@ Maximum number of candidate sequences that the batcher
//@@ maintains. Excess seqences are kept in an ordered backlog
//@@ and become candidates when existing candidate sequences
//@@ complete.
//@@
int32 max_candidate_sequences = 1;
//@@ .. cpp:var:: int32 preferred_batch_size (repeated)
//@@
//@@ Preferred batch sizes for dynamic batching of candidate
//@@ sequences. If a batch of one of these sizes can be formed
//@@ it will be executed immediately. If not specified a
//@@ preferred batch size will be chosen automatically
//@@ based on model and GPU characteristics.
//@@
repeated int32 preferred_batch_size = 2;
//@@ .. cpp:var:: uint64 max_queue_delay_microseconds
//@@
//@@ The maximum time, in microseconds, a candidate request
//@@ will be delayed in the dynamic batch scheduling queue to
//@@ wait for additional requests for batching. Default is 0.
//@@
uint64 max_queue_delay_microseconds = 3;
}
//@@ .. cpp:var:: oneof strategy_choice
//@@
//@@ The strategy used by the sequence batcher. Default strategy
//@@ is 'direct'.
//@@
oneof strategy_choice
{
//@@ .. cpp:var:: StrategyDirect direct
//@@
//@@ StrategyDirect scheduling strategy.
//@@
StrategyDirect direct = 3;
//@@ .. cpp:var:: StrategyOldest oldest
//@@
//@@ StrategyOldest scheduling strategy.
//@@
StrategyOldest oldest = 4;
}
//@@ .. cpp:var:: uint64 max_sequence_idle_microseconds
//@@
//@@ The maximum time, in microseconds, that a sequence is allowed to
//@@ be idle before it is aborted. The inference server considers a
//@@ sequence idle when it does not have any inference request queued
//@@ for the sequence. If this limit is exceeded, the inference server
//@@ will free the sequence slot allocated by the sequence and make it
//@@ available for another sequence. If not specified (or specified as
//@@ zero) a default value of 1000000 (1 second) is used.
//@@
uint64 max_sequence_idle_microseconds = 1;
//@@ .. cpp:var:: ControlInput control_input (repeated)
//@@
//@@ The model input(s) that the server should use to communicate
//@@ sequence start, stop, ready and similar control values to the
//@@ model.
//@@
repeated ControlInput control_input = 2;
//@@ .. cpp:var:: State state (repeated)
//@@
//@@ The optional state that can be stored in Triton for performing
//@@ inference requests on a sequence. Each sequence holds an implicit
//@@ state local to itself. The output state tensor provided by the
//@@ model in 'output_name' field of the current inference request will
//@@ be transferred as an input tensor named 'input_name' in the next
//@@ request of the same sequence. The input state of the first request
//@@ in the sequence contains garbage data.
//@@
repeated State state = 5;
}
//@@
//@@.. cpp:var:: message ModelEnsembling
//@@
//@@ Model ensembling configuration. These settings specify the models that
//@@ compose the ensemble and how data flows between the models.
//@@
message ModelEnsembling
{
//@@ .. cpp:var:: message Step
//@@
//@@ Each step specifies a model included in the ensemble,
//@@ maps ensemble tensor names to the model input tensors,
//@@ and maps model output tensors to ensemble tensor names
//@@
message Step
{
//@@ .. cpp:var:: string model_name
//@@
//@@ The name of the model to execute for this step of the ensemble.
//@@
string model_name = 1;
//@@ .. cpp:var:: int64 model_version
//@@
//@@ The version of the model to use for inference. If -1
//@@ the latest/most-recent version of the model is used.
//@@
int64 model_version = 2;
//@@ .. cpp:var:: map<string,string> input_map
//@@
//@@ Map from name of an input tensor on this step's model to ensemble
//@@ tensor name. The ensemble tensor must have the same data type and
//@@ shape as the model input. Each model input must be assigned to
//@@ one ensemble tensor, but the same ensemble tensor can be assigned
//@@ to multiple model inputs.
//@@
map<string, string> input_map = 3;
//@@ .. cpp:var:: map<string,string> output_map
//@@
//@@ Map from name of an output tensor on this step's model to ensemble
//@@ tensor name. The data type and shape of the ensemble tensor will
//@@ be inferred from the model output. It is optional to assign all
//@@ model outputs to ensemble tensors. One ensemble tensor name
//@@ can appear in an output map only once.
//@@
map<string, string> output_map = 4;
}
//@@ .. cpp:var:: Step step (repeated)
//@@
//@@ The models and the input / output mappings used within the ensemble.
//@@
repeated Step step = 1;
}
//@@
//@@.. cpp:var:: message ModelParameter
//@@
//@@ A model parameter.
//@@
message ModelParameter
{
//@@ .. cpp:var:: string string_value
//@@
//@@ The string value of the parameter.
//@@
string string_value = 1;
}
//@@
//@@.. cpp:var:: message ModelWarmup
//@@
//@@ Settings used to construct the request sample for model warmup.
//@@
message ModelWarmup
{
//@@
//@@ .. cpp:var:: message Input
//@@
//@@ Meta data associated with an input.
//@@
message Input
{
//@@ .. cpp:var:: DataType data_type
//@@
//@@ The data-type of the input.
//@@
DataType data_type = 1;
//@@ .. cpp:var:: int64 dims (repeated)
//@@
//@@ The shape of the input tensor, not including the batch dimension.
//@@
repeated int64 dims = 2;
//@@ .. cpp:var:: oneof input_data_type
//@@
//@@ Specify how the input data is generated. If the input has STRING
//@@ data type and 'random_data' is set, the data generation will fall
//@@ back to 'zero_data'.
//@@
oneof input_data_type
{
//@@
//@@ .. cpp:var:: bool zero_data
//@@
//@@ The identifier for using zeros as input data. Note that the
//@@ value of 'zero_data' will not be checked, instead, zero data
//@@ will be used as long as the field is set.
//@@
bool zero_data = 3;
//@@
//@@ .. cpp:var:: bool random_data
//@@
//@@ The identifier for using random data as input data. Note that
//@@ the value of 'random_data' will not be checked, instead,
//@@ random data will be used as long as the field is set.
//@@
bool random_data = 4;
//@@ .. cpp:var:: string input_data_file
//@@
//@@ The file whose content will be used as raw input data in
//@@ row-major order. The file must be provided in a sub-directory
//@@ 'warmup' under the model directory. The file contents should be
//@@ in binary format. For TYPE_STRING data-type, an element is
//@@ represented by a 4-byte unsigned integer giving the length
//@@ followed by the actual bytes.
//@@
string input_data_file = 5;
}
}
//@@ .. cpp:var:: string name
//@@
//@@ The name of the request sample.
//@@
string name = 1;
//@@ .. cpp:var:: uint32 batch_size
//@@
//@@ The batch size of the inference request. This must be >= 1. For
//@@ models that don't support batching, batch_size must be 1. If
//@@ batch_size > 1, the 'inputs' specified below will be duplicated to
//@@ match the batch size requested.
//@@
uint32 batch_size = 2;
//@@ .. cpp:var:: map<string, Input> inputs
//@@
//@@ The warmup meta data associated with every model input, including
//@@ control tensors.
//@@
map<string, Input> inputs = 3;
//@@ .. cpp:var:: uint32 count
//@@
//@@ The number of iterations that this warmup sample will be executed.
//@@ For example, if this field is set to 2, 2 model executions using this
//@@ sample will be scheduled for warmup. Default value is 0 which
//@@ indicates that this sample will be used only once.
//@@ Note that for sequence model, 'count' may not work well
//@@ because the model often expect a valid sequence of requests which
//@@ should be represented by a series of warmup samples. 'count > 1'
//@@ essentially "resends" one of the sample, which may invalidate the
//@@ sequence and result in unexpected warmup failure.
//@@
uint32 count = 4;
}
//@@
//@@ .. cpp:var:: message ModelOperations
//@@
//@@ The metadata of libraries providing custom operations for this model.
//@@
message ModelOperations
{
//@@ .. cpp:var:: string op_library_filename (repeated)
//@@
//@@ Optional paths of the libraries providing custom operations for
//@@ this model. Valid only for ONNX models.
//@@
repeated string op_library_filename = 1;
}
//@@
//@@ .. cpp:var:: message ModelTransactionPolicy
//@@
//@@ The specification that describes the nature of transactions
//@@ to be expected from the model.
//@@
message ModelTransactionPolicy
{
//@@ .. cpp:var:: bool decoupled
//@@
//@@ Indicates whether responses generated by the model are decoupled with
//@@ the requests issued to it, which means the number of responses
//@@ generated by model may differ from number of requests issued, and
//@@ that the responses may be out of order relative to the order of
//@@ requests. The default is false, which means the model will generate
//@@ exactly one response for each request.
//@@
bool decoupled = 1;
}
//@@
//@@.. cpp:var:: message ModelRepositoryAgents
//@@
//@@ The repository agents for the model.
//@@
message ModelRepositoryAgents
{
//@@
//@@ .. cpp:var:: message Agent
//@@
//@@ A repository agent that should be invoked for the specified
//@@ repository actions for this model.
//@@
message Agent
{
//@@ .. cpp:var:: string name
//@@
//@@ The name of the agent.
//@@
string name = 1;
//@@ .. cpp:var:: map<string, string> parameters
//@@
//@@ The parameters for the agent.
//@@
map<string, string> parameters = 2;
}
//@@
//@@ .. cpp:var:: Agent agents (repeated)
//@@
//@@ The ordered list of agents for the model. These agents will be
//@@ invoked in order to respond to repository actions occuring for the
//@@ model.
//@@
repeated Agent agents = 1;
}
//@@
//@@.. cpp:var:: message ModelResponseCache
//@@
//@@ The response cache setting for the model.
//@@
message ModelResponseCache
{
//@@
//@@ .. cpp::var:: bool enable
//@@
//@@ Whether or not to use response cache for the model. If True, the
//@@ responses from the model are cached and when identical request
//@@ is encountered, instead of going through the model execution,
//@@ the response from the cache is utilized. By default, response
//@@ cache is disabled for the models.
//@@
bool enable = 1;
}
//@@
//@@.. cpp:var:: message ModelConfig
//@@
//@@ A model configuration.
//@@
message ModelConfig
{
//@@ .. cpp:var:: string name
//@@
//@@ The name of the model.
//@@
string name = 1;
//@@ .. cpp:var:: string platform
//@@
//@@ The framework for the model. Possible values are
//@@ "tensorrt_plan", "tensorflow_graphdef",
//@@ "tensorflow_savedmodel", "onnxruntime_onnx",
//@@ "pytorch_libtorch".
//@@
string platform = 2;
//@@ .. cpp:var:: string backend
//@@
//@@ The backend used by the model.
//@@
string backend = 17;
//@@ .. cpp:var:: ModelVersionPolicy version_policy
//@@
//@@ Policy indicating which version(s) of the model will be served.
//@@
ModelVersionPolicy version_policy = 3;
//@@ .. cpp:var:: int32 max_batch_size
//@@
//@@ Maximum batch size allowed for inference. This can only decrease
//@@ what is allowed by the model itself. A max_batch_size value of 0
//@@ indicates that batching is not allowed for the model and the
//@@ dimension/shape of the input and output tensors must exactly
//@@ match what is specified in the input and output configuration. A
//@@ max_batch_size value > 0 indicates that batching is allowed and
//@@ so the model expects the input tensors to have an additional
//@@ initial dimension for the batching that is not specified in the
//@@ input (for example, if the model supports batched inputs of
//@@ 2-dimensional tensors then the model configuration will specify
//@@ the input shape as [ X, Y ] but the model will expect the actual
//@@ input tensors to have shape [ N, X, Y ]). For max_batch_size > 0
//@@ returned outputs will also have an additional initial dimension
//@@ for the batch.
//@@
int32 max_batch_size = 4;
//@@ .. cpp:var:: ModelInput input (repeated)
//@@
//@@ The inputs request by the model.
//@@
repeated ModelInput input = 5;
//@@ .. cpp:var:: ModelOutput output (repeated)
//@@
//@@ The outputs produced by the model.
//@@
repeated ModelOutput output = 6;
//@@ .. cpp:var:: BatchInput batch_input (repeated)
//@@
//@@ The model input(s) that the server should use to communicate
//@@ batch related values to the model.
//@@
repeated BatchInput batch_input = 20;
//@@ .. cpp:var:: BatchOutput batch_output (repeated)
//@@
//@@ The outputs produced by the model that requires special handling
//@@ by the model backend.
//@@
repeated BatchOutput batch_output = 21;
//@@ .. cpp:var:: ModelOptimizationPolicy optimization
//@@
//@@ Optimization configuration for the model. If not specified
//@@ then default optimization policy is used.
//@@
ModelOptimizationPolicy optimization = 12;
//@@ .. cpp:var:: oneof scheduling_choice
//@@
//@@ The scheduling policy for the model. If not specified the
//@@ default scheduling policy is used for the model. The default
//@@ policy is to execute each inference request independently.
//@@
oneof scheduling_choice
{
//@@ .. cpp:var:: ModelDynamicBatching dynamic_batching
//@@
//@@ If specified, enables the dynamic-batching scheduling
//@@ policy. With dynamic-batching the scheduler may group
//@@ together independent requests into a single batch to
//@@ improve inference throughput.
//@@
ModelDynamicBatching dynamic_batching = 11;
//@@ .. cpp:var:: ModelSequenceBatching sequence_batching
//@@
//@@ If specified, enables the sequence-batching scheduling
//@@ policy. With sequence-batching, inference requests
//@@ with the same correlation ID are routed to the same
//@@ model instance. Multiple sequences of inference requests
//@@ may be batched together into a single batch to
//@@ improve inference throughput.
//@@
ModelSequenceBatching sequence_batching = 13;
//@@ .. cpp:var:: ModelEnsembling ensemble_scheduling
//@@
//@@ If specified, enables the model-ensembling scheduling
//@@ policy. With model-ensembling, inference requests
//@@ will be processed according to the specification, such as an
//@@ execution sequence of models. The input specified in this model
//@@ config will be the input for the ensemble, and the output
//@@ specified will be the output of the ensemble.
//@@
ModelEnsembling ensemble_scheduling = 15;
}
//@@ .. cpp:var:: ModelInstanceGroup instance_group (repeated)
//@@
//@@ Instances of this model. If not specified, one instance
//@@ of the model will be instantiated on each available GPU.
//@@
repeated ModelInstanceGroup instance_group = 7;
//@@ .. cpp:var:: string default_model_filename
//@@
//@@ Optional filename of the model file to use if a
//@@ compute-capability specific model is not specified in
//@@ :cpp:var:`cc_model_filenames`. If not specified the default name
//@@ is 'model.graphdef', 'model.savedmodel', 'model.plan' or
//@@ 'model.pt' depending on the model type.
//@@
string default_model_filename = 8;
//@@ .. cpp:var:: map<string,string> cc_model_filenames
//@@
//@@ Optional map from CUDA compute capability to the filename of
//@@ the model that supports that compute capability. The filename
//@@ refers to a file within the model version directory.
//@@
map<string, string> cc_model_filenames = 9;
//@@ .. cpp:var:: map<string,string> metric_tags
//@@
//@@ Optional metric tags. User-specific key-value pairs for metrics
//@@ reported for this model. These tags are applied to the metrics
//@@ reported on the HTTP metrics port.
//@@
map<string, string> metric_tags = 10;
//@@ .. cpp:var:: map<string,ModelParameter> parameters
//@@
//@@ Optional model parameters. User-specified parameter values.
//@@
map<string, ModelParameter> parameters = 14;
//@@ .. cpp:var:: ModelWarmup model_warmup (repeated)
//@@
//@@ Warmup setting of this model. If specified, all instances
//@@ will be run with the request samples in sequence before
//@@ serving the model.
//@@ This field can only be specified if the model is not an ensemble
//@@ model.
//@@
repeated ModelWarmup model_warmup = 16;
//@@ .. cpp:var:: ModelOperations model_operations
//@@
//@@ Optional metadata of the libraries providing custom operations for
//@@ this model.
//@@
ModelOperations model_operations = 18;
//@@ .. cpp:var:: ModelTransactionPolicy model_transaction_policy
//@@
//@@ Optional specification that describes the nature of transactions
//@@ to be expected from the model.
//@@
ModelTransactionPolicy model_transaction_policy = 19;
//@@ .. cpp:var:: ModelRepositoryAgents model_repository_agents
//@@
//@@ Optional specification of the agent(s) that should be invoked
//@@ with repository actions are performed for this model.
//@@
ModelRepositoryAgents model_repository_agents = 23;
//@@ .. cpp:var:: ModelResponseCache response_cache
//@@
//@@ Optional setting for utilizing the response cache for this
//@@ model.
//@@
ModelResponseCache response_cache = 24;
}
\ No newline at end of file
# flake8: noqa
# -*- coding: utf-8 -*-
# Generated by the protocol buffer compiler. DO NOT EDIT!
# source: model_config.protxt
"""Generated protocol buffer code."""
from google.protobuf import descriptor as _descriptor
from google.protobuf import descriptor_pool as _descriptor_pool
from google.protobuf import message as _message
from google.protobuf import reflection as _reflection
from google.protobuf import symbol_database as _symbol_database
from google.protobuf.internal import enum_type_wrapper
# @@protoc_insertion_point(imports)
_sym_db = _symbol_database.Default()
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
b'\n\x13model_config.protxt\x12\tinference\"\x96\x01\n\x10ModelRateLimiter\x12\x37\n\tresources\x18\x01 \x03(\x0b\x32$.inference.ModelRateLimiter.Resource\x12\x10\n\x08priority\x18\x02 \x01(\r\x1a\x37\n\x08Resource\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0e\n\x06global\x18\x02 \x01(\x08\x12\r\n\x05\x63ount\x18\x03 \x01(\r\"\x87\x04\n\x12ModelInstanceGroup\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x30\n\x04kind\x18\x04 \x01(\x0e\x32\".inference.ModelInstanceGroup.Kind\x12\r\n\x05\x63ount\x18\x02 \x01(\x05\x12\x31\n\x0crate_limiter\x18\x06 \x01(\x0b\x32\x1b.inference.ModelRateLimiter\x12\x0c\n\x04gpus\x18\x03 \x03(\x05\x12H\n\x11secondary_devices\x18\x08 \x03(\x0b\x32-.inference.ModelInstanceGroup.SecondaryDevice\x12\x0f\n\x07profile\x18\x05 \x03(\t\x12\x0f\n\x07passive\x18\x07 \x01(\x08\x12\x13\n\x0bhost_policy\x18\t \x01(\t\x1a\x9c\x01\n\x0fSecondaryDevice\x12O\n\x04kind\x18\x01 \x01(\x0e\x32\x41.inference.ModelInstanceGroup.SecondaryDevice.SecondaryDeviceKind\x12\x11\n\tdevice_id\x18\x02 \x01(\x03\"%\n\x13SecondaryDeviceKind\x12\x0e\n\nKIND_NVDLA\x10\x00\"A\n\x04Kind\x12\r\n\tKIND_AUTO\x10\x00\x12\x0c\n\x08KIND_GPU\x10\x01\x12\x0c\n\x08KIND_CPU\x10\x02\x12\x0e\n\nKIND_MODEL\x10\x03\"#\n\x12ModelTensorReshape\x12\r\n\x05shape\x18\x01 \x03(\x03\"\xb2\x02\n\nModelInput\x12\x0c\n\x04name\x18\x01 \x01(\t\x12&\n\tdata_type\x18\x02 \x01(\x0e\x32\x13.inference.DataType\x12,\n\x06\x66ormat\x18\x03 \x01(\x0e\x32\x1c.inference.ModelInput.Format\x12\x0c\n\x04\x64ims\x18\x04 \x03(\x03\x12.\n\x07reshape\x18\x05 \x01(\x0b\x32\x1d.inference.ModelTensorReshape\x12\x17\n\x0fis_shape_tensor\x18\x06 \x01(\x08\x12\x1a\n\x12\x61llow_ragged_batch\x18\x07 \x01(\x08\x12\x10\n\x08optional\x18\x08 \x01(\x08\";\n\x06\x46ormat\x12\x0f\n\x0b\x46ORMAT_NONE\x10\x00\x12\x0f\n\x0b\x46ORMAT_NHWC\x10\x01\x12\x0f\n\x0b\x46ORMAT_NCHW\x10\x02\"\xb2\x01\n\x0bModelOutput\x12\x0c\n\x04name\x18\x01 \x01(\t\x12&\n\tdata_type\x18\x02 \x01(\x0e\x32\x13.inference.DataType\x12\x0c\n\x04\x64ims\x18\x03 \x03(\x03\x12.\n\x07reshape\x18\x05 \x01(\x0b\x32\x1d.inference.ModelTensorReshape\x12\x16\n\x0elabel_filename\x18\x04 \x01(\t\x12\x17\n\x0fis_shape_tensor\x18\x06 \x01(\x08\"\xd9\x02\n\nBatchInput\x12(\n\x04kind\x18\x01 \x01(\x0e\x32\x1a.inference.BatchInput.Kind\x12\x13\n\x0btarget_name\x18\x02 \x03(\t\x12&\n\tdata_type\x18\x03 \x01(\x0e\x32\x13.inference.DataType\x12\x14\n\x0csource_input\x18\x04 \x03(\t\"\xcd\x01\n\x04Kind\x12\x17\n\x13\x42\x41TCH_ELEMENT_COUNT\x10\x00\x12#\n\x1f\x42\x41TCH_ACCUMULATED_ELEMENT_COUNT\x10\x01\x12-\n)BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO\x10\x02\x12$\n BATCH_MAX_ELEMENT_COUNT_AS_SHAPE\x10\x03\x12\x14\n\x10\x42\x41TCH_ITEM_SHAPE\x10\x04\x12\x1c\n\x18\x42\x41TCH_ITEM_SHAPE_FLATTEN\x10\x05\"\x8f\x01\n\x0b\x42\x61tchOutput\x12\x13\n\x0btarget_name\x18\x01 \x03(\t\x12)\n\x04kind\x18\x02 \x01(\x0e\x32\x1b.inference.BatchOutput.Kind\x12\x14\n\x0csource_input\x18\x03 \x03(\t\"*\n\x04Kind\x12\"\n\x1e\x42\x41TCH_SCATTER_WITH_INPUT_SHAPE\x10\x00\"\x90\x02\n\x12ModelVersionPolicy\x12\x36\n\x06latest\x18\x01 \x01(\x0b\x32$.inference.ModelVersionPolicy.LatestH\x00\x12\x30\n\x03\x61ll\x18\x02 \x01(\x0b\x32!.inference.ModelVersionPolicy.AllH\x00\x12:\n\x08specific\x18\x03 \x01(\x0b\x32&.inference.ModelVersionPolicy.SpecificH\x00\x1a\x1e\n\x06Latest\x12\x14\n\x0cnum_versions\x18\x01 \x01(\r\x1a\x05\n\x03\x41ll\x1a\x1c\n\x08Specific\x12\x10\n\x08versions\x18\x01 \x03(\x03\x42\x0f\n\rpolicy_choice\"\xfd\r\n\x17ModelOptimizationPolicy\x12\x37\n\x05graph\x18\x01 \x01(\x0b\x32(.inference.ModelOptimizationPolicy.Graph\x12\x42\n\x08priority\x18\x02 \x01(\x0e\x32\x30.inference.ModelOptimizationPolicy.ModelPriority\x12\x35\n\x04\x63uda\x18\x03 \x01(\x0b\x32\'.inference.ModelOptimizationPolicy.Cuda\x12X\n\x16\x65xecution_accelerators\x18\x04 \x01(\x0b\x32\x38.inference.ModelOptimizationPolicy.ExecutionAccelerators\x12R\n\x13input_pinned_memory\x18\x05 \x01(\x0b\x32\x35.inference.ModelOptimizationPolicy.PinnedMemoryBuffer\x12S\n\x14output_pinned_memory\x18\x06 \x01(\x0b\x32\x35.inference.ModelOptimizationPolicy.PinnedMemoryBuffer\x12&\n\x1egather_kernel_buffer_threshold\x18\x07 \x01(\r\x12\x16\n\x0e\x65\x61ger_batching\x18\x08 \x01(\x08\x1a\x16\n\x05Graph\x12\r\n\x05level\x18\x01 \x01(\x05\x1a\xba\x05\n\x04\x43uda\x12\x0e\n\x06graphs\x18\x01 \x01(\x08\x12\x18\n\x10\x62usy_wait_events\x18\x02 \x01(\x08\x12\x45\n\ngraph_spec\x18\x03 \x03(\x0b\x32\x31.inference.ModelOptimizationPolicy.Cuda.GraphSpec\x12\x1a\n\x12output_copy_stream\x18\x04 \x01(\x08\x1a\xa4\x04\n\tGraphSpec\x12\x12\n\nbatch_size\x18\x01 \x01(\x05\x12K\n\x05input\x18\x02 \x03(\x0b\x32<.inference.ModelOptimizationPolicy.Cuda.GraphSpec.InputEntry\x12W\n\x11graph_lower_bound\x18\x03 \x01(\x0b\x32<.inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound\x1a\x14\n\x05Shape\x12\x0b\n\x03\x64im\x18\x01 \x03(\x03\x1a\xdf\x01\n\nLowerBound\x12\x12\n\nbatch_size\x18\x01 \x01(\x05\x12V\n\x05input\x18\x02 \x03(\x0b\x32G.inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound.InputEntry\x1a\x65\n\nInputEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x46\n\x05value\x18\x02 \x01(\x0b\x32\x37.inference.ModelOptimizationPolicy.Cuda.GraphSpec.Shape:\x02\x38\x01\x1a\x65\n\nInputEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x46\n\x05value\x18\x02 \x01(\x0b\x32\x37.inference.ModelOptimizationPolicy.Cuda.GraphSpec.Shape:\x02\x38\x01\x1a\xa4\x03\n\x15\x45xecutionAccelerators\x12g\n\x19gpu_execution_accelerator\x18\x01 \x03(\x0b\x32\x44.inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator\x12g\n\x19\x63pu_execution_accelerator\x18\x02 \x03(\x0b\x32\x44.inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator\x1a\xb8\x01\n\x0b\x41\x63\x63\x65lerator\x12\x0c\n\x04name\x18\x01 \x01(\t\x12h\n\nparameters\x18\x02 \x03(\x0b\x32T.inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator.ParametersEntry\x1a\x31\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a$\n\x12PinnedMemoryBuffer\x12\x0e\n\x06\x65nable\x18\x01 \x01(\x08\"I\n\rModelPriority\x12\x14\n\x10PRIORITY_DEFAULT\x10\x00\x12\x10\n\x0cPRIORITY_MAX\x10\x01\x12\x10\n\x0cPRIORITY_MIN\x10\x02\"\xdb\x01\n\x10ModelQueuePolicy\x12\x41\n\x0etimeout_action\x18\x01 \x01(\x0e\x32).inference.ModelQueuePolicy.TimeoutAction\x12$\n\x1c\x64\x65\x66\x61ult_timeout_microseconds\x18\x02 \x01(\x04\x12\x1e\n\x16\x61llow_timeout_override\x18\x03 \x01(\x08\x12\x16\n\x0emax_queue_size\x18\x04 \x01(\r\"&\n\rTimeoutAction\x12\n\n\x06REJECT\x10\x00\x12\t\n\x05\x44\x45LAY\x10\x01\"\x9b\x03\n\x14ModelDynamicBatching\x12\x1c\n\x14preferred_batch_size\x18\x01 \x03(\x05\x12$\n\x1cmax_queue_delay_microseconds\x18\x02 \x01(\x04\x12\x19\n\x11preserve_ordering\x18\x03 \x01(\x08\x12\x17\n\x0fpriority_levels\x18\x04 \x01(\r\x12\x1e\n\x16\x64\x65\x66\x61ult_priority_level\x18\x05 \x01(\r\x12\x39\n\x14\x64\x65\x66\x61ult_queue_policy\x18\x06 \x01(\x0b\x32\x1b.inference.ModelQueuePolicy\x12W\n\x15priority_queue_policy\x18\x07 \x03(\x0b\x32\x38.inference.ModelDynamicBatching.PriorityQueuePolicyEntry\x1aW\n\x18PriorityQueuePolicyEntry\x12\x0b\n\x03key\x18\x01 \x01(\r\x12*\n\x05value\x18\x02 \x01(\x0b\x32\x1b.inference.ModelQueuePolicy:\x02\x38\x01\"\xef\t\n\x15ModelSequenceBatching\x12\x41\n\x06\x64irect\x18\x03 \x01(\x0b\x32/.inference.ModelSequenceBatching.StrategyDirectH\x00\x12\x41\n\x06oldest\x18\x04 \x01(\x0b\x32/.inference.ModelSequenceBatching.StrategyOldestH\x00\x12&\n\x1emax_sequence_idle_microseconds\x18\x01 \x01(\x04\x12\x44\n\rcontrol_input\x18\x02 \x03(\x0b\x32-.inference.ModelSequenceBatching.ControlInput\x12\x35\n\x05state\x18\x05 \x03(\x0b\x32&.inference.ModelSequenceBatching.State\x1a\xb1\x02\n\x07\x43ontrol\x12;\n\x04kind\x18\x01 \x01(\x0e\x32-.inference.ModelSequenceBatching.Control.Kind\x12\x18\n\x10int32_false_true\x18\x02 \x03(\x05\x12\x17\n\x0f\x66p32_false_true\x18\x03 \x03(\x02\x12\x17\n\x0f\x62ool_false_true\x18\x05 \x03(\x08\x12&\n\tdata_type\x18\x04 \x01(\x0e\x32\x13.inference.DataType\"u\n\x04Kind\x12\x1a\n\x16\x43ONTROL_SEQUENCE_START\x10\x00\x12\x1a\n\x16\x43ONTROL_SEQUENCE_READY\x10\x01\x12\x18\n\x14\x43ONTROL_SEQUENCE_END\x10\x02\x12\x1b\n\x17\x43ONTROL_SEQUENCE_CORRID\x10\x03\x1aW\n\x0c\x43ontrolInput\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x39\n\x07\x63ontrol\x18\x02 \x03(\x0b\x32(.inference.ModelSequenceBatching.Control\x1a\x8a\x01\n\x0cInitialState\x12&\n\tdata_type\x18\x01 \x01(\x0e\x32\x13.inference.DataType\x12\x0c\n\x04\x64ims\x18\x02 \x03(\x03\x12\x13\n\tzero_data\x18\x03 \x01(\x08H\x00\x12\x13\n\tdata_file\x18\x04 \x01(\tH\x00\x12\x0c\n\x04name\x18\x05 \x01(\tB\x0c\n\nstate_data\x1a\xac\x01\n\x05State\x12\x12\n\ninput_name\x18\x01 \x01(\t\x12\x13\n\x0boutput_name\x18\x02 \x01(\t\x12&\n\tdata_type\x18\x03 \x01(\x0e\x32\x13.inference.DataType\x12\x0c\n\x04\x64ims\x18\x04 \x03(\x03\x12\x44\n\rinitial_state\x18\x05 \x03(\x0b\x32-.inference.ModelSequenceBatching.InitialState\x1aX\n\x0eStrategyDirect\x12$\n\x1cmax_queue_delay_microseconds\x18\x01 \x01(\x04\x12 \n\x18minimum_slot_utilization\x18\x02 \x01(\x02\x1au\n\x0eStrategyOldest\x12\x1f\n\x17max_candidate_sequences\x18\x01 \x01(\x05\x12\x1c\n\x14preferred_batch_size\x18\x02 \x03(\x05\x12$\n\x1cmax_queue_delay_microseconds\x18\x03 \x01(\x04\x42\x11\n\x0fstrategy_choice\"\xdd\x02\n\x0fModelEnsembling\x12-\n\x04step\x18\x01 \x03(\x0b\x32\x1f.inference.ModelEnsembling.Step\x1a\x9a\x02\n\x04Step\x12\x12\n\nmodel_name\x18\x01 \x01(\t\x12\x15\n\rmodel_version\x18\x02 \x01(\x03\x12@\n\tinput_map\x18\x03 \x03(\x0b\x32-.inference.ModelEnsembling.Step.InputMapEntry\x12\x42\n\noutput_map\x18\x04 \x03(\x0b\x32..inference.ModelEnsembling.Step.OutputMapEntry\x1a/\n\rInputMapEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a\x30\n\x0eOutputMapEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"&\n\x0eModelParameter\x12\x14\n\x0cstring_value\x18\x01 \x01(\t\"\xd9\x02\n\x0bModelWarmup\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x12\n\nbatch_size\x18\x02 \x01(\r\x12\x32\n\x06inputs\x18\x03 \x03(\x0b\x32\".inference.ModelWarmup.InputsEntry\x12\r\n\x05\x63ount\x18\x04 \x01(\r\x1a\x97\x01\n\x05Input\x12&\n\tdata_type\x18\x01 \x01(\x0e\x32\x13.inference.DataType\x12\x0c\n\x04\x64ims\x18\x02 \x03(\x03\x12\x13\n\tzero_data\x18\x03 \x01(\x08H\x00\x12\x15\n\x0brandom_data\x18\x04 \x01(\x08H\x00\x12\x19\n\x0finput_data_file\x18\x05 \x01(\tH\x00\x42\x11\n\x0finput_data_type\x1aK\n\x0bInputsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12+\n\x05value\x18\x02 \x01(\x0b\x32\x1c.inference.ModelWarmup.Input:\x02\x38\x01\".\n\x0fModelOperations\x12\x1b\n\x13op_library_filename\x18\x01 \x03(\t\"+\n\x16ModelTransactionPolicy\x12\x11\n\tdecoupled\x18\x01 \x01(\x08\"\xe6\x01\n\x15ModelRepositoryAgents\x12\x36\n\x06\x61gents\x18\x01 \x03(\x0b\x32&.inference.ModelRepositoryAgents.Agent\x1a\x94\x01\n\x05\x41gent\x12\x0c\n\x04name\x18\x01 \x01(\t\x12J\n\nparameters\x18\x02 \x03(\x0b\x32\x36.inference.ModelRepositoryAgents.Agent.ParametersEntry\x1a\x31\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"$\n\x12ModelResponseCache\x12\x0e\n\x06\x65nable\x18\x01 \x01(\x08\"\xb2\n\n\x0bModelConfig\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x10\n\x08platform\x18\x02 \x01(\t\x12\x0f\n\x07\x62\x61\x63kend\x18\x11 \x01(\t\x12\x35\n\x0eversion_policy\x18\x03 \x01(\x0b\x32\x1d.inference.ModelVersionPolicy\x12\x16\n\x0emax_batch_size\x18\x04 \x01(\x05\x12$\n\x05input\x18\x05 \x03(\x0b\x32\x15.inference.ModelInput\x12&\n\x06output\x18\x06 \x03(\x0b\x32\x16.inference.ModelOutput\x12*\n\x0b\x62\x61tch_input\x18\x14 \x03(\x0b\x32\x15.inference.BatchInput\x12,\n\x0c\x62\x61tch_output\x18\x15 \x03(\x0b\x32\x16.inference.BatchOutput\x12\x38\n\x0coptimization\x18\x0c \x01(\x0b\x32\".inference.ModelOptimizationPolicy\x12;\n\x10\x64ynamic_batching\x18\x0b \x01(\x0b\x32\x1f.inference.ModelDynamicBatchingH\x00\x12=\n\x11sequence_batching\x18\r \x01(\x0b\x32 .inference.ModelSequenceBatchingH\x00\x12\x39\n\x13\x65nsemble_scheduling\x18\x0f \x01(\x0b\x32\x1a.inference.ModelEnsemblingH\x00\x12\x35\n\x0einstance_group\x18\x07 \x03(\x0b\x32\x1d.inference.ModelInstanceGroup\x12\x1e\n\x16\x64\x65\x66\x61ult_model_filename\x18\x08 \x01(\t\x12H\n\x12\x63\x63_model_filenames\x18\t \x03(\x0b\x32,.inference.ModelConfig.CcModelFilenamesEntry\x12;\n\x0bmetric_tags\x18\n \x03(\x0b\x32&.inference.ModelConfig.MetricTagsEntry\x12:\n\nparameters\x18\x0e \x03(\x0b\x32&.inference.ModelConfig.ParametersEntry\x12,\n\x0cmodel_warmup\x18\x10 \x03(\x0b\x32\x16.inference.ModelWarmup\x12\x34\n\x10model_operations\x18\x12 \x01(\x0b\x32\x1a.inference.ModelOperations\x12\x43\n\x18model_transaction_policy\x18\x13 \x01(\x0b\x32!.inference.ModelTransactionPolicy\x12\x41\n\x17model_repository_agents\x18\x17 \x01(\x0b\x32 .inference.ModelRepositoryAgents\x12\x35\n\x0eresponse_cache\x18\x18 \x01(\x0b\x32\x1d.inference.ModelResponseCache\x1a\x37\n\x15\x43\x63ModelFilenamesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a\x31\n\x0fMetricTagsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1aL\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12(\n\x05value\x18\x02 \x01(\x0b\x32\x19.inference.ModelParameter:\x02\x38\x01\x42\x13\n\x11scheduling_choice*\xfa\x01\n\x08\x44\x61taType\x12\x10\n\x0cTYPE_INVALID\x10\x00\x12\r\n\tTYPE_BOOL\x10\x01\x12\x0e\n\nTYPE_UINT8\x10\x02\x12\x0f\n\x0bTYPE_UINT16\x10\x03\x12\x0f\n\x0bTYPE_UINT32\x10\x04\x12\x0f\n\x0bTYPE_UINT64\x10\x05\x12\r\n\tTYPE_INT8\x10\x06\x12\x0e\n\nTYPE_INT16\x10\x07\x12\x0e\n\nTYPE_INT32\x10\x08\x12\x0e\n\nTYPE_INT64\x10\t\x12\r\n\tTYPE_FP16\x10\n\x12\r\n\tTYPE_FP32\x10\x0b\x12\r\n\tTYPE_FP64\x10\x0c\x12\x0f\n\x0bTYPE_STRING\x10\r\x12\r\n\tTYPE_BF16\x10\x0e\x62\x06proto3'
)
_DATATYPE = DESCRIPTOR.enum_types_by_name['DataType']
DataType = enum_type_wrapper.EnumTypeWrapper(_DATATYPE)
TYPE_INVALID = 0
TYPE_BOOL = 1
TYPE_UINT8 = 2
TYPE_UINT16 = 3
TYPE_UINT32 = 4
TYPE_UINT64 = 5
TYPE_INT8 = 6
TYPE_INT16 = 7
TYPE_INT32 = 8
TYPE_INT64 = 9
TYPE_FP16 = 10
TYPE_FP32 = 11
TYPE_FP64 = 12
TYPE_STRING = 13
TYPE_BF16 = 14
_MODELRATELIMITER = DESCRIPTOR.message_types_by_name['ModelRateLimiter']
_MODELRATELIMITER_RESOURCE = _MODELRATELIMITER.nested_types_by_name['Resource']
_MODELINSTANCEGROUP = DESCRIPTOR.message_types_by_name['ModelInstanceGroup']
_MODELINSTANCEGROUP_SECONDARYDEVICE = _MODELINSTANCEGROUP.nested_types_by_name[
'SecondaryDevice']
_MODELTENSORRESHAPE = DESCRIPTOR.message_types_by_name['ModelTensorReshape']
_MODELINPUT = DESCRIPTOR.message_types_by_name['ModelInput']
_MODELOUTPUT = DESCRIPTOR.message_types_by_name['ModelOutput']
_BATCHINPUT = DESCRIPTOR.message_types_by_name['BatchInput']
_BATCHOUTPUT = DESCRIPTOR.message_types_by_name['BatchOutput']
_MODELVERSIONPOLICY = DESCRIPTOR.message_types_by_name['ModelVersionPolicy']
_MODELVERSIONPOLICY_LATEST = _MODELVERSIONPOLICY.nested_types_by_name['Latest']
_MODELVERSIONPOLICY_ALL = _MODELVERSIONPOLICY.nested_types_by_name['All']
_MODELVERSIONPOLICY_SPECIFIC = _MODELVERSIONPOLICY.nested_types_by_name[
'Specific']
_MODELOPTIMIZATIONPOLICY = DESCRIPTOR.message_types_by_name[
'ModelOptimizationPolicy']
_MODELOPTIMIZATIONPOLICY_GRAPH = _MODELOPTIMIZATIONPOLICY.nested_types_by_name[
'Graph']
_MODELOPTIMIZATIONPOLICY_CUDA = _MODELOPTIMIZATIONPOLICY.nested_types_by_name[
'Cuda']
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC = _MODELOPTIMIZATIONPOLICY_CUDA.nested_types_by_name[
'GraphSpec']
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC.nested_types_by_name[
'Shape']
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC.nested_types_by_name[
'LowerBound']
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND.nested_types_by_name[
'InputEntry']
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC.nested_types_by_name[
'InputEntry']
_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS = _MODELOPTIMIZATIONPOLICY.nested_types_by_name[
'ExecutionAccelerators']
_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR = _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS.nested_types_by_name[
'Accelerator']
_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY = _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR.nested_types_by_name[
'ParametersEntry']
_MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER = _MODELOPTIMIZATIONPOLICY.nested_types_by_name[
'PinnedMemoryBuffer']
_MODELQUEUEPOLICY = DESCRIPTOR.message_types_by_name['ModelQueuePolicy']
_MODELDYNAMICBATCHING = DESCRIPTOR.message_types_by_name[
'ModelDynamicBatching']
_MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY = _MODELDYNAMICBATCHING.nested_types_by_name[
'PriorityQueuePolicyEntry']
_MODELSEQUENCEBATCHING = DESCRIPTOR.message_types_by_name[
'ModelSequenceBatching']
_MODELSEQUENCEBATCHING_CONTROL = _MODELSEQUENCEBATCHING.nested_types_by_name[
'Control']
_MODELSEQUENCEBATCHING_CONTROLINPUT = _MODELSEQUENCEBATCHING.nested_types_by_name[
'ControlInput']
_MODELSEQUENCEBATCHING_INITIALSTATE = _MODELSEQUENCEBATCHING.nested_types_by_name[
'InitialState']
_MODELSEQUENCEBATCHING_STATE = _MODELSEQUENCEBATCHING.nested_types_by_name[
'State']
_MODELSEQUENCEBATCHING_STRATEGYDIRECT = _MODELSEQUENCEBATCHING.nested_types_by_name[
'StrategyDirect']
_MODELSEQUENCEBATCHING_STRATEGYOLDEST = _MODELSEQUENCEBATCHING.nested_types_by_name[
'StrategyOldest']
_MODELENSEMBLING = DESCRIPTOR.message_types_by_name['ModelEnsembling']
_MODELENSEMBLING_STEP = _MODELENSEMBLING.nested_types_by_name['Step']
_MODELENSEMBLING_STEP_INPUTMAPENTRY = _MODELENSEMBLING_STEP.nested_types_by_name[
'InputMapEntry']
_MODELENSEMBLING_STEP_OUTPUTMAPENTRY = _MODELENSEMBLING_STEP.nested_types_by_name[
'OutputMapEntry']
_MODELPARAMETER = DESCRIPTOR.message_types_by_name['ModelParameter']
_MODELWARMUP = DESCRIPTOR.message_types_by_name['ModelWarmup']
_MODELWARMUP_INPUT = _MODELWARMUP.nested_types_by_name['Input']
_MODELWARMUP_INPUTSENTRY = _MODELWARMUP.nested_types_by_name['InputsEntry']
_MODELOPERATIONS = DESCRIPTOR.message_types_by_name['ModelOperations']
_MODELTRANSACTIONPOLICY = DESCRIPTOR.message_types_by_name[
'ModelTransactionPolicy']
_MODELREPOSITORYAGENTS = DESCRIPTOR.message_types_by_name[
'ModelRepositoryAgents']
_MODELREPOSITORYAGENTS_AGENT = _MODELREPOSITORYAGENTS.nested_types_by_name[
'Agent']
_MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY = _MODELREPOSITORYAGENTS_AGENT.nested_types_by_name[
'ParametersEntry']
_MODELRESPONSECACHE = DESCRIPTOR.message_types_by_name['ModelResponseCache']
_MODELCONFIG = DESCRIPTOR.message_types_by_name['ModelConfig']
_MODELCONFIG_CCMODELFILENAMESENTRY = _MODELCONFIG.nested_types_by_name[
'CcModelFilenamesEntry']
_MODELCONFIG_METRICTAGSENTRY = _MODELCONFIG.nested_types_by_name[
'MetricTagsEntry']
_MODELCONFIG_PARAMETERSENTRY = _MODELCONFIG.nested_types_by_name[
'ParametersEntry']
_MODELINSTANCEGROUP_SECONDARYDEVICE_SECONDARYDEVICEKIND = _MODELINSTANCEGROUP_SECONDARYDEVICE.enum_types_by_name[
'SecondaryDeviceKind']
_MODELINSTANCEGROUP_KIND = _MODELINSTANCEGROUP.enum_types_by_name['Kind']
_MODELINPUT_FORMAT = _MODELINPUT.enum_types_by_name['Format']
_BATCHINPUT_KIND = _BATCHINPUT.enum_types_by_name['Kind']
_BATCHOUTPUT_KIND = _BATCHOUTPUT.enum_types_by_name['Kind']
_MODELOPTIMIZATIONPOLICY_MODELPRIORITY = _MODELOPTIMIZATIONPOLICY.enum_types_by_name[
'ModelPriority']
_MODELQUEUEPOLICY_TIMEOUTACTION = _MODELQUEUEPOLICY.enum_types_by_name[
'TimeoutAction']
_MODELSEQUENCEBATCHING_CONTROL_KIND = _MODELSEQUENCEBATCHING_CONTROL.enum_types_by_name[
'Kind']
ModelRateLimiter = _reflection.GeneratedProtocolMessageType(
'ModelRateLimiter',
(_message.Message, ),
{
'Resource':
_reflection.GeneratedProtocolMessageType(
'Resource',
(_message.Message, ),
{
'DESCRIPTOR': _MODELRATELIMITER_RESOURCE,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelRateLimiter.Resource)
}),
'DESCRIPTOR':
_MODELRATELIMITER,
'__module__':
'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelRateLimiter)
})
_sym_db.RegisterMessage(ModelRateLimiter)
_sym_db.RegisterMessage(ModelRateLimiter.Resource)
ModelInstanceGroup = _reflection.GeneratedProtocolMessageType(
'ModelInstanceGroup',
(_message.Message, ),
{
'SecondaryDevice':
_reflection.GeneratedProtocolMessageType(
'SecondaryDevice',
(_message.Message, ),
{
'DESCRIPTOR': _MODELINSTANCEGROUP_SECONDARYDEVICE,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelInstanceGroup.SecondaryDevice)
}),
'DESCRIPTOR':
_MODELINSTANCEGROUP,
'__module__':
'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelInstanceGroup)
})
_sym_db.RegisterMessage(ModelInstanceGroup)
_sym_db.RegisterMessage(ModelInstanceGroup.SecondaryDevice)
ModelTensorReshape = _reflection.GeneratedProtocolMessageType(
'ModelTensorReshape',
(_message.Message, ),
{
'DESCRIPTOR': _MODELTENSORRESHAPE,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelTensorReshape)
})
_sym_db.RegisterMessage(ModelTensorReshape)
ModelInput = _reflection.GeneratedProtocolMessageType(
'ModelInput',
(_message.Message, ),
{
'DESCRIPTOR': _MODELINPUT,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelInput)
})
_sym_db.RegisterMessage(ModelInput)
ModelOutput = _reflection.GeneratedProtocolMessageType(
'ModelOutput',
(_message.Message, ),
{
'DESCRIPTOR': _MODELOUTPUT,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelOutput)
})
_sym_db.RegisterMessage(ModelOutput)
BatchInput = _reflection.GeneratedProtocolMessageType(
'BatchInput',
(_message.Message, ),
{
'DESCRIPTOR': _BATCHINPUT,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.BatchInput)
})
_sym_db.RegisterMessage(BatchInput)
BatchOutput = _reflection.GeneratedProtocolMessageType(
'BatchOutput',
(_message.Message, ),
{
'DESCRIPTOR': _BATCHOUTPUT,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.BatchOutput)
})
_sym_db.RegisterMessage(BatchOutput)
ModelVersionPolicy = _reflection.GeneratedProtocolMessageType(
'ModelVersionPolicy',
(_message.Message, ),
{
'Latest':
_reflection.GeneratedProtocolMessageType(
'Latest',
(_message.Message, ),
{
'DESCRIPTOR': _MODELVERSIONPOLICY_LATEST,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelVersionPolicy.Latest)
}),
'All':
_reflection.GeneratedProtocolMessageType(
'All',
(_message.Message, ),
{
'DESCRIPTOR': _MODELVERSIONPOLICY_ALL,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelVersionPolicy.All)
}),
'Specific':
_reflection.GeneratedProtocolMessageType(
'Specific',
(_message.Message, ),
{
'DESCRIPTOR': _MODELVERSIONPOLICY_SPECIFIC,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelVersionPolicy.Specific)
}),
'DESCRIPTOR':
_MODELVERSIONPOLICY,
'__module__':
'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelVersionPolicy)
})
_sym_db.RegisterMessage(ModelVersionPolicy)
_sym_db.RegisterMessage(ModelVersionPolicy.Latest)
_sym_db.RegisterMessage(ModelVersionPolicy.All)
_sym_db.RegisterMessage(ModelVersionPolicy.Specific)
ModelOptimizationPolicy = _reflection.GeneratedProtocolMessageType(
'ModelOptimizationPolicy',
(_message.Message, ),
{
'Graph':
_reflection.GeneratedProtocolMessageType(
'Graph',
(_message.Message, ),
{
'DESCRIPTOR': _MODELOPTIMIZATIONPOLICY_GRAPH,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Graph)
}),
'Cuda':
_reflection.GeneratedProtocolMessageType(
'Cuda',
(_message.Message, ),
{
'GraphSpec':
_reflection.GeneratedProtocolMessageType(
'GraphSpec',
(_message.Message, ),
{
'Shape':
_reflection.GeneratedProtocolMessageType(
'Shape',
(_message.Message, ),
{
'DESCRIPTOR':
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec.Shape)
}),
'LowerBound':
_reflection.GeneratedProtocolMessageType(
'LowerBound',
(_message.Message, ),
{
'InputEntry':
_reflection.GeneratedProtocolMessageType(
'InputEntry',
(_message.Message, ),
{
'DESCRIPTOR':
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound.InputEntry)
}),
'DESCRIPTOR':
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND,
'__module__':
'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound)
}),
'InputEntry':
_reflection.GeneratedProtocolMessageType(
'InputEntry',
(_message.Message, ),
{
'DESCRIPTOR':
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec.InputEntry)
}),
'DESCRIPTOR':
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC,
'__module__':
'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec)
}),
'DESCRIPTOR':
_MODELOPTIMIZATIONPOLICY_CUDA,
'__module__':
'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda)
}),
'ExecutionAccelerators':
_reflection.GeneratedProtocolMessageType(
'ExecutionAccelerators',
(_message.Message, ),
{
'Accelerator':
_reflection.GeneratedProtocolMessageType(
'Accelerator',
(_message.Message, ),
{
'ParametersEntry':
_reflection.GeneratedProtocolMessageType(
'ParametersEntry',
(_message.Message, ),
{
'DESCRIPTOR':
_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator.ParametersEntry)
}),
'DESCRIPTOR':
_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR,
'__module__':
'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator)
}),
'DESCRIPTOR':
_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS,
'__module__':
'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.ExecutionAccelerators)
}),
'PinnedMemoryBuffer':
_reflection.GeneratedProtocolMessageType(
'PinnedMemoryBuffer',
(_message.Message, ),
{
'DESCRIPTOR': _MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.PinnedMemoryBuffer)
}),
'DESCRIPTOR':
_MODELOPTIMIZATIONPOLICY,
'__module__':
'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy)
})
_sym_db.RegisterMessage(ModelOptimizationPolicy)
_sym_db.RegisterMessage(ModelOptimizationPolicy.Graph)
_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda)
_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec)
_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec.Shape)
_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound)
_sym_db.RegisterMessage(
ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound.InputEntry)
_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec.InputEntry)
_sym_db.RegisterMessage(ModelOptimizationPolicy.ExecutionAccelerators)
_sym_db.RegisterMessage(
ModelOptimizationPolicy.ExecutionAccelerators.Accelerator)
_sym_db.RegisterMessage(
ModelOptimizationPolicy.ExecutionAccelerators.Accelerator.ParametersEntry)
_sym_db.RegisterMessage(ModelOptimizationPolicy.PinnedMemoryBuffer)
ModelQueuePolicy = _reflection.GeneratedProtocolMessageType(
'ModelQueuePolicy',
(_message.Message, ),
{
'DESCRIPTOR': _MODELQUEUEPOLICY,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelQueuePolicy)
})
_sym_db.RegisterMessage(ModelQueuePolicy)
ModelDynamicBatching = _reflection.GeneratedProtocolMessageType(
'ModelDynamicBatching',
(_message.Message, ),
{
'PriorityQueuePolicyEntry':
_reflection.GeneratedProtocolMessageType(
'PriorityQueuePolicyEntry',
(_message.Message, ),
{
'DESCRIPTOR': _MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelDynamicBatching.PriorityQueuePolicyEntry)
}),
'DESCRIPTOR':
_MODELDYNAMICBATCHING,
'__module__':
'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelDynamicBatching)
})
_sym_db.RegisterMessage(ModelDynamicBatching)
_sym_db.RegisterMessage(ModelDynamicBatching.PriorityQueuePolicyEntry)
ModelSequenceBatching = _reflection.GeneratedProtocolMessageType(
'ModelSequenceBatching',
(_message.Message, ),
{
'Control':
_reflection.GeneratedProtocolMessageType(
'Control',
(_message.Message, ),
{
'DESCRIPTOR': _MODELSEQUENCEBATCHING_CONTROL,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.Control)
}),
'ControlInput':
_reflection.GeneratedProtocolMessageType(
'ControlInput',
(_message.Message, ),
{
'DESCRIPTOR': _MODELSEQUENCEBATCHING_CONTROLINPUT,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.ControlInput)
}),
'InitialState':
_reflection.GeneratedProtocolMessageType(
'InitialState',
(_message.Message, ),
{
'DESCRIPTOR': _MODELSEQUENCEBATCHING_INITIALSTATE,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.InitialState)
}),
'State':
_reflection.GeneratedProtocolMessageType(
'State',
(_message.Message, ),
{
'DESCRIPTOR': _MODELSEQUENCEBATCHING_STATE,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.State)
}),
'StrategyDirect':
_reflection.GeneratedProtocolMessageType(
'StrategyDirect',
(_message.Message, ),
{
'DESCRIPTOR': _MODELSEQUENCEBATCHING_STRATEGYDIRECT,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.StrategyDirect)
}),
'StrategyOldest':
_reflection.GeneratedProtocolMessageType(
'StrategyOldest',
(_message.Message, ),
{
'DESCRIPTOR': _MODELSEQUENCEBATCHING_STRATEGYOLDEST,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.StrategyOldest)
}),
'DESCRIPTOR':
_MODELSEQUENCEBATCHING,
'__module__':
'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching)
})
_sym_db.RegisterMessage(ModelSequenceBatching)
_sym_db.RegisterMessage(ModelSequenceBatching.Control)
_sym_db.RegisterMessage(ModelSequenceBatching.ControlInput)
_sym_db.RegisterMessage(ModelSequenceBatching.InitialState)
_sym_db.RegisterMessage(ModelSequenceBatching.State)
_sym_db.RegisterMessage(ModelSequenceBatching.StrategyDirect)
_sym_db.RegisterMessage(ModelSequenceBatching.StrategyOldest)
ModelEnsembling = _reflection.GeneratedProtocolMessageType(
'ModelEnsembling',
(_message.Message, ),
{
'Step':
_reflection.GeneratedProtocolMessageType(
'Step',
(_message.Message, ),
{
'InputMapEntry':
_reflection.GeneratedProtocolMessageType(
'InputMapEntry',
(_message.Message, ),
{
'DESCRIPTOR': _MODELENSEMBLING_STEP_INPUTMAPENTRY,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelEnsembling.Step.InputMapEntry)
}),
'OutputMapEntry':
_reflection.GeneratedProtocolMessageType(
'OutputMapEntry',
(_message.Message, ),
{
'DESCRIPTOR': _MODELENSEMBLING_STEP_OUTPUTMAPENTRY,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelEnsembling.Step.OutputMapEntry)
}),
'DESCRIPTOR':
_MODELENSEMBLING_STEP,
'__module__':
'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelEnsembling.Step)
}),
'DESCRIPTOR':
_MODELENSEMBLING,
'__module__':
'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelEnsembling)
})
_sym_db.RegisterMessage(ModelEnsembling)
_sym_db.RegisterMessage(ModelEnsembling.Step)
_sym_db.RegisterMessage(ModelEnsembling.Step.InputMapEntry)
_sym_db.RegisterMessage(ModelEnsembling.Step.OutputMapEntry)
ModelParameter = _reflection.GeneratedProtocolMessageType(
'ModelParameter',
(_message.Message, ),
{
'DESCRIPTOR': _MODELPARAMETER,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelParameter)
})
_sym_db.RegisterMessage(ModelParameter)
ModelWarmup = _reflection.GeneratedProtocolMessageType(
'ModelWarmup',
(_message.Message, ),
{
'Input':
_reflection.GeneratedProtocolMessageType(
'Input',
(_message.Message, ),
{
'DESCRIPTOR': _MODELWARMUP_INPUT,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelWarmup.Input)
}),
'InputsEntry':
_reflection.GeneratedProtocolMessageType(
'InputsEntry',
(_message.Message, ),
{
'DESCRIPTOR': _MODELWARMUP_INPUTSENTRY,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelWarmup.InputsEntry)
}),
'DESCRIPTOR':
_MODELWARMUP,
'__module__':
'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelWarmup)
})
_sym_db.RegisterMessage(ModelWarmup)
_sym_db.RegisterMessage(ModelWarmup.Input)
_sym_db.RegisterMessage(ModelWarmup.InputsEntry)
ModelOperations = _reflection.GeneratedProtocolMessageType(
'ModelOperations',
(_message.Message, ),
{
'DESCRIPTOR': _MODELOPERATIONS,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelOperations)
})
_sym_db.RegisterMessage(ModelOperations)
ModelTransactionPolicy = _reflection.GeneratedProtocolMessageType(
'ModelTransactionPolicy',
(_message.Message, ),
{
'DESCRIPTOR': _MODELTRANSACTIONPOLICY,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelTransactionPolicy)
})
_sym_db.RegisterMessage(ModelTransactionPolicy)
ModelRepositoryAgents = _reflection.GeneratedProtocolMessageType(
'ModelRepositoryAgents',
(_message.Message, ),
{
'Agent':
_reflection.GeneratedProtocolMessageType(
'Agent',
(_message.Message, ),
{
'ParametersEntry':
_reflection.GeneratedProtocolMessageType(
'ParametersEntry',
(_message.Message, ),
{
'DESCRIPTOR':
_MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelRepositoryAgents.Agent.ParametersEntry)
}),
'DESCRIPTOR':
_MODELREPOSITORYAGENTS_AGENT,
'__module__':
'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelRepositoryAgents.Agent)
}),
'DESCRIPTOR':
_MODELREPOSITORYAGENTS,
'__module__':
'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelRepositoryAgents)
})
_sym_db.RegisterMessage(ModelRepositoryAgents)
_sym_db.RegisterMessage(ModelRepositoryAgents.Agent)
_sym_db.RegisterMessage(ModelRepositoryAgents.Agent.ParametersEntry)
ModelResponseCache = _reflection.GeneratedProtocolMessageType(
'ModelResponseCache',
(_message.Message, ),
{
'DESCRIPTOR': _MODELRESPONSECACHE,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelResponseCache)
})
_sym_db.RegisterMessage(ModelResponseCache)
ModelConfig = _reflection.GeneratedProtocolMessageType(
'ModelConfig',
(_message.Message, ),
{
'CcModelFilenamesEntry':
_reflection.GeneratedProtocolMessageType(
'CcModelFilenamesEntry',
(_message.Message, ),
{
'DESCRIPTOR': _MODELCONFIG_CCMODELFILENAMESENTRY,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelConfig.CcModelFilenamesEntry)
}),
'MetricTagsEntry':
_reflection.GeneratedProtocolMessageType(
'MetricTagsEntry',
(_message.Message, ),
{
'DESCRIPTOR': _MODELCONFIG_METRICTAGSENTRY,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelConfig.MetricTagsEntry)
}),
'ParametersEntry':
_reflection.GeneratedProtocolMessageType(
'ParametersEntry',
(_message.Message, ),
{
'DESCRIPTOR': _MODELCONFIG_PARAMETERSENTRY,
'__module__': 'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelConfig.ParametersEntry)
}),
'DESCRIPTOR':
_MODELCONFIG,
'__module__':
'model_config.protxt_pb2'
# @@protoc_insertion_point(class_scope:inference.ModelConfig)
})
_sym_db.RegisterMessage(ModelConfig)
_sym_db.RegisterMessage(ModelConfig.CcModelFilenamesEntry)
_sym_db.RegisterMessage(ModelConfig.MetricTagsEntry)
_sym_db.RegisterMessage(ModelConfig.ParametersEntry)
if _descriptor._USE_C_DESCRIPTORS == False:
DESCRIPTOR._options = None
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY._options = None
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY._serialized_options = b'8\001'
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY._options = None
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY._serialized_options = b'8\001'
_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY._options = None
_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY._serialized_options = b'8\001'
_MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY._options = None
_MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY._serialized_options = b'8\001'
_MODELENSEMBLING_STEP_INPUTMAPENTRY._options = None
_MODELENSEMBLING_STEP_INPUTMAPENTRY._serialized_options = b'8\001'
_MODELENSEMBLING_STEP_OUTPUTMAPENTRY._options = None
_MODELENSEMBLING_STEP_OUTPUTMAPENTRY._serialized_options = b'8\001'
_MODELWARMUP_INPUTSENTRY._options = None
_MODELWARMUP_INPUTSENTRY._serialized_options = b'8\001'
_MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY._options = None
_MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY._serialized_options = b'8\001'
_MODELCONFIG_CCMODELFILENAMESENTRY._options = None
_MODELCONFIG_CCMODELFILENAMESENTRY._serialized_options = b'8\001'
_MODELCONFIG_METRICTAGSENTRY._options = None
_MODELCONFIG_METRICTAGSENTRY._serialized_options = b'8\001'
_MODELCONFIG_PARAMETERSENTRY._options = None
_MODELCONFIG_PARAMETERSENTRY._serialized_options = b'8\001'
_DATATYPE._serialized_start = 8137
_DATATYPE._serialized_end = 8387
_MODELRATELIMITER._serialized_start = 35
_MODELRATELIMITER._serialized_end = 185
_MODELRATELIMITER_RESOURCE._serialized_start = 130
_MODELRATELIMITER_RESOURCE._serialized_end = 185
_MODELINSTANCEGROUP._serialized_start = 188
_MODELINSTANCEGROUP._serialized_end = 707
_MODELINSTANCEGROUP_SECONDARYDEVICE._serialized_start = 484
_MODELINSTANCEGROUP_SECONDARYDEVICE._serialized_end = 640
_MODELINSTANCEGROUP_SECONDARYDEVICE_SECONDARYDEVICEKIND._serialized_start = 603
_MODELINSTANCEGROUP_SECONDARYDEVICE_SECONDARYDEVICEKIND._serialized_end = 640
_MODELINSTANCEGROUP_KIND._serialized_start = 642
_MODELINSTANCEGROUP_KIND._serialized_end = 707
_MODELTENSORRESHAPE._serialized_start = 709
_MODELTENSORRESHAPE._serialized_end = 744
_MODELINPUT._serialized_start = 747
_MODELINPUT._serialized_end = 1053
_MODELINPUT_FORMAT._serialized_start = 994
_MODELINPUT_FORMAT._serialized_end = 1053
_MODELOUTPUT._serialized_start = 1056
_MODELOUTPUT._serialized_end = 1234
_BATCHINPUT._serialized_start = 1237
_BATCHINPUT._serialized_end = 1582
_BATCHINPUT_KIND._serialized_start = 1377
_BATCHINPUT_KIND._serialized_end = 1582
_BATCHOUTPUT._serialized_start = 1585
_BATCHOUTPUT._serialized_end = 1728
_BATCHOUTPUT_KIND._serialized_start = 1686
_BATCHOUTPUT_KIND._serialized_end = 1728
_MODELVERSIONPOLICY._serialized_start = 1731
_MODELVERSIONPOLICY._serialized_end = 2003
_MODELVERSIONPOLICY_LATEST._serialized_start = 1919
_MODELVERSIONPOLICY_LATEST._serialized_end = 1949
_MODELVERSIONPOLICY_ALL._serialized_start = 1951
_MODELVERSIONPOLICY_ALL._serialized_end = 1956
_MODELVERSIONPOLICY_SPECIFIC._serialized_start = 1958
_MODELVERSIONPOLICY_SPECIFIC._serialized_end = 1986
_MODELOPTIMIZATIONPOLICY._serialized_start = 2006
_MODELOPTIMIZATIONPOLICY._serialized_end = 3795
_MODELOPTIMIZATIONPOLICY_GRAPH._serialized_start = 2536
_MODELOPTIMIZATIONPOLICY_GRAPH._serialized_end = 2558
_MODELOPTIMIZATIONPOLICY_CUDA._serialized_start = 2561
_MODELOPTIMIZATIONPOLICY_CUDA._serialized_end = 3259
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC._serialized_start = 2711
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC._serialized_end = 3259
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE._serialized_start = 2910
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE._serialized_end = 2930
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND._serialized_start = 2933
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND._serialized_end = 3156
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY._serialized_start = 3055
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY._serialized_end = 3156
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY._serialized_start = 3055
_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY._serialized_end = 3156
_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS._serialized_start = 3262
_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS._serialized_end = 3682
_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR._serialized_start = 3498
_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR._serialized_end = 3682
_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY._serialized_start = 3633
_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY._serialized_end = 3682
_MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER._serialized_start = 3684
_MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER._serialized_end = 3720
_MODELOPTIMIZATIONPOLICY_MODELPRIORITY._serialized_start = 3722
_MODELOPTIMIZATIONPOLICY_MODELPRIORITY._serialized_end = 3795
_MODELQUEUEPOLICY._serialized_start = 3798
_MODELQUEUEPOLICY._serialized_end = 4017
_MODELQUEUEPOLICY_TIMEOUTACTION._serialized_start = 3979
_MODELQUEUEPOLICY_TIMEOUTACTION._serialized_end = 4017
_MODELDYNAMICBATCHING._serialized_start = 4020
_MODELDYNAMICBATCHING._serialized_end = 4431
_MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY._serialized_start = 4344
_MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY._serialized_end = 4431
_MODELSEQUENCEBATCHING._serialized_start = 4434
_MODELSEQUENCEBATCHING._serialized_end = 5697
_MODELSEQUENCEBATCHING_CONTROL._serialized_start = 4759
_MODELSEQUENCEBATCHING_CONTROL._serialized_end = 5064
_MODELSEQUENCEBATCHING_CONTROL_KIND._serialized_start = 4947
_MODELSEQUENCEBATCHING_CONTROL_KIND._serialized_end = 5064
_MODELSEQUENCEBATCHING_CONTROLINPUT._serialized_start = 5066
_MODELSEQUENCEBATCHING_CONTROLINPUT._serialized_end = 5153
_MODELSEQUENCEBATCHING_INITIALSTATE._serialized_start = 5156
_MODELSEQUENCEBATCHING_INITIALSTATE._serialized_end = 5294
_MODELSEQUENCEBATCHING_STATE._serialized_start = 5297
_MODELSEQUENCEBATCHING_STATE._serialized_end = 5469
_MODELSEQUENCEBATCHING_STRATEGYDIRECT._serialized_start = 5471
_MODELSEQUENCEBATCHING_STRATEGYDIRECT._serialized_end = 5559
_MODELSEQUENCEBATCHING_STRATEGYOLDEST._serialized_start = 5561
_MODELSEQUENCEBATCHING_STRATEGYOLDEST._serialized_end = 5678
_MODELENSEMBLING._serialized_start = 5700
_MODELENSEMBLING._serialized_end = 6049
_MODELENSEMBLING_STEP._serialized_start = 5767
_MODELENSEMBLING_STEP._serialized_end = 6049
_MODELENSEMBLING_STEP_INPUTMAPENTRY._serialized_start = 5952
_MODELENSEMBLING_STEP_INPUTMAPENTRY._serialized_end = 5999
_MODELENSEMBLING_STEP_OUTPUTMAPENTRY._serialized_start = 6001
_MODELENSEMBLING_STEP_OUTPUTMAPENTRY._serialized_end = 6049
_MODELPARAMETER._serialized_start = 6051
_MODELPARAMETER._serialized_end = 6089
_MODELWARMUP._serialized_start = 6092
_MODELWARMUP._serialized_end = 6437
_MODELWARMUP_INPUT._serialized_start = 6209
_MODELWARMUP_INPUT._serialized_end = 6360
_MODELWARMUP_INPUTSENTRY._serialized_start = 6362
_MODELWARMUP_INPUTSENTRY._serialized_end = 6437
_MODELOPERATIONS._serialized_start = 6439
_MODELOPERATIONS._serialized_end = 6485
_MODELTRANSACTIONPOLICY._serialized_start = 6487
_MODELTRANSACTIONPOLICY._serialized_end = 6530
_MODELREPOSITORYAGENTS._serialized_start = 6533
_MODELREPOSITORYAGENTS._serialized_end = 6763
_MODELREPOSITORYAGENTS_AGENT._serialized_start = 6615
_MODELREPOSITORYAGENTS_AGENT._serialized_end = 6763
_MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY._serialized_start = 3633
_MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY._serialized_end = 3682
_MODELRESPONSECACHE._serialized_start = 6765
_MODELRESPONSECACHE._serialized_end = 6801
_MODELCONFIG._serialized_start = 6804
_MODELCONFIG._serialized_end = 8134
_MODELCONFIG_CCMODELFILENAMESENTRY._serialized_start = 7929
_MODELCONFIG_CCMODELFILENAMESENTRY._serialized_end = 7984
_MODELCONFIG_METRICTAGSENTRY._serialized_start = 7986
_MODELCONFIG_METRICTAGSENTRY._serialized_end = 8035
_MODELCONFIG_PARAMETERSENTRY._serialized_start = 8037
_MODELCONFIG_PARAMETERSENTRY._serialized_end = 8113
# @@protoc_insertion_point(module_scope)
...@@ -417,7 +417,10 @@ def get_component_tabs(*apis, vdl_args, request_args): ...@@ -417,7 +417,10 @@ def get_component_tabs(*apis, vdl_args, request_args):
all_tabs.update(api('component_tabs', request_args)) all_tabs.update(api('component_tabs', request_args))
all_tabs.add('static_graph') all_tabs.add('static_graph')
else: else:
return ['static_graph', 'x2paddle', 'fastdeploy_server'] return [
'static_graph', 'x2paddle', 'fastdeploy_server',
'fastdeploy_client'
]
return list(all_tabs) return list(all_tabs)
......
...@@ -13,12 +13,14 @@ ...@@ -13,12 +13,14 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# ======================================================================= # =======================================================================
import json
import multiprocessing import multiprocessing
import os import os
import re import re
import sys import sys
import threading import threading
import time import time
import urllib
import webbrowser import webbrowser
import requests import requests
...@@ -32,6 +34,8 @@ from flask_babel import Babel ...@@ -32,6 +34,8 @@ from flask_babel import Babel
import visualdl.server import visualdl.server
from visualdl import __version__ from visualdl import __version__
from visualdl.component.inference.fastdeploy_lib import get_start_arguments
from visualdl.component.inference.fastdeploy_server import create_fastdeploy_api_call
from visualdl.component.inference.model_convert_server import create_model_convert_api_call from visualdl.component.inference.model_convert_server import create_model_convert_api_call
from visualdl.component.profiler.profiler_server import create_profiler_api_call from visualdl.component.profiler.profiler_server import create_profiler_api_call
from visualdl.server.api import create_api_call from visualdl.server.api import create_api_call
...@@ -71,6 +75,7 @@ def create_app(args): # noqa: C901 ...@@ -71,6 +75,7 @@ def create_app(args): # noqa: C901
api_call = create_api_call(args.logdir, args.model, args.cache_timeout) api_call = create_api_call(args.logdir, args.model, args.cache_timeout)
profiler_api_call = create_profiler_api_call(args.logdir) profiler_api_call = create_profiler_api_call(args.logdir)
inference_api_call = create_model_convert_api_call() inference_api_call = create_model_convert_api_call()
fastdeploy_api_call = create_fastdeploy_api_call()
if args.telemetry: if args.telemetry:
update_util.PbUpdater(args.product).start() update_util.PbUpdater(args.product).start()
...@@ -153,6 +158,141 @@ def create_app(args): # noqa: C901 ...@@ -153,6 +158,141 @@ def create_app(args): # noqa: C901
return make_response( return make_response(
Response(data, mimetype=mimetype, headers=headers)) Response(data, mimetype=mimetype, headers=headers))
@app.route(api_path + '/fastdeploy/<path:method>', methods=["GET", "POST"])
def serve_fastdeploy_api(method):
if request.method == 'POST':
data, mimetype, headers = fastdeploy_api_call(method, request.form)
else:
data, mimetype, headers = fastdeploy_api_call(method, request.args)
return make_response(
Response(data, mimetype=mimetype, headers=headers))
@app.route(
api_path + '/fastdeploy/fastdeploy_client', methods=["GET", "POST"])
def serve_fastdeploy_create_fastdeploy_client():
try:
if request.method == 'POST':
fastdeploy_api_call('create_fastdeploy_client', request.form)
request_args = request.form
else:
fastdeploy_api_call('create_fastdeploy_client', request.args)
request_args = request.args
except Exception as e:
error_msg = '{}'.format(e)
return make_response(error_msg)
args = urllib.parse.urlencode(request_args)
if args:
return redirect(
api_path + "/fastdeploy/fastdeploy_client/app?{}".format(args),
code=302)
return redirect(
api_path + "/fastdeploy/fastdeploy_client/app", code=302)
@app.route(
api_path + "/fastdeploy/fastdeploy_client/<path:path>",
methods=["GET", "POST"])
def request_fastdeploy_create_fastdeploy_client_app(path: str):
'''
Gradio app server url interface. We route urls for gradio app to gradio server.
Args:
path(str): All resource path from gradio server.
Returns:
Any thing from gradio server.
'''
if request.method == 'POST':
port = fastdeploy_api_call('create_fastdeploy_client',
request.form)
request_args = request.form
else:
port = fastdeploy_api_call('create_fastdeploy_client',
request.args)
request_args = request.args
if path == 'app':
proxy_url = request.url.replace(
request.host_url.rstrip('/') + api_path +
'/fastdeploy/fastdeploy_client/app',
'http://localhost:{}/'.format(port))
else:
proxy_url = request.url.replace(
request.host_url.rstrip('/') + api_path +
'/fastdeploy/fastdeploy_client/',
'http://localhost:{}/'.format(port))
resp = requests.request(
method=request.method,
url=proxy_url,
headers={
key: value
for (key, value) in request.headers if key != 'Host'
},
data=request.get_data(),
cookies=request.cookies,
allow_redirects=False)
if path == 'app':
content = resp.content
if request_args and 'server_id' in request_args:
server_id = request_args.get('server_id')
start_args = get_start_arguments(server_id)
http_port = start_args.get('http-port', '')
metrics_port = start_args.get('metrics-port', '')
model_name = start_args.get('default_model_name', '')
content = content.decode()
try:
default_server_addr = re.search(
'"label": {}.*?"value": "".*?}}'.format(
json.dumps("服务ip", ensure_ascii=True).replace(
'\\', '\\\\')), content).group(0)
cur_server_addr = default_server_addr.replace(
'"value": ""', '"value": "localhost"')
default_http_port = re.search(
'"label": {}.*?"value": "".*?}}'.format(
json.dumps("推理服务端口", ensure_ascii=True).replace(
'\\', '\\\\')), content).group(0)
cur_http_port = default_http_port.replace(
'"value": ""', '"value": "{}"'.format(http_port))
default_metrics_port = re.search(
'"label": {}.*?"value": "".*?}}'.format(
json.dumps("性能服务端口", ensure_ascii=True).replace(
'\\', '\\\\')), content).group(0)
cur_metrics_port = default_metrics_port.replace(
'"value": ""', '"value": "{}"'.format(metrics_port))
default_model_name = re.search(
'"label": {}.*?"value": "".*?}}'.format(
json.dumps("模型名称", ensure_ascii=True).replace(
'\\', '\\\\')), content).group(0)
cur_model_name = default_model_name.replace(
'"value": ""', '"value": "{}"'.format(model_name))
default_model_version = re.search(
'"label": {}.*?"value": "".*?}}'.format(
json.dumps("模型版本", ensure_ascii=True).replace(
'\\', '\\\\')), content).group(0)
cur_model_version = default_model_version.replace(
'"value": ""', '"value": "{}"'.format('1'))
content = content.replace(default_server_addr,
cur_server_addr)
if http_port:
content = content.replace(default_http_port,
cur_http_port)
if metrics_port:
content = content.replace(default_metrics_port,
cur_metrics_port)
if model_name:
content = content.replace(default_model_name,
cur_model_name)
content = content.replace(default_model_version,
cur_model_version)
except Exception:
pass
finally:
content = content.encode()
else:
content = resp.content
headers = [(name, value) for (name, value) in resp.raw.headers.items()]
response = Response(content, resp.status_code, headers)
return response
@app.route(api_path + '/component_tabs') @app.route(api_path + '/component_tabs')
def component_tabs(): def component_tabs():
data, mimetype, headers = get_component_tabs( data, mimetype, headers = get_component_tabs(
......
...@@ -78,7 +78,8 @@ def validate_args(args): ...@@ -78,7 +78,8 @@ def validate_args(args):
supported_tabs = [ supported_tabs = [
'scalar', 'image', 'text', 'embeddings', 'audio', 'histogram', 'scalar', 'image', 'text', 'embeddings', 'audio', 'histogram',
'hyper_parameters', 'static_graph', 'dynamic_graph', 'pr_curve', 'hyper_parameters', 'static_graph', 'dynamic_graph', 'pr_curve',
'roc_curve', 'profiler', 'x2paddle', 'fastdeploy_server' 'roc_curve', 'profiler', 'x2paddle', 'fastdeploy_server',
'fastdeploy_client'
] ]
if args.component_tabs is not None: if args.component_tabs is not None:
for component_tab in args.component_tabs: for component_tab in args.component_tabs:
......
...@@ -23,6 +23,7 @@ USER_HOME = os.path.expanduser('~') ...@@ -23,6 +23,7 @@ USER_HOME = os.path.expanduser('~')
VDL_HOME = os.path.join(USER_HOME, '.visualdl') VDL_HOME = os.path.join(USER_HOME, '.visualdl')
CONF_HOME = os.path.join(VDL_HOME, 'conf') CONF_HOME = os.path.join(VDL_HOME, 'conf')
CONFIG_PATH = os.path.join(CONF_HOME, 'config.json') CONFIG_PATH = os.path.join(CONF_HOME, 'config.json')
FASTDEPLOYSERVER_PATH = os.path.join(VDL_HOME, 'fastdeployserver')
X2PADDLE_CACHE_PATH = os.path.join(VDL_HOME, 'x2paddle') X2PADDLE_CACHE_PATH = os.path.join(VDL_HOME, 'x2paddle')
...@@ -32,5 +33,7 @@ def init_vdl_config(): ...@@ -32,5 +33,7 @@ def init_vdl_config():
if not os.path.exists(CONFIG_PATH) or 0 == os.path.getsize(CONFIG_PATH): if not os.path.exists(CONFIG_PATH) or 0 == os.path.getsize(CONFIG_PATH):
with open(CONFIG_PATH, 'w') as fp: with open(CONFIG_PATH, 'w') as fp:
fp.write(json.dumps(default_vdl_config)) fp.write(json.dumps(default_vdl_config))
if not os.path.exists(FASTDEPLOYSERVER_PATH):
os.makedirs(FASTDEPLOYSERVER_PATH, exist_ok=True)
if not os.path.exists(X2PADDLE_CACHE_PATH): if not os.path.exists(X2PADDLE_CACHE_PATH):
os.makedirs(X2PADDLE_CACHE_PATH, exist_ok=True) os.makedirs(X2PADDLE_CACHE_PATH, exist_ok=True)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册