未验证 提交 31b5e25c 编写于 作者: M Maxim Zhiltsov 提交者: GitHub

Keep model input data unchanged in SDK (#6455)

Nested containers may be modified during the model input data parsing in
class constructors. This can lead to subtle memory errors, which are
very hard to find. In CVAT, this helps to avoid unexpected problems in
tests, such as one test affecting another one by subtly changing test
assets.
上级 03975ea4
......@@ -21,6 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Fixed
- \[SDK\] Ability to create attributes with blank default values
(<https://github.com/opencv/cvat/pull/6454>)
- \[SDK\] SDK should not change input data in models (<https://github.com/opencv/cvat/pull/6455>)
### Security
- TDB
......
{{>partial_header}}
from datetime import date, datetime # noqa: F401
from copy import deepcopy
from copy import copy, deepcopy
import inspect
import io
import os
......@@ -1310,14 +1310,18 @@ def validate_and_convert_types(input_value, required_types_mixed, path_to_item,
if inner_required_types is None:
# for this type, there are not more inner variables left to look at
return input_value
if isinstance(input_value, list):
if input_value == []:
# avoid storing and changing the input value when the type is mutable collection
output_value = copy(input_value)
if output_value == []:
# allow an empty list
return input_value
for index, inner_value in enumerate(input_value):
return output_value
for index, inner_value in enumerate(output_value):
inner_path = list(path_to_item)
inner_path.append(index)
input_value[index] = validate_and_convert_types(
output_value[index] = validate_and_convert_types(
inner_value,
inner_required_types,
inner_path,
......@@ -1326,16 +1330,19 @@ def validate_and_convert_types(input_value, required_types_mixed, path_to_item,
configuration=configuration
)
elif isinstance(input_value, dict):
if input_value == {}:
# avoid storing and changing the input value when the type is mutable collection
output_value = copy(input_value)
if output_value == {}:
# allow an empty dict
return input_value
for inner_key, inner_val in input_value.items():
return output_value
for inner_key, inner_val in output_value.items():
inner_path = list(path_to_item)
inner_path.append(inner_key)
if get_simple_class(inner_key) != str:
raise get_type_error(inner_key, inner_path, valid_classes,
key_type=True)
input_value[inner_key] = validate_and_convert_types(
output_value[inner_key] = validate_and_convert_types(
inner_val,
inner_required_types,
inner_path,
......@@ -1343,7 +1350,10 @@ def validate_and_convert_types(input_value, required_types_mixed, path_to_item,
_check_type,
configuration=configuration
)
return input_value
else:
output_value = input_value
return output_value
def model_to_dict(model_instance, serialize=True):
......@@ -1382,24 +1392,20 @@ def model_to_dict(model_instance, serialize=True):
except KeyError:
used_fallback_python_attribute_names.add(attr)
if isinstance(value, list):
if not value:
# empty list or None
result[attr] = value
else:
res = []
for v in value:
if isinstance(v, PRIMITIVE_TYPES) or v is None:
res.append(v)
elif isinstance(v, ModelSimple):
res.append(v.value)
elif isinstance(v, dict):
res.append(dict(map(
extract_item,
v.items()
)))
else:
res.append(model_to_dict(v, serialize=serialize))
result[attr] = res
res = []
for v in value:
if isinstance(v, PRIMITIVE_TYPES) or v is None:
res.append(v)
elif isinstance(v, ModelSimple):
res.append(v.value)
elif isinstance(v, dict):
res.append(dict(map(
extract_item,
v.items()
)))
else:
res.append(model_to_dict(v, serialize=serialize))
result[attr] = res
elif isinstance(value, dict):
result[attr] = dict(map(
extract_item,
......
# Copyright (C) 2023 CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT
from copy import deepcopy
from cvat_sdk import models
from deepdiff import DeepDiff
def test_models_do_not_change_input_values():
# Nested containers may be modified during the model input data parsing.
# This can lead to subtle memory errors, which are very hard to find.
original_input_data = {
"name": "test",
"labels": [
{
"name": "cat",
"attributes": [
{
"default_value": "yy",
"input_type": "text",
"mutable": False,
"name": "x",
"values": ["yy"],
},
{
"default_value": "1",
"input_type": "radio",
"mutable": False,
"name": "y",
"values": ["1", "2"],
},
],
}
],
}
input_data = deepcopy(original_input_data)
models.TaskWriteRequest(**input_data)
assert DeepDiff(original_input_data, input_data) == {}
def test_models_do_not_store_input_collections():
# Avoid depending on input data for collection fields after the model is initialized.
# This can lead to subtle memory errors and unexpected behavior
# if the original input data is modified.
input_data = {
"name": "test",
"labels": [
{
"name": "cat1",
"attributes": [
{
"default_value": "yy",
"input_type": "text",
"mutable": False,
"name": "x",
"values": ["yy"],
},
{
"default_value": "1",
"input_type": "radio",
"mutable": False,
"name": "y",
"values": ["1", "2"],
},
],
},
{"name": "cat2", "attributes": []},
],
}
model = models.TaskWriteRequest(**input_data)
model_data1 = model.to_dict()
# Modify input value containers
input_data["labels"][0]["attributes"].clear()
input_data["labels"][1]["attributes"].append(
{
"default_value": "",
"input_type": "text",
"mutable": True,
"name": "z",
}
)
input_data["labels"].append({"name": "dog"})
model_data2 = model.to_dict()
assert DeepDiff(model_data1, model_data2) == {}
def test_models_do_not_return_internal_collections():
# Avoid returning internal data for mutable collection fields.
# This can lead to subtle memory errors and unexpected behavior
# if the returned data is modified.
input_data = {
"name": "test",
"labels": [],
}
model = models.TaskWriteRequest(**input_data)
model_data1 = model.to_dict()
model_data1_original = deepcopy(model_data1)
# Modify an output value container
model_data1["labels"].append({"name": "dog"})
model_data2 = model.to_dict()
assert DeepDiff(model_data1_original, model_data2) == {}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册