提交 670bc720 编写于 作者: L luopengting

enhance validation for cmp operation, set user-difined' required value as False

上级 ecab5e89
......@@ -95,14 +95,15 @@ def filter_summary_lineage(summary_base_dir, search_condition=None):
Users can filter and sort all lineage information according to the search
condition. The supported filter fields include `summary_dir`, `network`,
etc. The filter conditions include `eq`, `lt`, `gt`, `le`, `ge` and `in`.
At the same time, the combined use of these fields and conditions is
supported. If you want to sort based on filter fields, the field of
`sorted_name` and `sorted_type` should be specified.
If the value type of filter condition is `str`, such as summary_dir and
lineage_type, then its key can only be `in` and `eq`. At the same time,
the combined use of these fields and conditions is supported. If you want
to sort based on filter fields, the field of `sorted_name` and `sorted_type`
should be specified.
Users can use `lineage_type` to decide what kind of lineage information to
query. If the `lineage_type` is `dataset`, the query result is only the
lineage information related to data augmentation. If the `lineage_type` is
`model` or `None`, the query result is all lineage information.
query. If the `lineage_type` is not defined, the query result is all lineage
information.
Users can paginate query result based on `offset` and `limit`. The `offset`
refers to page number. The `limit` refers to the number in one page.
......@@ -147,6 +148,15 @@ def filter_summary_lineage(summary_base_dir, search_condition=None):
- dataset_mark (dict): The filter condition of dataset mark.
- lineage_type (dict): The filter condition of lineage type. It decides
what kind of lineage information to query. Its value can be `dataset`
or `model`, e.g., {'in': ['dataset', 'model']}, {'eq': 'model'}, etc.
If its values contain `dataset`, the query result will contain the
lineage information related to data augmentation. If its values contain
`model`, the query result will contain model lineage information.
If it is not defined or it is a dict like {'in': ['dataset', 'model']},
the query result is all lineage information.
- offset (int): Page number, the value range is [0, 100000].
- limit (int): The number in one page, the value range is [1, 100].
......@@ -156,14 +166,8 @@ def filter_summary_lineage(summary_base_dir, search_condition=None):
- sorted_type (str): Specify sort order. It can be `ascending` or
`descending`.
- lineage_type (str): It decides what kind of lineage information to
query. It can be `dataset` or `model`. If it is `dataset`,
the query result is only the lineage information related to data
augmentation. If it is `model` or `None`, the query result is all
lineage information.
Returns:
dict, all lineage information under summary base directory according to
dict, lineage information under summary base directory according to
search condition.
Raises:
......@@ -196,7 +200,9 @@ def filter_summary_lineage(summary_base_dir, search_condition=None):
>>> 'sorted_type': 'descending',
>>> 'limit': 3,
>>> 'offset': 0,
>>> 'lineage_type': 'model'
>>> 'lineage_type': {
>>> 'eq': 'model'
>>> }
>>> }
>>> summary_lineage = filter_summary_lineage(summary_base_dir)
>>> summary_lineage_filter = filter_summary_lineage(summary_base_dir, search_condition)
......
......@@ -83,6 +83,8 @@ class LineageErrors(LineageErrorCodes):
LINEAGE_SEARCH_CONDITION_PARAM_ERROR = 24 | _MODEL_LINEAGE_API_ERROR_MASK
LINEAGE_PARAM_LINEAGE_TYPE_ERROR = 25 | _MODEL_LINEAGE_API_ERROR_MASK
# Dataset lineage error codes.
LINEAGE_PARAM_DATASET_MARK_ERROR = 0 | _DATASET_LINEAGE_ERROR_MASK
SUMMARY_ANALYZE_ERROR = 0 | _SUMMARY_ANALYZE_ERROR_MASK
SUMMARY_VERIFICATION_ERROR = 1 | _SUMMARY_ANALYZE_ERROR_MASK
......@@ -156,26 +158,27 @@ class LineageErrorMsg(Enum):
" 'eq', 'lt', 'gt', 'ge', 'le', 'in'."
LINEAGE_PARAM_SUMMARY_DIR_ERROR = "The parameter summary_dir is invalid. It should be a dict and the value " \
"should be a string"
"should be a string."
LINEAGE_TRAIN_DATASET_PATH_ERROR = "The parameter train_dataset_path is invalid." \
" It should be a dict and the value should be a string"
" It should be a dict and the value should be a string."
LINEAGE_TRAIN_DATASET_COUNT_ERROR = "The parameter train_dataset_count is invalid. It should be a dict " \
"and the value should be a integer between 0 and pow(2, 63) -1"
"and the value should be a integer between 0 and pow(2, 63) -1."
LINEAGE_TEST_DATASET_PATH_ERROR = "The parameter test_dataset_path is invalid. " \
"It should be a dict and the value should be a string"
"It should be a dict and the value should be a string."
LINEAGE_TEST_DATASET_COUNT_ERROR = "The parameter test_dataset_count is invalid. It should be a dict " \
"and the value should be a integer between 0 and pow(2, 63) -1"
"and the value should be a integer between 0 and pow(2, 63) -1."
LINEAGE_NETWORK_ERROR = "The parameter network is invalid. It should be a dict and the value should be a string"
LINEAGE_NETWORK_ERROR = "The parameter network is invalid. It should be a dict and the value should be a string."
LINEAGE_OPTIMIZER_ERROR = "The parameter optimizer is invalid. It should be a dict and the value should be a string"
LINEAGE_OPTIMIZER_ERROR = "The parameter optimizer is invalid. " \
"It should be a dict and the value should be a string."
LINEAGE_LOSS_FUNCTION_ERROR = "The parameter loss_function is invalid. " \
"It should be a dict and the value should be a string"
"It should be a dict and the value should be a string."
LINEAGE_LOSS_ERROR = "The parameter loss is invalid. " \
"It should be a float."
......@@ -184,7 +187,7 @@ class LineageErrorMsg(Enum):
"It should be an integer between 0 and pow(2, 63) -1."
LINEAGE_LEARNING_RATE_ERROR = "The parameter learning_rate is invalid. " \
"It should be a dict and the value should be a float or a integer"
"It should be a dict and the value should be a float or a integer."
LINEAGE_PARAM_SORTED_NAME_ERROR = "The parameter sorted_name is invalid. " \
"It should be a string."
......@@ -195,6 +198,9 @@ class LineageErrorMsg(Enum):
LINEAGE_PARAM_LINEAGE_TYPE_ERROR = "The parameter lineage_type is invalid. " \
"It should be 'dataset' or 'model'."
LINEAGE_PARAM_DATASET_MARK_ERROR = "The parameter dataset_mark is invalid. " \
"It should be a string."
SUMMARY_ANALYZE_ERROR = "Failed to analyze summary log. {}"
SUMMARY_VERIFICATION_ERROR = "Verification failed in summary analysis. {}"
......
......@@ -129,6 +129,7 @@ class SearchModelConditionParameter(Schema):
offset = fields.Int(validate=lambda n: 0 <= n <= 100000)
sorted_name = fields.Str()
sorted_type = fields.Str(allow_none=True)
dataset_mark = fields.Dict()
lineage_type = fields.Dict()
@staticmethod
......@@ -137,7 +138,7 @@ class SearchModelConditionParameter(Schema):
for key, value in data.items():
if key == "in":
if not isinstance(value, (list, tuple)):
raise ValidationError("In operation's value must be list or tuple.")
raise ValidationError("The value of `in` operation must be list or tuple.")
else:
if not isinstance(value, value_type):
raise ValidationError("Wrong value type.")
......@@ -153,12 +154,20 @@ class SearchModelConditionParameter(Schema):
for key, value in data.items():
if key == "in":
if not isinstance(value, (list, tuple)):
raise ValidationError("In operation's value must be list or tuple.")
raise ValidationError("The value of `in` operation must be list or tuple.")
else:
if isinstance(value, bool) or \
(not isinstance(value, float) and not isinstance(value, int)):
raise ValidationError("Wrong value type.")
@staticmethod
def check_operation(data):
"""Check input param's compare operation."""
if not set(data.keys()).issubset(['in', 'eq']):
raise ValidationError("Its operation should be `in` or `eq`.")
if len(data.keys()) > 1:
raise ValidationError("More than one operation.")
@validates("loss")
def check_loss(self, data):
"""Check loss."""
......@@ -172,11 +181,13 @@ class SearchModelConditionParameter(Schema):
@validates("loss_function")
def check_loss_function(self, data):
"""Check loss function."""
SearchModelConditionParameter.check_operation(data)
SearchModelConditionParameter.check_dict_value_type(data, str)
@validates("train_dataset_path")
def check_train_dataset_path(self, data):
"""Check train dataset path."""
SearchModelConditionParameter.check_operation(data)
SearchModelConditionParameter.check_dict_value_type(data, str)
@validates("train_dataset_count")
......@@ -187,6 +198,7 @@ class SearchModelConditionParameter(Schema):
@validates("test_dataset_path")
def check_test_dataset_path(self, data):
"""Check test dataset path."""
SearchModelConditionParameter.check_operation(data)
SearchModelConditionParameter.check_dict_value_type(data, str)
@validates("test_dataset_count")
......@@ -197,11 +209,13 @@ class SearchModelConditionParameter(Schema):
@validates("network")
def check_network(self, data):
"""Check network."""
SearchModelConditionParameter.check_operation(data)
SearchModelConditionParameter.check_dict_value_type(data, str)
@validates("optimizer")
def check_optimizer(self, data):
"""Check optimizer."""
SearchModelConditionParameter.check_operation(data)
SearchModelConditionParameter.check_dict_value_type(data, str)
@validates("epoch")
......@@ -222,11 +236,19 @@ class SearchModelConditionParameter(Schema):
@validates("summary_dir")
def check_summary_dir(self, data):
"""Check summary dir."""
SearchModelConditionParameter.check_operation(data)
SearchModelConditionParameter.check_dict_value_type(data, str)
@validates("dataset_mark")
def check_dataset_mark(self, data):
"""Check dataset mark."""
SearchModelConditionParameter.check_operation(data)
SearchModelConditionParameter.check_dict_value_type(data, str)
@validates("lineage_type")
def check_lineage_type(self, data):
"""Check lineage type."""
SearchModelConditionParameter.check_operation(data)
SearchModelConditionParameter.check_dict_value_type(data, str)
recv_types = []
for key, value in data.items():
......@@ -243,7 +265,7 @@ class SearchModelConditionParameter(Schema):
def check_comparision(self, data, **kwargs):
"""Check comparision for all parameters in schema."""
for attr, condition in data.items():
if attr in ["limit", "offset", "sorted_name", "sorted_type"]:
if attr in ["limit", "offset", "sorted_name", "sorted_type", 'lineage_type']:
continue
if not isinstance(attr, str):
......@@ -256,13 +278,6 @@ class SearchModelConditionParameter(Schema):
raise LineageParamTypeError("The search_condition element {} should be dict."
.format(attr))
if attr in ["summary_dir", "lineage_type"]:
if not set(condition.keys()).issubset(['in', 'eq']):
raise LineageParamValueError("Invalid operation of %s." % attr)
if len(condition.keys()) > 1:
raise LineageParamValueError("More than one operation of %s." % attr)
continue
for key in condition.keys():
if key not in ["eq", "lt", "gt", "le", "ge", "in"]:
raise LineageParamValueError("The compare condition should be in "
......
......@@ -63,6 +63,7 @@ SEARCH_MODEL_ERROR_MAPPING = {
'model_size': LineageErrors.LINEAGE_PARAM_MODEL_SIZE_ERROR,
'sorted_name': LineageErrors.LINEAGE_PARAM_SORTED_NAME_ERROR,
'sorted_type': LineageErrors.LINEAGE_PARAM_SORTED_TYPE_ERROR,
'dataset_mark': LineageErrors.LINEAGE_PARAM_DATASET_MARK_ERROR,
'lineage_type': LineageErrors.LINEAGE_PARAM_LINEAGE_TYPE_ERROR
}
......@@ -97,6 +98,7 @@ SEARCH_MODEL_ERROR_MSG_MAPPING = {
'model_size': LineageErrorMsg.LINEAGE_MODEL_SIZE_ERROR.value,
'sorted_name': LineageErrorMsg.LINEAGE_PARAM_SORTED_NAME_ERROR.value,
'sorted_type': LineageErrorMsg.LINEAGE_PARAM_SORTED_TYPE_ERROR.value,
'dataset_mark': LineageErrorMsg.LINEAGE_PARAM_DATASET_MARK_ERROR.value,
'lineage_type': LineageErrorMsg.LINEAGE_PARAM_LINEAGE_TYPE_ERROR.value
}
......@@ -238,10 +240,14 @@ def validate_search_model_condition(schema, data):
MindInsightException: If the parameters are invalid.
"""
error = schema().validate(data)
for error_key in error.keys():
for (error_key, error_msgs) in error.items():
if error_key in SEARCH_MODEL_ERROR_MAPPING.keys():
error_code = SEARCH_MODEL_ERROR_MAPPING.get(error_key)
error_msg = SEARCH_MODEL_ERROR_MSG_MAPPING.get(error_key)
for err_msg in error_msgs:
if 'operation' in err_msg.lower():
error_msg = f'The parameter {error_key} is invalid. {err_msg}'
break
log.error(error_msg)
raise MindInsightException(error=error_code, message=error_msg)
......@@ -417,7 +423,7 @@ def validate_user_defined_info(user_defined_info):
"Only str is permitted now.".format(type(key))
log.error(error_msg)
raise LineageParamTypeError(error_msg)
if not isinstance(key, (int, str, float)):
if not isinstance(value, (int, str, float)):
error_msg = "Dict value type {} is not supported in user defined info." \
"Only str, int and float are permitted now.".format(type(value))
log.error(error_msg)
......
......@@ -318,13 +318,14 @@ class Querier:
for offset_result in offset_results:
for obj_name in ["metric", "user_defined"]:
obj = getattr(offset_result, obj_name)
require = True if obj_name == "metric" else False
if obj and isinstance(obj, dict):
for key, value in obj.items():
label = obj_name + "/" + key
label = f'{obj_name}/{key}'
customized[label] = dict()
customized[label]["label"] = label
# user defined info is default displayed
customized[label]["required"] = True
# user defined info is not displayed by default
customized[label]["required"] = require
customized[label]["type"] = type(value).__name__
lineage_types = condition.get(ConditionParam.LINEAGE_TYPE.value)
......
......@@ -37,8 +37,7 @@ FIELD_MAPPING = {
"batch_size": Field('hyper_parameters', 'batch_size'),
"loss": Field('algorithm', 'loss'),
"model_size": Field('model', 'size'),
"dataset_mark": Field('dataset_mark', None),
"lineage_type": Field(None, None)
"dataset_mark": Field('dataset_mark', None)
}
......
......@@ -755,7 +755,7 @@ class TestModelApi(TestCase):
@pytest.mark.env_single
def test_filter_summary_lineage_exception_7(self):
"""Test the abnormal execution of the filter_summary_lineage interface."""
condition_keys = ["summary_dir", "lineage_type"]
condition_keys = ["summary_dir", "lineage_type", "loss_function", "optimizer", "network", "dataset_mark"]
for condition_key in condition_keys:
# the condition type not supported in summary_dir and lineage_type
search_condition = {
......@@ -765,7 +765,7 @@ class TestModelApi(TestCase):
}
self.assertRaisesRegex(
LineageSearchConditionParamError,
f'Invalid operation of {condition_key}.',
f'The parameter {condition_key} is invalid. Its operation should be `in` or `eq`.',
filter_summary_lineage,
BASE_SUMMARY_DIR,
search_condition
......@@ -780,7 +780,7 @@ class TestModelApi(TestCase):
}
self.assertRaisesRegex(
LineageSearchConditionParamError,
f'More than one operation of {condition_key}.',
f'The parameter {condition_key} is invalid. More than one operation.',
filter_summary_lineage,
BASE_SUMMARY_DIR,
search_condition
......@@ -793,11 +793,12 @@ class TestModelApi(TestCase):
@pytest.mark.platform_x86_cpu
@pytest.mark.env_single
def test_filter_summary_lineage_exception_8(self):
"""Test the abnormal execution of the filter_summary_lineage interface."""
invalid_lineage_types = ['xxx', None]
for lineage_type in invalid_lineage_types:
search_condition = {
'lineage_type': {
'in': lineage_type
'eq': lineage_type
}
}
self.assertRaisesRegex(
......@@ -815,6 +816,7 @@ class TestModelApi(TestCase):
@pytest.mark.platform_x86_cpu
@pytest.mark.env_single
def test_filter_summary_lineage_exception_9(self):
"""Test the abnormal execution of the filter_summary_lineage interface."""
invalid_sorted_names = ['xxx', 'metric_', 1]
for sorted_name in invalid_sorted_names:
search_condition = {
......
......@@ -82,7 +82,7 @@ class TestMsDataLoader:
ms_loader = MSDataLoader(summary_dir)
ms_loader._latest_summary_filename = 'summary.00'
ms_loader.load()
assert ms_loader._latest_summary_filename == 'summary.01'
shutil.rmtree(summary_dir)
assert ms_loader._latest_summary_file_size == RECORD_LEN
tag = ms_loader.get_events_data().list_tags_by_plugin('scalar')
tensors = ms_loader.get_events_data().tensors(tag[0])
......
......@@ -101,8 +101,7 @@ class TestValidateSearchModelCondition(TestCase):
}
}
self._assert_raise_of_mindinsight_exception(
"The parameter learning_rate is invalid. It should be a dict and "
"the value should be a float or a integer",
"The value of `in` operation must be list or tuple.",
condition
)
......@@ -136,8 +135,8 @@ class TestValidateSearchModelCondition(TestCase):
}
}
self._assert_raise_of_mindinsight_exception(
"The parameter loss_function is invalid. It should be a dict and "
"the value should be a string",
"The parameter loss_function is invalid. "
"Its operation should be `in` or `eq`.",
condition
)
......@@ -147,8 +146,7 @@ class TestValidateSearchModelCondition(TestCase):
}
}
self._assert_raise_of_mindinsight_exception(
"The parameter train_dataset_count is invalid. It should be a dict "
"and the value should be a integer between 0",
"The value of `in` operation must be list or tuple.",
condition
)
......@@ -161,8 +159,8 @@ class TestValidateSearchModelCondition(TestCase):
}
}
self._assert_raise_of_mindinsight_exception(
"The parameter network is invalid. It should be a dict and "
"the value should be a string",
"The parameter network is invalid. "
"Its operation should be `in` or `eq`.",
condition
)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册