enhance validation for cmp operation, set user-difined' required value as False

670bc720 · luopengting · ecab5e89 · 670bc720 · 670bc720 · 670bc720
9 changed file
--- a/mindinsight/lineagemgr/api/model.py
+++ b/mindinsight/lineagemgr/api/model.py
@@ -95,14 +95,15 @@ def filter_summary_lineage(summary_base_dir, search_condition=None):
    Users can filter and sort all lineage information according to the search
    condition. The supported filter fields include `summary_dir`, `network`,
    etc. The filter conditions include `eq`, `lt`, `gt`, `le`, `ge` and `in`.
-    At the same time, the combined use of these fields and conditions is
-    supported. If you want to sort based on filter fields, the field of
-    `sorted_name` and `sorted_type` should be specified.
+    If the value type of filter condition is `str`, such as summary_dir and
+    lineage_type, then its key can only be `in` and `eq`. At the same time,
+    the combined use of these fields and conditions is supported. If you want
+    to sort based on filter fields, the field of `sorted_name` and `sorted_type`
+    should be specified.

    Users can use `lineage_type` to decide what kind of lineage information to
-    query. If the `lineage_type` is `dataset`, the query result is only the
-    lineage information related to data augmentation. If the `lineage_type` is
-    `model` or `None`, the query result is all lineage information.
+    query. If the `lineage_type` is not defined, the query result is all lineage
+    information.

    Users can paginate query result based on `offset` and `limit`. The `offset`
    refers to page number. The `limit` refers to the number in one page.
@@ -147,6 +148,15 @@ def filter_summary_lineage(summary_base_dir, search_condition=None):

            - dataset_mark (dict): The filter condition of dataset mark.

+            - lineage_type (dict): The filter condition of lineage type. It decides
+              what kind of lineage information to query. Its value can be `dataset`
+              or `model`, e.g., {'in': ['dataset', 'model']}, {'eq': 'model'}, etc.
+              If its values contain `dataset`, the query result will contain the
+              lineage information related to data augmentation. If its values contain
+              `model`, the query result will contain model lineage information.
+              If it is not defined or it is a dict like {'in': ['dataset', 'model']},
+              the query result is all lineage information.
+
            - offset (int): Page number, the value range is [0, 100000].

            - limit (int): The number in one page, the value range is [1, 100].
@@ -156,14 +166,8 @@ def filter_summary_lineage(summary_base_dir, search_condition=None):
            - sorted_type (str): Specify sort order. It can be `ascending` or
              `descending`.

-            - lineage_type (str): It decides what kind of lineage information to
-              query. It can be `dataset` or `model`. If it is `dataset`,
-              the query result is only the lineage information related to data
-              augmentation. If it is `model` or `None`, the query result is all
-              lineage information.
-
    Returns:
-        dict, all lineage information under summary base directory according to
+        dict, lineage information under summary base directory according to
        search condition.

    Raises:
@@ -196,7 +200,9 @@ def filter_summary_lineage(summary_base_dir, search_condition=None):
        >>>     'sorted_type': 'descending',
        >>>     'limit': 3,
        >>>     'offset': 0,
-        >>>     'lineage_type': 'model'
+        >>>     'lineage_type': {
+        >>>         'eq': 'model'
+        >>>     }
        >>> }
        >>> summary_lineage = filter_summary_lineage(summary_base_dir)
        >>> summary_lineage_filter = filter_summary_lineage(summary_base_dir, search_condition)

--- a/mindinsight/lineagemgr/common/exceptions/error_code.py
+++ b/mindinsight/lineagemgr/common/exceptions/error_code.py
@@ -83,6 +83,8 @@ class LineageErrors(LineageErrorCodes):
    LINEAGE_SEARCH_CONDITION_PARAM_ERROR = 24 | _MODEL_LINEAGE_API_ERROR_MASK
    LINEAGE_PARAM_LINEAGE_TYPE_ERROR = 25 | _MODEL_LINEAGE_API_ERROR_MASK

+    # Dataset lineage error codes.
+    LINEAGE_PARAM_DATASET_MARK_ERROR = 0 | _DATASET_LINEAGE_ERROR_MASK

    SUMMARY_ANALYZE_ERROR = 0 | _SUMMARY_ANALYZE_ERROR_MASK
    SUMMARY_VERIFICATION_ERROR = 1 | _SUMMARY_ANALYZE_ERROR_MASK
@@ -156,26 +158,27 @@ class LineageErrorMsg(Enum):
                                      " 'eq', 'lt', 'gt', 'ge', 'le', 'in'."

    LINEAGE_PARAM_SUMMARY_DIR_ERROR = "The parameter summary_dir is invalid. It should be a dict and the value " \
-                                      "should be a string"
+                                      "should be a string."

    LINEAGE_TRAIN_DATASET_PATH_ERROR = "The parameter train_dataset_path is invalid." \
-                                       " It should be a dict and the value should be a string"
+                                       " It should be a dict and the value should be a string."

    LINEAGE_TRAIN_DATASET_COUNT_ERROR = "The parameter train_dataset_count is invalid. It should be a dict " \
-                                        "and the value should be a integer between 0 and pow(2, 63) -1"
+                                        "and the value should be a integer between 0 and pow(2, 63) -1."

    LINEAGE_TEST_DATASET_PATH_ERROR = "The parameter test_dataset_path is invalid. " \
-                                      "It should be a dict and the value should be a string"
+                                      "It should be a dict and the value should be a string."

    LINEAGE_TEST_DATASET_COUNT_ERROR = "The parameter test_dataset_count is invalid. It should be a dict " \
-                                       "and the value should be a integer between 0 and pow(2, 63) -1"
+                                       "and the value should be a integer between 0 and pow(2, 63) -1."

-    LINEAGE_NETWORK_ERROR = "The parameter network is invalid. It should be a dict and the value should be a string"
+    LINEAGE_NETWORK_ERROR = "The parameter network is invalid. It should be a dict and the value should be a string."

-    LINEAGE_OPTIMIZER_ERROR = "The parameter optimizer is invalid. It should be a dict and the value should be a string"
+    LINEAGE_OPTIMIZER_ERROR = "The parameter optimizer is invalid. " \
+                              "It should be a dict and the value should be a string."

    LINEAGE_LOSS_FUNCTION_ERROR = "The parameter loss_function is invalid. " \
-                                  "It should be a dict and the value should be a string"
+                                  "It should be a dict and the value should be a string."

    LINEAGE_LOSS_ERROR = "The parameter loss is invalid. " \
                         "It should be a float."
@@ -184,7 +187,7 @@ class LineageErrorMsg(Enum):
                               "It should be an integer between 0 and pow(2, 63) -1."

    LINEAGE_LEARNING_RATE_ERROR = "The parameter learning_rate is invalid. " \
-                                  "It should be a dict and the value should be a float or a integer"
+                                  "It should be a dict and the value should be a float or a integer."

    LINEAGE_PARAM_SORTED_NAME_ERROR = "The parameter sorted_name is invalid. " \
                                      "It should be a string."
@@ -195,6 +198,9 @@ class LineageErrorMsg(Enum):
    LINEAGE_PARAM_LINEAGE_TYPE_ERROR = "The parameter lineage_type is invalid. " \
                                       "It should be 'dataset' or 'model'."

+    LINEAGE_PARAM_DATASET_MARK_ERROR = "The parameter dataset_mark is invalid. " \
+                                       "It should be a string."
+
    SUMMARY_ANALYZE_ERROR = "Failed to analyze summary log. {}"
    SUMMARY_VERIFICATION_ERROR = "Verification failed in summary analysis. {}"


--- a/mindinsight/lineagemgr/common/validator/model_parameter.py
+++ b/mindinsight/lineagemgr/common/validator/model_parameter.py
@@ -129,6 +129,7 @@ class SearchModelConditionParameter(Schema):
    offset = fields.Int(validate=lambda n: 0 <= n <= 100000)
    sorted_name = fields.Str()
    sorted_type = fields.Str(allow_none=True)
+    dataset_mark = fields.Dict()
    lineage_type = fields.Dict()

    @staticmethod
@@ -137,7 +138,7 @@ class SearchModelConditionParameter(Schema):
        for key, value in data.items():
            if key == "in":
                if not isinstance(value, (list, tuple)):
-                    raise ValidationError("In operation's value must be list or tuple.")
+                    raise ValidationError("The value of `in` operation must be list or tuple.")
            else:
                if not isinstance(value, value_type):
                    raise ValidationError("Wrong value type.")
@@ -153,12 +154,20 @@ class SearchModelConditionParameter(Schema):
        for key, value in data.items():
            if key == "in":
                if not isinstance(value, (list, tuple)):
-                    raise ValidationError("In operation's value must be list or tuple.")
+                    raise ValidationError("The value of `in` operation must be list or tuple.")
            else:
                if isinstance(value, bool) or \
                        (not isinstance(value, float) and not isinstance(value, int)):
                    raise ValidationError("Wrong value type.")

+    @staticmethod
+    def check_operation(data):
+        """Check input param's compare operation."""
+        if not set(data.keys()).issubset(['in', 'eq']):
+            raise ValidationError("Its operation should be `in` or `eq`.")
+        if len(data.keys()) > 1:
+            raise ValidationError("More than one operation.")
+
    @validates("loss")
    def check_loss(self, data):
        """Check loss."""
@@ -172,11 +181,13 @@ class SearchModelConditionParameter(Schema):
    @validates("loss_function")
    def check_loss_function(self, data):
        """Check loss function."""
+        SearchModelConditionParameter.check_operation(data)
        SearchModelConditionParameter.check_dict_value_type(data, str)

    @validates("train_dataset_path")
    def check_train_dataset_path(self, data):
        """Check train dataset path."""
+        SearchModelConditionParameter.check_operation(data)
        SearchModelConditionParameter.check_dict_value_type(data, str)

    @validates("train_dataset_count")
@@ -187,6 +198,7 @@ class SearchModelConditionParameter(Schema):
    @validates("test_dataset_path")
    def check_test_dataset_path(self, data):
        """Check test dataset path."""
+        SearchModelConditionParameter.check_operation(data)
        SearchModelConditionParameter.check_dict_value_type(data, str)

    @validates("test_dataset_count")
@@ -197,11 +209,13 @@ class SearchModelConditionParameter(Schema):
    @validates("network")
    def check_network(self, data):
        """Check network."""
+        SearchModelConditionParameter.check_operation(data)
        SearchModelConditionParameter.check_dict_value_type(data, str)

    @validates("optimizer")
    def check_optimizer(self, data):
        """Check optimizer."""
+        SearchModelConditionParameter.check_operation(data)
        SearchModelConditionParameter.check_dict_value_type(data, str)

    @validates("epoch")
@@ -222,11 +236,19 @@ class SearchModelConditionParameter(Schema):
    @validates("summary_dir")
    def check_summary_dir(self, data):
        """Check summary dir."""
+        SearchModelConditionParameter.check_operation(data)
+        SearchModelConditionParameter.check_dict_value_type(data, str)
+
+    @validates("dataset_mark")
+    def check_dataset_mark(self, data):
+        """Check dataset mark."""
+        SearchModelConditionParameter.check_operation(data)
        SearchModelConditionParameter.check_dict_value_type(data, str)

    @validates("lineage_type")
    def check_lineage_type(self, data):
        """Check lineage type."""
+        SearchModelConditionParameter.check_operation(data)
        SearchModelConditionParameter.check_dict_value_type(data, str)
        recv_types = []
        for key, value in data.items():
@@ -243,7 +265,7 @@ class SearchModelConditionParameter(Schema):
    def check_comparision(self, data, **kwargs):
        """Check comparision for all parameters in schema."""
        for attr, condition in data.items():
-            if attr in ["limit", "offset", "sorted_name", "sorted_type"]:
+            if attr in ["limit", "offset", "sorted_name", "sorted_type", 'lineage_type']:
                continue

            if not isinstance(attr, str):
@@ -256,13 +278,6 @@ class SearchModelConditionParameter(Schema):
                raise LineageParamTypeError("The search_condition element {} should be dict."
                                            .format(attr))

-            if attr in ["summary_dir", "lineage_type"]:
-                if not set(condition.keys()).issubset(['in', 'eq']):
-                    raise LineageParamValueError("Invalid operation of %s." % attr)
-                if len(condition.keys()) > 1:
-                    raise LineageParamValueError("More than one operation of %s." % attr)
-                continue
-
            for key in condition.keys():
                if key not in ["eq", "lt", "gt", "le", "ge", "in"]:
                    raise LineageParamValueError("The compare condition should be in "

--- a/mindinsight/lineagemgr/common/validator/validate.py
+++ b/mindinsight/lineagemgr/common/validator/validate.py
@@ -63,6 +63,7 @@ SEARCH_MODEL_ERROR_MAPPING = {
    'model_size': LineageErrors.LINEAGE_PARAM_MODEL_SIZE_ERROR,
    'sorted_name': LineageErrors.LINEAGE_PARAM_SORTED_NAME_ERROR,
    'sorted_type': LineageErrors.LINEAGE_PARAM_SORTED_TYPE_ERROR,
+    'dataset_mark': LineageErrors.LINEAGE_PARAM_DATASET_MARK_ERROR,
    'lineage_type': LineageErrors.LINEAGE_PARAM_LINEAGE_TYPE_ERROR
 }

@@ -97,6 +98,7 @@ SEARCH_MODEL_ERROR_MSG_MAPPING = {
    'model_size': LineageErrorMsg.LINEAGE_MODEL_SIZE_ERROR.value,
    'sorted_name': LineageErrorMsg.LINEAGE_PARAM_SORTED_NAME_ERROR.value,
    'sorted_type': LineageErrorMsg.LINEAGE_PARAM_SORTED_TYPE_ERROR.value,
+    'dataset_mark': LineageErrorMsg.LINEAGE_PARAM_DATASET_MARK_ERROR.value,
    'lineage_type': LineageErrorMsg.LINEAGE_PARAM_LINEAGE_TYPE_ERROR.value
 }

@@ -238,10 +240,14 @@ def validate_search_model_condition(schema, data):
        MindInsightException: If the parameters are invalid.
    """
    error = schema().validate(data)
-    for error_key in error.keys():
+    for (error_key, error_msgs) in error.items():
        if error_key in SEARCH_MODEL_ERROR_MAPPING.keys():
            error_code = SEARCH_MODEL_ERROR_MAPPING.get(error_key)
            error_msg = SEARCH_MODEL_ERROR_MSG_MAPPING.get(error_key)
+            for err_msg in error_msgs:
+                if 'operation' in err_msg.lower():
+                    error_msg = f'The parameter {error_key} is invalid. {err_msg}'
+                    break
            log.error(error_msg)
            raise MindInsightException(error=error_code, message=error_msg)

@@ -417,7 +423,7 @@ def validate_user_defined_info(user_defined_info):
                        "Only str is permitted now.".format(type(key))
            log.error(error_msg)
            raise LineageParamTypeError(error_msg)
-        if not isinstance(key, (int, str, float)):
+        if not isinstance(value, (int, str, float)):
            error_msg = "Dict value type {} is not supported in user defined info." \
                        "Only str, int and float are permitted now.".format(type(value))
            log.error(error_msg)

--- a/mindinsight/lineagemgr/querier/querier.py
+++ b/mindinsight/lineagemgr/querier/querier.py
@@ -318,13 +318,14 @@ class Querier:
        for offset_result in offset_results:
            for obj_name in ["metric", "user_defined"]:
                obj = getattr(offset_result, obj_name)
+                require = True if obj_name == "metric" else False
                if obj and isinstance(obj, dict):
                    for key, value in obj.items():
-                        label = obj_name + "/" + key
+                        label = f'{obj_name}/{key}'
                        customized[label] = dict()
                        customized[label]["label"] = label
-                        # user defined info is default displayed
-                        customized[label]["required"] = True
+                        # user defined info is not displayed by default
+                        customized[label]["required"] = require
                        customized[label]["type"] = type(value).__name__

        lineage_types = condition.get(ConditionParam.LINEAGE_TYPE.value)

--- a/mindinsight/lineagemgr/querier/query_model.py
+++ b/mindinsight/lineagemgr/querier/query_model.py
@@ -37,8 +37,7 @@ FIELD_MAPPING = {
    "batch_size": Field('hyper_parameters', 'batch_size'),
    "loss": Field('algorithm', 'loss'),
    "model_size": Field('model', 'size'),
-    "dataset_mark": Field('dataset_mark', None),
-    "lineage_type": Field(None, None)
+    "dataset_mark": Field('dataset_mark', None)
 }



--- a/tests/st/func/lineagemgr/api/test_model_api.py
+++ b/tests/st/func/lineagemgr/api/test_model_api.py
@@ -755,7 +755,7 @@ class TestModelApi(TestCase):
    @pytest.mark.env_single
    def test_filter_summary_lineage_exception_7(self):
        """Test the abnormal execution of the filter_summary_lineage interface."""
-        condition_keys = ["summary_dir", "lineage_type"]
+        condition_keys = ["summary_dir", "lineage_type", "loss_function", "optimizer", "network", "dataset_mark"]
        for condition_key in condition_keys:
            # the condition type not supported in summary_dir and lineage_type
            search_condition = {
@@ -765,7 +765,7 @@ class TestModelApi(TestCase):
            }
            self.assertRaisesRegex(
                LineageSearchConditionParamError,
-                f'Invalid operation of {condition_key}.',
+                f'The parameter {condition_key} is invalid. Its operation should be `in` or `eq`.',
                filter_summary_lineage,
                BASE_SUMMARY_DIR,
                search_condition
@@ -780,7 +780,7 @@ class TestModelApi(TestCase):
            }
            self.assertRaisesRegex(
                LineageSearchConditionParamError,
-                f'More than one operation of {condition_key}.',
+                f'The parameter {condition_key} is invalid. More than one operation.',
                filter_summary_lineage,
                BASE_SUMMARY_DIR,
                search_condition
@@ -793,11 +793,12 @@ class TestModelApi(TestCase):
    @pytest.mark.platform_x86_cpu
    @pytest.mark.env_single
    def test_filter_summary_lineage_exception_8(self):
+        """Test the abnormal execution of the filter_summary_lineage interface."""
        invalid_lineage_types = ['xxx', None]
        for lineage_type in invalid_lineage_types:
            search_condition = {
                'lineage_type': {
-                    'in': lineage_type
+                    'eq': lineage_type
                }
            }
            self.assertRaisesRegex(
@@ -815,6 +816,7 @@ class TestModelApi(TestCase):
    @pytest.mark.platform_x86_cpu
    @pytest.mark.env_single
    def test_filter_summary_lineage_exception_9(self):
+        """Test the abnormal execution of the filter_summary_lineage interface."""
        invalid_sorted_names = ['xxx', 'metric_', 1]
        for sorted_name in invalid_sorted_names:
            search_condition = {

--- a/tests/ut/datavisual/data_transform/test_ms_data_loader.py
+++ b/tests/ut/datavisual/data_transform/test_ms_data_loader.py
@@ -82,7 +82,7 @@ class TestMsDataLoader:
        ms_loader = MSDataLoader(summary_dir)
        ms_loader._latest_summary_filename = 'summary.00'
        ms_loader.load()
-        assert ms_loader._latest_summary_filename == 'summary.01'
+        shutil.rmtree(summary_dir)
        assert ms_loader._latest_summary_file_size == RECORD_LEN
        tag = ms_loader.get_events_data().list_tags_by_plugin('scalar')
        tensors = ms_loader.get_events_data().tensors(tag[0])

--- a/tests/ut/lineagemgr/common/validator/test_validate.py
+++ b/tests/ut/lineagemgr/common/validator/test_validate.py
@@ -101,8 +101,7 @@ class TestValidateSearchModelCondition(TestCase):
            }
        }
        self._assert_raise_of_mindinsight_exception(
-            "The parameter learning_rate is invalid. It should be a dict and "
-            "the value should be a float or a integer",
+            "The value of `in` operation must be list or tuple.",
            condition
        )

@@ -136,8 +135,8 @@ class TestValidateSearchModelCondition(TestCase):
            }
        }
        self._assert_raise_of_mindinsight_exception(
-            "The parameter loss_function is invalid. It should be a dict and "
-            "the value should be a string",
+            "The parameter loss_function is invalid. "
+            "Its operation should be `in` or `eq`.",
            condition
        )

@@ -147,8 +146,7 @@ class TestValidateSearchModelCondition(TestCase):
            }
        }
        self._assert_raise_of_mindinsight_exception(
-            "The parameter train_dataset_count is invalid. It should be a dict "
-            "and the value should be a integer between 0",
+            "The value of `in` operation must be list or tuple.",
            condition
        )

@@ -161,8 +159,8 @@ class TestValidateSearchModelCondition(TestCase):
            }
        }
        self._assert_raise_of_mindinsight_exception(
-            "The parameter network is invalid. It should be a dict and "
-            "the value should be a string",
+            "The parameter network is invalid. "
+            "Its operation should be `in` or `eq`.",
            condition
        )