add comments, optimize histogram log generator to record max and min

5c6fb358 · wenkai · 988aad75 · 5c6fb358 · 5c6fb358 · 5c6fb358
3 changed file
--- a/mindinsight/datavisual/data_transform/histogram_container.py
+++ b/mindinsight/datavisual/data_transform/histogram_container.py
@@ -120,6 +120,13 @@ class HistogramContainer:

        It's caller's duty to ensure input is valid.

+        Why we need visual range for histograms? Miss aligned buckets between steps might miss-lead users about the
+        trend of a tensor. Because for given tensor, if you have thinner buckets, count of every bucket might get
+        low, however, if you have thicker buckets, count of every bucket might get high.  If there are the above two
+        kinds of histogram in one graph, user might think the histogram with thicker buckets has more values. This is
+        miss-leading. So we need to unify buckets across steps. Visual range for histogram is a technology for unifying
+        buckets.
+
        Args:
            max_val (float): Max value for visual histogram.
            min_val (float): Min value for visual histogram.

--- a/mindinsight/datavisual/data_transform/reservoir.py
+++ b/mindinsight/datavisual/data_transform/reservoir.py
@@ -174,6 +174,10 @@ class HistogramReservoir(Reservoir):
                max_count = max(histogram.count, max_count)
                visual_range.update(histogram.max, histogram.min)

+            if visual_range.max == visual_range.min and not max_count:
+                logger.warning("Max equals to min, however, count is zero. Please check mindspore "
+                               "does write max and min values to histogram summary file.")
+
            bins = calc_histogram_bins(max_count)

            # update visual range

--- a/tests/utils/log_generators/histogram_log_generator.py
+++ b/tests/utils/log_generators/histogram_log_generator.py
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""Log generator for histogram data."""
-import time
-
-import numpy as np
-
-from mindinsight.datavisual.proto_files import mindinsight_summary_pb2 as summary_pb2
-
-from .log_generator import LogGenerator
-
-
-class HistogramLogGenerator(LogGenerator):
-    """
-    Log generator for histogram data.
-
-    This is a log generator writing histogram data. User can use it to generate fake
-    summary logs about histogram.
-    """
-
-    def generate_event(self, values):
-        """
-        Method for generating histogram event.
-
-        Args:
-            values (dict): A dict contains:
-                {
-                    wall_time (float): Timestamp.
-                    step (int): Train step.
-                    value (float): Histogram value.
-                    tag (str): Tag name.
-                }
-
-       Returns:
-            summary_pb2.Event.
-
-        """
-        histogram_event = summary_pb2.Event()
-        histogram_event.wall_time = values.get('wall_time')
-        histogram_event.step = values.get('step')
-
-        value = histogram_event.summary.value.add()
-        value.tag = values.get('tag')
-
-        buckets = values.get('buckets')
-        for bucket in buckets:
-            left, width, count = bucket
-            bucket = value.histogram.buckets.add()
-            bucket.left = left
-            bucket.width = width
-            bucket.count = count
-
-        return histogram_event
-
-    def generate_log(self, file_path, steps_list, tag_name):
-        """
-        Generate log for external calls.
-
-        Args:
-            file_path (str): Path to write logs.
-            steps_list (list): A list consists of step.
-            tag_name (str): Tag name.
-
-        Returns:
-            list[dict], generated histogram metadata.
-            None, to be consistent with return value of HistogramGenerator.
-
-        """
-        histogram_metadata = []
-        for step in steps_list:
-            histogram = dict()
-
-            wall_time = time.time()
-            histogram.update({'wall_time': wall_time})
-            histogram.update({'step': step})
-            histogram.update({'tag': tag_name})
-
-            # Construct buckets
-            buckets = []
-            leftmost = list(np.random.randn(11))
-            leftmost.sort()
-            for i in range(10):
-                left = leftmost[i]
-                width = leftmost[i+1] - left
-                count = np.random.randint(20)
-                bucket = [left, width, count]
-                buckets.append(bucket)
-
-            histogram.update({'buckets': buckets})
-            histogram_metadata.append(histogram)
-
-            self._write_log_one_step(file_path, histogram)
-
-        return histogram_metadata, None
-
-
-if __name__ == "__main__":
-    histogram_log_generator = HistogramLogGenerator()
-    test_file_name = '%s.%s.%s' % ('histogram', 'summary', str(time.time()))
-    test_steps = [1, 3, 5]
-    test_tag = "test_histogram_tag_name"
-    histogram_log_generator.generate_log(test_file_name, test_steps, test_tag)
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""Log generator for histogram data."""
+import time
+
+import numpy as np
+
+from mindinsight.datavisual.proto_files import mindinsight_summary_pb2 as summary_pb2
+
+from .log_generator import LogGenerator
+
+
+class HistogramLogGenerator(LogGenerator):
+    """
+    Log generator for histogram data.
+
+    This is a log generator writing histogram data. User can use it to generate fake
+    summary logs about histogram.
+    """
+
+    def generate_event(self, values):
+        """
+        Method for generating histogram event.
+
+        Args:
+            values (dict): A dict contains:
+                {
+                    wall_time (float): Timestamp.
+                    step (int): Train step.
+                    value (float): Histogram value.
+                    tag (str): Tag name.
+                }
+
+       Returns:
+            summary_pb2.Event.
+
+        """
+        histogram_event = summary_pb2.Event()
+        histogram_event.wall_time = values.get('wall_time')
+        histogram_event.step = values.get('step')
+
+        value = histogram_event.summary.value.add()
+        value.tag = values.get('tag')
+
+        buckets = values.get('buckets')
+        for bucket in buckets:
+            left, width, count = bucket
+            bucket = value.histogram.buckets.add()
+            bucket.left = left
+            bucket.width = width
+            bucket.count = count
+
+        value.histogram.min = values.get("min", -1)
+        value.histogram.max = values.get("max", -1)
+
+        return histogram_event
+
+    def generate_log(self, file_path, steps_list, tag_name):
+        """
+        Generate log for external calls.
+
+        Args:
+            file_path (str): Path to write logs.
+            steps_list (list): A list consists of step.
+            tag_name (str): Tag name.
+
+        Returns:
+            list[dict], generated histogram metadata.
+            None, to be consistent with return value of HistogramGenerator.
+
+        """
+        histogram_metadata = []
+        for step in steps_list:
+            histogram = dict()
+
+            wall_time = time.time()
+            histogram.update({'wall_time': wall_time})
+            histogram.update({'step': step})
+            histogram.update({'tag': tag_name})
+
+            # Construct buckets
+            buckets = []
+            leftmost = list(np.random.randn(11))
+            leftmost.sort()
+            min_val = leftmost[0]
+            max_val = leftmost[-1]
+            for i in range(10):
+                left = leftmost[i]
+                width = leftmost[i+1] - left
+                count = np.random.randint(20)
+                bucket = [left, width, count]
+                buckets.append(bucket)
+
+            histogram.update({'buckets': buckets, "min": min_val, "max": max_val})
+            histogram_metadata.append(histogram)
+
+            self._write_log_one_step(file_path, histogram)
+
+        return histogram_metadata, None
+
+
+if __name__ == "__main__":
+    histogram_log_generator = HistogramLogGenerator()
+    test_file_name = '%s.%s.%s' % ('histogram', 'summary', str(time.time()))
+    test_steps = [1, 3, 5]
+    test_tag = "test_histogram_tag_name"
+    histogram_log_generator.generate_log(test_file_name, test_steps, test_tag)