From 5c6fb358b2d794576e65049786fc2bb8c7430cfd Mon Sep 17 00:00:00 2001 From: wenkai Date: Wed, 22 Apr 2020 15:49:45 +0800 Subject: [PATCH] add comments, optimize histogram log generator to record max and min --- .../data_transform/histogram_container.py | 7 + .../datavisual/data_transform/reservoir.py | 4 + .../log_generators/histogram_log_generator.py | 233 +++++++++--------- 3 files changed, 130 insertions(+), 114 deletions(-) diff --git a/mindinsight/datavisual/data_transform/histogram_container.py b/mindinsight/datavisual/data_transform/histogram_container.py index 45ae1df..d9a80d6 100644 --- a/mindinsight/datavisual/data_transform/histogram_container.py +++ b/mindinsight/datavisual/data_transform/histogram_container.py @@ -120,6 +120,13 @@ class HistogramContainer: It's caller's duty to ensure input is valid. + Why we need visual range for histograms? Miss aligned buckets between steps might miss-lead users about the + trend of a tensor. Because for given tensor, if you have thinner buckets, count of every bucket might get + low, however, if you have thicker buckets, count of every bucket might get high. If there are the above two + kinds of histogram in one graph, user might think the histogram with thicker buckets has more values. This is + miss-leading. So we need to unify buckets across steps. Visual range for histogram is a technology for unifying + buckets. + Args: max_val (float): Max value for visual histogram. min_val (float): Min value for visual histogram. diff --git a/mindinsight/datavisual/data_transform/reservoir.py b/mindinsight/datavisual/data_transform/reservoir.py index aef9879..c35fc32 100644 --- a/mindinsight/datavisual/data_transform/reservoir.py +++ b/mindinsight/datavisual/data_transform/reservoir.py @@ -174,6 +174,10 @@ class HistogramReservoir(Reservoir): max_count = max(histogram.count, max_count) visual_range.update(histogram.max, histogram.min) + if visual_range.max == visual_range.min and not max_count: + logger.warning("Max equals to min, however, count is zero. Please check mindspore " + "does write max and min values to histogram summary file.") + bins = calc_histogram_bins(max_count) # update visual range diff --git a/tests/utils/log_generators/histogram_log_generator.py b/tests/utils/log_generators/histogram_log_generator.py index 5d6e2ab..bced6de 100644 --- a/tests/utils/log_generators/histogram_log_generator.py +++ b/tests/utils/log_generators/histogram_log_generator.py @@ -1,114 +1,119 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -"""Log generator for histogram data.""" -import time - -import numpy as np - -from mindinsight.datavisual.proto_files import mindinsight_summary_pb2 as summary_pb2 - -from .log_generator import LogGenerator - - -class HistogramLogGenerator(LogGenerator): - """ - Log generator for histogram data. - - This is a log generator writing histogram data. User can use it to generate fake - summary logs about histogram. - """ - - def generate_event(self, values): - """ - Method for generating histogram event. - - Args: - values (dict): A dict contains: - { - wall_time (float): Timestamp. - step (int): Train step. - value (float): Histogram value. - tag (str): Tag name. - } - - Returns: - summary_pb2.Event. - - """ - histogram_event = summary_pb2.Event() - histogram_event.wall_time = values.get('wall_time') - histogram_event.step = values.get('step') - - value = histogram_event.summary.value.add() - value.tag = values.get('tag') - - buckets = values.get('buckets') - for bucket in buckets: - left, width, count = bucket - bucket = value.histogram.buckets.add() - bucket.left = left - bucket.width = width - bucket.count = count - - return histogram_event - - def generate_log(self, file_path, steps_list, tag_name): - """ - Generate log for external calls. - - Args: - file_path (str): Path to write logs. - steps_list (list): A list consists of step. - tag_name (str): Tag name. - - Returns: - list[dict], generated histogram metadata. - None, to be consistent with return value of HistogramGenerator. - - """ - histogram_metadata = [] - for step in steps_list: - histogram = dict() - - wall_time = time.time() - histogram.update({'wall_time': wall_time}) - histogram.update({'step': step}) - histogram.update({'tag': tag_name}) - - # Construct buckets - buckets = [] - leftmost = list(np.random.randn(11)) - leftmost.sort() - for i in range(10): - left = leftmost[i] - width = leftmost[i+1] - left - count = np.random.randint(20) - bucket = [left, width, count] - buckets.append(bucket) - - histogram.update({'buckets': buckets}) - histogram_metadata.append(histogram) - - self._write_log_one_step(file_path, histogram) - - return histogram_metadata, None - - -if __name__ == "__main__": - histogram_log_generator = HistogramLogGenerator() - test_file_name = '%s.%s.%s' % ('histogram', 'summary', str(time.time())) - test_steps = [1, 3, 5] - test_tag = "test_histogram_tag_name" - histogram_log_generator.generate_log(test_file_name, test_steps, test_tag) +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Log generator for histogram data.""" +import time + +import numpy as np + +from mindinsight.datavisual.proto_files import mindinsight_summary_pb2 as summary_pb2 + +from .log_generator import LogGenerator + + +class HistogramLogGenerator(LogGenerator): + """ + Log generator for histogram data. + + This is a log generator writing histogram data. User can use it to generate fake + summary logs about histogram. + """ + + def generate_event(self, values): + """ + Method for generating histogram event. + + Args: + values (dict): A dict contains: + { + wall_time (float): Timestamp. + step (int): Train step. + value (float): Histogram value. + tag (str): Tag name. + } + + Returns: + summary_pb2.Event. + + """ + histogram_event = summary_pb2.Event() + histogram_event.wall_time = values.get('wall_time') + histogram_event.step = values.get('step') + + value = histogram_event.summary.value.add() + value.tag = values.get('tag') + + buckets = values.get('buckets') + for bucket in buckets: + left, width, count = bucket + bucket = value.histogram.buckets.add() + bucket.left = left + bucket.width = width + bucket.count = count + + value.histogram.min = values.get("min", -1) + value.histogram.max = values.get("max", -1) + + return histogram_event + + def generate_log(self, file_path, steps_list, tag_name): + """ + Generate log for external calls. + + Args: + file_path (str): Path to write logs. + steps_list (list): A list consists of step. + tag_name (str): Tag name. + + Returns: + list[dict], generated histogram metadata. + None, to be consistent with return value of HistogramGenerator. + + """ + histogram_metadata = [] + for step in steps_list: + histogram = dict() + + wall_time = time.time() + histogram.update({'wall_time': wall_time}) + histogram.update({'step': step}) + histogram.update({'tag': tag_name}) + + # Construct buckets + buckets = [] + leftmost = list(np.random.randn(11)) + leftmost.sort() + min_val = leftmost[0] + max_val = leftmost[-1] + for i in range(10): + left = leftmost[i] + width = leftmost[i+1] - left + count = np.random.randint(20) + bucket = [left, width, count] + buckets.append(bucket) + + histogram.update({'buckets': buckets, "min": min_val, "max": max_val}) + histogram_metadata.append(histogram) + + self._write_log_one_step(file_path, histogram) + + return histogram_metadata, None + + +if __name__ == "__main__": + histogram_log_generator = HistogramLogGenerator() + test_file_name = '%s.%s.%s' % ('histogram', 'summary', str(time.time())) + test_steps = [1, 3, 5] + test_tag = "test_histogram_tag_name" + histogram_log_generator.generate_log(test_file_name, test_steps, test_tag) -- GitLab