diff --git a/mindspore/train/callback/_summary_collector.py b/mindspore/train/callback/_summary_collector.py index 63706ca73e298d89e979a97be2253d54c22b1930..a025df6a34592acf74987a7c79d6428b99562c39 100644 --- a/mindspore/train/callback/_summary_collector.py +++ b/mindspore/train/callback/_summary_collector.py @@ -414,11 +414,11 @@ class SummaryCollector(Callback): logger.info("The 'train_dataset_element' in cb_params is None, maybe there is dataset sink mode.") return - if isinstance(input_data, (list, tuple)): + if isinstance(input_data, (list, tuple)) and input_data: input_data = input_data[0] try: self._record.add_value(PluginEnum.IMAGE.value, 'input_data/auto', input_data) - except ValueError: + except (TypeError, ValueError): logger.warning('The input data of network are not image, so will not collect by SummaryCollector.') self._collect_specified_data['collect_input_data'] = False return diff --git a/mindspore/train/model.py b/mindspore/train/model.py index d4fcb96006677b12f1d41de283fdc665368d9059..3a1286d13060c27b9808e300884dd51614d53a7c 100755 --- a/mindspore/train/model.py +++ b/mindspore/train/model.py @@ -448,6 +448,7 @@ class Model: for inputs in dataset_helper: if _need_to_full() and context.get_context("device_target") == "GPU": inputs = _to_full_tensor(inputs, self._device_number, self._global_rank) + cb_params.train_dataset_element = inputs list_callback.step_begin(run_context) outputs = self._train_network(*inputs) cb_params.cur_step_num += dataset_helper.sink_size() @@ -499,7 +500,6 @@ class Model: raise ValueError("when loss_fn is not None, train_dataset should" "return two elements, but got {}".format(len_element)) cb_params.cur_step_num += 1 - list_callback.step_begin(run_context) overflow = False if self._loss_scale_manager and self._loss_scale_manager.get_drop_overflow_update(): @@ -507,6 +507,7 @@ class Model: next_element = tuple(next_element) + (Tensor(scaling_sens, mstype.float32),) cb_params.train_dataset_element = next_element + list_callback.step_begin(run_context) outputs = self._train_network(*next_element) cb_params.net_outputs = outputs if self._loss_scale_manager and self._loss_scale_manager.get_drop_overflow_update(): diff --git a/model_zoo/official/nlp/bert_thor/src/model_thor.py b/model_zoo/official/nlp/bert_thor/src/model_thor.py index 01697f65a7085ccc1c789fb85643f1dda0cfe834..f47e8c368979a725ffd35018a4ae2a2fa2e15aff 100644 --- a/model_zoo/official/nlp/bert_thor/src/model_thor.py +++ b/model_zoo/official/nlp/bert_thor/src/model_thor.py @@ -482,6 +482,7 @@ class Model: for inputs in dataset_helper: if _need_to_full(): inputs = _to_full_tensor(inputs, self._device_number, self._global_rank) + cb_params.train_dataset_element = inputs list_callback.step_begin(run_context) if switch_branch_one: cb_params.cur_step_num += dataset_helper.sink_size() @@ -546,7 +547,6 @@ class Model: raise ValueError("when loss_fn is not None, train_dataset should" "return two elements, but got {}".format(len_element)) cb_params.cur_step_num += 1 - list_callback.step_begin(run_context) overflow = False if self._loss_scale_manager and self._loss_scale_manager.get_drop_overflow_update(): @@ -554,6 +554,7 @@ class Model: next_element = tuple(next_element) + (Tensor(scaling_sens, mstype.float32),) cb_params.train_dataset_element = next_element + list_callback.step_begin(run_context) outputs = self._train_network(*next_element) cb_params.net_outputs = outputs if self._loss_scale_manager and self._loss_scale_manager.get_drop_overflow_update(): diff --git a/tests/st/networks/models/resnet50/src_thor/model_thor.py b/tests/st/networks/models/resnet50/src_thor/model_thor.py index 8144b1cd805fbe2c2ad5b0ce5c6283ff96224cac..c99a41cb6cacbd665146c18ebb1c9862d494b8f5 100644 --- a/tests/st/networks/models/resnet50/src_thor/model_thor.py +++ b/tests/st/networks/models/resnet50/src_thor/model_thor.py @@ -454,7 +454,6 @@ class Model: # for data sink dataset_helper only iter once, other wise iter epoch_size times. for inputs in dataset_helper: - list_callback.step_begin(run_context) if switch_branch_one: cb_params.cur_step_num += loop_size self._train_network.add_flags_recursive(thor=True) @@ -467,6 +466,8 @@ class Model: _exec_datagraph(train_dataset, iter_first_order, phase='train1_dataset') self._has_do_dataset_init = True switch_branch_one = not switch_branch_one + cb_params.train_dataset_element = inputs + list_callback.step_begin(run_context) outputs = self._train_network(*inputs) cb_params.net_outputs = outputs list_callback.step_end(run_context) @@ -514,13 +515,14 @@ class Model: raise ValueError("when loss_fn is not None, train_dataset should" "return two elements, but got {}".format(len_element)) cb_params.cur_step_num += 1 - list_callback.step_begin(run_context) overflow = False if self._loss_scale_manager and self._loss_scale_manager.get_drop_overflow_update(): scaling_sens = self._get_scaling_sens() next_element = tuple(next_element) + (Tensor(scaling_sens, mstype.float32),) + cb_params.train_dataset_element = next_element + list_callback.step_begin(run_context) outputs = self._train_network(*next_element) cb_params.net_outputs = outputs if self._loss_scale_manager and self._loss_scale_manager.get_drop_overflow_update(): diff --git a/tests/ut/python/train/summary/test_summary_collector.py b/tests/ut/python/train/summary/test_summary_collector.py index 48d79a80dcea832e2035fe707b745abe0a30d314..f802f58bcc4b7fbba0239cc5ddbfec3ffcc09cb0 100644 --- a/tests/ut/python/train/summary/test_summary_collector.py +++ b/tests/ut/python/train/summary/test_summary_collector.py @@ -242,13 +242,14 @@ class TestSummaryCollector: SummaryCollector((tempfile.mkdtemp(dir=self.base_summary_dir)))._check_callbacks(cb_params) assert f"more than one SummaryCollector instance in callback list" in str(exc.value) - def test_collect_input_data_with_train_dataset_element_none(self): - """Test the param 'train_dataset_element' in cb_params is none.""" + def test_collect_input_data_with_train_dataset_element_invalid(self): + """Test the param 'train_dataset_element' in cb_params is invalid.""" cb_params = _InternalCallbackParam() - cb_params.train_dataset_element = None - summary_collector = SummaryCollector((tempfile.mkdtemp(dir=self.base_summary_dir))) - summary_collector._collect_input_data(cb_params) - assert not summary_collector._collect_specified_data['collect_input_data'] + for invalid in (), [], None, [None]: + cb_params.train_dataset_element = invalid + with SummaryCollector(tempfile.mkdtemp(dir=self.base_summary_dir)) as summary_collector: + summary_collector._collect_input_data(cb_params) + assert not summary_collector._collect_specified_data['collect_input_data'] @mock.patch.object(SummaryRecord, 'add_value') def test_collect_input_data_success(self, mock_add_value):