diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage2.py b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage2.py index f800b43f9d282004e763293afbb2e811bbed0e4b..5a96be60e21e758469703113b32931164d77738c 100644 --- a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage2.py +++ b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage2.py @@ -539,14 +539,6 @@ class GroupShardedStage2(nn.Layer): self._has_grad_storage[index] = True else: self._param_grads.append(param.name) - print( - "Can not add param: {}, param's shape: {}, param align: {}, grad_storages fill: {}, ".format( - param.name, - param.shape, - self._trainable_param2align[param.name], - self._grad_storages[param.dtype][dst_rank]._fill, - ) - ) for dtype in self._grad_storages.keys(): self._grad_storage_list.extend( diff --git a/python/paddle/distributed/fleet/utils/hybrid_parallel_util.py b/python/paddle/distributed/fleet/utils/hybrid_parallel_util.py index f67e52cbc5ea8b583bc5d2f7c7d4ac5a2a15fa35..8df7e69ea0b6fc98998d38d20c1f188850a4700e 100644 --- a/python/paddle/distributed/fleet/utils/hybrid_parallel_util.py +++ b/python/paddle/distributed/fleet/utils/hybrid_parallel_util.py @@ -159,7 +159,7 @@ def broadcast_input_data(hcg, *inputs, **kwargs): v_gpu._share_buffer_to(v) _broadcast_data_help(v, v.shape, v.dtype, hcg) else: - logger.error("it doesn't support data type {}".format(type(v))) + logger.warning("it doesn't support data type {}".format(type(v))) for k, v in kwargs.items(): if isinstance(v, (core.VarBase, core.eager.Tensor)): @@ -171,7 +171,7 @@ def broadcast_input_data(hcg, *inputs, **kwargs): _broadcast_data_help(v, v.shape, v.dtype, hcg) kwargs[k] = v else: - logger.error("it doesn't support data type {}".format(type(v))) + logger.warning("it doesn't support data type {}".format(type(v))) return inputs, kwargs