未验证 提交 ce535945 编写于 作者: D digger yu 提交者: GitHub

fix: change ==NONE to is (#3923)

上级 55243f3b
......@@ -151,7 +151,7 @@ def initialize(args=None,
if hasattr(args, "deepspeed_config") and args.deepspeed_config is not None:
assert config is None, "Not sure how to proceed, we were given deepspeed configs in the deepspeed arguments and deepspeed.initialize() function call"
config = args.deepspeed_config
assert config != None, "DeepSpeed requires --deepspeed_config to specify configuration file"
assert config is not None, "DeepSpeed requires --deepspeed_config to specify configuration file"
if not isinstance(model, PipelineModule):
config_class = DeepSpeedConfig(config, mpu)
......
......@@ -46,7 +46,7 @@ class BaseTuner:
self.rm.schedule_experiments(exp_paths)
self.rm.run()
exp, metric_val = self.rm.parse_results(self.metric)
if self.best_exp == None or self.best_metric_val == None or (metric_val
if self.best_exp is None or self.best_metric_val is None or (metric_val
and metric_val > self.best_metric_val):
# logger.info(f"tuner finds better = {exp}")
self.best_exp = exp
......
......@@ -14,7 +14,7 @@ from .torch import TorchBackend
def build_ccl_op():
builder = get_accelerator().create_op_builder("CCLCommBuilder")
if builder == None:
if builder is None:
return None
ccl_cpp_module = builder.load()
print(f'DeepSpeed {builder.absolute_name()} built successfully')
......@@ -25,7 +25,7 @@ class CCLBackend(TorchBackend):
def __init__(self, name='ccl', rank=-1, world_size=-1, mpu=None, timeout=None, init_method=None):
self.ccl_comm_op = build_ccl_op()
if self.ccl_comm_op == None:
if self.ccl_comm_op is None:
# set CCLBackend to uninitialized state if CCLCommBuilder cannot be loaded
self.initialized = False
return
......
......@@ -637,7 +637,7 @@ def init_distributed(dist_backend=None,
utils.logger.info('Distributed backend already initialized')
else:
assert isinstance(timeout, timedelta)
if dist_backend == None:
if dist_backend is None:
dist_backend = get_accelerator().communication_backend_name()
if int(os.getenv('RANK', '0')) == 0:
utils.logger.info('Initializing TorchBackend in DeepSpeed with backend {}'.format(dist_backend))
......
......@@ -148,7 +148,7 @@ def _get_compatible_gpus_v02(micro_batches,
for micro_batch in micro_batches:
if final_batch_size // current_num_gpus % micro_batch == 0:
if candidate_microbatch == None:
if candidate_microbatch is None:
candidate_microbatch = micro_batch
if prefer_larger and candidate_microbatch < micro_batch:
candidate_microbatch = micro_batch
......
......@@ -498,7 +498,7 @@ def main(args=None):
deepspeed_launch.append(f"--min_elastic_nodes={args.min_elastic_nodes}")
if args.bind_cores_to_rank:
deepspeed_launch.append("--bind_cores_to_rank")
if args.bind_core_list != None:
if args.bind_core_list is not None:
deepspeed_launch.append(f"--bind_core_list={args.bind_core_list}")
cmd = deepspeed_launch + [args.user_script] + args.user_args
else:
......
......@@ -461,12 +461,12 @@ def replace_transformer_layer(orig_layer_impl, model, checkpoint_dict, config, m
else:
class_name = prev_class_name + '.' + prev_name
checking_key = prefix + '.' + class_name + '.' + name + '.' if class_name != "" else prefix + '.' + name + '.'
if child.__class__ in [nn.Linear, nn.Embedding, nn.LayerNorm] and state_dict != None:
if child.__class__ in [nn.Linear, nn.Embedding, nn.LayerNorm] and state_dict is not None:
if any(checking_key in item for item in state_dict):
load(child, state_dict, checking_key, mp_group)
else:
continue
if len(child._buffers) != 0 and state_dict != None:
if len(child._buffers) != 0 and state_dict is not None:
load_buffer(child, state_dict, checking_key)
if child.__class__ in linear_policies:
setattr(r_module, name, linear_policies[child.__class__](child, prev_name + '.' + name,
......@@ -507,7 +507,7 @@ def replace_transformer_layer(orig_layer_impl, model, checkpoint_dict, config, m
return new_module
if checkpoint_dict != None and not config.replace_with_kernel_inject:
if checkpoint_dict is not None and not config.replace_with_kernel_inject:
# AutoTP shard loading
checkpoint = checkpoint_dict["checkpoints"]
pbar = tqdm.tqdm(total=len(checkpoint), desc=f"Loading {len(checkpoint)} checkpoint shards")
......@@ -745,7 +745,7 @@ def replace_module(model, orig_class, replace_fn, _replace_policy, checkpoint=No
A modified ``model``.
"""
sd = None
if checkpoint != None:
if checkpoint is not None:
sd = torch.load(checkpoint, map_location='cpu')
policy = {}
if orig_class is not None:
......@@ -764,7 +764,7 @@ def replace_module(model, orig_class, replace_fn, _replace_policy, checkpoint=No
"You can find some samples here: https://github.com/microsoft/DeepSpeed/blob/master/deepspeed/module_inject/replace_policy.py"
replaced_module, _ = _replace_module(model, policy, state_dict=sd)
if checkpoint != None:
if checkpoint is not None:
embedding_weight = None
for n, p in replaced_module.named_parameters():
if "word_embeddings." in n or "embed_tokens." in n or "wte." in n:
......@@ -833,7 +833,7 @@ def _replace_module(model, policies, prefix='', layer_id=0, level_id=0, state_di
layer_id += 1
else:
checking_key = prefix + name + '.'
if child.__class__ in load_layers and state_dict != None:
if child.__class__ in load_layers and state_dict is not None:
if any(checking_key in item for item in state_dict):
load(
child,
......@@ -842,7 +842,7 @@ def _replace_module(model, policies, prefix='', layer_id=0, level_id=0, state_di
)
else:
continue
if len(child._buffers) != 0 and state_dict != None:
if len(child._buffers) != 0 and state_dict is not None:
load_buffer(child, state_dict, checking_key)
_, layer_id = _replace_module(child,
policies,
......
......@@ -57,7 +57,7 @@ class DeepSpeedDiffusersAttentionFunction(Function):
head_size = input.shape[-1] // config.heads
do_flash_attn = (head_size <= 128)
scale = (1 / norm_factor) * (1 / norm_factor)
if do_flash_attn and context == None:
if do_flash_attn and context is None:
qkv_out = linear_func(input, attn_qkvw, attn_qkvb if attn_qkvb is not None else attn_qkvw, attn_qkvb
is not None, do_flash_attn, config.heads, False)
......
......@@ -85,7 +85,7 @@ class DeepSpeedDiffusersTransformerBlock(nn.Module):
# In v0.11.0 of diffusers, the kwarg was changed from 'context' to 'encoder_hidden_states'
# This is so we can support older and newer versions of diffusers
if "encoder_hidden_states" in kwargs and kwargs["encoder_hidden_states"] != None:
if "encoder_hidden_states" in kwargs and kwargs["encoder_hidden_states"] is not None:
context = kwargs["encoder_hidden_states"]
out_norm_1 = self.transformer_cuda_module.layer_norm(hidden_states, self.norm1_g, self.norm1_b, self.norm1_eps)
......
......@@ -193,7 +193,7 @@ class DeepSpeedMoEInference(nn.Module):
specialized_mode = False
# InferenceSpecializedBuilder is not among DeepSpeed provided builder yet, so we infer by builder name string
builder = get_accelerator().create_op_builder("InferenceSpecializedBuilder")
if builder != None and builder.is_compatible():
if builder is not None and builder.is_compatible():
inference_module = builder.load()
specialized_mode = True
else:
......
......@@ -37,7 +37,7 @@ class ResidualAddOp(BaseOp):
attention_bias: Optional[torch.Tensor] = None,
final_bias: Optional[torch.Tensor] = None):
if self.residual_add_func != None:
if self.residual_add_func is not None:
if final_bias is None:
residual = self._vector_add(residual, hidden_state, 1.0 / self.config.mp_size)
else:
......
......@@ -859,7 +859,7 @@ class DeepSpeedEngine(Module):
elif self.bfloat16_enabled():
model_dtype = torch.bfloat16
if self._config.grad_accum_dtype == None:
if self._config.grad_accum_dtype is None:
if model_dtype == torch.bfloat16 and not self.zero_optimization():
grad_accum_dtype = torch.float32
else:
......@@ -968,7 +968,7 @@ class DeepSpeedEngine(Module):
"variable, it is set by the deepspeed launcher, deepspeed.init_distributed, or the torch's launcher. If using a " \
"different launcher please ensure LOCAL_RANK is set prior to initializing deepspeed."
if hasattr(args, 'local_rank') and args.local_rank != None:
if hasattr(args, 'local_rank') and args.local_rank is not None:
assert isinstance(args.local_rank,
int), f"args.local_rank of {args.local_rank} is an unknown type {type(args.local_rank)}"
if args.local_rank >= 0:
......
......@@ -159,7 +159,7 @@ class PipelineModule(nn.Module):
self.global_rank = dist.get_rank(group=self.world_group)
self.world_size = dist.get_world_size(group=self.world_group)
self.local_rank = int(os.environ.get("LOCAL_RANK", None))
assert self.local_rank != None
assert self.local_rank is not None
if topology:
self._topo = topology
......
......@@ -1892,7 +1892,7 @@ class DeepSpeedZeroOptimizer_Stage3(ZeROOptimizer):
# warn user about caching allocator flushes
memory_stats = get_accelerator().memory_stats()
alloc_retries = memory_stats["num_alloc_retries"] if memory_stats != None else 0
alloc_retries = memory_stats["num_alloc_retries"] if memory_stats is not None else 0
if alloc_retries > self.n_caching_allocator_flushes:
if dist.get_rank() == 0:
logger.warning(
......
......@@ -83,7 +83,7 @@ def printflock(*msgs):
"""
global fcntl
if fcntl == None:
if fcntl is None:
import fcntl
with open(__file__, "r") as fh:
......
......@@ -107,7 +107,7 @@ def get_numactl_cmd(bind_core_list, num_local_procs, local_rank):
"Unset KMP_AFFINITY before launching deepspeed.\n\n"
"\t$ unset KMP_AFFINITY\n"
"\t$ deepspeed <deepspeed command parameters>")
if bind_core_list != None:
if bind_core_list is not None:
core_list = parse_range_list(bind_core_list)
total_cores = len(core_list)
else:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册