fix: change ==NONE to is (#3923)

ce535945 · digger yu · GitHub · 55243f3b · ce535945 · ce535945
16 changed file
--- a/deepspeed/__init__.py
+++ b/deepspeed/__init__.py
@@ -151,7 +151,7 @@ def initialize(args=None,
    if hasattr(args, "deepspeed_config") and args.deepspeed_config is not None:
        assert config is None, "Not sure how to proceed, we were given deepspeed configs in the deepspeed arguments and deepspeed.initialize() function call"
        config = args.deepspeed_config
-    assert config != None, "DeepSpeed requires --deepspeed_config to specify configuration file"
+    assert config is not None, "DeepSpeed requires --deepspeed_config to specify configuration file"

    if not isinstance(model, PipelineModule):
        config_class = DeepSpeedConfig(config, mpu)

--- a/deepspeed/autotuning/tuner/base_tuner.py
+++ b/deepspeed/autotuning/tuner/base_tuner.py
@@ -46,7 +46,7 @@ class BaseTuner:
                self.rm.schedule_experiments(exp_paths)
                self.rm.run()
                exp, metric_val = self.rm.parse_results(self.metric)
-                if self.best_exp == None or self.best_metric_val == None or (metric_val
+                if self.best_exp is None or self.best_metric_val is None or (metric_val
                                                                             and metric_val > self.best_metric_val):
                    # logger.info(f"tuner finds better = {exp}")
                    self.best_exp = exp

--- a/deepspeed/comm/ccl.py
+++ b/deepspeed/comm/ccl.py
@@ -14,7 +14,7 @@ from .torch import TorchBackend

 def build_ccl_op():
    builder = get_accelerator().create_op_builder("CCLCommBuilder")
-    if builder == None:
+    if builder is None:
        return None
    ccl_cpp_module = builder.load()
    print(f'DeepSpeed {builder.absolute_name()} built successfully')
@@ -25,7 +25,7 @@ class CCLBackend(TorchBackend):

    def __init__(self, name='ccl', rank=-1, world_size=-1, mpu=None, timeout=None, init_method=None):
        self.ccl_comm_op = build_ccl_op()
-        if self.ccl_comm_op == None:
+        if self.ccl_comm_op is None:
            # set CCLBackend to uninitialized state if CCLCommBuilder cannot be loaded
            self.initialized = False
            return

--- a/deepspeed/comm/comm.py
+++ b/deepspeed/comm/comm.py
@@ -637,7 +637,7 @@ def init_distributed(dist_backend=None,
                utils.logger.info('Distributed backend already initialized')
        else:
            assert isinstance(timeout, timedelta)
-            if dist_backend == None:
+            if dist_backend is None:
                dist_backend = get_accelerator().communication_backend_name()
            if int(os.getenv('RANK', '0')) == 0:
                utils.logger.info('Initializing TorchBackend in DeepSpeed with backend {}'.format(dist_backend))

--- a/deepspeed/elasticity/elasticity.py
+++ b/deepspeed/elasticity/elasticity.py
@@ -148,7 +148,7 @@ def _get_compatible_gpus_v02(micro_batches,

        for micro_batch in micro_batches:
            if final_batch_size // current_num_gpus % micro_batch == 0:
-                if candidate_microbatch == None:
+                if candidate_microbatch is None:
                    candidate_microbatch = micro_batch
                if prefer_larger and candidate_microbatch < micro_batch:
                    candidate_microbatch = micro_batch

--- a/deepspeed/launcher/runner.py
+++ b/deepspeed/launcher/runner.py
@@ -498,7 +498,7 @@ def main(args=None):
            deepspeed_launch.append(f"--min_elastic_nodes={args.min_elastic_nodes}")
        if args.bind_cores_to_rank:
            deepspeed_launch.append("--bind_cores_to_rank")
-        if args.bind_core_list != None:
+        if args.bind_core_list is not None:
            deepspeed_launch.append(f"--bind_core_list={args.bind_core_list}")
        cmd = deepspeed_launch + [args.user_script] + args.user_args
    else:

--- a/deepspeed/module_inject/replace_module.py
+++ b/deepspeed/module_inject/replace_module.py
@@ -461,12 +461,12 @@ def replace_transformer_layer(orig_layer_impl, model, checkpoint_dict, config, m
                else:
                    class_name = prev_class_name + '.' + prev_name
                checking_key = prefix + '.' + class_name + '.' + name + '.' if class_name != "" else prefix + '.' + name + '.'
-                if child.__class__ in [nn.Linear, nn.Embedding, nn.LayerNorm] and state_dict != None:
+                if child.__class__ in [nn.Linear, nn.Embedding, nn.LayerNorm] and state_dict is not None:
                    if any(checking_key in item for item in state_dict):
                        load(child, state_dict, checking_key, mp_group)
                    else:
                        continue
-                if len(child._buffers) != 0 and state_dict != None:
+                if len(child._buffers) != 0 and state_dict is not None:
                    load_buffer(child, state_dict, checking_key)
                if child.__class__ in linear_policies:
                    setattr(r_module, name, linear_policies[child.__class__](child, prev_name + '.' + name,
@@ -507,7 +507,7 @@ def replace_transformer_layer(orig_layer_impl, model, checkpoint_dict, config, m

        return new_module

-    if checkpoint_dict != None and not config.replace_with_kernel_inject:
+    if checkpoint_dict is not None and not config.replace_with_kernel_inject:
        # AutoTP shard loading
        checkpoint = checkpoint_dict["checkpoints"]
        pbar = tqdm.tqdm(total=len(checkpoint), desc=f"Loading {len(checkpoint)} checkpoint shards")
@@ -745,7 +745,7 @@ def replace_module(model, orig_class, replace_fn, _replace_policy, checkpoint=No
        A modified ``model``.
    """
    sd = None
-    if checkpoint != None:
+    if checkpoint is not None:
        sd = torch.load(checkpoint, map_location='cpu')
    policy = {}
    if orig_class is not None:
@@ -764,7 +764,7 @@ def replace_module(model, orig_class, replace_fn, _replace_policy, checkpoint=No
        "You can find some samples here: https://github.com/microsoft/DeepSpeed/blob/master/deepspeed/module_inject/replace_policy.py"

    replaced_module, _ = _replace_module(model, policy, state_dict=sd)
-    if checkpoint != None:
+    if checkpoint is not None:
        embedding_weight = None
        for n, p in replaced_module.named_parameters():
            if "word_embeddings." in n or "embed_tokens." in n or "wte." in n:
@@ -833,7 +833,7 @@ def _replace_module(model, policies, prefix='', layer_id=0, level_id=0, state_di
            layer_id += 1
        else:
            checking_key = prefix + name + '.'
-            if child.__class__ in load_layers and state_dict != None:
+            if child.__class__ in load_layers and state_dict is not None:
                if any(checking_key in item for item in state_dict):
                    load(
                        child,
@@ -842,7 +842,7 @@ def _replace_module(model, policies, prefix='', layer_id=0, level_id=0, state_di
                    )
                else:
                    continue
-            if len(child._buffers) != 0 and state_dict != None:
+            if len(child._buffers) != 0 and state_dict is not None:
                load_buffer(child, state_dict, checking_key)
            _, layer_id = _replace_module(child,
                                          policies,

--- a/deepspeed/ops/transformer/inference/diffusers_attention.py
+++ b/deepspeed/ops/transformer/inference/diffusers_attention.py
@@ -57,7 +57,7 @@ class DeepSpeedDiffusersAttentionFunction(Function):
            head_size = input.shape[-1] // config.heads
            do_flash_attn = (head_size <= 128)
            scale = (1 / norm_factor) * (1 / norm_factor)
-            if do_flash_attn and context == None:
+            if do_flash_attn and context is None:
                qkv_out = linear_func(input, attn_qkvw, attn_qkvb if attn_qkvb is not None else attn_qkvw, attn_qkvb
                                      is not None, do_flash_attn, config.heads, False)


--- a/deepspeed/ops/transformer/inference/diffusers_transformer_block.py
+++ b/deepspeed/ops/transformer/inference/diffusers_transformer_block.py
@@ -85,7 +85,7 @@ class DeepSpeedDiffusersTransformerBlock(nn.Module):

        # In v0.11.0 of diffusers, the kwarg was changed from 'context' to 'encoder_hidden_states'
        # This is so we can support older and newer versions of diffusers
-        if "encoder_hidden_states" in kwargs and kwargs["encoder_hidden_states"] != None:
+        if "encoder_hidden_states" in kwargs and kwargs["encoder_hidden_states"] is not None:
            context = kwargs["encoder_hidden_states"]

        out_norm_1 = self.transformer_cuda_module.layer_norm(hidden_states, self.norm1_g, self.norm1_b, self.norm1_eps)

--- a/deepspeed/ops/transformer/inference/moe_inference.py
+++ b/deepspeed/ops/transformer/inference/moe_inference.py
@@ -193,7 +193,7 @@ class DeepSpeedMoEInference(nn.Module):
            specialized_mode = False
            # InferenceSpecializedBuilder is not among DeepSpeed provided builder yet, so we infer by builder name string
            builder = get_accelerator().create_op_builder("InferenceSpecializedBuilder")
-            if builder != None and builder.is_compatible():
+            if builder is not None and builder.is_compatible():
                inference_module = builder.load()
                specialized_mode = True
            else:

--- a/deepspeed/ops/transformer/inference/op_binding/residual_add.py
+++ b/deepspeed/ops/transformer/inference/op_binding/residual_add.py
@@ -37,7 +37,7 @@ class ResidualAddOp(BaseOp):
                attention_bias: Optional[torch.Tensor] = None,
                final_bias: Optional[torch.Tensor] = None):

-        if self.residual_add_func != None:
+        if self.residual_add_func is not None:
            if final_bias is None:
                residual = self._vector_add(residual, hidden_state, 1.0 / self.config.mp_size)
            else:

--- a/deepspeed/runtime/engine.py
+++ b/deepspeed/runtime/engine.py
@@ -859,7 +859,7 @@ class DeepSpeedEngine(Module):
        elif self.bfloat16_enabled():
            model_dtype = torch.bfloat16

-        if self._config.grad_accum_dtype == None:
+        if self._config.grad_accum_dtype is None:
            if model_dtype == torch.bfloat16 and not self.zero_optimization():
                grad_accum_dtype = torch.float32
            else:
@@ -968,7 +968,7 @@ class DeepSpeedEngine(Module):
            "variable, it is set by the deepspeed launcher, deepspeed.init_distributed, or the torch's launcher. If using a " \
            "different launcher please ensure LOCAL_RANK is set prior to initializing deepspeed."

-        if hasattr(args, 'local_rank') and args.local_rank != None:
+        if hasattr(args, 'local_rank') and args.local_rank is not None:
            assert isinstance(args.local_rank,
                              int), f"args.local_rank of {args.local_rank} is an unknown type {type(args.local_rank)}"
            if args.local_rank >= 0:

--- a/deepspeed/runtime/pipe/module.py
+++ b/deepspeed/runtime/pipe/module.py
@@ -159,7 +159,7 @@ class PipelineModule(nn.Module):
        self.global_rank = dist.get_rank(group=self.world_group)
        self.world_size = dist.get_world_size(group=self.world_group)
        self.local_rank = int(os.environ.get("LOCAL_RANK", None))
-        assert self.local_rank != None
+        assert self.local_rank is not None

        if topology:
            self._topo = topology

--- a/deepspeed/runtime/zero/stage3.py
+++ b/deepspeed/runtime/zero/stage3.py
@@ -1892,7 +1892,7 @@ class DeepSpeedZeroOptimizer_Stage3(ZeROOptimizer):

        # warn user about caching allocator flushes
        memory_stats = get_accelerator().memory_stats()
-        alloc_retries = memory_stats["num_alloc_retries"] if memory_stats != None else 0
+        alloc_retries = memory_stats["num_alloc_retries"] if memory_stats is not None else 0
        if alloc_retries > self.n_caching_allocator_flushes:
            if dist.get_rank() == 0:
                logger.warning(

--- a/deepspeed/utils/debug.py
+++ b/deepspeed/utils/debug.py
@@ -83,7 +83,7 @@ def printflock(*msgs):

    """
    global fcntl
-    if fcntl == None:
+    if fcntl is None:
        import fcntl

    with open(__file__, "r") as fh:

--- a/deepspeed/utils/numa.py
+++ b/deepspeed/utils/numa.py
@@ -107,7 +107,7 @@ def get_numactl_cmd(bind_core_list, num_local_procs, local_rank):
                         "Unset KMP_AFFINITY before launching deepspeed.\n\n"
                         "\t$ unset KMP_AFFINITY\n"
                         "\t$ deepspeed <deepspeed command parameters>")
-    if bind_core_list != None:
+    if bind_core_list is not None:
        core_list = parse_range_list(bind_core_list)
        total_cores = len(core_list)
    else: