未验证 提交 3de3e45e 编写于 作者: N Nyakku Shigure 提交者: GitHub

[CodeStyle][py2][U008] remove unnecessary args in `super()` (#47549)

* [CodeStyle][py2][U008] remove unnecessary args in `super()`

* remove remained args

* revert changes in test_pylayer_op

* Revert "revert changes in test_pylayer_op"

This reverts commit ff185a9ae738afac3b0264f61bde6c6b7f72e7c4.

* revert some changes in example code
上级 6d0f730d
......@@ -20,7 +20,7 @@ import sys
class AbsNet(paddle.nn.Layer):
def __init__(self):
super(AbsNet, self).__init__()
super().__init__()
def forward(self, x):
x = paddle.abs(x)
......
......@@ -42,7 +42,7 @@ class RandomDataset(paddle.io.Dataset):
class LinearNet(nn.Layer):
def __init__(self):
super(LinearNet, self).__init__()
super().__init__()
self._linear = nn.Linear(IMAGE_SIZE, CLASS_NUM)
@paddle.jit.to_static
......
......@@ -31,7 +31,7 @@ inplace_optional_out_type_map = {
class ForwardAPI(BaseAPI):
def __init__(self, api_item_yaml):
super(ForwardAPI, self).__init__(api_item_yaml)
super().__init__(api_item_yaml)
self.is_dygraph_api, self.intermediate_outs = self.parse_intermediate(
api_item_yaml
)
......
......@@ -21,7 +21,7 @@ from api_base import BaseAPI
class BackwardAPI(BaseAPI):
def __init__(self, backward_item_yaml):
super(BackwardAPI, self).__init__(backward_item_yaml)
super().__init__(backward_item_yaml)
self.check_args(backward_item_yaml['forward'])
self.no_need_buffer = self.parse_no_need_buffer(backward_item_yaml)
......@@ -97,7 +97,7 @@ class BackwardAPI(BaseAPI):
'Tensor': 'Tensor*',
'std::vector<Tensor>': 'std::vector<Tensor*>',
}
intputs_and_attrs = super(BackwardAPI, self).get_define_args()
intputs_and_attrs = super().get_define_args()
outs = []
for i, name in enumerate(self.outputs['names']):
outs.append(
......
......@@ -21,12 +21,12 @@ from api_base import PREFIX_TENSOR_NAME
class SparseAPI(ForwardAPI):
def __init__(self, api_item_yaml):
super(SparseAPI, self).__init__(api_item_yaml)
super().__init__(api_item_yaml)
def gene_api_declaration(self):
return f"""
// {", ".join(self.outputs['names'])}
{super(SparseAPI, self).gene_api_declaration()}
{super().gene_api_declaration()}
"""
def gene_output(
......
......@@ -23,7 +23,7 @@ PREFIX_META_TENSOR_NAME = 'meta_'
class StringsAPI(ForwardAPI):
def __init__(self, api_item_yaml):
super(StringsAPI, self).__init__(api_item_yaml)
super().__init__(api_item_yaml)
def get_api_func_name(self):
return self.api
......@@ -31,7 +31,7 @@ class StringsAPI(ForwardAPI):
def gene_api_declaration(self):
return f"""
// {", ".join(self.outputs['names'])}
{super(StringsAPI, self).gene_api_declaration()}
{super().gene_api_declaration()}
"""
def get_kernel_tensor_out_type(self, output_name):
......
......@@ -85,7 +85,7 @@ class GradScaler(AmpScaler):
decr_every_n_nan_or_inf=2,
use_dynamic_loss_scaling=True,
):
super(GradScaler, self).__init__(
super().__init__(
enable,
init_loss_scaling,
incr_ratio,
......@@ -125,7 +125,7 @@ class GradScaler(AmpScaler):
scaler.minimize(optimizer, scaled) # update parameters
optimizer.clear_grad()
"""
return super(GradScaler, self).scale(var)
return super().scale(var)
def minimize(self, optimizer, *args, **kwargs):
"""
......@@ -161,7 +161,7 @@ class GradScaler(AmpScaler):
scaler.minimize(optimizer, scaled) # update parameters
optimizer.clear_grad()
"""
return super(GradScaler, self).minimize(optimizer, *args, **kwargs)
return super().minimize(optimizer, *args, **kwargs)
def step(self, optimizer):
"""
......@@ -280,7 +280,7 @@ class GradScaler(AmpScaler):
scaler.update()
optimizer.clear_grad()
"""
return super(GradScaler, self)._unscale(optimizer)
return super()._unscale(optimizer)
def is_enable(self):
"""
......@@ -304,7 +304,7 @@ class GradScaler(AmpScaler):
enable = scaler.is_enable()
print(enable) # True
"""
return super(GradScaler, self).is_enable()
return super().is_enable()
def is_use_dynamic_loss_scaling(self):
"""
......@@ -328,7 +328,7 @@ class GradScaler(AmpScaler):
use_dynamic_loss_scaling = scaler.is_use_dynamic_loss_scaling()
print(use_dynamic_loss_scaling) # True
"""
return super(GradScaler, self).is_use_dynamic_loss_scaling()
return super().is_use_dynamic_loss_scaling()
def get_init_loss_scaling(self):
"""
......@@ -352,7 +352,7 @@ class GradScaler(AmpScaler):
init_loss_scaling = scaler.get_init_loss_scaling()
print(init_loss_scaling) # 1024
"""
return super(GradScaler, self).get_init_loss_scaling()
return super().get_init_loss_scaling()
def set_init_loss_scaling(self, new_init_loss_scaling):
"""
......@@ -378,7 +378,7 @@ class GradScaler(AmpScaler):
scaler.set_init_loss_scaling(new_init_loss_scaling)
print(scaler.get_init_loss_scaling()) # 1000
"""
super(GradScaler, self).set_init_loss_scaling(new_init_loss_scaling)
super().set_init_loss_scaling(new_init_loss_scaling)
def get_incr_ratio(self):
"""
......@@ -402,7 +402,7 @@ class GradScaler(AmpScaler):
incr_ratio = scaler.get_incr_ratio()
print(incr_ratio) # 2.0
"""
return super(GradScaler, self).get_incr_ratio()
return super().get_incr_ratio()
def set_incr_ratio(self, new_incr_ratio):
"""
......@@ -428,7 +428,7 @@ class GradScaler(AmpScaler):
scaler.set_incr_ratio(new_incr_ratio)
print(scaler.get_incr_ratio()) # 3.0
"""
super(GradScaler, self).set_incr_ratio(new_incr_ratio)
super().set_incr_ratio(new_incr_ratio)
def get_decr_ratio(self):
"""
......@@ -452,7 +452,7 @@ class GradScaler(AmpScaler):
decr_ratio = scaler.get_decr_ratio()
print(decr_ratio) # 0.5
"""
return super(GradScaler, self).get_decr_ratio()
return super().get_decr_ratio()
def set_decr_ratio(self, new_decr_ratio):
"""
......@@ -478,7 +478,7 @@ class GradScaler(AmpScaler):
scaler.set_decr_ratio(new_decr_ratio)
print(scaler.get_decr_ratio()) # 0.1
"""
super(GradScaler, self).set_decr_ratio(new_decr_ratio)
super().set_decr_ratio(new_decr_ratio)
def get_incr_every_n_steps(self):
"""
......@@ -502,7 +502,7 @@ class GradScaler(AmpScaler):
incr_every_n_steps = scaler.get_incr_every_n_steps()
print(incr_every_n_steps) # 1000
"""
return super(GradScaler, self).get_incr_every_n_steps()
return super().get_incr_every_n_steps()
def set_incr_every_n_steps(self, new_incr_every_n_steps):
"""
......@@ -528,7 +528,7 @@ class GradScaler(AmpScaler):
scaler.set_incr_every_n_steps(new_incr_every_n_steps)
print(scaler.get_incr_every_n_steps()) # 2000
"""
super(GradScaler, self).set_incr_every_n_steps(new_incr_every_n_steps)
super().set_incr_every_n_steps(new_incr_every_n_steps)
def get_decr_every_n_nan_or_inf(self):
"""
......@@ -552,7 +552,7 @@ class GradScaler(AmpScaler):
decr_every_n_nan_or_inf = scaler.get_decr_every_n_nan_or_inf()
print(decr_every_n_nan_or_inf) # 2
"""
return super(GradScaler, self).get_decr_every_n_nan_or_inf()
return super().get_decr_every_n_nan_or_inf()
def set_decr_every_n_nan_or_inf(self, new_decr_every_n_nan_or_inf):
"""
......@@ -578,9 +578,7 @@ class GradScaler(AmpScaler):
scaler.set_decr_every_n_nan_or_inf(new_decr_every_n_nan_or_inf)
print(scaler.get_decr_every_n_nan_or_inf()) # 3
"""
super(GradScaler, self).set_decr_every_n_nan_or_inf(
new_decr_every_n_nan_or_inf
)
super().set_decr_every_n_nan_or_inf(new_decr_every_n_nan_or_inf)
def state_dict(self):
"""
......@@ -614,7 +612,7 @@ class GradScaler(AmpScaler):
use_dynamic_loss_scaling=True)
scaler_state = scaler.state_dict()
"""
return super(GradScaler, self).state_dict()
return super().state_dict()
def load_state_dict(self, state_dict):
"""
......@@ -640,4 +638,4 @@ class GradScaler(AmpScaler):
scaler_state = scaler.state_dict()
scaler.load_state_dict(scaler_state)
"""
super(GradScaler, self).load_state_dict(state_dict)
super().load_state_dict(state_dict)
......@@ -49,7 +49,7 @@ class AudioClassificationDataset(paddle.io.Dataset):
feat_type (:obj:`str`, `optional`, defaults to `raw`):
It identifies the feature type that user wants to extrace of an audio file.
"""
super(AudioClassificationDataset, self).__init__()
super().__init__()
if feat_type not in feat_funcs.keys():
raise RuntimeError(
......
......@@ -151,7 +151,7 @@ class ESC50(AudioClassificationDataset):
if archive is not None:
self.archive = archive
files, labels = self._get_data(mode, split)
super(ESC50, self).__init__(
super().__init__(
files=files, labels=labels, feat_type=feat_type, **kwargs
)
......
......@@ -107,7 +107,7 @@ class TESS(AudioClassificationDataset):
if archive is not None:
self.archive = archive
files, labels = self._get_data(mode, n_folds, split)
super(TESS, self).__init__(
super().__init__(
files=files, labels=labels, feat_type=feat_type, **kwargs
)
......
......@@ -72,7 +72,7 @@ class Spectrogram(nn.Layer):
pad_mode: str = 'reflect',
dtype: str = 'float32',
) -> None:
super(Spectrogram, self).__init__()
super().__init__()
assert power > 0, 'Power of spectrogram must be > 0.'
self.power = power
......@@ -163,7 +163,7 @@ class MelSpectrogram(nn.Layer):
norm: Union[str, float] = 'slaney',
dtype: str = 'float32',
) -> None:
super(MelSpectrogram, self).__init__()
super().__init__()
self._spectrogram = Spectrogram(
n_fft=n_fft,
......@@ -269,7 +269,7 @@ class LogMelSpectrogram(nn.Layer):
top_db: Optional[float] = None,
dtype: str = 'float32',
) -> None:
super(LogMelSpectrogram, self).__init__()
super().__init__()
self._melspectrogram = MelSpectrogram(
sr=sr,
......@@ -374,7 +374,7 @@ class MFCC(nn.Layer):
top_db: Optional[float] = None,
dtype: str = 'float32',
) -> None:
super(MFCC, self).__init__()
super().__init__()
assert (
n_mfcc <= n_mels
), 'n_mfcc cannot be larger than n_mels: %d vs %d' % (n_mfcc, n_mels)
......
......@@ -201,7 +201,7 @@ class LayerMeta(type):
name + '_backward', (PyLayerBackward,), {"_forward_cls": cls}
)
return super(LayerMeta, cls).__init__(name, bases, attrs)
return super().__init__(name, bases, attrs)
class LegacyPyLayer(with_mateclass(LayerMeta, CPyLayer)):
......@@ -552,7 +552,7 @@ class EagerPyLayerMeta(type):
name + '_backward', (EagerPyLayerBackward,), {"_forward_cls": cls}
)
return super(EagerPyLayerMeta, cls).__init__(name, bases, attrs)
return super().__init__(name, bases, attrs)
class EagerPyLayer(
......
......@@ -83,7 +83,7 @@ def config_callbacks(
class ProgBarLoggerAuto(ProgBarLogger):
def __init__(self, log_freq=1, verbose=2):
super(ProgBarLoggerAuto, self).__init__(log_freq, verbose)
super().__init__(log_freq, verbose)
def _is_print(self):
return True
......@@ -158,7 +158,7 @@ class ProgBarLoggerAuto(ProgBarLogger):
class LRSchedulerAuto(LRScheduler):
def __init__(self, by_step=True, by_epoch=False):
super(LRSchedulerAuto, self).__init__(by_step, by_epoch)
super().__init__(by_step, by_epoch)
def on_epoch_begin(self, epoch=None, logs=None):
self.acc_step = self.params["acc_step"]
......@@ -225,7 +225,7 @@ class Profiler(Callback):
class ModelCheckpointAuto(ModelCheckpoint):
def __init__(self, *args, **kwargs):
super(ModelCheckpointAuto, self).__init__(*args, **kwargs)
super().__init__(*args, **kwargs)
def _is_save(self):
return self.model and self.save_dir
......
......@@ -756,7 +756,7 @@ class CommOpCost(OpCost):
OP_TYPE = "COMM"
def __init__(self, op=None, op_desc=None, comm_context=None):
super(CommOpCost, self).__init__(op=op, op_desc=op_desc)
super().__init__(op=op, op_desc=op_desc)
self._check_comm_op_type()
self._comm_context = comm_context
self._group_ranks = None
......@@ -859,7 +859,7 @@ class CompOpCost(OpCost):
OP_TYPE = "COMP"
def __init__(self, op=None, op_desc=None, cluster=None):
super(CompOpCost, self).__init__(op=op, op_desc=op_desc)
super().__init__(op=op, op_desc=op_desc)
self._check_comp_op_type()
self._cost = self.calc_cost()
self.cluster = cluster
......
......@@ -22,9 +22,7 @@ class AllreduceSumOpCost(CommOpCost):
OP_TYPE = "c_allreduce_sum"
def __init__(self, op=None, op_desc=None, comm_context=None):
super(AllreduceSumOpCost, self).__init__(
op=op, op_desc=op_desc, comm_context=comm_context
)
super().__init__(op=op, op_desc=op_desc, comm_context=comm_context)
def calc_time(self):
# use tree if cross machine and use ring if in a single machine
......@@ -86,9 +84,7 @@ class AllgatherOpCost(CommOpCost):
OP_TYPE = "c_allgather"
def __init__(self, op=None, op_desc=None, comm_context=None):
super(AllgatherOpCost, self).__init__(
op=op, op_desc=op_desc, comm_context=comm_context
)
super().__init__(op=op, op_desc=op_desc, comm_context=comm_context)
def calc_time(self):
time = self.calc_time_ring()
......@@ -115,9 +111,7 @@ class BroadcastOpCost(CommOpCost):
OP_TYPE = "c_broadcast"
def __init__(self, op=None, op_desc=None, comm_context=None):
super(BroadcastOpCost, self).__init__(
op=op, op_desc=op_desc, comm_context=comm_context
)
super().__init__(op=op, op_desc=op_desc, comm_context=comm_context)
def calc_time(self):
time = self.calc_time_ring()
......@@ -143,9 +137,7 @@ class IdentityOpCost(CommOpCost):
OP_TYPE = "c_identity"
def __init__(self, op=None, op_desc=None, comm_context=None):
super(IdentityOpCost, self).__init__(
op=op, op_desc=op_desc, comm_context=comm_context
)
super().__init__(op=op, op_desc=op_desc, comm_context=comm_context)
def calc_time(self):
return 0
......@@ -156,9 +148,7 @@ class RecvOpCost(CommOpCost):
OP_TYPE = "recv_v2"
def __init__(self, op=None, op_desc=None, comm_context=None):
super(RecvOpCost, self).__init__(
op=op, op_desc=op_desc, comm_context=comm_context
)
super().__init__(op=op, op_desc=op_desc, comm_context=comm_context)
def calc_time(self):
alpha = self.comm_context.base_ring
......@@ -179,9 +169,7 @@ class SendOpCost(CommOpCost):
OP_TYPE = "send_v2"
def __init__(self, op=None, op_desc=None, comm_context=None):
super(SendOpCost, self).__init__(
op=op, op_desc=op_desc, comm_context=comm_context
)
super().__init__(op=op, op_desc=op_desc, comm_context=comm_context)
def calc_time(self):
alpha = self.comm_context.base_ring
......
......@@ -71,7 +71,7 @@ class CostNode(object):
class MergedOpsCostNode(CostNode):
def __init__(self, node_type, id=None, base_node_list=None, is_bwd=False):
super(MergedOpsCostNode, self).__init__(None, node_type, id)
super().__init__(None, node_type, id)
self.node_list = base_node_list
self.is_bwd = is_bwd
......@@ -80,7 +80,7 @@ class CommOpCostNode(CostNode):
def __init__(
self, node, node_type, id=None, comm_node_list=None, is_bwd=False
):
super(CommOpCostNode, self).__init__(node, node_type, id)
super().__init__(node, node_type, id)
self.node_list = comm_node_list
self.ranks = []
self.comm_type = node.type
......@@ -124,7 +124,7 @@ class TensorCostNode(CostNode):
batch_size=None,
shared_node_id=None,
):
super(TensorCostNode, self).__init__(node, node_type, id)
super().__init__(node, node_type, id)
if node.name == "create_py_reader_0" or node.name == "double_buffer_0":
self.shape = [2, 2]
self.dtype = paddle.float32
......@@ -159,7 +159,7 @@ class TensorCostNode(CostNode):
class CompOpCostNode(CostNode):
def __init__(self, node, node_type, id=None, is_bwd=False, is_optim=False):
super(CompOpCostNode, self).__init__(node, node_type, id)
super().__init__(node, node_type, id)
self.is_bwd = is_bwd
self.is_optim = is_optim
......
......@@ -37,7 +37,7 @@ class ProxyLayer(Layer):
"""
def __init__(self, layer, loss_func, metrics):
super(ProxyLayer, self).__init__()
super().__init__()
# NOTE: All verify logics are finished in Engine.Prepare
self.inner_layer = layer
self.loss_func = loss_func
......
......@@ -22,7 +22,7 @@ from ..utils import compute_compatible_and_update_dim_mapping
class DistributedAssign(DistributedOperatorImplContainer):
def __init__(self, op_type):
super(DistributedAssign, self).__init__(op_type)
super().__init__(op_type)
register_distributed_operator_impl_container(DistributedAssign("assign"))
......@@ -30,7 +30,7 @@ register_distributed_operator_impl_container(DistributedAssign("assign"))
class DistributedAssignImpl(DistributedOperatorImpl):
def __init__(self, name):
super(DistributedAssignImpl, self).__init__(name)
super().__init__(name)
self._forward_implemented = True
self._backward_implemented = True
......
......@@ -31,7 +31,7 @@ world_process_group = get_world_process_group()
class DistributedCheckFiniteAndUnscale(DistributedOperatorImplContainer):
def __init__(self, op_type):
super(DistributedCheckFiniteAndUnscale, self).__init__(op_type)
super().__init__(op_type)
register_distributed_operator_impl_container(
......@@ -41,7 +41,7 @@ register_distributed_operator_impl_container(
class DistributedCheckFiniteAndUnscaleImpl(DistributedOperatorImpl):
def __init__(self, name):
super(DistributedCheckFiniteAndUnscaleImpl, self).__init__(name)
super().__init__(name)
self._name = name
self._forward_implemented = False
self._backward_implemented = True
......
......@@ -86,7 +86,7 @@ def prim_operator_data_parallel_functor(ctx, src_op):
class DistributedDefault(DistributedOperatorImplContainer):
def __init__(self, op_type):
super(DistributedDefault, self).__init__(op_type)
super().__init__(op_type)
register_distributed_operator_impl_container(DistributedDefault("default"))
......@@ -95,7 +95,7 @@ register_distributed_operator_impl_container(DistributedDefault("default"))
# Replicated Default
class DistributedDefaultImpl0(DistributedOperatorImpl):
def __init__(self, name):
super(DistributedDefaultImpl0, self).__init__(name)
super().__init__(name)
self._forward_implemented = True
self._backward_implemented = True
......
......@@ -28,7 +28,7 @@ from ..cost import build_comp_costs_from_descs
class DistributedElementwise(DistributedOperatorImplContainer):
def __init__(self, op_type):
super(DistributedElementwise, self).__init__(op_type)
super().__init__(op_type)
register_distributed_operator_impl_container(
......@@ -39,7 +39,7 @@ register_distributed_operator_impl_container(
# Replicated Elementwise
class DistributedElementwiseImpl0(DistributedOperatorImpl):
def __init__(self, name):
super(DistributedElementwiseImpl0, self).__init__(name)
super().__init__(name)
self._forward_implemented = False
self._backward_implemented = False
......
......@@ -51,7 +51,7 @@ from paddle.distributed.auto_parallel.cost.comm_op_cost import (
class DistributedEmbedding(DistributedOperatorImplContainer):
def __init__(self, op_type):
super(DistributedEmbedding, self).__init__(op_type)
super().__init__(op_type)
register_distributed_operator_impl_container(
......@@ -153,7 +153,7 @@ def adopt_lookup_table_v1(ctx, main_block, src_op, Ids_var):
# RowParallel
class DistributedEmbeddingImpl(DistributedOperatorImpl):
def __init__(self, name):
super(DistributedEmbeddingImpl, self).__init__(name)
super().__init__(name)
self._forward_implemented = True
self._backward_implemented = True
......
......@@ -26,7 +26,7 @@ from ..cost import build_comp_costs_from_descs
class DistributedFillConstantBatchSizeLike(DistributedOperatorImplContainer):
def __init__(self, op_type):
super(DistributedFillConstantBatchSizeLike, self).__init__(op_type)
super().__init__(op_type)
register_distributed_operator_impl_container(
......@@ -36,7 +36,7 @@ register_distributed_operator_impl_container(
class DistributedFillConstantBatchSizeLikeImpl0(DistributedOperatorImpl):
def __init__(self, name):
super(DistributedFillConstantBatchSizeLikeImpl0, self).__init__(name)
super().__init__(name)
self._forward_implemented = True
self._backward_implemented = True
......
......@@ -25,7 +25,7 @@ from ..process_group import new_process_group
class DistributedFusedAttention(DistributedOperatorImplContainer):
def __init__(self, op_type):
super(DistributedFusedAttention, self).__init__(op_type)
super().__init__(op_type)
register_distributed_operator_impl_container(
......@@ -35,7 +35,7 @@ register_distributed_operator_impl_container(
class DistributedFusedAttentionImpl(DistributedOperatorImpl):
def __init__(self, name):
super(DistributedFusedAttentionImpl, self).__init__(name)
super().__init__(name)
self._forward_implemented = True
self._backward_implemented = True
......
......@@ -25,7 +25,7 @@ from ..process_group import new_process_group
class DistributedFusedFeedForward(DistributedOperatorImplContainer):
def __init__(self, op_type):
super(DistributedFusedFeedForward, self).__init__(op_type)
super().__init__(op_type)
register_distributed_operator_impl_container(
......@@ -35,7 +35,7 @@ register_distributed_operator_impl_container(
class DistributedFusedFeedForwardImpl(DistributedOperatorImpl):
def __init__(self, name):
super(DistributedFusedFeedForwardImpl, self).__init__(name)
super().__init__(name)
self._forward_implemented = True
self._backward_implemented = True
......
......@@ -544,7 +544,7 @@ def _init_param_sync(Weight_var, dist_op_context, startup_block, ctx, rank_id):
class DistributedMatmul(DistributedOperatorImplContainer):
def __init__(self, op_type):
super(DistributedMatmul, self).__init__(op_type)
super().__init__(op_type)
register_distributed_operator_impl_container(DistributedMatmul("matmul"))
......@@ -553,7 +553,7 @@ register_distributed_operator_impl_container(DistributedMatmul("matmul"))
# ColumnParallel
class DistributedMatmulImpl0(DistributedOperatorImpl):
def __init__(self, name):
super(DistributedMatmulImpl0, self).__init__(name)
super().__init__(name)
self._forward_implemented = True
self._backward_implemented = True
......@@ -939,7 +939,7 @@ class DistributedMatmulImpl0(DistributedOperatorImpl):
# RowParallel
class DistributedMatmulImpl1(DistributedOperatorImpl):
def __init__(self, name):
super(DistributedMatmulImpl1, self).__init__(name)
super().__init__(name)
self._forward_implemented = True
self._backward_implemented = True
......@@ -1300,7 +1300,7 @@ class DistributedMatmulImpl1(DistributedOperatorImpl):
# ReplicateParallel
class DistributedMatmulImpl2(DistributedOperatorImpl):
def __init__(self, name):
super(DistributedMatmulImpl2, self).__init__(name)
super().__init__(name)
def calc_cost(self, op_role, dist_op, ctx, cluster):
cost = None
......@@ -1441,7 +1441,7 @@ register_distributed_operator_impl(
class DistributedMatmulV2(DistributedOperatorImplContainer):
def __init__(self, op_type):
super(DistributedMatmulV2, self).__init__(op_type)
super().__init__(op_type)
register_distributed_operator_impl_container(DistributedMatmulV2("matmul_v2"))
......@@ -1450,7 +1450,7 @@ register_distributed_operator_impl_container(DistributedMatmulV2("matmul_v2"))
# ColumnParallel
class DistributedMatmulV2Impl0(DistributedOperatorImpl):
def __init__(self, name):
super(DistributedMatmulV2Impl0, self).__init__(name)
super().__init__(name)
self._forward_implemented = True
self._backward_implemented = True
......@@ -1839,7 +1839,7 @@ class DistributedMatmulV2Impl0(DistributedOperatorImpl):
# RowParallel
class DistributedMatmulV2Impl1(DistributedOperatorImpl):
def __init__(self, name):
super(DistributedMatmulV2Impl1, self).__init__(name)
super().__init__(name)
self._forward_implemented = True
self._backward_implemented = True
......@@ -2199,7 +2199,7 @@ class DistributedMatmulV2Impl1(DistributedOperatorImpl):
# ReplicateParallel
class DistributedMatmulV2Impl2(DistributedOperatorImpl):
def __init__(self, name):
super(DistributedMatmulV2Impl2, self).__init__(name)
super().__init__(name)
def calc_cost(self, op_role, dist_op, ctx, cluster):
cost = None
......@@ -2342,7 +2342,7 @@ register_distributed_operator_impl(
class DistributedMul(DistributedOperatorImplContainer):
def __init__(self, op_type):
super(DistributedMul, self).__init__(op_type)
super().__init__(op_type)
register_distributed_operator_impl_container(DistributedMul("mul"))
......@@ -2351,7 +2351,7 @@ register_distributed_operator_impl_container(DistributedMul("mul"))
# ColumnParallel
class DistributedMulImpl0(DistributedOperatorImpl):
def __init__(self, name):
super(DistributedMulImpl0, self).__init__(name)
super().__init__(name)
self._forward_implemented = True
self._backward_implemented = True
......@@ -2743,7 +2743,7 @@ class DistributedMulImpl0(DistributedOperatorImpl):
# RowParallel
class DistributedMulImpl1(DistributedOperatorImpl):
def __init__(self, name):
super(DistributedMulImpl1, self).__init__(name)
super().__init__(name)
self._forward_implemented = True
self._backward_implemented = True
......@@ -3114,7 +3114,7 @@ class DistributedMulImpl1(DistributedOperatorImpl):
# ReplicateParallel
class DistributedMulImpl2(DistributedOperatorImpl):
def __init__(self, name):
super(DistributedMulImpl2, self).__init__(name)
super().__init__(name)
def calc_cost(self, op_role, dist_op, ctx, cluster):
cost = None
......
......@@ -37,7 +37,7 @@ from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype
class DistributedPNorm(DistributedOperatorImplContainer):
def __init__(self, op_type):
super(DistributedPNorm, self).__init__(op_type)
super().__init__(op_type)
register_distributed_operator_impl_container(DistributedPNorm("p_norm"))
......@@ -46,7 +46,7 @@ register_distributed_operator_impl_container(DistributedPNorm("p_norm"))
# Row Parallel
class DistributedPNormImpl(DistributedOperatorImpl):
def __init__(self, name):
super(DistributedPNormImpl, self).__init__(name)
super().__init__(name)
self._forward_implemented = True
self._backward_implemented = True
......
......@@ -24,7 +24,7 @@ from ..process_group import new_process_group
class DistributedReduceSumPrimtive(DistributedOperatorImplContainer):
def __init__(self, op_type):
super(DistributedReduceSumPrimtive, self).__init__(op_type)
super().__init__(op_type)
register_distributed_operator_impl_container(
......@@ -35,7 +35,7 @@ register_distributed_operator_impl_container(
# Batch Dimension ReduceSum Primitive
class DistributedReduceSumPrimtiveImpl0(DistributedOperatorImpl):
def __init__(self, name):
super(DistributedReduceSumPrimtiveImpl0, self).__init__(name)
super().__init__(name)
self._forward_implemented = True
self._backward_implemented = True
......
......@@ -28,7 +28,7 @@ from paddle.distributed.fleet.meta_optimizers.common import OpRole
class DistributedReshape2(DistributedOperatorImplContainer):
def __init__(self, op_type):
super(DistributedReshape2, self).__init__(op_type)
super().__init__(op_type)
register_distributed_operator_impl_container(DistributedReshape2("reshape2"))
......@@ -36,7 +36,7 @@ register_distributed_operator_impl_container(DistributedReshape2("reshape2"))
class DistributedReshapeImpl0(DistributedOperatorImpl):
def __init__(self, name):
super(DistributedReshapeImpl0, self).__init__(name)
super().__init__(name)
self._forward_implemented = True
self._backward_implemented = False
......@@ -286,7 +286,7 @@ class DistributedReshapeImpl0(DistributedOperatorImpl):
class DistributedReshapeImpl1(DistributedOperatorImpl):
def __init__(self, name):
super(DistributedReshapeImpl1, self).__init__(name)
super().__init__(name)
self._forward_implemented = True
self._backward_implemented = False
......@@ -539,7 +539,7 @@ class DistributedReshapeImpl1(DistributedOperatorImpl):
class DistributedReshapeImpl2(DistributedOperatorImpl):
def __init__(self, name):
super(DistributedReshapeImpl2, self).__init__(name)
super().__init__(name)
self._forward_implemented = True
self._backward_implemented = False
......
......@@ -22,7 +22,7 @@ from ..utils import is_dim_shard
class DistributedShape(DistributedOperatorImplContainer):
def __init__(self, op_type):
super(DistributedShape, self).__init__(op_type)
super().__init__(op_type)
register_distributed_operator_impl_container(DistributedShape("shape"))
......@@ -30,7 +30,7 @@ register_distributed_operator_impl_container(DistributedShape("shape"))
class DistributedShapeImpl(DistributedOperatorImpl):
def __init__(self, name):
super(DistributedShapeImpl, self).__init__(name)
super().__init__(name)
self._forward_implemented = True
self._backward_implemented = True
......
......@@ -23,7 +23,7 @@ from .dist_default import DistributedDefaultImpl0
class DistributedSlice(DistributedOperatorImplContainer):
def __init__(self, op_type):
super(DistributedSlice, self).__init__(op_type)
super().__init__(op_type)
register_distributed_operator_impl_container(DistributedSlice("slice"))
......@@ -31,7 +31,7 @@ register_distributed_operator_impl_container(DistributedSlice("slice"))
class DistributedSliceImpl(DistributedOperatorImpl):
def __init__(self, name):
super(DistributedSliceImpl, self).__init__(name)
super().__init__(name)
self._forward_implemented = True
self._backward_implemented = True
......
......@@ -28,7 +28,7 @@ from paddle.distributed.fleet.meta_optimizers.common import OpRole
class DistributedSoftmax(DistributedOperatorImplContainer):
def __init__(self, op_type):
super(DistributedSoftmax, self).__init__(op_type)
super().__init__(op_type)
register_distributed_operator_impl_container(DistributedSoftmax("softmax"))
......@@ -36,7 +36,7 @@ register_distributed_operator_impl_container(DistributedSoftmax("softmax"))
class DistributedSoftmaxImpl(DistributedOperatorImpl):
def __init__(self, name):
super(DistributedSoftmaxImpl, self).__init__(name)
super().__init__(name)
self._forward_implemented = False
self._backward_implemented = False
......
......@@ -23,7 +23,7 @@ from .dist_default import DistributedDefaultImpl0
class DistributedSplit(DistributedOperatorImplContainer):
def __init__(self, op_type):
super(DistributedSplit, self).__init__(op_type)
super().__init__(op_type)
register_distributed_operator_impl_container(DistributedSplit("split"))
......@@ -31,7 +31,7 @@ register_distributed_operator_impl_container(DistributedSplit("split"))
class DistributedSplitImpl(DistributedOperatorImpl):
def __init__(self, name):
super(DistributedSplitImpl, self).__init__(name)
super().__init__(name)
self._forward_implemented = True
self._backward_implemented = True
......
......@@ -27,7 +27,7 @@ from paddle.distributed.fleet.meta_optimizers.common import OpRole
class DistributedTranspose2(DistributedOperatorImplContainer):
def __init__(self, op_type):
super(DistributedTranspose2, self).__init__(op_type)
super().__init__(op_type)
register_distributed_operator_impl_container(
......@@ -37,7 +37,7 @@ register_distributed_operator_impl_container(
class DistributedTranspose2Impl(DistributedOperatorImpl):
def __init__(self, name):
super(DistributedTranspose2Impl, self).__init__(name)
super().__init__(name)
self._forward_implemented = False
self._backward_implemented = False
......
......@@ -21,7 +21,7 @@ from ..utils import set_dist_op_desc_original_id
class DistributedUpdateLossScaling(DistributedOperatorImplContainer):
def __init__(self, op_type):
super(DistributedUpdateLossScaling, self).__init__(op_type)
super().__init__(op_type)
register_distributed_operator_impl_container(
......@@ -31,7 +31,7 @@ register_distributed_operator_impl_container(
class DistributedUpdateLossScalingImpl(DistributedOperatorImpl):
def __init__(self, name):
super(DistributedUpdateLossScalingImpl, self).__init__(name)
super().__init__(name)
self._name = name
self._forward_implemented = False
self._backward_implemented = True
......
......@@ -445,7 +445,7 @@ class SearchAlgorithm:
class MCMC(SearchAlgorithm):
def __init__(self, serial_program_info, parallelizer, max_search_times=5):
super(MCMC, self).__init__("mcmc")
super().__init__("mcmc")
self._serial_program_info = serial_program_info
self._max_search_times = max_search_times
self._parallelizer = parallelizer
......
......@@ -76,43 +76,43 @@ class BaseConfig(object):
class RecomputeConfig(BaseConfig):
def __init__(self, config_dict=None):
category = constants.RECOMPUTE
super(RecomputeConfig, self).__init__(category, config_dict)
super().__init__(category, config_dict)
class AMPConfig(BaseConfig):
def __init__(self, config_dict=None):
category = constants.AMP
super(AMPConfig, self).__init__(category, config_dict)
super().__init__(category, config_dict)
class ShardingConfig(BaseConfig):
def __init__(self, config_dict=None):
category = constants.SHARDING
super(ShardingConfig, self).__init__(category, config_dict)
super().__init__(category, config_dict)
class GradientMergeConfig(BaseConfig):
def __init__(self, config_dict=None):
category = constants.GRADIENT_MERGE
super(GradientMergeConfig, self).__init__(category, config_dict)
super().__init__(category, config_dict)
class QATConfig(BaseConfig):
def __init__(self, config_dict=None):
category = constants.QAT
super(QATConfig, self).__init__(category, config_dict)
super().__init__(category, config_dict)
class TuningConfig(BaseConfig):
def __init__(self, config_dict=None):
category = constants.TUNING
super(TuningConfig, self).__init__(category, config_dict)
super().__init__(category, config_dict)
class DatasetConfig(BaseConfig):
def __init__(self, config_dict=None):
category = constants.DATASET
super(DatasetConfig, self).__init__(category, config_dict)
super().__init__(category, config_dict)
class Strategy(BaseConfig):
......@@ -161,7 +161,7 @@ class Strategy(BaseConfig):
self._config_dict = {}
category = constants.BASE
super(Strategy, self).__init__(category, self._config_dict)
super().__init__(category, self._config_dict)
config_dict = self._config_dict.get(constants.RECOMPUTE, None)
self.recompute = RecomputeConfig(config_dict)
......
......@@ -121,7 +121,7 @@ class OptimizationTunerTrial(Trial):
trial_id=None,
status=TrialStatus.RUNNING,
):
super(OptimizationTunerTrial, self).__init__(config, trial_id, status)
super().__init__(config, trial_id, status)
self._name = name
self._changed_configs = changed_configs
......
......@@ -45,7 +45,7 @@ class Fixed(TunableVariable):
"""
def __init__(self, name, default):
super(Fixed, self).__init__(name=name, default=default)
super().__init__(name=name, default=default)
self.name = name
if not isinstance(default, (str, int, float, bool)):
raise ValueError(
......@@ -68,7 +68,7 @@ class Boolean(TunableVariable):
"""
def __init__(self, name, default=False):
super(Boolean, self).__init__(name=name, default=default)
super().__init__(name=name, default=default)
if default not in {True, False}:
raise ValueError(
"default must be a Python boolean, but got {}".format(default)
......@@ -86,7 +86,7 @@ class Boolean(TunableVariable):
class Choice(TunableVariable):
def __init__(self, name, values, default=None):
super(Choice, self).__init__(name=name, default=default)
super().__init__(name=name, default=default)
types = set(type(v) for v in values)
if len(types) > 1:
......@@ -143,7 +143,7 @@ class Choice(TunableVariable):
return rng.choice(self.values)
def get_state(self):
state = super(Choice, self).get_state()
state = super().get_state()
state["values"] = self.values
return state
......@@ -159,7 +159,7 @@ class IntRange(TunableVariable):
"""
def __init__(self, name, start, stop, step=1, default=None, endpoint=False):
super(IntRange, self).__init__(name=name, default=default)
super().__init__(name=name, default=default)
self.start = self._check_int(start)
self.stop = self._check_int(stop)
self.step = self._check_int(step)
......@@ -185,7 +185,7 @@ class IntRange(TunableVariable):
return int(value)
def get_state(self):
state = super(IntRange, self).get_state()
state = super().get_state()
state["start"] = self.start
state["stop"] = self.stop
state["step"] = self.step
......@@ -214,7 +214,7 @@ class FloatRange(TunableVariable):
def __init__(
self, name, start, stop, step=None, default=None, endpoint=False
):
super(FloatRange, self).__init__(name=name, default=default)
super().__init__(name=name, default=default)
self.stop = float(stop)
self.start = float(start)
if step is not None:
......@@ -243,7 +243,7 @@ class FloatRange(TunableVariable):
return value
def get_state(self):
state = super(FloatRange, self).get_state()
state = super().get_state()
state["start"] = self.start
state["stop"] = self.stop
state["step"] = self.step
......
......@@ -80,7 +80,7 @@ class ProbabilityEntry(EntryAttr):
"""
def __init__(self, probability):
super(ProbabilityEntry, self).__init__()
super().__init__()
if not isinstance(probability, float):
raise ValueError("probability must be a float in (0,1)")
......@@ -120,7 +120,7 @@ class CountFilterEntry(EntryAttr):
"""
def __init__(self, count_filter):
super(CountFilterEntry, self).__init__()
super().__init__()
if not isinstance(count_filter, int):
raise ValueError(
......@@ -168,7 +168,7 @@ class ShowClickEntry(EntryAttr):
"""
def __init__(self, show_name, click_name):
super(ShowClickEntry, self).__init__()
super().__init__()
if not isinstance(show_name, str) or not isinstance(click_name, str):
raise ValueError("show_name click_name must be a str")
......
......@@ -545,7 +545,7 @@ class RoleMakerBase(object):
class PaddleCloudRoleMaker(RoleMakerBase):
def __init__(self, is_collective=False, **kwargs):
super(PaddleCloudRoleMaker, self).__init__()
super().__init__()
self._is_collective = is_collective
self._non_distributed = False
......@@ -1181,7 +1181,7 @@ class PaddleCloudRoleMaker(RoleMakerBase):
class UserDefinedRoleMaker(PaddleCloudRoleMaker):
def __init__(self, is_collective=False, init_gloo=False, **kwargs):
super(UserDefinedRoleMaker, self).__init__(
super().__init__(
is_collective=is_collective, init_gloo=init_gloo, **kwargs
)
self._init_gloo = init_gloo
......
......@@ -123,7 +123,7 @@ class StrategyCompiler(StrategyCompilerBase):
"""
def __init__(self):
super(StrategyCompiler, self).__init__()
super().__init__()
self._meta_optimizers = []
self._graph_optimizers = []
self._valid_optimizer_list = None
......
......@@ -100,7 +100,7 @@ class DPGroup(StrategyGroupBase):
"""
def __init__(self, list_of_ranks):
super(DPGroup, self).__init__(list_of_ranks)
super().__init__(list_of_ranks)
assert not isinstance(
self.group, list
), "Rank {} belongs to multi dp groups".format(self._rank)
......@@ -119,7 +119,7 @@ class MPGroup(StrategyGroupBase):
"""
def __init__(self, list_of_ranks):
super(MPGroup, self).__init__(list_of_ranks)
super().__init__(list_of_ranks)
assert not isinstance(
self.group, list
), "Rank {} belongs to multi mp groups".format(self._rank)
......@@ -138,7 +138,7 @@ class ShardingGroup(StrategyGroupBase):
"""
def __init__(self, list_of_ranks):
super(ShardingGroup, self).__init__(list_of_ranks)
super().__init__(list_of_ranks)
assert not isinstance(
self.group, list
), "Rank {} belongs to multi sharding groups".format(self._rank)
......@@ -157,7 +157,7 @@ class PPGroup(StrategyGroupBase):
"""
def __init__(self, list_of_ranks):
super(PPGroup, self).__init__(list_of_ranks)
super().__init__(list_of_ranks)
assert not isinstance(
self.group, list
), "Rank {} belongs to multi pp groups".format(self._rank)
......
......@@ -363,7 +363,7 @@ class InMemoryDataset(DatasetBase):
def __init__(self):
"""Init."""
super(InMemoryDataset, self).__init__()
super().__init__()
self.proto_desc.name = "MultiSlotInMemoryDataFeed"
self.fleet_send_batch_size = None
self.is_user_set_queue_num = False
......@@ -597,7 +597,7 @@ class InMemoryDataset(DatasetBase):
data_feed_type = "MultiSlotInMemoryDataFeed"
self._set_feed_type(data_feed_type)
super(InMemoryDataset, self).init(
super().init(
batch_size=batch_size,
thread_num=thread_num,
use_var=use_var,
......@@ -1288,7 +1288,7 @@ class QueueDataset(DatasetBase):
"""
Initialize QueueDataset
"""
super(QueueDataset, self).__init__()
super().__init__()
self.proto_desc.name = "MultiSlotDataFeed"
def init(self, **kwargs):
......@@ -1297,7 +1297,7 @@ class QueueDataset(DatasetBase):
should be called only once in user's python scripts to initialize setings of dataset instance
"""
super(QueueDataset, self).init(**kwargs)
super().init(**kwargs)
def _prepare_to_run(self):
"""
......@@ -1329,14 +1329,14 @@ class FileInstantDataset(DatasetBase):
"""
Initialize FileInstantDataset
"""
super(FileInstantDataset, self).__init__()
super().__init__()
self.proto_desc.name = "MultiSlotFileInstantDataFeed"
def init(self, **kwargs):
"""
should be called only once in user's python scripts to initialize setings of dataset instance
"""
super(FileInstantDataset, self).init(**kwargs)
super().init(**kwargs)
class BoxPSDataset(InMemoryDataset):
......@@ -1354,7 +1354,7 @@ class BoxPSDataset(InMemoryDataset):
"""
Initialize BoxPSDataset
"""
super(BoxPSDataset, self).__init__()
super().__init__()
self.boxps = core.BoxPS(self.dataset)
self.proto_desc.name = "PaddleBoxDataFeed"
......@@ -1362,7 +1362,7 @@ class BoxPSDataset(InMemoryDataset):
"""
should be called only once in user's python scripts to initialize setings of dataset instance
"""
super(BoxPSDataset, self).init(**kwargs)
super().init(**kwargs)
rank_offset = kwargs.get("rank_offset", "")
self._set_rank_offset(rank_offset)
......
......@@ -23,7 +23,7 @@ class Index(object):
class TreeIndex(Index):
def __init__(self, name, path):
super(TreeIndex, self).__init__(name)
super().__init__(name)
self._wrapper = core.IndexWrapper()
self._wrapper.insert_tree_index(name, path)
self._tree = self._wrapper.get_tree_index(name)
......
......@@ -59,7 +59,7 @@ class VocabParallelEmbedding(Layer):
class SimpleMPNet(paddle.nn.Layer):
def __init__(self, vocab_size, hidden_size, inner_size, output_size):
super(SimpleMPNet, self).__init__()
super().__init__()
self.linear1 = fleet.meta_parallel.ColumnParallelLinear(
hidden_size,
inner_size,
......@@ -94,7 +94,7 @@ class VocabParallelEmbedding(Layer):
mp_group=None,
name=None,
):
super(VocabParallelEmbedding, self).__init__()
super().__init__()
self.model_parallel_group = (
tp._HYBRID_PARALLEL_GROUP.get_model_parallel_group()
......@@ -193,7 +193,7 @@ class ColumnParallelLinear(Layer):
class SimpleMPNet(paddle.nn.Layer):
def __init__(self, vocab_size, hidden_size, inner_size, output_size):
super(SimpleMPNet, self).__init__()
super().__init__()
self.linear1 = fleet.meta_parallel.ColumnParallelLinear(
hidden_size,
inner_size,
......@@ -231,7 +231,7 @@ class ColumnParallelLinear(Layer):
mp_group=None,
name=None,
):
super(ColumnParallelLinear, self).__init__()
super().__init__()
self.model_parallel_group = (
tp._HYBRID_PARALLEL_GROUP.get_model_parallel_group()
......@@ -347,7 +347,7 @@ class RowParallelLinear(Layer):
class SimpleMPNet(paddle.nn.Layer):
def __init__(self, vocab_size, hidden_size, inner_size, output_size):
super(SimpleMPNet, self).__init__()
super().__init__()
self.linear1 = fleet.meta_parallel.ColumnParallelLinear(
hidden_size,
inner_size,
......@@ -385,7 +385,7 @@ class RowParallelLinear(Layer):
mp_group=None,
name=None,
):
super(RowParallelLinear, self).__init__()
super().__init__()
self.in_features = in_features
self.out_features = out_features
......@@ -504,7 +504,7 @@ class ParallelCrossEntropy(Layer):
"""
def __init__(self, mp_group=None, name=None):
super(ParallelCrossEntropy, self).__init__()
super().__init__()
self.name = name
self.model_parallel_group = (
tp._HYBRID_PARALLEL_GROUP.get_model_parallel_group()
......
......@@ -371,7 +371,7 @@ class _Linear(layers.Layer):
bias_attr=None,
name=None,
):
super(_Linear, self).__init__()
super().__init__()
self._dtype = self._helper.get_default_dtype()
self._weight_attr = weight_attr
self._bias_attr = bias_attr
......
......@@ -19,7 +19,7 @@ __all__ = []
class AMPOptimizer(MetaOptimizerBase):
def __init__(self, optimizer):
super(AMPOptimizer, self).__init__(optimizer)
super().__init__(optimizer)
self.inner_opt = optimizer
self.wrapped_opt = None
# we do not allow meta optimizer to be inner optimizer currently
......@@ -34,7 +34,7 @@ class AMPOptimizer(MetaOptimizerBase):
def _set_basic_info(
self, loss, role_maker, user_defined_optimizer, user_defined_strategy
):
super(AMPOptimizer, self)._set_basic_info(
super()._set_basic_info(
loss, role_maker, user_defined_optimizer, user_defined_strategy
)
......
......@@ -20,7 +20,7 @@ __all__ = []
class ASPOptimizer(MetaOptimizerBase):
def __init__(self, optimizer):
super(ASPOptimizer, self).__init__(optimizer)
super().__init__(optimizer)
self.inner_opt = optimizer
# we do not allow meta optimizer to be inner optimizer currently
self.meta_optimizers_white_list = [
......@@ -36,7 +36,7 @@ class ASPOptimizer(MetaOptimizerBase):
def _set_basic_info(
self, loss, role_maker, user_defined_optimizer, user_defined_strategy
):
super(ASPOptimizer, self)._set_basic_info(
super()._set_basic_info(
loss, role_maker, user_defined_optimizer, user_defined_strategy
)
......
......@@ -20,7 +20,7 @@ __all__ = []
class DGCOptimizer(MetaOptimizerBase):
def __init__(self, optimizer):
super(DGCOptimizer, self).__init__(optimizer)
super().__init__(optimizer)
self.inner_opt = optimizer
self.dgc_opt = None
# we do not allow meta optimizer to be inner optimizer currently
......@@ -30,7 +30,7 @@ class DGCOptimizer(MetaOptimizerBase):
def _set_basic_info(
self, loss, role_maker, user_defined_optimizer, user_defined_strategy
):
super(DGCOptimizer, self)._set_basic_info(
super()._set_basic_info(
loss, role_maker, user_defined_optimizer, user_defined_strategy
)
......
......@@ -19,7 +19,7 @@ __all__ = []
class FP16AllReduceOptimizer(MetaOptimizerBase):
def __init__(self, optimizer):
super(FP16AllReduceOptimizer, self).__init__(optimizer)
super().__init__(optimizer)
self.inner_opt = optimizer
# we do not allow meta optimizer to be inner optimizer currently
self.meta_optimizers_white_list = [
......@@ -36,7 +36,7 @@ class FP16AllReduceOptimizer(MetaOptimizerBase):
def _set_basic_info(
self, loss, role_maker, user_defined_optimizer, user_defined_strategy
):
super(FP16AllReduceOptimizer, self)._set_basic_info(
super()._set_basic_info(
loss, role_maker, user_defined_optimizer, user_defined_strategy
)
......
......@@ -19,7 +19,7 @@ __all__ = []
class GradientMergeOptimizer(MetaOptimizerBase):
def __init__(self, optimizer):
super(GradientMergeOptimizer, self).__init__(optimizer)
super().__init__(optimizer)
self.inner_opt = optimizer
self.wrapped_opt = None
self.meta_optimizers_white_list = [
......@@ -34,7 +34,7 @@ class GradientMergeOptimizer(MetaOptimizerBase):
def _set_basic_info(
self, loss, role_maker, user_defined_optimizer, user_defined_strategy
):
super(GradientMergeOptimizer, self)._set_basic_info(
super()._set_basic_info(
loss, role_maker, user_defined_optimizer, user_defined_strategy
)
......
......@@ -25,7 +25,7 @@ __all__ = []
class GraphExecutionOptimizer(MetaOptimizerBase):
def __init__(self, optimizer):
super(GraphExecutionOptimizer, self).__init__(optimizer)
super().__init__(optimizer)
self.inner_opt = optimizer
# we do not allow meta optimizer to be inner optimizer currently
self.meta_optimizers_white_list = []
......
......@@ -21,7 +21,7 @@ __all__ = []
class LambOptimizer(MetaOptimizerBase):
def __init__(self, optimizer):
super(LambOptimizer, self).__init__(optimizer)
super().__init__(optimizer)
self.inner_opt = optimizer
self.lamb_opt = None
# we do not allow meta optimizer to be inner optimizer currently
......@@ -31,7 +31,7 @@ class LambOptimizer(MetaOptimizerBase):
def _set_basic_info(
self, loss, role_maker, user_defined_optimizer, user_defined_strategy
):
super(LambOptimizer, self)._set_basic_info(
super()._set_basic_info(
loss, role_maker, user_defined_optimizer, user_defined_strategy
)
......
......@@ -20,7 +20,7 @@ __all__ = []
class LarsOptimizer(MetaOptimizerBase):
def __init__(self, optimizer):
super(LarsOptimizer, self).__init__(optimizer)
super().__init__(optimizer)
self.inner_opt = optimizer
self.lars_opt = None
# we do not allow meta optimizer to be inner optimizer currently
......@@ -30,7 +30,7 @@ class LarsOptimizer(MetaOptimizerBase):
def _set_basic_info(
self, loss, role_maker, user_defined_optimizer, user_defined_strategy
):
super(LarsOptimizer, self)._set_basic_info(
super()._set_basic_info(
loss, role_maker, user_defined_optimizer, user_defined_strategy
)
......
......@@ -23,7 +23,7 @@ __all__ = []
class LocalSGDOptimizer(MetaOptimizerBase):
def __init__(self, optimizer):
super(LocalSGDOptimizer, self).__init__(optimizer)
super().__init__(optimizer)
self.inner_opt = optimizer
self.meta_optimizers_white_list = ['AMPOptimizer']
self.meta_optimizers_black_list = [
......@@ -205,7 +205,7 @@ class LocalSGDOptimizer(MetaOptimizerBase):
class AdaptiveLocalSGDOptimizer(MetaOptimizerBase):
def __init__(self, optimizer):
super(AdaptiveLocalSGDOptimizer, self).__init__(optimizer)
super().__init__(optimizer)
self.inner_opt = optimizer
self.meta_optimizers_white_list = ['AMPOptimizer']
self.meta_optimizers_black_list = [
......
......@@ -19,7 +19,7 @@ __all__ = []
class ParameterServerGraphOptimizer(ParameterServerOptimizer):
def __init__(self, optimizer):
super(ParameterServerGraphOptimizer, self).__init__(optimizer)
super().__init__(optimizer)
self.inner_opt = optimizer
# we do not allow meta optimizer to be inner optimizer currently
self.meta_optimizers_white_list = []
......
......@@ -25,7 +25,7 @@ __all__ = []
class ParameterServerOptimizer(MetaOptimizerBase):
def __init__(self, optimizer):
super(ParameterServerOptimizer, self).__init__(optimizer)
super().__init__(optimizer)
self.inner_opt = optimizer
# we do not allow meta optimizer to be inner optimizer currently
self.meta_optimizers_white_list = []
......@@ -33,7 +33,7 @@ class ParameterServerOptimizer(MetaOptimizerBase):
def _set_basic_info(
self, loss, role_maker, user_defined_optimizer, user_defined_strategy
):
super(ParameterServerOptimizer, self)._set_basic_info(
super()._set_basic_info(
loss, role_maker, user_defined_optimizer, user_defined_strategy
)
......
......@@ -28,7 +28,7 @@ __all__ = []
class PipelineOptimizer(MetaOptimizerBase):
def __init__(self, optimizer):
super(PipelineOptimizer, self).__init__(optimizer)
super().__init__(optimizer)
self.inner_opt = optimizer
self.meta_optimizers_white_list = [
"RecomputeOptimizer",
......@@ -44,7 +44,7 @@ class PipelineOptimizer(MetaOptimizerBase):
def _set_basic_info(
self, loss, role_maker, user_defined_optimizer, user_defined_strategy
):
super(PipelineOptimizer, self)._set_basic_info(
super()._set_basic_info(
loss, role_maker, user_defined_optimizer, user_defined_strategy
)
self.micro_batch_size = user_defined_strategy.pipeline_configs[
......
......@@ -33,7 +33,7 @@ from paddle.distributed.ps.utils.ps_factory import PsProgramBuilderFactory
class ParameterServerOptimizer(MetaOptimizerBase):
def __init__(self, optimizer):
super(ParameterServerOptimizer, self).__init__(optimizer)
super().__init__(optimizer)
self.inner_opt = optimizer
# we do not allow meta optimizer to be inner optimizer currently
self.meta_optimizers_white_list = []
......@@ -41,7 +41,7 @@ class ParameterServerOptimizer(MetaOptimizerBase):
def _set_basic_info(
self, loss, role_maker, user_defined_optimizer, user_defined_strategy
):
super(ParameterServerOptimizer, self)._set_basic_info(
super()._set_basic_info(
loss, role_maker, user_defined_optimizer, user_defined_strategy
)
......
......@@ -27,7 +27,7 @@ from .common import (
class RawProgramOptimizer(MetaOptimizerBase):
def __init__(self, optimizer):
super(RawProgramOptimizer, self).__init__(optimizer)
super().__init__(optimizer)
self.inner_opt = optimizer
self.meta_optimizers_white_list = [
"RecomputeOptimizer",
......@@ -46,7 +46,7 @@ class RawProgramOptimizer(MetaOptimizerBase):
def _set_basic_info(
self, loss, role_maker, user_defined_optimizer, user_defined_strategy
):
super(RawProgramOptimizer, self)._set_basic_info(
super()._set_basic_info(
loss, role_maker, user_defined_optimizer, user_defined_strategy
)
self.without_graph_optimization = (
......
......@@ -19,7 +19,7 @@ __all__ = []
class RecomputeOptimizer(MetaOptimizerBase):
def __init__(self, optimizer):
super(RecomputeOptimizer, self).__init__(optimizer)
super().__init__(optimizer)
self.inner_opt = optimizer
self.wrapped_opt = None
# we do not allow meta optimizer to be inner optimizer currently
......@@ -34,7 +34,7 @@ class RecomputeOptimizer(MetaOptimizerBase):
def _set_basic_info(
self, loss, role_maker, user_defined_optimizer, user_defined_strategy
):
super(RecomputeOptimizer, self)._set_basic_info(
super()._set_basic_info(
loss, role_maker, user_defined_optimizer, user_defined_strategy
)
......
......@@ -50,7 +50,7 @@ class ShardingOptimizer(MetaOptimizerBase):
"""Sharding Optimizer."""
def __init__(self, optimizer):
super(ShardingOptimizer, self).__init__(optimizer)
super().__init__(optimizer)
self.inner_opt = optimizer
self.meta_optimizers_white_list = [
"RecomputeOptimizer",
......
......@@ -28,7 +28,7 @@ __all__ = []
class TensorParallelOptimizer(MetaOptimizerBase):
def __init__(self, optimizer):
super(TensorParallelOptimizer, self).__init__(optimizer)
super().__init__(optimizer)
self.inner_opt = optimizer
self.meta_optimizers_white_list = [
"RecomputeOptimizer",
......@@ -46,7 +46,7 @@ class TensorParallelOptimizer(MetaOptimizerBase):
def _set_basic_info(
self, loss, role_maker, user_defined_optimizer, user_defined_strategy
):
super(TensorParallelOptimizer, self)._set_basic_info(
super()._set_basic_info(
loss, role_maker, user_defined_optimizer, user_defined_strategy
)
self.mp_degree = user_defined_strategy.tensor_parallel_configs[
......
......@@ -19,9 +19,7 @@ __all__ = []
class MetaParallelBase(Layer):
def __init__(self, layers, hcg, strategy):
super(MetaParallelBase, self).__init__(
layers.full_name() + "_meta_parallel_base"
)
super().__init__(layers.full_name() + "_meta_parallel_base")
self._layers = layers
self._hcg = hcg
self._strategy = strategy
......
......@@ -83,7 +83,7 @@ class SharedLayerDesc(LayerDesc):
*inputs,
**kwargs
):
super(SharedLayerDesc, self).__init__(layer_func, *inputs, **kwargs)
super().__init__(layer_func, *inputs, **kwargs)
self.layer_name = key
self.forward_func = forward_func
self.shared_weight_attr = shared_weight_attr
......@@ -179,7 +179,7 @@ class SegmentLayers(object):
class PipelineLayerChunk(Layer):
def __init__(self):
super(PipelineLayerChunk, self).__init__()
super().__init__()
self.run_function = []
def append(self, sublayer):
......@@ -240,7 +240,7 @@ class PipelineLayer(Layer):
class ReshapeHelp(Layer):
def __init__(self, shape):
super(ReshapeHelp, self).__init__()
super().__init__()
self.shape = shape
def forward(self, x):
......@@ -275,7 +275,7 @@ class PipelineLayer(Layer):
ReshapeHelp, shape=[-1, 256]),
LayerDesc(nn.Linear, 256, self.num_classes), # classifier
]
super(AlexNetPipeDesc, self).__init__(
super().__init__(
layers=decs, loss_fn=nn.CrossEntropyLoss(), **kwargs)
model = AlexNetPipeDesc(num_stages=pipeline_parallel_size, topology=hcg._topo)
......@@ -293,7 +293,7 @@ class PipelineLayer(Layer):
recompute_ctx=None,
num_virtual_pipeline_stages=None,
):
super(PipelineLayer, self).__init__()
super().__init__()
if num_stages is None and topology is None:
raise ValueError("should provide num_stages or topology")
......
......@@ -34,7 +34,7 @@ class PipelineParallel(MetaParallelBase):
raise TypeError(
"The Layer should be a derived class of PipelineLayer."
)
super(PipelineParallel, self).__init__(layers, hcg, strategy)
super().__init__(layers, hcg, strategy)
self.use_data_parallel = self._hcg.get_data_parallel_world_size() > 1
self.use_model_parallel = self._hcg.get_model_parallel_world_size() > 1
self.use_sharding_parallel = (
......@@ -462,9 +462,7 @@ class PipelineParallelWithInterleave(PipelineParallel):
# pipeline parallel with interleave scheduler
def __init__(self, layers, hcg, strategy):
super(PipelineParallelWithInterleave, self).__init__(
layers=layers, hcg=hcg, strategy=strategy
)
super().__init__(layers=layers, hcg=hcg, strategy=strategy)
assert layers.get_num_virtual_stages() > 1
assert (
framework.in_dygraph_mode()
......
......@@ -21,7 +21,7 @@ __all__ = []
class ShardingParallel(MetaParallelBase):
def __init__(self, layers, hcg, **kwargs):
super(ShardingParallel, self).__init__(layers, hcg, **kwargs)
super().__init__(layers, hcg, **kwargs)
def _prepare_for_model(self):
logger.info("start broadcast sharding parameters")
......
......@@ -26,7 +26,7 @@ __all__ = []
class TensorParallel(MetaParallelBase):
def __init__(self, layers, hcg, **kwargs):
super(TensorParallel, self).__init__(layers, hcg, **kwargs)
super().__init__(layers, hcg, **kwargs)
def _prepare_for_model(self):
logger.info("start broadcast mp parameters")
......
......@@ -47,7 +47,7 @@ def distributed_model(model):
class LinearNet(nn.Layer):
def __init__(self):
super(LinearNet, self).__init__()
super().__init__()
self._linear1 = nn.Linear(10, 10)
self._linear2 = nn.Linear(10, 1)
......
......@@ -439,7 +439,7 @@ def recompute(function, *args, **kwargs):
def __init__(self, input_size=10,
recompute_blocks=[1, 3],
recompute_kwargs={}):
super(Naive_fc_net, self).__init__()
super().__init__()
self.recompute_blocks = recompute_blocks
self.recompute_kwargs = recompute_kwargs
self.runfunc0 = get_fc_block(0, input_size, is_last=False)
......
......@@ -20,7 +20,7 @@ __all__ = []
class CollectiveRuntime(RuntimeBase):
def __init__(self):
super(CollectiveRuntime, self).__init__()
super().__init__()
def _init_worker(self):
logging.warn(
......
......@@ -31,7 +31,7 @@ __all__ = []
class ParameterServerRuntime(RuntimeBase):
def __init__(self):
super(ParameterServerRuntime, self).__init__()
super().__init__()
self._communicator = None
def _set_basic_info(self, context):
......
......@@ -677,7 +677,7 @@ class fsClient:
class TheOnePSRuntime(RuntimeBase):
def __init__(self):
super(TheOnePSRuntime, self).__init__()
super().__init__()
self._communicator = None
self._server = None
self._worker = fluid.core.DistFleetWrapper()
......
......@@ -135,7 +135,7 @@ class KVHTTPServer(HTTPServer, object):
def __init__(self, port, handler):
"""Init."""
super(KVHTTPServer, self).__init__(('', port), handler)
super().__init__(('', port), handler)
self.delete_kv_lock = threading.Lock()
self.delete_kv = {}
self.kv_lock = threading.Lock()
......
......@@ -69,7 +69,7 @@ class KVHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
class KVServer(HTTPServer, object):
def __init__(self, port):
super(KVServer, self).__init__(('', port), KVHandler)
super().__init__(('', port), KVHandler)
self.kv_lock = threading.Lock()
self.kv = {'/healthy': b'ok'}
self.port = port
......
......@@ -120,7 +120,7 @@ def init_parallel_env():
class LinearNet(nn.Layer):
def __init__(self):
super(LinearNet, self).__init__()
super().__init__()
self._linear1 = nn.Linear(10, 10)
self._linear2 = nn.Linear(10, 1)
......
......@@ -618,7 +618,7 @@ def _check_and_update_gradient(params_grads, loss_scaling, dist_context):
@register_pass("auto_parallel_amp")
class AMPPass(PassBase):
def __init__(self):
super(AMPPass, self).__init__()
super().__init__()
self.set_attr("loss", None)
self.set_attr("dist_context", None)
self.set_attr("custom_white_list", None)
......
......@@ -58,7 +58,7 @@ class DataParallelOptimizationPass(PassBase):
"""
def __init__(self):
super(DataParallelOptimizationPass, self).__init__()
super().__init__()
# NOTE not use depence on loss and param_grads
self.set_attr("dist_context", None)
self.set_attr("global_rank", -1)
......
......@@ -699,7 +699,7 @@ def cast_startup_program():
@register_pass("auto_parallel_fp16")
class FP16Pass(AMPPass):
def __init__(self):
super(FP16Pass, self).__init__()
super().__init__()
# NOTE: why FP16Pass can override apply_single_impl instead of
# apply_impl? AMP is an optimization pass for serial program,
......
......@@ -221,7 +221,7 @@ class ClipGradByGloblNormPass(PassBase):
"""
def __init__(self):
super(ClipGradByGloblNormPass, self).__init__()
super().__init__()
self.set_attr("rank_id", None)
self.set_attr("dist_context", None)
self.set_attr("params_grads", None)
......
......@@ -327,7 +327,7 @@ def parse_program(
@register_pass("auto_parallel_gradient_merge_pass")
class GradientMergePass(PassBase):
def __init__(self):
super(GradientMergePass, self).__init__()
super().__init__()
self.set_attr("k_steps", -1)
self.set_attr("avg", True)
......
......@@ -38,7 +38,7 @@ def _node_id(node):
@register_pass("auto_parallel_quantization")
class QuantizationPass(PassBase):
def __init__(self):
super(QuantizationPass, self).__init__()
super().__init__()
self.set_attr("dist_context", None)
self.set_attr("params_grads", None)
......
......@@ -35,7 +35,7 @@ from paddle.distributed.auto_parallel.utils import (
class RecomputeState(ProgramStats):
def __init__(self, block, ops):
super(RecomputeState, self).__init__(block=block, ops=ops)
super().__init__(block=block, ops=ops)
self._block = block
self._ops = ops
self.var_op_deps = {}
......@@ -239,7 +239,7 @@ def _add_needed_descs_to_block(
@register_pass("auto_parallel_recompute")
class RecomputePass(PassBase):
def __init__(self):
super(RecomputePass, self).__init__()
super().__init__()
self.set_attr("checkpoints", None)
self.set_attr("loss", None)
self.set_attr("dist_context", None)
......
......@@ -71,7 +71,7 @@ def _is_reshard_op(op):
@register_pass("auto_parallel_sharding")
class ShardingPass(PassBase):
def __init__(self):
super(ShardingPass, self).__init__()
super().__init__()
self.set_attr("dist_context", None)
self.set_attr("stage", None)
self.set_attr("sharding_degree", None) # for parallelizer
......
......@@ -20,7 +20,7 @@ from paddle.fluid.framework import core, _apply_pass as _apply_cpp_pass
@register_pass("fuse_elewise_add_act")
class FuseElementwiseAddActPass(CPPPassWrapper):
def __init__(self):
super(FuseElementwiseAddActPass, self).__init__()
super().__init__()
@property
def cpp_name(self):
......@@ -33,7 +33,7 @@ class FuseElementwiseAddActPass(CPPPassWrapper):
@register_pass("fuse_bn_act")
class FuseBatchNormActPass(CPPPassWrapper):
def __init__(self):
super(FuseBatchNormActPass, self).__init__()
super().__init__()
@property
def cpp_name(self):
......@@ -46,7 +46,7 @@ class FuseBatchNormActPass(CPPPassWrapper):
@register_pass("fuse_bn_add_act")
class FuseBatchNormAddActPass(CPPPassWrapper):
def __init__(self):
super(FuseBatchNormAddActPass, self).__init__()
super().__init__()
@property
def cpp_name(self):
......@@ -59,7 +59,7 @@ class FuseBatchNormAddActPass(CPPPassWrapper):
@register_pass("fuse_relu_depthwise_conv")
class FuseReluDepthwiseConvPass(CPPPassWrapper):
def __init__(self):
super(FuseReluDepthwiseConvPass, self).__init__()
super().__init__()
@property
def cpp_name(self):
......@@ -72,7 +72,7 @@ class FuseReluDepthwiseConvPass(CPPPassWrapper):
@register_pass("fuse_optimizer")
class FuseOptimizerPass(CPPPassWrapper):
def __init__(self):
super(FuseOptimizerPass, self).__init__()
super().__init__()
@property
def cpp_name(self):
......@@ -89,7 +89,7 @@ class FuseOptimizerPass(CPPPassWrapper):
@register_pass("inplace_addto_op")
class InplaceAddtoOpPass(CPPPassWrapper):
def __init__(self):
super(InplaceAddtoOpPass, self).__init__()
super().__init__()
@property
def cpp_name(self):
......@@ -109,7 +109,7 @@ def _set_cinn_op_flag(flag_name, extra_ops):
@register_pass("build_cinn")
class BuildCINNPass(CPPPassWrapper):
def __init__(self):
super(BuildCINNPass, self).__init__()
super().__init__()
self.set_attr("allow_ops", [])
self.set_attr("deny_ops", [])
......
......@@ -351,7 +351,7 @@ def insert_fuse_all_reduce_by_memory_size(block, groups, max_memory_size):
@register_pass("fuse_all_reduce")
class FuseAllReducePass(PassBase):
def __init__(self):
super(FuseAllReducePass, self).__init__()
super().__init__()
self.set_attr("max_memory_size", -1)
def _check_self(self):
......
......@@ -139,7 +139,7 @@ def new_pass(name, pass_attrs={}):
class CPPPassWrapper(PassBase):
def __init__(self):
super(CPPPassWrapper, self).__init__()
super().__init__()
@property
def cpp_name(self):
......
......@@ -40,7 +40,7 @@ from paddle.fluid.layers.learning_rate_scheduler import (
@register_pass("add_lr_decay_table_pass")
class AddLrDecayTablePass(PassBase):
def __init__(self):
super(AddLrDecayTablePass, self).__init__()
super().__init__()
def _check_self(self):
return True
......@@ -169,7 +169,7 @@ class AddLrDecayTablePass(PassBase):
@register_pass("add_listen_and_serv_pass")
class AddListenAndServPass(PassBase):
def __init__(self):
super(AddListenAndServPass, self).__init__()
super().__init__()
def _check_self(self):
return True
......@@ -202,7 +202,7 @@ class AddListenAndServPass(PassBase):
@register_pass("add_rpc_global_flags_pass")
class AddRpcGlobalFlagsPass(PassBase):
def __init__(self):
super(AddRpcGlobalFlagsPass, self).__init__()
super().__init__()
def _check_self(self):
return True
......@@ -217,7 +217,7 @@ class AddRpcGlobalFlagsPass(PassBase):
@register_pass("add_optimizer_pass")
class AddOptimizerPass(PassBase):
def __init__(self):
super(AddOptimizerPass, self).__init__()
super().__init__()
def _check_self(self):
return True
......@@ -232,7 +232,7 @@ class AddOptimizerPass(PassBase):
@register_pass("add_geo_optimizer_pass")
class AddGeoOptimizerPass(PassBase):
def __init__(self):
super(AddGeoOptimizerPass, self).__init__()
super().__init__()
def _check_self(self):
return True
......@@ -247,7 +247,7 @@ class AddGeoOptimizerPass(PassBase):
@register_pass("build_pserver_startup_program_pass")
class BuildPserverStartupProgramPass(PassBase):
def __init__(self):
super(BuildPserverStartupProgramPass, self).__init__()
super().__init__()
def _check_self(self):
return True
......@@ -262,7 +262,7 @@ class BuildPserverStartupProgramPass(PassBase):
@register_pass("delete_unused_in_startup_pass")
class DeleteUnusedInStartupPass(PassBase):
def __init__(self):
super(DeleteUnusedInStartupPass, self).__init__()
super().__init__()
def _check_self(self):
return True
......
......@@ -26,7 +26,7 @@ from paddle.fluid.framework import Program, Parameter
@register_pass("append_send_ops_pass")
class AppendSendOpsPass(PassBase): # 该 pass 被多种模式复用
def __init__(self):
super(AppendSendOpsPass, self).__init__()
super().__init__()
def _check_self(self):
return True
......@@ -115,7 +115,7 @@ class AppendSendOpsPass(PassBase): # 该 pass 被多种模式复用
@register_pass("distributed_ops_pass")
class DistributedOpsPass(PassBase):
def __init__(self):
super(DistributedOpsPass, self).__init__()
super().__init__()
self.w_2_table_id = {}
self.emb_size = {}
......@@ -533,7 +533,7 @@ class DistributedOpsPass(PassBase):
@register_pass("delete_optimizer_pass")
class DeleteOptimizesPass(PassBase):
def __init__(self):
super(DeleteOptimizesPass, self).__init__()
super().__init__()
def _check_self(self):
return True
......@@ -617,7 +617,7 @@ class DeleteOptimizesPass(PassBase):
@register_pass("delete_extra_optimizer_pass")
class DeleteExtraOptimizerPass(PassBase):
def __init__(self):
super(DeleteExtraOptimizerPass, self).__init__()
super().__init__()
def _check_self(self):
return True
......@@ -674,7 +674,7 @@ class DeleteExtraOptimizerPass(PassBase):
@register_pass("fake_init_ops_pass")
class FakeInitOpsPass(PassBase):
def __init__(self):
super(FakeInitOpsPass, self).__init__()
super().__init__()
def _check_self(self):
return True
......@@ -729,7 +729,7 @@ class FakeInitOpsPass(PassBase):
@register_pass("ps_gpu_pass")
class PsGpuPass(PassBase):
def __init__(self):
super(PsGpuPass, self).__init__()
super().__init__()
def _check_self(self):
return True
......@@ -831,7 +831,7 @@ class PsGpuPass(PassBase):
@register_pass("ps_transpile_pass")
class PsTranspilePass(PassBase):
def __init__(self):
super(PsTranspilePass, self).__init__()
super().__init__()
def _check_self(self):
return True
......@@ -856,7 +856,7 @@ class PsTranspilePass(PassBase):
@register_pass("split_heter_worker_ops_pass")
class SplitHeterWorkerOpsPass(PassBase):
def __init__(self):
super(SplitHeterWorkerOpsPass, self).__init__()
super().__init__()
def _check_self(self):
return True
......@@ -1064,7 +1064,7 @@ class SplitHeterWorkerOpsPass(PassBase):
@register_pass("split_trainer_ops_pass")
class SplitTrainerOpsPass(PassBase):
def __init__(self):
super(SplitTrainerOpsPass, self).__init__()
super().__init__()
def _check_self(self):
return True
......@@ -1272,7 +1272,7 @@ class SplitTrainerOpsPass(PassBase):
@register_pass("set_heter_pipeline_opt_pass")
class SetHeterPipelineOptPass(PassBase):
def __init__(self):
super(SetHeterPipelineOptPass, self).__init__()
super().__init__()
def _check_self(self):
return True
......@@ -1310,7 +1310,7 @@ class SetHeterPipelineOptPass(PassBase):
@register_pass("split_fl_ops_pass")
class SplitFlOpsPass(PassBase):
def __init__(self):
super(SplitFlOpsPass, self).__init__()
super().__init__()
self.PART_A_DEVICE_FlAG = 'gpu:0'
self.PART_A_JOINT_OP_DEVICE_FlAG = 'gpu:2'
self.PART_B_DEVICE_FlAG = 'gpu:1'
......
......@@ -197,7 +197,7 @@ class FLClientBase(abc.ABC):
class FLClient(FLClientBase):
def __init__(self):
super(FLClient, self).__init__()
super().__init__()
def __build_fl_client_info_desc(self, state_info):
# ......... to implement ...... #
......
......@@ -129,7 +129,7 @@ class Service:
class GpuService(Service):
def __init__(self):
super(GpuService, self).__init__()
super().__init__()
def _set(self, service_proto):
service_proto.server_class = 'PsLocalServer'
......@@ -285,7 +285,7 @@ class Accessor:
class CommonAccessor(Accessor):
def __init__(self):
super(CommonAccessor, self).__init__()
super().__init__()
self.table_name = ''
self.entry = 'none'
self.attrs = []
......@@ -633,7 +633,7 @@ class Table:
class BarrierTable(Table):
def __init__(self, context, idx):
super(BarrierTable, self).__init__()
super().__init__()
self.type = None
self.shard_num = 256
self.accessor.accessor_class = 'CommMergeAccessor'
......@@ -668,7 +668,7 @@ class BarrierTable(Table):
class TensorTable(Table):
def __init__(self, idx, tensor_dict, role_maker):
super(TensorTable, self).__init__()
super().__init__()
self.idx = idx
self.tensor_dict = tensor_dict
self.role_maker = role_maker
......@@ -691,7 +691,7 @@ class TensorTable(Table):
class SparseTable(Table):
def __init__(self, context, send_ctx):
super(SparseTable, self).__init__()
super().__init__()
self.context = context
self.ctx = send_ctx
self.type = None
......@@ -800,7 +800,7 @@ class SparseTable(Table):
class GeoSparseTable(SparseTable):
def __init__(self, context, send_ctx):
super(GeoSparseTable, self).__init__(context, send_ctx)
super().__init__(context, send_ctx)
self.table_class = "MemorySparseGeoTable"
if self.context['ps_mode'] != DistributedMode.GEO:
raise ValueError("not geo sparse table!")
......@@ -835,7 +835,7 @@ class GeoSparseTable(SparseTable):
class DenseTable(Table):
def __init__(self, context, send_ctx):
super(DenseTable, self).__init__()
super().__init__()
self.context = context
self.ctx = send_ctx
self.accessor = Accessor()
......@@ -879,7 +879,7 @@ class Server:
class DownpourServer(Server):
def __init__(self):
super(DownpourServer, self).__init__()
super().__init__()
def _set(self):
pass
......@@ -895,7 +895,7 @@ class Worker:
class DownpourWorker(Worker):
def __init__(self):
super(DownpourWorker, self).__init__()
super().__init__()
def _set(self):
pass
......@@ -1032,7 +1032,7 @@ class PsDescBuilder(object):
class TheOnePSRuntime(RuntimeBase):
def __init__(self):
super(TheOnePSRuntime, self).__init__()
super().__init__()
self._communicator = None
self._server = None
self._worker = fluid.core.DistFleetWrapper()
......
......@@ -101,7 +101,7 @@ class PsProgramBuilder(object):
class GeoPsProgramBuilder(PsProgramBuilder): # 仅 CPU 模式
def __init__(self, pass_ctx):
super(GeoPsProgramBuilder, self).__init__(pass_ctx)
super().__init__(pass_ctx)
if self.ps_mode != DistributedMode.GEO:
raise ValueError(
"ps mode: {} not matched {}",
......@@ -129,7 +129,7 @@ class GeoPsProgramBuilder(PsProgramBuilder): # 仅 CPU 模式
class NuPsProgramBuilder(PsProgramBuilder):
def __init__(self, pass_ctx):
super(NuPsProgramBuilder, self).__init__(pass_ctx)
super().__init__(pass_ctx)
if not self.attrs['local_sparse']:
raise ValueError("No local sparse params")
......@@ -178,7 +178,7 @@ class NuPsProgramBuilder(PsProgramBuilder):
class CpuSyncPsProgramBuilder(PsProgramBuilder):
def __init__(self, pass_ctx):
super(CpuSyncPsProgramBuilder, self).__init__(pass_ctx)
super().__init__(pass_ctx)
if (
self.ps_mode != DistributedMode.SYNC
and self.ps_mode != DistributedMode.ASYNC
......@@ -230,7 +230,7 @@ class CpuSyncPsProgramBuilder(PsProgramBuilder):
class CpuAsyncPsProgramBuilder(CpuSyncPsProgramBuilder):
def __init__(self, pass_ctx):
super(CpuAsyncPsProgramBuilder, self).__init__(pass_ctx)
super().__init__(pass_ctx)
def _build_trainer_desc(self):
opt_info = self.loss.block.program._fleet_opt
......@@ -267,7 +267,7 @@ class CpuAsyncPsProgramBuilder(CpuSyncPsProgramBuilder):
class GpuPsProgramBuilder(PsProgramBuilder):
def __init__(self, pass_ctx):
super(GpuPsProgramBuilder, self).__init__(pass_ctx)
super().__init__(pass_ctx)
def _build_trainer_programs(self):
......@@ -301,7 +301,7 @@ class GpuPsProgramBuilder(PsProgramBuilder):
class HeterAsyncPsProgramBuilder(PsProgramBuilder):
def __init__(self, pass_ctx):
super(HeterAsyncPsProgramBuilder, self).__init__(pass_ctx)
super().__init__(pass_ctx)
def _build_trainer_programs(self):
add_lr_decay_table_pass = new_pass(
......@@ -377,7 +377,7 @@ class HeterAsyncPsProgramBuilder(PsProgramBuilder):
class FlPsProgramBuilder(HeterAsyncPsProgramBuilder):
def __init__(self, pass_ctx):
super(FlPsProgramBuilder, self).__init__(pass_ctx)
super().__init__(pass_ctx)
def _build_trainer_programs(self):
_main_file = ps_log_root_dir + '0_fl_worker_main_program.prototxt'
......
......@@ -95,7 +95,7 @@ class Beta(exponential_family.ExponentialFamily):
paddle.stack([self.alpha, self.beta], -1)
)
super(Beta, self).__init__(self._dirichlet._batch_shape)
super().__init__(self._dirichlet._batch_shape)
@property
def mean(self):
......
......@@ -30,7 +30,7 @@ class Range(Constraint):
def __init__(self, lower, upper):
self._lower = lower
self._upper = upper
super(Range, self).__init__()
super().__init__()
def __call__(self, value):
return self._lower <= value <= self._upper
......
......@@ -77,9 +77,7 @@ class Dirichlet(exponential_family.ExponentialFamily):
)
self.concentration = concentration
super(Dirichlet, self).__init__(
concentration.shape[:-1], concentration.shape[-1:]
)
super().__init__(concentration.shape[:-1], concentration.shape[-1:])
@property
def mean(self):
......
......@@ -60,7 +60,7 @@ class Distribution(object):
else tuple(event_shape)
)
super(Distribution, self).__init__()
super().__init__()
@property
def batch_shape(self):
......
......@@ -96,7 +96,7 @@ class Gumbel(TransformedDistribution):
self.transforms = ()
super(Gumbel, self).__init__(self.base_dist, self.transforms)
super().__init__(self.base_dist, self.transforms)
@property
def mean(self):
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册