From bd01836016137dc9564f6c26bf4fb5c3b19ff950 Mon Sep 17 00:00:00 2001 From: caozhou <48191911+Caozhou1995@users.noreply.github.com> Date: Wed, 1 Jun 2022 10:22:06 +0800 Subject: [PATCH] add some comp op costs (#43114) --- .../auto_parallel/cost/comp_op_cost.py | 610 ++++++++++++++++-- .../unittests/auto_parallel/test_comp_cost.py | 178 +++++ 2 files changed, 752 insertions(+), 36 deletions(-) diff --git a/python/paddle/distributed/auto_parallel/cost/comp_op_cost.py b/python/paddle/distributed/auto_parallel/cost/comp_op_cost.py index 28d2e2d5a3..8958c4bf90 100644 --- a/python/paddle/distributed/auto_parallel/cost/comp_op_cost.py +++ b/python/paddle/distributed/auto_parallel/cost/comp_op_cost.py @@ -23,7 +23,7 @@ class AssignOpCost(CompOpCost): super(AssignOpCost, self).__init__( op=op, op_desc=op_desc, cluster=cluster) - # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): # NOTE: The actual formula will be filled in the future return 0 @@ -41,7 +41,7 @@ class AssignValueOpCost(CompOpCost): super(AssignValueOpCost, self).__init__( op=op, op_desc=op_desc, cluster=cluster) - # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): # NOTE: The actual formula will be filled in the future return 0 @@ -59,7 +59,7 @@ class BeamSearchOpCost(CompOpCost): super(BeamSearchOpCost, self).__init__( op=op, op_desc=op_desc, cluster=cluster) - # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): # NOTE: The actual formula will be filled in the future return 0 @@ -77,7 +77,7 @@ class BeamSearchDecodeOpCost(CompOpCost): super(BeamSearchDecodeOpCost, self).__init__( op=op, op_desc=op_desc, cluster=cluster) - # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): # NOTE: The actual formula will be filled in the future return 0 @@ -95,7 +95,7 @@ class CastOpCost(CompOpCost): super(CastOpCost, self).__init__( op=op, op_desc=op_desc, cluster=cluster) - # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): # NOTE: The actual formula will be filled in the future return 0 @@ -113,7 +113,7 @@ class ConcatOpCost(CompOpCost): super(ConcatOpCost, self).__init__( op=op, op_desc=op_desc, cluster=cluster) - # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): # NOTE: The actual formula will be filled in the future return 0 @@ -131,7 +131,7 @@ class ElementwiseAddOpCost(CompOpCost): super(ElementwiseAddOpCost, self).__init__( op=op, op_desc=op_desc, cluster=cluster) - # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): # NOTE: The actual formula will be filled in the future return 0 @@ -149,7 +149,7 @@ class ElementwiseAddGradOpCost(CompOpCost): super(ElementwiseAddGradOpCost, self).__init__( op=op, op_desc=op_desc, cluster=cluster) - # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): # NOTE: The actual formula will be filled in the future return 0 @@ -167,7 +167,7 @@ class ElementwiseDivOpCost(CompOpCost): super(ElementwiseDivOpCost, self).__init__( op=op, op_desc=op_desc, cluster=cluster) - # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): # NOTE: The actual formula will be filled in the future return 0 @@ -185,7 +185,7 @@ class ElementwiseDivGradOpCost(CompOpCost): super(ElementwiseDivGradOpCost, self).__init__( op=op, op_desc=op_desc, cluster=cluster) - # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): # NOTE: The actual formula will be filled in the future return 0 @@ -203,7 +203,7 @@ class ElementwiseMulOpCost(CompOpCost): super(ElementwiseMulOpCost, self).__init__( op=op, op_desc=op_desc, cluster=cluster) - # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): # NOTE: The actual formula will be filled in the future return 0 @@ -221,7 +221,7 @@ class ElementwiseMulGradOpCost(CompOpCost): super(ElementwiseMulGradOpCost, self).__init__( op=op, op_desc=op_desc, cluster=cluster) - # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): # NOTE: The actual formula will be filled in the future return 0 @@ -239,7 +239,25 @@ class ElementwiseSubOpCost(CompOpCost): super(ElementwiseSubOpCost, self).__init__( op=op, op_desc=op_desc, cluster=cluster) - # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + def calc_flops(self): + # NOTE: The actual formula will be filled in the future + return 0 + + def calc_time(self): + # NOTE: The actual formula will be filled in the future + return 0 + + +@register_op_cost +class ElementwiseSubGradOpCost(CompOpCost): + OP_TYPE = "elementwise_sub_grad" + + def __init__(self, op=None, op_desc=None, cluster=None): + super(ElementwiseSubGradOpCost, self).__init__( + op=op, op_desc=op_desc, cluster=cluster) + + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): # NOTE: The actual formula will be filled in the future return 0 @@ -257,7 +275,7 @@ class EmbeddingOpCost(CompOpCost): super(EmbeddingOpCost, self).__init__( op=op, op_desc=op_desc, cluster=cluster) - # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): # NOTE: The actual formula will be filled in the future return 0 @@ -275,7 +293,7 @@ class EmbeddingGradOpCost(CompOpCost): super(EmbeddingGradOpCost, self).__init__( op=op, op_desc=op_desc, cluster=cluster) - # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): # NOTE: The actual formula will be filled in the future return 0 @@ -293,7 +311,7 @@ class FillConstantOpCost(CompOpCost): super(FillConstantOpCost, self).__init__( op=op, op_desc=op_desc, cluster=cluster) - # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): # NOTE: The actual formula will be filled in the future return 0 @@ -311,7 +329,7 @@ class FillConstantBatchSizeLikeOpCost(CompOpCost): super(FillConstantBatchSizeLikeOpCost, self).__init__( op=op, op_desc=op_desc, cluster=cluster) - # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): # NOTE: The actual formula will be filled in the future return 0 @@ -329,7 +347,7 @@ class FillConstantBatchSizeLikeGradOpCost(CompOpCost): super(FillConstantBatchSizeLikeGradOpCost, self).__init__( op=op, op_desc=op_desc, cluster=cluster) - # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): # NOTE: The actual formula will be filled in the future return 0 @@ -347,7 +365,7 @@ class GatherOpCost(CompOpCost): super(GatherOpCost, self).__init__( op=op, op_desc=op_desc, cluster=cluster) - # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): # NOTE: The actual formula will be filled in the future return 0 @@ -365,7 +383,7 @@ class GeluOpCost(CompOpCost): super(GeluOpCost, self).__init__( op=op, op_desc=op_desc, cluster=cluster) - # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): # NOTE: The actual formula will be filled in the future return 0 @@ -383,7 +401,7 @@ class GeluGradOpCost(CompOpCost): super(GeluGradOpCost, self).__init__( op=op, op_desc=op_desc, cluster=cluster) - # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): # NOTE: The actual formula will be filled in the future return 0 @@ -401,7 +419,7 @@ class GreaterEqualOpCost(CompOpCost): super(GreaterEqualOpCost, self).__init__( op=op, op_desc=op_desc, cluster=cluster) - # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): # NOTE: The actual formula will be filled in the future return 0 @@ -419,7 +437,7 @@ class IncrementOpCost(CompOpCost): super(IncrementOpCost, self).__init__( op=op, op_desc=op_desc, cluster=cluster) - # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): # NOTE: The actual formula will be filled in the future return 0 @@ -433,7 +451,7 @@ class IsEmptyOpCost(CompOpCost): super(IsEmptyOpCost, self).__init__( op=op, op_desc=op_desc, cluster=cluster) - # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): # NOTE: The actual formula will be filled in the future return 0 @@ -447,7 +465,7 @@ class LayerNormOpCost(CompOpCost): super(LayerNormOpCost, self).__init__( op=op, op_desc=op_desc, cluster=cluster) - # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): # NOTE: The actual formula will be filled in the future return 0 @@ -465,7 +483,7 @@ class LayerNormGradOpCost(CompOpCost): super(LayerNormGradOpCost, self).__init__( op=op, op_desc=op_desc, cluster=cluster) - # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): # NOTE: The actual formula will be filled in the future return 0 @@ -483,7 +501,7 @@ class LessThanOpCost(CompOpCost): super(LessThanOpCost, self).__init__( op=op, op_desc=op_desc, cluster=cluster) - # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): # NOTE: The actual formula will be filled in the future return 0 @@ -501,7 +519,7 @@ class LogicalNotOpCost(CompOpCost): super(LogicalNotOpCost, self).__init__( op=op, op_desc=op_desc, cluster=cluster) - # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): # NOTE: The actual formula will be filled in the future return 0 @@ -519,7 +537,7 @@ class LogicalAndOpCost(CompOpCost): super(LogicalAndOpCost, self).__init__( op=op, op_desc=op_desc, cluster=cluster) - # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): # NOTE: The actual formula will be filled in the future return 0 @@ -537,7 +555,7 @@ class LodResetOpCost(CompOpCost): super(LodResetOpCost, self).__init__( op=op, op_desc=op_desc, cluster=cluster) - # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): # NOTE: The actual formula will be filled in the future return 0 @@ -554,7 +572,7 @@ class LogOpCost(CompOpCost): def __init__(self, op=None, op_desc=None, cluster=None): super(LogOpCost, self).__init__(op=op, op_desc=op_desc, cluster=cluster) - # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): # NOTE: The actual formula will be filled in the future return 0 @@ -572,7 +590,7 @@ class LookupTableV2OpCost(CompOpCost): super(LookupTableV2OpCost, self).__init__( op=op, op_desc=op_desc, cluster=cluster) - # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): # NOTE: The actual formula will be filled in the future return 0 @@ -590,7 +608,7 @@ class LookupTableV2GradOpCost(CompOpCost): super(LookupTableV2GradOpCost, self).__init__( op=op, op_desc=op_desc, cluster=cluster) - # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): # NOTE: The actual formula will be filled in the future return 0 @@ -608,7 +626,7 @@ class MatmulOpCost(CompOpCost): super(MatmulOpCost, self).__init__( op=op, op_desc=op_desc, cluster=cluster) - # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): # NOTE: The actual formula will be filled in the future return 0 @@ -626,7 +644,7 @@ class MatmulGradOpCost(CompOpCost): super(MatmulGradOpCost, self).__init__( op=op, op_desc=op_desc, cluster=cluster) - # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): # NOTE: The actual formula will be filled in the future return 0 @@ -644,7 +662,527 @@ class MatmulV2OpCost(CompOpCost): super(MatmulV2OpCost, self).__init__( op=op, op_desc=op_desc, cluster=cluster) - # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + def calc_flops(self): + # NOTE: The actual formula will be filled in the future + return 0 + + def calc_time(self): + # NOTE: The actual formula will be filled in the future + return 0 + + +@register_op_cost +class MatmulV2GradOpCost(CompOpCost): + OP_TYPE = "matmul_v2_grad" + + def __init__(self, op=None, op_desc=None, cluster=None): + super(MatmulV2GradOpCost, self).__init__( + op=op, op_desc=op_desc, cluster=cluster) + + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + def calc_flops(self): + # NOTE: The actual formula will be filled in the future + return 0 + + def calc_time(self): + # NOTE: The actual formula will be filled in the future + return 0 + + +@register_op_cost +class MemcpyOpCost(CompOpCost): + OP_TYPE = "memcpy" + + def __init__(self, op=None, op_desc=None, cluster=None): + super(MemcpyOpCost, self).__init__( + op=op, op_desc=op_desc, cluster=cluster) + + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + def calc_flops(self): + # NOTE: The actual formula will be filled in the future + return 0 + + def calc_time(self): + # NOTE: The actual formula will be filled in the future + return 0 + + +@register_op_cost +class MulOpCost(CompOpCost): + OP_TYPE = "mul" + + def __init__(self, op=None, op_desc=None, cluster=None): + super(MulOpCost, self).__init__(op=op, op_desc=op_desc, cluster=cluster) + + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + def calc_flops(self): + # NOTE: The actual formula will be filled in the future + return 0 + + def calc_time(self): + # NOTE: The actual formula will be filled in the future + return 0 + + +@register_op_cost +class MulGradOpCost(CompOpCost): + OP_TYPE = "mul_grad" + + def __init__(self, op=None, op_desc=None, cluster=None): + super(MulGradOpCost, self).__init__( + op=op, op_desc=op_desc, cluster=cluster) + + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + def calc_flops(self): + # NOTE: The actual formula will be filled in the future + return 0 + + def calc_time(self): + # NOTE: The actual formula will be filled in the future + return 0 + + +@register_op_cost +class OneHotOpCost(CompOpCost): + OP_TYPE = "one_hot" + + def __init__(self, op=None, op_desc=None, cluster=None): + super(OneHotOpCost, self).__init__( + op=op, op_desc=op_desc, cluster=cluster) + + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + def calc_flops(self): + # NOTE: The actual formula will be filled in the future + return 0 + + def calc_time(self): + # NOTE: The actual formula will be filled in the future + return 0 + + +@register_op_cost +class ReadFromArrayOpCost(CompOpCost): + OP_TYPE = "read_from_array" + + def __init__(self, op=None, op_desc=None, cluster=None): + super(ReadFromArrayOpCost, self).__init__( + op=op, op_desc=op_desc, cluster=cluster) + + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + def calc_flops(self): + # NOTE: The actual formula will be filled in the future + return 0 + + def calc_time(self): + # NOTE: The actual formula will be filled in the future + return 0 + + +@register_op_cost +class ReduceSumOpCost(CompOpCost): + OP_TYPE = "reduce_sum" + + def __init__(self, op=None, op_desc=None, cluster=None): + super(ReduceSumOpCost, self).__init__( + op=op, op_desc=op_desc, cluster=cluster) + + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + def calc_flops(self): + # NOTE: The actual formula will be filled in the future + return 0 + + def calc_time(self): + # NOTE: The actual formula will be filled in the future + return 0 + + +@register_op_cost +class ReduceSumGradOpCost(CompOpCost): + OP_TYPE = "reduce_sum_grad" + + def __init__(self, op=None, op_desc=None, cluster=None): + super(ReduceSumGradOpCost, self).__init__( + op=op, op_desc=op_desc, cluster=cluster) + + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + def calc_flops(self): + # NOTE: The actual formula will be filled in the future + return 0 + + def calc_time(self): + # NOTE: The actual formula will be filled in the future + return 0 + + +@register_op_cost +class Reshape2OpCost(CompOpCost): + OP_TYPE = "reshape2" + + def __init__(self, op=None, op_desc=None, cluster=None): + super(Reshape2OpCost, self).__init__( + op=op, op_desc=op_desc, cluster=cluster) + + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + def calc_flops(self): + # NOTE: The actual formula will be filled in the future + return 0 + + def calc_time(self): + # NOTE: The actual formula will be filled in the future + return 0 + + +@register_op_cost +class Reshape2GradOpCost(CompOpCost): + OP_TYPE = "reshape2_grad" + + def __init__(self, op=None, op_desc=None, cluster=None): + super(Reshape2GradOpCost, self).__init__( + op=op, op_desc=op_desc, cluster=cluster) + + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + def calc_flops(self): + # NOTE: The actual formula will be filled in the future + return 0 + + def calc_time(self): + # NOTE: The actual formula will be filled in the future + return 0 + + +@register_op_cost +class ReduceMeanOpCost(CompOpCost): + OP_TYPE = "reduce_mean" + + def __init__(self, op=None, op_desc=None, cluster=None): + super(ReduceMeanOpCost, self).__init__( + op=op, op_desc=op_desc, cluster=cluster) + + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + def calc_flops(self): + # NOTE: The actual formula will be filled in the future + return 0 + + def calc_time(self): + # NOTE: The actual formula will be filled in the future + return 0 + + +@register_op_cost +class ReduceMeanGradOpCost(CompOpCost): + OP_TYPE = "reduce_mean_grad" + + def __init__(self, op=None, op_desc=None, cluster=None): + super(ReduceMeanGradOpCost, self).__init__( + op=op, op_desc=op_desc, cluster=cluster) + + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + def calc_flops(self): + # NOTE: The actual formula will be filled in the future + return 0 + + def calc_time(self): + # NOTE: The actual formula will be filled in the future + return 0 + + +@register_op_cost +class SamplingIdOpCost(CompOpCost): + OP_TYPE = "sampling_id" + + def __init__(self, op=None, op_desc=None, cluster=None): + super(SamplingIdOpCost, self).__init__( + op=op, op_desc=op_desc, cluster=cluster) + + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + def calc_flops(self): + # NOTE: The actual formula will be filled in the future + return 0 + + def calc_time(self): + # NOTE: The actual formula will be filled in the future + return 0 + + +@register_op_cost +class ScaleOpCost(CompOpCost): + OP_TYPE = "scale" + + def __init__(self, op=None, op_desc=None, cluster=None): + super(ScaleOpCost, self).__init__( + op=op, op_desc=op_desc, cluster=cluster) + + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + def calc_flops(self): + # NOTE: The actual formula will be filled in the future + return 0 + + def calc_time(self): + # NOTE: The actual formula will be filled in the future + return 0 + + +@register_op_cost +class SliceOpCost(CompOpCost): + OP_TYPE = "slice" + + def __init__(self, op=None, op_desc=None, cluster=None): + super(SliceOpCost, self).__init__( + op=op, op_desc=op_desc, cluster=cluster) + + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + def calc_flops(self): + # NOTE: The actual formula will be filled in the future + return 0 + + def calc_time(self): + # NOTE: The actual formula will be filled in the future + return 0 + + +@register_op_cost +class SoftmaxOpCost(CompOpCost): + OP_TYPE = "softmax" + + def __init__(self, op=None, op_desc=None, cluster=None): + super(SoftmaxOpCost, self).__init__( + op=op, op_desc=op_desc, cluster=cluster) + + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + def calc_flops(self): + # NOTE: The actual formula will be filled in the future + return 0 + + def calc_time(self): + # NOTE: The actual formula will be filled in the future + return 0 + + +@register_op_cost +class SoftmaxGradOpCost(CompOpCost): + OP_TYPE = "softmax_grad" + + def __init__(self, op=None, op_desc=None, cluster=None): + super(SoftmaxGradOpCost, self).__init__( + op=op, op_desc=op_desc, cluster=cluster) + + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + def calc_flops(self): + # NOTE: The actual formula will be filled in the future + return 0 + + def calc_time(self): + # NOTE: The actual formula will be filled in the future + return 0 + + +@register_op_cost +class SoftmaxWithCrossEntropyOpCost(CompOpCost): + OP_TYPE = "softmax_with_cross_entropy" + + def __init__(self, op=None, op_desc=None, cluster=None): + super(SoftmaxWithCrossEntropyOpCost, self).__init__( + op=op, op_desc=op_desc, cluster=cluster) + + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + def calc_flops(self): + # NOTE: The actual formula will be filled in the future + return 0 + + def calc_time(self): + # NOTE: The actual formula will be filled in the future + return 0 + + +@register_op_cost +class SoftmaxWithCrossEntropyGradOpCost(CompOpCost): + OP_TYPE = "softmax_with_cross_entropy_grad" + + def __init__(self, op=None, op_desc=None, cluster=None): + super(SoftmaxWithCrossEntropyGradOpCost, self).__init__( + op=op, op_desc=op_desc, cluster=cluster) + + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + def calc_flops(self): + # NOTE: The actual formula will be filled in the future + return 0 + + def calc_time(self): + # NOTE: The actual formula will be filled in the future + return 0 + + +@register_op_cost +class SplitOpCost(CompOpCost): + OP_TYPE = "split" + + def __init__(self, op=None, op_desc=None, cluster=None): + super(SplitOpCost, self).__init__( + op=op, op_desc=op_desc, cluster=cluster) + + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + def calc_flops(self): + # NOTE: The actual formula will be filled in the future + return 0 + + def calc_time(self): + # NOTE: The actual formula will be filled in the future + return 0 + + +@register_op_cost +class Squeeze2OpCost(CompOpCost): + OP_TYPE = "squeeze2" + + def __init__(self, op=None, op_desc=None, cluster=None): + super(Squeeze2OpCost, self).__init__( + op=op, op_desc=op_desc, cluster=cluster) + + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + def calc_flops(self): + # NOTE: The actual formula will be filled in the future + return 0 + + def calc_time(self): + # NOTE: The actual formula will be filled in the future + return 0 + + +@register_op_cost +class SquareOpCost(CompOpCost): + OP_TYPE = "square" + + def __init__(self, op=None, op_desc=None, cluster=None): + super(SquareOpCost, self).__init__( + op=op, op_desc=op_desc, cluster=cluster) + + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + def calc_flops(self): + # NOTE: The actual formula will be filled in the future + return 0 + + def calc_time(self): + # NOTE: The actual formula will be filled in the future + return 0 + + +@register_op_cost +class SquareGradOpCost(CompOpCost): + OP_TYPE = "square_grad" + + def __init__(self, op=None, op_desc=None, cluster=None): + super(SquareGradOpCost, self).__init__( + op=op, op_desc=op_desc, cluster=cluster) + + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + def calc_flops(self): + # NOTE: The actual formula will be filled in the future + return 0 + + def calc_time(self): + # NOTE: The actual formula will be filled in the future + return 0 + + +@register_op_cost +class SumOpCost(CompOpCost): + OP_TYPE = "sum" + + def __init__(self, op=None, op_desc=None, cluster=None): + super(SumOpCost, self).__init__(op=op, op_desc=op_desc, cluster=cluster) + + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + def calc_flops(self): + # NOTE: The actual formula will be filled in the future + return 0 + + def calc_time(self): + # NOTE: The actual formula will be filled in the future + return 0 + + +@register_op_cost +class TopKOpCost(CompOpCost): + OP_TYPE = "top_k" + + def __init__(self, op=None, op_desc=None, cluster=None): + super(TopKOpCost, self).__init__( + op=op, op_desc=op_desc, cluster=cluster) + + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + def calc_flops(self): + # NOTE: The actual formula will be filled in the future + return 0 + + def calc_time(self): + # NOTE: The actual formula will be filled in the future + return 0 + + +@register_op_cost +class Transpose2OpCost(CompOpCost): + OP_TYPE = "transpose2" + + def __init__(self, op=None, op_desc=None, cluster=None): + super(Transpose2OpCost, self).__init__( + op=op, op_desc=op_desc, cluster=cluster) + + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + def calc_flops(self): + # NOTE: The actual formula will be filled in the future + return 0 + + def calc_time(self): + # NOTE: The actual formula will be filled in the future + return 0 + + +@register_op_cost +class Transpose2GradOpCost(CompOpCost): + OP_TYPE = "transpose2_grad" + + def __init__(self, op=None, op_desc=None, cluster=None): + super(Transpose2GradOpCost, self).__init__( + op=op, op_desc=op_desc, cluster=cluster) + + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + def calc_flops(self): + # NOTE: The actual formula will be filled in the future + return 0 + + def calc_time(self): + # NOTE: The actual formula will be filled in the future + return 0 + + +@register_op_cost +class Unsqueeze2OpCost(CompOpCost): + OP_TYPE = "unsqueeze2" + + def __init__(self, op=None, op_desc=None, cluster=None): + super(Unsqueeze2OpCost, self).__init__( + op=op, op_desc=op_desc, cluster=cluster) + + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided + def calc_flops(self): + # NOTE: The actual formula will be filled in the future + return 0 + + def calc_time(self): + # NOTE: The actual formula will be filled in the future + return 0 + + +@register_op_cost +class WriteToArrayOpCost(CompOpCost): + OP_TYPE = "write_to_array" + + def __init__(self, op=None, op_desc=None, cluster=None): + super(WriteToArrayOpCost, self).__init__( + op=op, op_desc=op_desc, cluster=cluster) + + # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): # NOTE: The actual formula will be filled in the future return 0 diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_comp_cost.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_comp_cost.py index 4cdd51e42a..af7a44b5aa 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_comp_cost.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_comp_cost.py @@ -54,6 +54,35 @@ from paddle.distributed.auto_parallel.cost.comp_op_cost import LookupTableV2Grad from paddle.distributed.auto_parallel.cost.comp_op_cost import MatmulOpCost from paddle.distributed.auto_parallel.cost.comp_op_cost import MatmulGradOpCost from paddle.distributed.auto_parallel.cost.comp_op_cost import MatmulV2OpCost +from paddle.distributed.auto_parallel.cost.comp_op_cost import MatmulV2GradOpCost +from paddle.distributed.auto_parallel.cost.comp_op_cost import MemcpyOpCost +from paddle.distributed.auto_parallel.cost.comp_op_cost import MulOpCost +from paddle.distributed.auto_parallel.cost.comp_op_cost import MulGradOpCost +from paddle.distributed.auto_parallel.cost.comp_op_cost import OneHotOpCost +from paddle.distributed.auto_parallel.cost.comp_op_cost import ReadFromArrayOpCost +from paddle.distributed.auto_parallel.cost.comp_op_cost import ReduceSumOpCost +from paddle.distributed.auto_parallel.cost.comp_op_cost import ReduceSumGradOpCost +from paddle.distributed.auto_parallel.cost.comp_op_cost import Reshape2OpCost +from paddle.distributed.auto_parallel.cost.comp_op_cost import Reshape2GradOpCost +from paddle.distributed.auto_parallel.cost.comp_op_cost import ReduceMeanOpCost +from paddle.distributed.auto_parallel.cost.comp_op_cost import ReduceMeanGradOpCost +from paddle.distributed.auto_parallel.cost.comp_op_cost import SamplingIdOpCost +from paddle.distributed.auto_parallel.cost.comp_op_cost import ScaleOpCost +from paddle.distributed.auto_parallel.cost.comp_op_cost import SliceOpCost +from paddle.distributed.auto_parallel.cost.comp_op_cost import SoftmaxOpCost +from paddle.distributed.auto_parallel.cost.comp_op_cost import SoftmaxGradOpCost +from paddle.distributed.auto_parallel.cost.comp_op_cost import SoftmaxWithCrossEntropyOpCost +from paddle.distributed.auto_parallel.cost.comp_op_cost import SoftmaxWithCrossEntropyGradOpCost +from paddle.distributed.auto_parallel.cost.comp_op_cost import SplitOpCost +from paddle.distributed.auto_parallel.cost.comp_op_cost import Squeeze2OpCost +from paddle.distributed.auto_parallel.cost.comp_op_cost import SquareOpCost +from paddle.distributed.auto_parallel.cost.comp_op_cost import SquareGradOpCost +from paddle.distributed.auto_parallel.cost.comp_op_cost import SumOpCost +from paddle.distributed.auto_parallel.cost.comp_op_cost import TopKOpCost +from paddle.distributed.auto_parallel.cost.comp_op_cost import Transpose2OpCost +from paddle.distributed.auto_parallel.cost.comp_op_cost import Transpose2GradOpCost +from paddle.distributed.auto_parallel.cost.comp_op_cost import Unsqueeze2OpCost +from paddle.distributed.auto_parallel.cost.comp_op_cost import WriteToArrayOpCost from test_cluster import cluster_json @@ -244,6 +273,155 @@ class TestCompOpCost(unittest.TestCase): self.assertTrue(op_cost.time >= 0) self.assertTrue(op_cost.memory >= 0) + op_cost = MatmulV2GradOpCost(cluster=cluster) + self.assertTrue(op_cost.flops >= 0) + self.assertTrue(op_cost.time >= 0) + self.assertTrue(op_cost.memory >= 0) + + op_cost = MemcpyOpCost(cluster=cluster) + self.assertTrue(op_cost.flops >= 0) + self.assertTrue(op_cost.time >= 0) + self.assertTrue(op_cost.memory >= 0) + + op_cost = MulOpCost(cluster=cluster) + self.assertTrue(op_cost.flops >= 0) + self.assertTrue(op_cost.time >= 0) + self.assertTrue(op_cost.memory >= 0) + + op_cost = MulGradOpCost(cluster=cluster) + self.assertTrue(op_cost.flops >= 0) + self.assertTrue(op_cost.time >= 0) + self.assertTrue(op_cost.memory >= 0) + + op_cost = OneHotOpCost(cluster=cluster) + self.assertTrue(op_cost.flops >= 0) + self.assertTrue(op_cost.time >= 0) + self.assertTrue(op_cost.memory >= 0) + + op_cost = ReadFromArrayOpCost(cluster=cluster) + self.assertTrue(op_cost.flops >= 0) + self.assertTrue(op_cost.time >= 0) + self.assertTrue(op_cost.memory >= 0) + + op_cost = ReduceSumOpCost(cluster=cluster) + self.assertTrue(op_cost.flops >= 0) + self.assertTrue(op_cost.time >= 0) + self.assertTrue(op_cost.memory >= 0) + + op_cost = ReduceSumGradOpCost(cluster=cluster) + self.assertTrue(op_cost.flops >= 0) + self.assertTrue(op_cost.time >= 0) + self.assertTrue(op_cost.memory >= 0) + + op_cost = Reshape2OpCost(cluster=cluster) + self.assertTrue(op_cost.flops >= 0) + self.assertTrue(op_cost.time >= 0) + self.assertTrue(op_cost.memory >= 0) + + op_cost = MatmulV2OpCost(cluster=cluster) + self.assertTrue(op_cost.flops >= 0) + self.assertTrue(op_cost.time >= 0) + self.assertTrue(op_cost.memory >= 0) + + op_cost = Reshape2GradOpCost(cluster=cluster) + self.assertTrue(op_cost.flops >= 0) + self.assertTrue(op_cost.time >= 0) + self.assertTrue(op_cost.memory >= 0) + + op_cost = ReduceMeanOpCost(cluster=cluster) + self.assertTrue(op_cost.flops >= 0) + self.assertTrue(op_cost.time >= 0) + self.assertTrue(op_cost.memory >= 0) + + op_cost = ReduceMeanGradOpCost(cluster=cluster) + self.assertTrue(op_cost.flops >= 0) + self.assertTrue(op_cost.time >= 0) + self.assertTrue(op_cost.memory >= 0) + + op_cost = SamplingIdOpCost(cluster=cluster) + self.assertTrue(op_cost.flops >= 0) + self.assertTrue(op_cost.time >= 0) + self.assertTrue(op_cost.memory >= 0) + + op_cost = ScaleOpCost(cluster=cluster) + self.assertTrue(op_cost.flops >= 0) + self.assertTrue(op_cost.time >= 0) + self.assertTrue(op_cost.memory >= 0) + + op_cost = SliceOpCost(cluster=cluster) + self.assertTrue(op_cost.flops >= 0) + self.assertTrue(op_cost.time >= 0) + self.assertTrue(op_cost.memory >= 0) + + op_cost = SoftmaxOpCost(cluster=cluster) + self.assertTrue(op_cost.flops >= 0) + self.assertTrue(op_cost.time >= 0) + self.assertTrue(op_cost.memory >= 0) + + op_cost = SoftmaxGradOpCost(cluster=cluster) + self.assertTrue(op_cost.flops >= 0) + self.assertTrue(op_cost.time >= 0) + self.assertTrue(op_cost.memory >= 0) + + op_cost = SoftmaxWithCrossEntropyOpCost(cluster=cluster) + self.assertTrue(op_cost.flops >= 0) + self.assertTrue(op_cost.time >= 0) + self.assertTrue(op_cost.memory >= 0) + + op_cost = SoftmaxWithCrossEntropyGradOpCost(cluster=cluster) + self.assertTrue(op_cost.flops >= 0) + self.assertTrue(op_cost.time >= 0) + self.assertTrue(op_cost.memory >= 0) + + op_cost = SplitOpCost(cluster=cluster) + self.assertTrue(op_cost.flops >= 0) + self.assertTrue(op_cost.time >= 0) + self.assertTrue(op_cost.memory >= 0) + + op_cost = Squeeze2OpCost(cluster=cluster) + self.assertTrue(op_cost.flops >= 0) + self.assertTrue(op_cost.time >= 0) + self.assertTrue(op_cost.memory >= 0) + + op_cost = SquareOpCost(cluster=cluster) + self.assertTrue(op_cost.flops >= 0) + self.assertTrue(op_cost.time >= 0) + self.assertTrue(op_cost.memory >= 0) + + op_cost = SquareGradOpCost(cluster=cluster) + self.assertTrue(op_cost.flops >= 0) + self.assertTrue(op_cost.time >= 0) + self.assertTrue(op_cost.memory >= 0) + + op_cost = SumOpCost(cluster=cluster) + self.assertTrue(op_cost.flops >= 0) + self.assertTrue(op_cost.time >= 0) + self.assertTrue(op_cost.memory >= 0) + + op_cost = TopKOpCost(cluster=cluster) + self.assertTrue(op_cost.flops >= 0) + self.assertTrue(op_cost.time >= 0) + self.assertTrue(op_cost.memory >= 0) + + op_cost = Transpose2OpCost(cluster=cluster) + self.assertTrue(op_cost.flops >= 0) + self.assertTrue(op_cost.time >= 0) + self.assertTrue(op_cost.memory >= 0) + + op_cost = Transpose2GradOpCost(cluster=cluster) + self.assertTrue(op_cost.flops >= 0) + self.assertTrue(op_cost.time >= 0) + self.assertTrue(op_cost.memory >= 0) + + op_cost = Unsqueeze2OpCost(cluster=cluster) + self.assertTrue(op_cost.flops >= 0) + self.assertTrue(op_cost.time >= 0) + self.assertTrue(op_cost.memory >= 0) + + op_cost = WriteToArrayOpCost(cluster=cluster) + self.assertTrue(op_cost.flops >= 0) + self.assertTrue(op_cost.time >= 0) + self.assertTrue(op_cost.memory >= 0) # Remove unnecessary files if os.path.exists(cluster_json_path): os.remove(cluster_json_path) -- GitLab