# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import json
import os
import unittest

from test_cluster import cluster_json

from paddle.distributed.auto_parallel.cluster import Cluster
from paddle.distributed.auto_parallel.cost.comp_op_cost import (
    AssignOpCost,
    AssignValueOpCost,
    BeamSearchDecodeOpCost,
    BeamSearchOpCost,
    CastOpCost,
    ConcatOpCost,
    DropoutGradOpCost,
    ElementwiseAddGradOpCost,
    ElementwiseAddOpCost,
    ElementwiseDivGradOpCost,
    ElementwiseDivOpCost,
    ElementwiseMulGradOpCost,
    ElementwiseMulOpCost,
    ElementwiseSubOpCost,
    EmbeddingGradOpCost,
    EmbeddingOpCost,
    FillConstantBatchSizeLikeOpCost,
    FillConstantOpCost,
    FusedSoftmaxMaskUpperTriangleGradOpCost,
    FusedSoftmaxMaskUpperTriangleOpCost,
    GatherOpCost,
    GeluGradOpCost,
    GeluOpCost,
    GreaterEqualOpCost,
    IncrementOpCost,
    IsEmptyOpCost,
    LayerNormGradOpCost,
    LayerNormOpCost,
    LessThanOpCost,
    LodResetOpCost,
    LogicalAndOpCost,
    LogicalNotOpCost,
    LogOpCost,
    LookupTableV2GradOpCost,
    LookupTableV2OpCost,
    MatmulOpCost,
    MatmulV2GradOpCost,
    MatmulV2OpCost,
    MemcpyOpCost,
    MulGradOpCost,
    MulOpCost,
    OneHotOpCost,
    ReadFromArrayOpCost,
    ReduceMeanGradOpCost,
    ReduceMeanOpCost,
    ReduceSumGradOpCost,
    ReduceSumOpCost,
    Reshape2GradOpCost,
    Reshape2OpCost,
    SamplingIdOpCost,
    ScaleOpCost,
    SliceOpCost,
    SoftmaxGradOpCost,
    SoftmaxOpCost,
    SoftmaxWithCrossEntropyGradOpCost,
    SoftmaxWithCrossEntropyOpCost,
    SplitOpCost,
    SquareGradOpCost,
    SquareOpCost,
    Squeeze2OpCost,
    SumOpCost,
    TopKOpCost,
    Transpose2GradOpCost,
    Transpose2OpCost,
    Unsqueeze2OpCost,
    WriteToArrayOpCost,
)


class TestCompOpCost(unittest.TestCase):
    def test_comp_cost(self):
        # Build cluster
        file_dir = os.path.dirname(os.path.abspath(__file__))
        cluster_json_path = os.path.join(file_dir, "auto_parallel_cluster.json")
        cluster_json_object = json.loads(cluster_json)
        with open(cluster_json_path, "w") as cluster_json_file:
            json.dump(cluster_json_object, cluster_json_file)
        cluster = Cluster()
        cluster.build_from_file(cluster_json_path)

        op_cost = AssignOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = AssignValueOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = BeamSearchOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = BeamSearchDecodeOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = CastOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = ConcatOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = ElementwiseAddOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = ElementwiseAddGradOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = ElementwiseDivOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = ElementwiseDivGradOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = ElementwiseMulOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = ElementwiseMulGradOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = ElementwiseSubOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = EmbeddingOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = EmbeddingGradOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = FillConstantOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = FillConstantBatchSizeLikeOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = GatherOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = GeluOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = GeluGradOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = GreaterEqualOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = IncrementOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = IsEmptyOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = LayerNormOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = LayerNormGradOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = LessThanOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = LogicalNotOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = LogicalAndOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = LodResetOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = LogOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = LookupTableV2OpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = LookupTableV2GradOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = MatmulOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = MatmulV2OpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = MatmulV2GradOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = MemcpyOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = MulOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = MulGradOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = OneHotOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = ReadFromArrayOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = ReduceSumOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = ReduceSumGradOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = Reshape2OpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = MatmulV2OpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = Reshape2GradOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = ReduceMeanOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = ReduceMeanGradOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = SamplingIdOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = ScaleOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = SliceOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = SoftmaxOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = SoftmaxGradOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = SoftmaxWithCrossEntropyOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = SoftmaxWithCrossEntropyGradOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = SplitOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = Squeeze2OpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = SquareOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = SquareGradOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = SumOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = TopKOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = Transpose2OpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = Transpose2GradOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = Unsqueeze2OpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = WriteToArrayOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = DropoutGradOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = FusedSoftmaxMaskUpperTriangleOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        op_cost = FusedSoftmaxMaskUpperTriangleGradOpCost(cluster=cluster)
        self.assertTrue(op_cost.flops >= 0)
        self.assertTrue(op_cost.time >= 0)
        self.assertTrue(op_cost.memory >= 0)

        # Remove unnecessary files
        if os.path.exists(cluster_json_path):
            os.remove(cluster_json_path)


if __name__ == "__main__":
    unittest.main()