op_test_xpu.py

#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np

import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.backward import append_backward
from paddle.fluid.framework import Program, convert_np_dtype_to_dtype_
from testsuite import append_loss_ops, create_op, set_input
from white_list import op_threshold_white_list, no_grad_set_white_list
from op_test import OpTest
from xpu.get_test_cover_info import is_empty_grad_op_type, get_xpu_op_support_types, type_dict_str_to_numpy


class XPUOpTest(OpTest):

    @classmethod
    def setUpClass(cls):
        '''Fix random seeds to remove randomness from tests'''
        cls.use_xpu = True
        cls.use_mkldnn = False
        super().setUpClass()

    @classmethod
    def tearDownClass(cls):
        """Restore random seeds"""

        def is_empty_grad_op(op_type):
            grad_op = op_type + '_grad'
            xpu_version = core.get_xpu_device_version(0)
            xpu_op_list = core.get_xpu_device_op_list(xpu_version)
            if grad_op in xpu_op_list.keys():
                return False
            return True

        if cls.dtype == np.float16:
            place = paddle.XPUPlace(0)
            if core.is_float16_supported(place) == False:
                return

        if cls.dtype == np.float64:
            return

        super().tearDownClass()

    def _get_places(self):
        places = [paddle.XPUPlace(0)]
        return places

    def check_output(self,
                     atol=0.001,
                     no_check_set=None,
                     equal_nan=False,
                     check_dygraph=True,
                     inplace_atol=None,
                     check_eager=False):
        place = paddle.XPUPlace(0)
        self.check_output_with_place(place, atol, no_check_set, equal_nan,
                                     check_dygraph, inplace_atol, check_eager)

    def check_output_with_place(self,
                                place,
                                atol=0.001,
                                no_check_set=None,
                                equal_nan=False,
                                check_dygraph=True,
                                inplace_atol=None,
                                check_eager=False):
        self.infer_dtype_from_inputs_outputs(self.inputs, self.outputs)
        if self.dtype == np.float64:
            return

        if self.dtype == np.float16:
            if core.is_float16_supported(place) == False:
                return

        if self.dtype == np.float16:
            atol = 0.1
        return super().check_output_with_place(place, atol, no_check_set,
                                               equal_nan, check_dygraph,
                                               inplace_atol)

    def check_grad(self,
                   inputs_to_check,
                   output_names,
                   no_grad_set=None,
                   numeric_grad_delta=0.005,
                   in_place=False,
                   max_relative_error=0.005,
                   user_defined_grads=None,
                   user_defined_grad_outputs=None,
                   check_dygraph=True,
                   numeric_place=None,
                   check_eager=False):
        place = paddle.XPUPlace(0)
        self.check_grad_with_place(place, inputs_to_check, output_names,
                                   no_grad_set, numeric_grad_delta, in_place,
                                   max_relative_error, user_defined_grads,
                                   user_defined_grad_outputs, check_dygraph,
                                   numeric_place, check_eager)

    def check_grad_with_place(self,
                              place,
                              inputs_to_check,
                              output_names,
                              no_grad_set=None,
                              numeric_grad_delta=0.005,
                              in_place=False,
                              max_relative_error=0.005,
                              user_defined_grads=None,
                              user_defined_grad_outputs=None,
                              check_dygraph=True,
                              numeric_place=None,
                              check_eager=False):
        if hasattr(self, 'op_type_need_check_grad'):
            xpu_version = core.get_xpu_device_version(0)
            if is_empty_grad_op_type(xpu_version, self.op_type,
                                     self.in_type_str):
                self._check_grad_helper()
                return

        cast_grad_op_types = get_xpu_op_support_types('cast')
        cast_grad_op_types_np = []
        for ctype in cast_grad_op_types:
            cast_grad_op_types_np.append(type_dict_str_to_numpy[ctype])

        if (self.dtype not in cast_grad_op_types_np):
            return

        if self.dtype == np.float64:
            return

        if self.dtype == np.float16:
            if core.is_float16_supported(place) == False:
                return

        if self.dtype == np.float16:
            max_relative_error = 1.0
            return super().check_grad_with_place(
                place, inputs_to_check, output_names, no_grad_set,
                numeric_grad_delta, in_place, max_relative_error,
                user_defined_grads, user_defined_grad_outputs, check_dygraph)

        a1 = self.get_grad_with_place(
            place,
            inputs_to_check,
            output_names,
            no_grad_set=no_grad_set,
            user_defined_grad_outputs=user_defined_grad_outputs)
        a2 = self.get_grad_with_place(
            place,
            inputs_to_check,
            output_names,
            no_grad_set=no_grad_set,
            user_defined_grad_outputs=user_defined_grad_outputs)
        a3 = self.get_grad_with_place(
            paddle.CPUPlace(),
            inputs_to_check,
            output_names,
            no_grad_set=no_grad_set,
            user_defined_grad_outputs=user_defined_grad_outputs)
        self._assert_is_close(a1, a2, inputs_to_check, 0.00000001,
                              "Gradient Check On two xpu")
        self._assert_is_close(a1, a3, inputs_to_check, max_relative_error,
                              "Gradient Check On cpu & xpu")

    def get_grad_with_place(self,
                            place,
                            inputs_to_check,
                            output_names,
                            no_grad_set=None,
                            numeric_grad_delta=0.005,
                            in_place=False,
                            max_relative_error=0.005,
                            user_defined_grad_outputs=None,
                            check_dygraph=True):
        self.scope = core.Scope()
        op_inputs = self.inputs if hasattr(self, "inputs") else dict()
        op_outputs = self.outputs if hasattr(self, "outputs") else dict()
        op_attrs = self.attrs if hasattr(self, "attrs") else dict()

        self._check_grad_helper()
        if self.dtype == np.float64 and \
            self.op_type not in op_threshold_white_list.NEED_FIX_FP64_CHECK_GRAD_THRESHOLD_OP_LIST:
            numeric_grad_delta = 1e-5
            max_relative_error = 1e-7

        cache_list = None
        if hasattr(self, "cache_name_list"):
            cache_list = self.cache_name_list

        # oneDNN numeric gradient should use CPU kernel
        use_onednn = False
        if "use_mkldnn" in op_attrs and op_attrs["use_mkldnn"] == True:
            op_attrs["use_mkldnn"] = False
            use_onednn = True

        mean_grad_op_types = get_xpu_op_support_types('mean')
        mean_grad_op_types_np = []
        for mtype in mean_grad_op_types:
            mean_grad_op_types_np.append(type_dict_str_to_numpy[mtype])

        self.op = create_op(self.scope,
                            self.op_type,
                            op_inputs,
                            op_outputs,
                            op_attrs,
                            cache_list=cache_list)

        if use_onednn:
            op_attrs["use_mkldnn"] = True

        if no_grad_set is None:
            no_grad_set = set()
        else:
            if (self.op_type not in no_grad_set_white_list.NEED_TO_FIX_OP_LIST
                ) and (self.op_type
                       not in no_grad_set_white_list.NOT_CHECK_OP_LIST) and (
                           not self.is_bfloat16_op()):
                raise AssertionError("no_grad_set must be None, op_type is " +
                                     self.op_type + " Op.")

        for input_to_check in inputs_to_check:
            set_input(self.scope, self.op, self.inputs, place)

        if not type(output_names) is list:
            output_names = [output_names]

        if (self.dtype not in mean_grad_op_types_np):

            prog = Program()
            block = prog.global_block()
            scope = core.Scope()
            self._append_ops(block)

            inputs = self._get_inputs(block)
            outputs = self._get_outputs(block)
            feed_dict = self.feed_var(inputs, place)
            cast_inputs = list(map(block.var, output_names))
            cast_outputs = block.create_var(dtype="float32",
                                            shape=cast_inputs[0].shape)
            cast_op = block.append_op(type="cast",
                                      inputs={"X": cast_inputs},
                                      outputs={"Out": cast_outputs},
                                      attrs={
                                          "in_dtype":
                                          convert_np_dtype_to_dtype_(
                                              self.dtype),
                                          "out_dtype":
                                          core.VarDesc.VarType.FP32
                                      })
            cast_op.desc.infer_var_type(block.desc)
            cast_op.desc.infer_shape(block.desc)

            output_names = [cast_outputs.name]

            loss = append_loss_ops(block, output_names)
            loss_names = [loss.name]
            recast_inputs = list(map(block.var, loss_names))
            recast_loss = block.create_var(dtype=self.dtype,
                                           shape=recast_inputs[0].shape)

            recast_op = block.append_op(type="cast",
                                        inputs={"X": recast_inputs},
                                        outputs={"Out": recast_loss},
                                        attrs={
                                            "in_dtype":
                                            core.VarDesc.VarType.FP32,
                                            "out_dtype":
                                            convert_np_dtype_to_dtype_(
                                                self.dtype)
                                        })
            recast_op.desc.infer_var_type(block.desc)
            recast_op.desc.infer_shape(block.desc)

            param_grad_list = append_backward(loss=recast_loss,
                                              parameter_list=[input_to_check],
                                              no_grad_set=no_grad_set)
            fetch_list = [g for p, g in param_grad_list]

            executor = fluid.Executor(place)
            return list(
                map(
                    np.array,
                    executor.run(prog,
                                 feed_dict,
                                 fetch_list,
                                 scope=scope,
                                 return_numpy=False)))

        analytic_grads = self._get_gradient(
            inputs_to_check,
            place,
            output_names,
            no_grad_set,
            user_defined_grad_outputs=user_defined_grad_outputs)
        return analytic_grads