test_sparse_momentum_op.py

# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import print_function

import unittest
import numpy as np
import paddle.fluid.core as core
from paddle.fluid.op import Operator
from op_test import OpTest
import paddle
import paddle.fluid as fluid


def calculate_sparse_momentum_by_numpy(param,
                                       grad,
                                       mu,
                                       velocity,
                                       use_nesterov,
                                       learning_rate,
                                       index,
                                       axis,
                                       regularization_method=None,
                                       regularization_coeff=1.0):
    sub_grad = grad.copy()
    grad = np.zeros_like(param)
    if axis == 0:
        unique_index = np.unique(index)
        for idx in unique_index:
            grad[idx, :] = np.sum(sub_grad[index == idx, :], axis=0)
    else:
        unique_index = np.unique(index)
        for idx in unique_index:
            grad[:, idx] = np.sum(sub_grad[:, index == idx], axis=1)
    if regularization_method == "l2_decay":
        grad = grad + regularization_coeff * param

        velocity_out = mu * velocity + grad
        if use_nesterov:
            param_out = param - (grad + velocity_out * mu) * learning_rate
        else:
            param_out = param - learning_rate * velocity_out
    else:
        velocity_out = mu * velocity + grad
        if use_nesterov:
            param_out = param - grad * learning_rate - \
                        velocity_out * mu * learning_rate
        else:
            param_out = param - learning_rate * velocity_out

    return param_out, velocity_out


class TestSparseMomentumOp(OpTest):
    def setUp(self):
        self.op_type = "sparse_momentum"
        self.dtype = np.float32
        self.index_dtype = np.int32
        self.axis = 0
        self.multi_precision = False
        self.use_nesterov = False
        self.batch_size = 20
        self.num_classes = 20
        self.init_dtype()
        self.init_axis()
        self.init_multi_precision()
        self.init_use_nesterov()

        if self.multi_precision:
            assert self.dtype == np.float16

        param = np.random.random(
            (self.batch_size, self.num_classes)).astype(self.dtype)
        grad = np.random.random(
            (self.batch_size, self.num_classes)).astype(self.dtype)
        if self.axis == 0:
            index = np.random.randint(
                0,
                self.batch_size,
                size=(self.batch_size // 2, ),
                dtype=self.index_dtype)
            grad = grad[index]
        else:
            index = np.random.randint(
                0,
                self.num_classes,
                size=(self.num_classes // 2, ),
                dtype=self.index_dtype)
            grad = grad[:, index]
        velocity = np.random.random(
            (self.batch_size, self.num_classes)).astype(self.dtype)
        learning_rate = np.array([0.001]).astype(self.dtype)

        mu = 0.9
        regularization_method = "l2_decay"
        regularization_coeff = 1.0

        param_out, velocity_out = calculate_sparse_momentum_by_numpy(
            param=param,
            grad=grad,
            mu=mu,
            velocity=velocity,
            use_nesterov=self.use_nesterov,
            learning_rate=learning_rate,
            regularization_method=regularization_method,
            regularization_coeff=regularization_coeff,
            index=index,
            axis=self.axis)

        self.attrs = {
            'mu': mu,
            'use_nesterov': self.use_nesterov,
            'regularization_method': regularization_method,
            'regularization_coeff': regularization_coeff,
            'multi_precision': self.multi_precision,
            'axis': self.axis,
        }

        self.inputs = {
            'Param': param.astype("float16") if self.multi_precision else param,
            'Velocity': velocity.astype("float32")
            if self.multi_precision else velocity,
            'LearningRate': learning_rate.astype("float32")
            if self.multi_precision else learning_rate,
            'Grad': grad.astype("float16") if self.multi_precision else grad,
            'Index': index,
            'Axis': np.array(self.axis).astype(np.int32),
        }
        self.outputs = {
            'ParamOut': param_out.astype("float16")
            if self.multi_precision else param_out,
            'VelocityOut': velocity_out.astype("float32")
            if self.multi_precision else velocity_out,
        }

        if self.multi_precision:
            self.inputs['MasterParam'] = param.astype(
                "float32") if self.multi_precision else param
            self.outputs['MasterParamOut'] = param_out.astype(
                "float32") if self.multi_precision else param_out

    def init_dtype(self):
        pass

    def init_axis(self):
        pass

    def init_multi_precision(self):
        pass

    def init_use_nesterov(self):
        pass

    def test_check_output(self):
        self.check_output(atol=5e-3 if self.multi_precision else 1e-5)


class TestSparseMomentumOpDtype1(TestSparseMomentumOp):
    def init_dtype(self):
        self.dtype = np.float32
        self.index_dtype = np.int64


class TestSparseMomentumOpDtype2(TestSparseMomentumOp):
    def init_dtype(self):
        self.dtype = np.float64
        self.index_dtype = np.int32


class TestSparseMomentumOpDtype3(TestSparseMomentumOp):
    def init_dtype(self):
        self.dtype = np.float64
        self.index_dtype = np.int64


class TestSparseMomentumOpAxis(TestSparseMomentumOp):
    def init_axis(self):
        self.axis = 1


class TestSparseMomentumOpNesterov(TestSparseMomentumOp):
    def init_use_nesterov(self):
        self.use_nesterov = True


class TestSparseMomentumOpMultiPrecision(TestSparseMomentumOp):
    def init_dtype(self):
        self.dtype = np.float16
        self.index_dtype = np.int32

    def init_multi_precision(self):
        self.multi_precision = True

    def init_use_nesterov(self):
        self.use_nesterov = True


class TestSparseMomentumOpMultiPrecision1(TestSparseMomentumOp):
    def init_dtype(self):
        self.dtype = np.float16
        self.index_dtype = np.int64

    def init_multi_precision(self):
        self.multi_precision = True

    def init_use_nesterov(self):
        self.use_nesterov = True


class TestSparseMomentumOpMultiPrecision2(TestSparseMomentumOp):
    def init_dtype(self):
        self.dtype = np.float16
        self.index_dtype = np.int32

    def init_multi_precision(self):
        self.multi_precision = True

    def init_use_nesterov(self):
        self.use_nesterov = False


class TestSparseMomentumOpMultiPrecision3(TestSparseMomentumOp):
    def init_dtype(self):
        self.dtype = np.float16
        self.index_dtype = np.int64

    def init_multi_precision(self):
        self.multi_precision = True

    def init_use_nesterov(self):
        self.use_nesterov = False