# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np import paddle from paddle.autograd.functional import _tensors def _product(t): if isinstance(t, int): return t else: return np.product(t) def _get_item(t, idx): assert isinstance(t, paddle.Tensor), "The first argument t must be Tensor." assert isinstance(idx, int), "The second argument idx must be an int number." flat_t = paddle.reshape(t, [-1]) return flat_t.__getitem__(idx) def _set_item(t, idx, value): assert isinstance(t, paddle.Tensor), "The first argument t must be Tensor." assert isinstance(idx, int), "The second argument idx must be an int number." flat_t = paddle.reshape(t, [-1]) flat_t.__setitem__(idx, value) return paddle.reshape(flat_t, t.shape) def _compute_numerical_jacobian(func, xs, delta, np_dtype): xs = _tensors(xs, "xs") ys = _tensors(func(*xs), "ys") fin_size = len(xs) fout_size = len(ys) jacobian = list([] for _ in range(fout_size)) for i in range(fout_size): jac_i = list([] for _ in range(fin_size)) for j in range(fin_size): jac_i[j] = np.zeros( (_product(ys[i].shape), _product(xs[j].shape)), dtype=np_dtype) jacobian[i] = jac_i for j in range(fin_size): for q in range(_product(xs[j].shape)): orig = _get_item(xs[j], q) x_pos = orig + delta xs[j] = _set_item(xs[j], q, x_pos) ys_pos = _tensors(func(*xs), "ys_pos") x_neg = orig - delta xs[j] = _set_item(xs[j], q, x_neg) ys_neg = _tensors(func(*xs), "ys_neg") xs[j] = _set_item(xs[j], q, orig) for i in range(fout_size): for p in range(_product(ys[i].shape)): y_pos = _get_item(ys_pos[i], p) y_neg = _get_item(ys_neg[i], p) jacobian[i][j][p][q] = (y_pos - y_neg) / delta / 2. return jacobian def _compute_numerical_hessian(func, xs, delta, np_dtype): xs = _tensors(xs, "xs") ys = _tensors(func(*xs), "ys") fin_size = len(xs) hessian = list([] for _ in range(fin_size)) for i in range(fin_size): hessian_i = list([] for _ in range(fin_size)) for j in range(fin_size): hessian_i[j] = np.zeros( (_product(xs[i].shape), _product(xs[j].shape)), dtype=np_dtype) hessian[i] = hessian_i for i in range(fin_size): for p in range(_product(xs[i].shape)): for j in range(fin_size): for q in range(_product(xs[j].shape)): orig = _get_item(xs[j], q) x_pos = orig + delta xs[j] = _set_item(xs[j], q, x_pos) jacobian_pos = _compute_numerical_jacobian(func, xs, delta, np_dtype) x_neg = orig - delta xs[j] = _set_item(xs[j], q, x_neg) jacobian_neg = _compute_numerical_jacobian(func, xs, delta, np_dtype) xs[j] = _set_item(xs[j], q, orig) hessian[i][j][p][q] = ( jacobian_pos[0][i][0][p] - jacobian_neg[0][i][0][p] ) / delta / 2. return hessian