# !/usr/bin/env python3 # Copyright (c) 2021 Institute for Quantum Computing, Baidu Inc. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. r""" The module of the gradient tool. """ from typing import Any, Callable, Tuple, List import numpy as np import paddle import paddle_quantum from paddle_quantum.ansatz import Circuit from math import pi from random import choice from tqdm import tqdm import matplotlib.pyplot as plt __all__ = [ "show_gradient", "plot_distribution", "random_sample", "plot_loss_grad", "plot_supervised_loss_grad", "random_sample_supervised" ] def show_gradient(circuit: Circuit, loss_func: Callable[[Circuit, Any], paddle.Tensor], ITR: int, LR: float, *args: Any) -> Tuple[List[float], List[float]]: r"""Calculate the gradient and loss function for every parameter in QNN. Args: circuit: QNN to be trained. loss_func: Loss function that evaluates the QNN. ITR: Number of iterations. LR: Learning rate. *args: Parameters for ``loss_func`` other than ``circuit``. Returns: Contains following two elements. - loss_list: A list of losses for each iteration. - grad_list: A list of gradients for each iteration. """ grad_list = [] loss_list = [] pbar = tqdm( desc="Training: ", total=ITR, ncols=100, ascii=True ) # randomize initial parameters circuit.randomize_param() opt = paddle.optimizer.Adam(learning_rate=LR, parameters=circuit.parameters()) for _ in range(ITR): pbar.update(1) loss = loss_func(circuit, *args) loss.backward() grad_list.append(circuit.grad) loss_list.append(loss.numpy()[0]) opt.minimize(loss) opt.clear_grad() pbar.close() return loss_list, grad_list def plot_distribution(grad: np.ndarray) -> None: r"""Plot the distribution map according to the input gradients. Args: grad: List of gradients with respect to a parameter. """ grad = np.abs(grad) grad_list = [0, 0, 0, 0, 0] x = ['<0.0001', ' (0.0001,0.001)', '(0.001,0.01)', '(0.01,0.1)', '>0.1'] for g in grad: if g > 0.1: grad_list[4] += 1 elif g > 0.01: grad_list[3] += 1 elif g > 0.001: grad_list[2] += 1 elif g > 0.0001: grad_list[1] += 1 else: grad_list[0] += 1 grad_list = np.array(grad_list) / len(grad) plt.figure() plt.bar(x, grad_list, width=0.5) plt.title('The gradient distribution of variables') plt.ylabel('ratio') plt.show() def random_sample(circuit: Circuit, loss_func: Callable[[Circuit, Any], paddle.Tensor], sample_num: int, *args: Any, mode: str = 'single', if_plot: bool = True, param: int = 0) -> Tuple[List[float], List[float]]: r"""Randomly sample the model. Obtain mean and variance of gradients according to different calculation modes. Args: circuit: QNN to be trained. loss_func: Loss function that evaluates the QNN. sample_num: Number of samplings. mode: Mode for calculation. Defaults to ``'single'``. if_plot: Whether plot the calculation. Defaults to ``True``. param: Which parameter to be plotted in single mode, Defaults to ``0``, which means the first one. *args: Parameters for ``loss_func`` other than ``circuit``. Note: This function provides three calculation modes: single, max and random. - In single mode, we calculate the mean and variance of gradients of every trainable parameter. - In max mode, we calculate the mean and variance of maximum gradients of for every trainable parameter. - In random mode, we calculate the mean and variance of data randomly extracted from gradients of every trainable parameter. Returns: Contains the following two elements. - loss_list: A list of losses for each iteration. - grad_list: A list of gradients for each iteration. """ loss_list, grad_list = [], [] pbar = tqdm( desc="Sampling: ", total=sample_num, ncols=100, ascii=True ) for _ in range(sample_num): pbar.update(1) circuit.randomize_param() loss = loss_func(circuit, *args) loss.backward() loss_list.append(loss.numpy()[0]) grad_list.append(circuit.grad) pbar.close() if mode == 'single': grad_list = np.array(grad_list) grad_list = grad_list.transpose() grad_variance_list = [] grad_mean_list = [] for idx in range(len(grad_list)): grad_variance_list.append(np.var(grad_list[idx])) grad_mean_list.append(np.mean(grad_list[idx])) print("Mean of gradient for all parameters: ") for i in range(len(grad_mean_list)): print("theta", i+1, ": ", grad_mean_list[i]) print("Variance of gradient for all parameters: ") for i in range(len(grad_variance_list)): print("theta", i+1, ": ", grad_variance_list[i]) if if_plot: plot_distribution(grad_list[param]) return grad_mean_list, grad_variance_list if mode == 'max': max_grad_list = [] for idx in range(len(grad_list)): max_grad_list.append(np.max(np.abs(grad_list[idx]))) print("Mean of max gradient") print(np.mean(max_grad_list)) print("Variance of max gradient") print(np.var(max_grad_list)) if if_plot: plot_distribution(max_grad_list) return np.mean(max_grad_list), np.var(max_grad_list) if mode == 'random': random_grad_list = [] for idx in range(len(grad_list)): random_grad = choice(grad_list[idx]) random_grad_list.append(random_grad) print("Mean of random gradient") print(np.mean(random_grad_list)) print("Variance of random gradient") print(np.var(random_grad_list)) if if_plot: plot_distribution(random_grad_list) return np.mean(random_grad_list), np.var(random_grad_list) return loss_list, grad_list def plot_loss_grad(circuit: Circuit, loss_func: Callable[[Circuit, Any], paddle.Tensor], ITR: int, LR: float, *args: Any) -> None: r"""Plot the distribution maps between loss values & gradients and number of iterations. Args: circuit: QNN to be trained. loss_func: Loss function that evaluate QNN. ITR: Number of iterations. LR: Learning rate. *args: Parameters for ``loss_func`` other than ``circuit``. """ loss, grad = show_gradient(circuit, loss_func, ITR, LR, *args) plt.xlabel(r"Iteration") plt.ylabel(r"Loss") plt.plot(range(1, ITR+1), loss, 'r', label='loss') plt.legend() plt.show() max_grad = [np.max(np.abs(i)) for i in grad] plt.xlabel(r"Iteration") plt.ylabel(r"Gradient") plt.plot(range(1, ITR+1), max_grad, 'b', label='gradient') plt.legend() plt.show() def plot_supervised_loss_grad(circuit: Circuit, loss_func: Callable[[Circuit, Any], paddle.Tensor], N: int, EPOCH: int, LR: float, BATCH: int, TRAIN_X: paddle.Tensor, TRAIN_Y: list, *args: Any) -> Tuple[List[float], List[float]]: r""" plot the distribution maps between loss values & gradients and number of iterations in supervised training Args: circuit: QNN ready to be trained. loss_func: Loss function that evaluates the QNN. N: Number of qubits. EPOCH: Number of training iterations. LR: Learning rate. BATCH: Size of batches. TRAIN_X: Data set . TRAIN_Y: Label set. *args: Parameters for ``loss_func`` other than ``circuit``. Raises: Exception: Training data should be paddle.Tensor type Returns: Contains the following two elements. - loss_list: A list of losses for each iteration. - grad_list: A list of gradients for each iteration. """ grad_list = [] loss_list = [] if type(TRAIN_X) != paddle.Tensor: raise Exception("Training data should be paddle.Tensor type") circuit.randomize_param() opt = paddle.optimizer.Adam(learning_rate=LR, parameters=circuit.parameters()) for _ in range(EPOCH): for itr in range(len(TRAIN_X)//BATCH): input_state = TRAIN_X[itr*BATCH:(itr+1)*BATCH] input_state = input_state.reshape([-1, 1, 2**N]) label = TRAIN_Y[itr * BATCH:(itr + 1) * BATCH] loss = loss_func(circuit, input_state, label) loss.backward() grad_list.append(circuit.grad) loss_list.append(loss.numpy()[0]) opt.minimize(loss) opt.clear_grad() max_grad = [np.max(np.abs(i)) for i in grad_list] plt.xlabel(r"Iteration") plt.ylabel(r"Loss") plt.plot(range(1, EPOCH*len(TRAIN_X)//BATCH+1), loss_list, 'r', label='loss') plt.legend() plt.show() plt.xlabel(r"Iteration") plt.ylabel(r"Gradient") plt.plot(range(1, EPOCH*len(TRAIN_X)//BATCH+1), max_grad, 'b', label='gradient') plt.legend() plt.show() return loss_list, grad_list def random_sample_supervised(circuit: Circuit, loss_func: Callable[[Circuit, Any], paddle.Tensor], N: int, sample_num: int, BATCH: int, TRAIN_X: paddle.Tensor, TRAIN_Y: paddle.Tensor, *args: Any, mode: str = 'single', if_plot: bool = True, param: int = 0) -> Tuple[List[float], List[float]]: r"""Random sample the supervised model. Obtain mean and variance of gradients according to different calculation modes. Args: circuit: QNN to be trained. loss_func: Loss function that evaluates the QNN. N: Number of qubits. sample_num: Number of samplings. BATCH: Size of batches. TRAIN_X: Data set. TRAIN_Y: Label set. mode: Mode for calculation. Defaults to ``'single'``. if_plot: Whether plot the calculation. Defaults to ``True``. param: Which parameter to be plotted in single mode. Defaults to ``0``, which means the first one. *args: Parameters for ``loss_func`` other than ``circuit``. Note: This function provides three calculation modes: single, max and random. - In single mode, we calculate the mean and variance of gradients of every trainable parameters. - In max mode, we calculate the mean and variance of maximum gradients of for every trainable parameters. - In random mode, we calculate the mean and variance of data randomly extracted from gradients of every trainable parameters. Raises: Exception: Training data should be paddle.Tensor type Returns: Contains the following two elements. - loss_list: A list of losses for each iteration. - grad_list: A list of gradients for each iteration. """ grad_list = [] loss_list = [] input_state = TRAIN_X[0:BATCH] input_state = input_state.reshape([-1, 1, 2**N]) label = TRAIN_Y[0: BATCH] if type(TRAIN_X) != paddle.Tensor: raise Exception("Training data should be paddle.Tensor type") pbar = tqdm( desc="Sampling: ", total=sample_num, ncols=100, ascii=True ) for idx in range(sample_num): pbar.update(1) circuit.randomize_param() loss = loss_func(circuit, input_state, label) loss.backward() grad_list.append(circuit.grad) loss_list.append(loss.numpy()[0]) pbar.close() if mode == 'single': grad_list = np.array(grad_list) grad_list = grad_list.transpose() grad_variance_list = [] grad_mean_list = [] for idx in range(len(grad_list)): grad_variance_list.append(np.var(grad_list[idx])) grad_mean_list.append(np.mean(grad_list[idx])) print("Mean of gradient for all parameters: ") for i in range(len(grad_mean_list)): print("theta", i+1, ": ", grad_mean_list[i]) print("Variance of gradient for all parameters: ") for i in range(len(grad_variance_list)): print("theta", i+1, ": ", grad_variance_list[i]) if if_plot: plot_distribution(grad_list[param]) return grad_mean_list, grad_variance_list if mode == 'max': max_grad_list = [] for idx in range(len(grad_list)): max_grad_list.append(np.max(np.abs(grad_list[idx]))) print("Mean of max gradient") print(np.mean(max_grad_list)) print("Variance of max gradient") print(np.var(max_grad_list)) if if_plot: plot_distribution(max_grad_list) return np.mean(max_grad_list), np.var(max_grad_list) if mode == 'random': random_grad_list = [] for idx in range(len(grad_list)): random_grad = choice(grad_list[idx]) random_grad_list.append(random_grad) print("Mean of random gradient") print(np.mean(random_grad_list)) print("Variance of random gradient") print(np.var(random_grad_list)) if if_plot: plot_distribution(random_grad_list) return np.mean(random_grad_list), np.var(random_grad_list) return loss_list, grad_list