""" Copyright (c) 2016, Marco Tulio Correia Ribeiro All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ """ The code in this file (lime_base.py) is modified from https://github.com/marcotcr/lime. """ import numpy as np import scipy as sp import sklearn import sklearn.preprocessing from skimage.color import gray2rgb from sklearn.linear_model import Ridge, lars_path from sklearn.utils import check_random_state import tqdm import copy from functools import partial from skimage.segmentation import quickshift from skimage.measure import regionprops class LimeBase(object): """Class for learning a locally linear sparse model from perturbed data""" def __init__(self, kernel_fn, verbose=False, random_state=None): """Init function Args: kernel_fn: function that transforms an array of distances into an array of proximity values (floats). verbose: if true, print local prediction values from linear model. random_state: an integer or numpy.RandomState that will be used to generate random numbers. If None, the random state will be initialized using the internal numpy seed. """ self.kernel_fn = kernel_fn self.verbose = verbose self.random_state = check_random_state(random_state) @staticmethod def generate_lars_path(weighted_data, weighted_labels): """Generates the lars path for weighted data. Args: weighted_data: data that has been weighted by kernel weighted_label: labels, weighted by kernel Returns: (alphas, coefs), both are arrays corresponding to the regularization parameter and coefficients, respectively """ x_vector = weighted_data alphas, _, coefs = lars_path(x_vector, weighted_labels, method='lasso', verbose=False) return alphas, coefs def forward_selection(self, data, labels, weights, num_features): """Iteratively adds features to the model""" clf = Ridge(alpha=0, fit_intercept=True, random_state=self.random_state) used_features = [] for _ in range(min(num_features, data.shape[1])): max_ = -100000000 best = 0 for feature in range(data.shape[1]): if feature in used_features: continue clf.fit(data[:, used_features + [feature]], labels, sample_weight=weights) score = clf.score(data[:, used_features + [feature]], labels, sample_weight=weights) if score > max_: best = feature max_ = score used_features.append(best) return np.array(used_features) def feature_selection(self, data, labels, weights, num_features, method): """Selects features for the model. see interpret_instance_with_data to understand the parameters.""" if method == 'none': return np.array(range(data.shape[1])) elif method == 'forward_selection': return self.forward_selection(data, labels, weights, num_features) elif method == 'highest_weights': clf = Ridge(alpha=0.01, fit_intercept=True, random_state=self.random_state) clf.fit(data, labels, sample_weight=weights) coef = clf.coef_ if sp.sparse.issparse(data): coef = sp.sparse.csr_matrix(clf.coef_) weighted_data = coef.multiply(data[0]) # Note: most efficient to slice the data before reversing sdata = len(weighted_data.data) argsort_data = np.abs(weighted_data.data).argsort() # Edge case where data is more sparse than requested number of feature importances # In that case, we just pad with zero-valued features if sdata < num_features: nnz_indexes = argsort_data[::-1] indices = weighted_data.indices[nnz_indexes] num_to_pad = num_features - sdata indices = np.concatenate((indices, np.zeros(num_to_pad, dtype=indices.dtype))) indices_set = set(indices) pad_counter = 0 for i in range(data.shape[1]): if i not in indices_set: indices[pad_counter + sdata] = i pad_counter += 1 if pad_counter >= num_to_pad: break else: nnz_indexes = argsort_data[sdata - num_features:sdata][::-1] indices = weighted_data.indices[nnz_indexes] return indices else: weighted_data = coef * data[0] feature_weights = sorted( zip(range(data.shape[1]), weighted_data), key=lambda x: np.abs(x[1]), reverse=True) return np.array([x[0] for x in feature_weights[:num_features]]) elif method == 'lasso_path': weighted_data = ((data - np.average(data, axis=0, weights=weights)) * np.sqrt(weights[:, np.newaxis])) weighted_labels = ((labels - np.average(labels, weights=weights)) * np.sqrt(weights)) nonzero = range(weighted_data.shape[1]) _, coefs = self.generate_lars_path(weighted_data, weighted_labels) for i in range(len(coefs.T) - 1, 0, -1): nonzero = coefs.T[i].nonzero()[0] if len(nonzero) <= num_features: break used_features = nonzero return used_features elif method == 'auto': if num_features <= 6: n_method = 'forward_selection' else: n_method = 'highest_weights' return self.feature_selection(data, labels, weights, num_features, n_method) def interpret_instance_with_data(self, neighborhood_data, neighborhood_labels, distances, label, num_features, feature_selection='auto', model_regressor=None): """Takes perturbed data, labels and distances, returns interpretation. Args: neighborhood_data: perturbed data, 2d array. first element is assumed to be the original data point. neighborhood_labels: corresponding perturbed labels. should have as many columns as the number of possible labels. distances: distances to original data point. label: label for which we want an interpretation num_features: maximum number of features in interpretation feature_selection: how to select num_features. options are: 'forward_selection': iteratively add features to the model. This is costly when num_features is high 'highest_weights': selects the features that have the highest product of absolute weight * original data point when learning with all the features 'lasso_path': chooses features based on the lasso regularization path 'none': uses all features, ignores num_features 'auto': uses forward_selection if num_features <= 6, and 'highest_weights' otherwise. model_regressor: sklearn regressor to use in interpretation. Defaults to Ridge regression if None. Must have model_regressor.coef_ and 'sample_weight' as a parameter to model_regressor.fit() Returns: (intercept, exp, score, local_pred): intercept is a float. exp is a sorted list of tuples, where each tuple (x,y) corresponds to the feature id (x) and the local weight (y). The list is sorted by decreasing absolute value of y. score is the R^2 value of the returned interpretation local_pred is the prediction of the interpretation model on the original instance """ weights = self.kernel_fn(distances) labels_column = neighborhood_labels[:, label] used_features = self.feature_selection(neighborhood_data, labels_column, weights, num_features, feature_selection) if model_regressor is None: model_regressor = Ridge(alpha=1, fit_intercept=True, random_state=self.random_state) easy_model = model_regressor easy_model.fit(neighborhood_data[:, used_features], labels_column, sample_weight=weights) prediction_score = easy_model.score( neighborhood_data[:, used_features], labels_column, sample_weight=weights) local_pred = easy_model.predict(neighborhood_data[0, used_features].reshape(1, -1)) if self.verbose: print('Intercept', easy_model.intercept_) print('Prediction_local', local_pred,) print('Right:', neighborhood_labels[0, label]) return (easy_model.intercept_, sorted(zip(used_features, easy_model.coef_), key=lambda x: np.abs(x[1]), reverse=True), prediction_score, local_pred) class ImageInterpretation(object): def __init__(self, image, segments): """Init function. Args: image: 3d numpy array segments: 2d numpy array, with the output from skimage.segmentation """ self.image = image self.segments = segments self.intercept = {} self.local_weights = {} self.local_pred = None def get_image_and_mask(self, label, positive_only=True, negative_only=False, hide_rest=False, num_features=5, min_weight=0.): """Init function. Args: label: label to interpret positive_only: if True, only take superpixels that positively contribute to the prediction of the label. negative_only: if True, only take superpixels that negatively contribute to the prediction of the label. If false, and so is positive_only, then both negativey and positively contributions will be taken. Both can't be True at the same time hide_rest: if True, make the non-interpretation part of the return image gray num_features: number of superpixels to include in interpretation min_weight: minimum weight of the superpixels to include in interpretation Returns: (image, mask), where image is a 3d numpy array and mask is a 2d numpy array that can be used with skimage.segmentation.mark_boundaries """ if label not in self.local_weights: raise KeyError('Label not in interpretation') if positive_only & negative_only: raise ValueError("Positive_only and negative_only cannot be true at the same time.") segments = self.segments image = self.image local_weights_label = self.local_weights[label] mask = np.zeros(segments.shape, segments.dtype) if hide_rest: temp = np.zeros(self.image.shape) else: temp = self.image.copy() if positive_only: fs = [x[0] for x in local_weights_label if x[1] > 0 and x[1] > min_weight][:num_features] if negative_only: fs = [x[0] for x in local_weights_label if x[1] < 0 and abs(x[1]) > min_weight][:num_features] if positive_only or negative_only: for f in fs: temp[segments == f] = image[segments == f].copy() mask[segments == f] = 1 return temp, mask else: for f, w in local_weights_label[:num_features]: if np.abs(w) < min_weight: continue c = 0 if w < 0 else 1 mask[segments == f] = -1 if w < 0 else 1 temp[segments == f] = image[segments == f].copy() temp[segments == f, c] = np.max(image) return temp, mask def get_rendered_image(self, label, min_weight=0.005): """ Args: label: label to interpret min_weight: Returns: image, is a 3d numpy array """ if label not in self.local_weights: raise KeyError('Label not in interpretation') from matplotlib import cm segments = self.segments image = self.image local_weights_label = self.local_weights[label] temp = np.zeros_like(image) weight_max = abs(local_weights_label[0][1]) local_weights_label = [(f, w/weight_max) for f, w in local_weights_label] local_weights_label = sorted(local_weights_label, key=lambda x: x[1], reverse=True) # negatives are at last. cmaps = cm.get_cmap('Spectral') colors = cmaps(np.linspace(0, 1, len(local_weights_label))) colors = colors[:, :3] for i, (f, w) in enumerate(local_weights_label): if np.abs(w) < min_weight: continue temp[segments == f] = image[segments == f].copy() temp[segments == f] = colors[i] * 255 return temp class LimeImageInterpreter(object): """Interpres predictions on Image (i.e. matrix) data. For numerical features, perturb them by sampling from a Normal(0,1) and doing the inverse operation of mean-centering and scaling, according to the means and stds in the training data. For categorical features, perturb by sampling according to the training distribution, and making a binary feature that is 1 when the value is the same as the instance being interpreted.""" def __init__(self, kernel_width=.25, kernel=None, verbose=False, feature_selection='auto', random_state=None): """Init function. Args: kernel_width: kernel width for the exponential kernel. If None, defaults to sqrt(number of columns) * 0.75. kernel: similarity kernel that takes euclidean distances and kernel width as input and outputs weights in (0,1). If None, defaults to an exponential kernel. verbose: if true, print local prediction values from linear model feature_selection: feature selection method. can be 'forward_selection', 'lasso_path', 'none' or 'auto'. See function 'einterpret_instance_with_data' in lime_base.py for details on what each of the options does. random_state: an integer or numpy.RandomState that will be used to generate random numbers. If None, the random state will be initialized using the internal numpy seed. """ kernel_width = float(kernel_width) if kernel is None: def kernel(d, kernel_width): return np.sqrt(np.exp(-(d ** 2) / kernel_width ** 2)) kernel_fn = partial(kernel, kernel_width=kernel_width) self.random_state = check_random_state(random_state) self.feature_selection = feature_selection self.base = LimeBase(kernel_fn, verbose, random_state=self.random_state) def interpret_instance(self, image, classifier_fn, labels=(1,), hide_color=None, num_features=100000, num_samples=1000, batch_size=10, distance_metric='cosine', model_regressor=None ): """Generates interpretations for a prediction. First, we generate neighborhood data by randomly perturbing features from the instance (see __data_inverse). We then learn locally weighted linear models on this neighborhood data to interpret each of the classes in an interpretable way (see lime_base.py). Args: image: 3 dimension RGB image. If this is only two dimensional, we will assume it's a grayscale image and call gray2rgb. classifier_fn: classifier prediction probability function, which takes a numpy array and outputs prediction probabilities. For ScikitClassifiers , this is classifier.predict_proba. labels: iterable with labels to be interpreted. hide_color: TODO num_features: maximum number of features present in interpretation num_samples: size of the neighborhood to learn the linear model batch_size: TODO distance_metric: the distance metric to use for weights. model_regressor: sklearn regressor to use in interpretation. Defaults to Ridge regression in LimeBase. Must have model_regressor.coef_ and 'sample_weight' as a parameter to model_regressor.fit() Returns: An ImageIinterpretation object (see lime_image.py) with the corresponding interpretations. """ if len(image.shape) == 2: image = gray2rgb(image) try: segments = quickshift(image, sigma=1) except ValueError as e: raise e self.segments = segments fudged_image = image.copy() if hide_color is None: # if no hide_color, use the mean for x in np.unique(segments): mx = np.mean(image[segments == x], axis=0) fudged_image[segments == x] = mx elif hide_color == 'avg_from_neighbor': from scipy.spatial.distance import cdist n_features = np.unique(segments).shape[0] regions = regionprops(segments + 1) centroids = np.zeros((n_features, 2)) for i, x in enumerate(regions): centroids[i] = np.array(x.centroid) d = cdist(centroids, centroids, 'sqeuclidean') for x in np.unique(segments): # print(np.argmin(d[x])) a = [image[segments == i] for i in np.argsort(d[x])[1:6]] mx = np.mean(np.concatenate(a), axis=0) fudged_image[segments == x] = mx else: fudged_image[:] = 0 top = labels data, labels = self.data_labels(image, fudged_image, segments, classifier_fn, num_samples, batch_size=batch_size) distances = sklearn.metrics.pairwise_distances( data, data[0].reshape(1, -1), metric=distance_metric ).ravel() interpretation_image = ImageInterpretation(image, segments) for label in top: (interpretation_image.intercept[label], interpretation_image.local_weights[label], interpretation_image.score, interpretation_image.local_pred) = self.base.interpret_instance_with_data( data, labels, distances, label, num_features, model_regressor=model_regressor, feature_selection=self.feature_selection) return interpretation_image def data_labels(self, image, fudged_image, segments, classifier_fn, num_samples, batch_size=10): """Generates images and predictions in the neighborhood of this image. Args: image: 3d numpy array, the image fudged_image: 3d numpy array, image to replace original image when superpixel is turned off segments: segmentation of the image classifier_fn: function that takes a list of images and returns a matrix of prediction probabilities num_samples: size of the neighborhood to learn the linear model batch_size: classifier_fn will be called on batches of this size. Returns: A tuple (data, labels), where: data: dense num_samples * num_superpixels labels: prediction probabilities matrix """ n_features = np.unique(segments).shape[0] data = self.random_state.randint(0, 2, num_samples * n_features) \ .reshape((num_samples, n_features)) labels = [] data[0, :] = 1 imgs = [] for row in tqdm.tqdm(data): temp = copy.deepcopy(image) zeros = np.where(row == 0)[0] mask = np.zeros(segments.shape).astype(bool) for z in zeros: mask[segments == z] = True temp[mask] = fudged_image[mask] imgs.append(temp) if len(imgs) == batch_size: preds = classifier_fn(np.array(imgs)) labels.extend(preds) imgs = [] if len(imgs) > 0: preds = classifier_fn(np.array(imgs)) labels.extend(preds) return data, np.array(labels)