Source code for xiuminglib.metric

# pylint: disable=arguments-differ

import numpy as np

from .img import rgb2lum
from .const import Path
from .os import open_file
from .imprt import preset_import

from .log import get_logger
logger = get_logger()


[docs]def compute_ci(data, level=0.95): r"""Computes confidence interval. Args: data (list(float)): Samples. level (float, optional): Confidence level. Defaults to :math:`0.95`. Returns: float: One-sided interval (i.e., mean :math:`\pm` this number). """ from scipy import stats data = np.array(data).astype(float) n = len(data) se = stats.sem(data) return se * stats.t.ppf((1 + level) / 2., n - 1)
[docs]class Base(): """The base metric. Attributes: dtype (numpy.dtype): Data type, with which data dynamic range is derived. drange (float): Dynamic range, i.e., difference between the maximum and minimum allowed. """
[docs] def __init__(self, dtype): """ Args: dtype (str or numpy.dtype): Data type, from which dynamic range will be derived. """ self.dtype = np.dtype(dtype) if self.dtype.kind == 'f': self.drange = 1. logger.warning( "Input type is float, so assuming dynamic range to be 1") elif self.dtype.kind == 'u': iinfo = np.iinfo(self.dtype) self.drange = float(iinfo.max - iinfo.min) else: raise NotImplementedError(self.dtype.kind)
def _assert_type(self, im): assert im.dtype == self.dtype, ( "Input data type ({in_dtype}) different from what was " "specified ({dtype})" ).format(in_dtype=im.dtype, dtype=self.dtype) def _assert_drange(self, im): actual = im.max() - im.min() assert self.drange >= actual, ( "The actual dynamic range ({actual}) is larger than what was " "derived from the data type ({derived})" ).format(actual=actual, derived=self.drange) @staticmethod def _assert_same_shape(im1, im2): assert im1.shape == im2.shape, \ "The two images are not even of the same shape" @staticmethod def _ensure_3d(im): if im.ndim == 2: return np.expand_dims(im, -1) if im.ndim == 3: assert im.shape[2] in (1, 3), ( "If 3D, input must have either 1 or 3 channels, but has %d" ) % im.shape[2] return im raise ValueError( "Input must be 2D (H-by-W) or 3D (H-by-W-by-C), but is %dD" % im.ndim)
[docs] def __call__(self, im1, im2, **kwargs): """ Args: im1 (numpy.ndarray): An image of shape H-by-W, H-by-W-by-1, or H-by-W-by-3. im2 Returns: float: The metric computed. """ raise NotImplementedError
[docs]class PSNR(Base): """Peak Signal-to-Noise Ratio (PSNR) in dB (higher is better). If the inputs are RGB, they are first converted to luma (or relative luminance, if the inputs are not gamma-corrected). PSNR is computed on the luma. """
[docs] def __call__(self, im1, im2, mask=None): """ Args: im1 im2 mask (numpy.ndarray, optional): An H-by-W logical array indicating pixels that contribute to the computation. Returns: float: PSNR in dB. """ self._assert_type(im1) self._assert_type(im2) im1 = im1.astype(float) # must be cast to an unbounded type im2 = im2.astype(float) im1 = self._ensure_3d(im1) im2 = self._ensure_3d(im2) self._assert_same_shape(im1, im2) self._assert_drange(im1) self._assert_drange(im2) # To luma if im1.shape[2] == 3: im1 = np.expand_dims(rgb2lum(im1), -1) im2 = np.expand_dims(rgb2lum(im2), -1) # Inputs guaranteed to be HxWx1 now if mask is None: mask = np.ones(im1.shape) elif mask.ndim == 2: mask = np.expand_dims(mask, -1) # Mask guaranteed to be 3D assert mask.shape == im1.shape, ( "Mask must be of shape {input_shape}, but is of shape " "{mask_shape}" ).format(input_shape=im1.shape, mask_shape=mask.shape) # Mask guaranteed to be HxWx1 now mask = mask.astype(bool) # in case it's not logical yet se = np.square(im1[mask] - im2[mask]) mse = np.sum(se) / np.sum(mask) psnr = 10 * np.log10((self.drange ** 2) / mse) # dB return psnr
[docs]class SSIM(Base): r"""The (multi-scale) Structural Similarity Index (SSIM) :math:`\in [0,1]` (higher is better). If the inputs are RGB, they are first converted to luma (or relative luminance, if the inputs are not gamma-corrected). SSIM is computed on the luma. """
[docs] def __call__(self, im1, im2, multiscale=False): """ Args: im1 im2 multiscale (bool, optional): Whether to compute MS-SSIM. Returns: float: SSIM computed (higher is better). """ tf = preset_import('tensorflow', assert_success=True) self._assert_type(im1) self._assert_type(im2) im1 = im1.astype(float) # must be cast to an unbounded type im2 = im2.astype(float) im1 = self._ensure_3d(im1) im2 = self._ensure_3d(im2) self._assert_same_shape(im1, im2) self._assert_drange(im1) self._assert_drange(im2) # To luma if im1.shape[2] == 3: im1 = np.expand_dims(rgb2lum(im1), -1) im2 = np.expand_dims(rgb2lum(im2), -1) # Guaranteed to be HxWx1 now im1 = tf.convert_to_tensor(im1) im2 = tf.convert_to_tensor(im2) if multiscale: ssim_func = tf.image.ssim_multiscale else: ssim_func = tf.image.ssim similarity = ssim_func(im1, im2, max_val=self.drange) similarity = similarity.numpy() return similarity
[docs]class LPIPS(Base): r"""The Learned Perceptual Image Patch Similarity (LPIPS) metric (lower is better). Project page: https://richzhang.github.io/PerceptualSimilarity/ Note: This implementation assumes the minimum value allowed is :math:`0`, so data dynamic range becomes the maximum value allowed. Attributes: dtype (numpy.dtype): Data type, with which data dynamic range is derived. drange (float): Dynamic range, i.e., difference between the maximum and minimum allowed. lpips_func (tf.function): The LPIPS network packed into a function. """
[docs] def __init__(self, dtype, weight_pb=None): """ Args: dtype (str or numpy.dtype): Data type, from which maximum allowed will be derived. weight_pb (str, optional): Path to the network weight protobuf. Defaults to the bundled ``net-lin_alex_v0.1.pb``. """ super().__init__(dtype) tf = preset_import('tensorflow', assert_success=True) if weight_pb is None: weight_pb = Path.lpips_weights # Pack LPIPS network into a tf function graph_def = tf.compat.v1.GraphDef() with open_file(weight_pb, 'rb') as h: graph_def.ParseFromString(h.read()) self.lpips_func = tf.function(self._wrap_frozen_graph( graph_def, inputs=['0:0', '1:0'], outputs='Reshape_10:0'))
@staticmethod def _wrap_frozen_graph(graph_def, inputs, outputs): tf = preset_import('tensorflow', assert_success=True) def _imports_graph_def(): tf.compat.v1.import_graph_def(graph_def, name="") wrapped_import = tf.compat.v1.wrap_function(_imports_graph_def, []) import_graph = wrapped_import.graph return wrapped_import.prune( tf.nest.map_structure(import_graph.as_graph_element, inputs), tf.nest.map_structure(import_graph.as_graph_element, outputs))
[docs] def __call__(self, im1, im2): """ Args: im1 im2 Returns: float: LPIPS computed (lower is better). """ tf = preset_import('tensorflow', assert_success=True) self._assert_type(im1) self._assert_type(im2) im1 = im1.astype(float) # must be cast to an unbounded type im2 = im2.astype(float) im1 = self._ensure_3d(im1) im2 = self._ensure_3d(im2) self._assert_same_shape(im1, im2) self._assert_drange(im1) self._assert_drange(im2) if im1.shape[2] == 1: im1 = np.dstack([im1] * 3) im2 = np.dstack([im2] * 3) # Guaranteed to be HxWx3 now maxv = self.drange + 0 # NOTE: assumes the minimum value allowed is 0 im1t = tf.convert_to_tensor( np.expand_dims(im1, axis=0), dtype=float) / maxv * 2 - 1 im2t = tf.convert_to_tensor( np.expand_dims(im2, axis=0), dtype=float) / maxv * 2 - 1 # Now 1xHxWx3 and all values in [-1, 1] lpips = self.lpips_func( tf.transpose(im1t, [0, 3, 1, 2]), # to 1x3xHxW tf.transpose(im2t, [0, 3, 1, 2]) ).numpy().squeeze()[()] return lpips