Source code for xiuminglib.img

from copy import deepcopy
import numpy as np

from .imprt import preset_import

from .log import get_logger
logger = get_logger()


[docs]def normalize_uint(arr):
    r"""Normalizes the input ``uint`` array such that its ``dtype`` maximum
    becomes :math:`1`.

    Args:
        arr (numpy.ndarray): Input array of type ``uint``.

    Returns:
        numpy.ndarray: Normalized array of type ``float``.
    """
    if arr.dtype not in (np.uint8, np.uint16):
        raise TypeError(arr.dtype)
    maxv = np.iinfo(arr.dtype).max
    arr_ = arr.astype(float)
    arr_ = arr_ / maxv
    return arr_


[docs]def denormalize_float(arr, uint_type='uint8'):
    r"""De-normalizes the input ``float`` array such that :math:`1` becomes
    the target ``uint`` maximum.

    Args:
        arr (numpy.ndarray): Input array of type ``float``.
        uint_type (str, optional): Target ``uint`` type.

    Returns:
        numpy.ndarray: De-normalized array of the target type.
    """
    _assert_float_0to1(arr)
    if uint_type not in ('uint8', 'uint16'):
        raise TypeError(uint_type)
    maxv = np.iinfo(uint_type).max
    arr_ = arr * maxv
    arr_ = arr_.astype(uint_type)
    return arr_


[docs]def alpha_blend(arr1, alpha, arr2=None):
    r"""Alpha-blends two arrays, or masks one array.

    Args:
        arr1 (numpy.ndarray): Input array.
        alpha (numpy.ndarray): Alpha map whose values are :math:`\in [0,1]`.
        arr2 (numpy.ndarray): Input array. If ``None``, ``arr1`` will be
            blended with an all-zero array, equivalent to masking ``arr1``.

    Returns:
        numpy.ndarray: Blended array of type ``float``.
    """
    arr1 = arr1.astype(float)
    if arr2 is None:
        arr2 = np.zeros(arr1.shape, dtype=arr1.dtype)

    if alpha.shape != arr1.shape:
        if alpha.ndim == 2 and arr1.ndim == 3:
            alpha = np.dstack([alpha] * arr1.shape[2])
        elif alpha.ndim == 3 and alpha.shape[2] == 1 and arr1.ndim == 3:
            alpha = np.tile(alpha, (1, 1, arr1.shape[2]))
        else:
            raise NotImplementedError(
                "{arr_s} and {alpha_s}".format(
                    alpha_s=alpha.shape, arr_s=arr1.shape))
    blend = np.multiply(arr1, alpha) + np.multiply(arr2, 1 - alpha)
    return blend


[docs]def resize(arr, new_h=None, new_w=None, method='cv2'):
    """Resizes an image, with the option of maintaining the aspect ratio.

    Args:
        arr (numpy.ndarray): Image to binarize. If multiple-channel, each
            channel is resized independently.
        new_h (int, optional): Target height. If ``None``, will be calculated
            according to the target width, assuming the same aspect ratio.
        new_w (int, optional): Target width. If ``None``, will be calculated
            according to the target height, assuming the same aspect ratio.
        method (str, optional): Accepted values: ``'cv2'``, ``'tf'``, and
            ``'pil'``.

    Returns:
        numpy.ndarray: Resized image.
    """
    h, w = arr.shape[:2]
    if new_h is not None and new_w is not None:
        if int(h / w * new_w) != new_h:
            logger.warning((
                "Aspect ratio changed in resizing: original size is %s; "
                "new size is %s"), (h, w), (new_h, new_w))
    elif new_h is None and new_w is not None:
        new_h = int(h / w * new_w)
    elif new_h is not None and new_w is None:
        new_w = int(w / h * new_h)
    else:
        raise ValueError("At least one of new height or width must be given")

    method = method.lower()

    if method in ('tf', 'tensorflow'):
        tf = preset_import('tensorflow', assert_success=True)
        tf.compat.v1.enable_eager_execution()
        tensor = tf.convert_to_tensor(arr)
        tensor_resized = tf.image.resize(
            tensor, (new_h, new_w), method='bilinear', antialias=True)
        resized = tensor_resized.numpy()

    elif method in ('cv', 'cv2', 'opencv'):
        cv2 = preset_import('cv2', assert_success=True)
        interp = cv2.INTER_LINEAR if new_h > h else cv2.INTER_AREA
        resized = cv2.resize(arr, (new_w, new_h), interpolation=interp)

    elif method in ('pil', 'pillow'):
        Image = preset_import('Image', assert_success=True)
        img = Image.fromarray(arr)
        img = img.resize((new_w, new_h))
        resized = np.array(img)

    else:
        raise NotImplementedError(method)

    return resized


[docs]def binarize(im, threshold=None):
    """Binarizes images.

    Args:
        im (numpy.ndarray): Image to binarize. Of any integer type (``uint8``,
            ``uint16``, etc.).  If H-by-W-by-3, will be converted to grayscale
            and treated as H-by-W.
        threshold (float, optional): Threshold for binarization. ``None``
            means midpoint of the ``dtype``.

    Returns:
        numpy.ndarray: Binarized image. Of only 0's and 1's.
    """
    im_copy = deepcopy(im)

    # RGB to grayscale
    if im_copy.ndim == 3 and im_copy.shape[2] == 3: # h-by-w-by-3
        cv2 = preset_import('cv2', assert_success=True)
        im_copy = cv2.cvtColor(im_copy, cv2.COLOR_BGR2GRAY)

    if im_copy.ndim == 2: # h-by-w
        # Compute threshold from data type
        if threshold is None:
            maxval = np.iinfo(im_copy.dtype).max
            threshold = maxval / 2.
        im_bin = im_copy
        logicalmap = im_copy > threshold
        im_bin[logicalmap] = 1
        im_bin[np.logical_not(logicalmap)] = 0
    else:
        raise ValueError("'im' is neither h-by-w nor h-by-w-by-3")

    return im_bin


[docs]def remove_islands(im, min_n_pixels, connectivity=4):
    """Removes small islands of pixels from a binary image.

    Args:
        im (numpy.ndarray): Input binary image. Of only 0's and 1's.
        min_n_pixels (int): Minimum island size to keep.
        connectivity (int, optional): Definition of "connected": either 4 or 8.

    Returns:
        numpy.ndarray: Output image with small islands removed.
    """
    cv2 = preset_import('cv2', assert_success=True)

    # Validate inputs
    assert (len(im.shape) == 2), \
        "'im' needs to have exactly two dimensions"
    assert np.array_equal(np.unique(im), np.array([0, 1])), \
        "'im' needs to contain only 0's and 1's"
    assert connectivity in (4, 8), \
        "'connectivity' must be either 4 or 8"

    # Find islands, big or small
    nlabels, labelmap, leftx_topy_bbw_bbh_npix, _ = \
        cv2.connectedComponentsWithStats(im, connectivity)

    # Figure out background is 0 or 1
    bgval = im[labelmap == 0][0]

    # Set small islands to background value
    im_clean = im
    for i in range(1, nlabels): # skip the 0th island -- background
        island_size = leftx_topy_bbw_bbh_npix[i, -1]
        if island_size < min_n_pixels:
            im_clean[labelmap == i] = bgval

    return im_clean


[docs]def grid_query_img(im, query_x, query_y, method='bilinear'):
    r"""Grid queries an image via interpolation.

    If you want to grid query unstructured data, consider
    :func:`grid_query_unstruct`.

    This function uses either bilinear interpolation that allows you to break
    big matrices into patches and work locally, or bivariate spline
    interpolation that fits a global spline (so memory-intensive) and shows
    global effects.

    Args:
        im (numpy.ndarray): H-by-W or H-by-W-by-C rectangular grid of data.
            Each of C channels is interpolated independently.
        query_x (array_like): :math:`x` coordinates of the queried rectangle,
            e.g., ``np.arange(10)`` for a 10-by-10 grid (hence, this should
            *not* be generated by :func:`numpy.meshgrid` or similar
            functions).
        query_y (array_like): :math:`y` coordinates, following this
            convention:

            .. code-block:: none

                +---------> query_x
                |
                |
                |
                v query_y

        method (str, optional): Interpolation method: ``'spline'`` or
            ``'bilinear'``.

    Returns:
        numpy.ndarray: Interpolated values at query locations, of shape
        ``(len(query_y), len(query_x))`` for single-channel input or
        ``(len(query_y), len(query_x), im.shape[2])`` for multi-channel
        input.
    """
    from scipy.interpolate import RectBivariateSpline, interp2d

    # Figure out image size and number of channels
    if im.ndim == 3:
        h, w, c = im.shape
        if c == 1: # single dimension
            im = im[:, :, 0]
    elif im.ndim == 2:
        h, w = im.shape
        c = 1
    else:
        raise ValueError("'im' must have either two or three dimensions")

    x = np.arange(w)
    y = np.arange(h)

    if query_x.min() < 0 or query_x.max() > w - 1 or \
            query_y.min() < 0 or query_y.max() > h - 1:
        logger.warning("Sure you want to query points outside 'im'?")

    def query(x, y, z, qx, qy, method):
        if method == 'spline':
            # TODO: test whether we need to swap x and y
            spline_obj = RectBivariateSpline(y, x, z)
            qz = spline_obj(qy, qx, grid=True)
        elif method == 'bilinear':
            f = interp2d(x, y, z, kind='linear')
            qz = f(qx, qy)
        else:
            raise NotImplementedError("Other interplation methods")
        return qz

    if c == 1:
        # Single channel
        z = im
        logger.info("Interpolation (method: %s) started", method)
        interp_val = query(x, y, z, query_x, query_y, method)
        logger.info("... done")

    else:
        # Multiple channels
        interp_val = np.zeros((len(query_x), len(query_y), c))
        for i in range(c):
            z = im[:, :, i]
            logger.info(
                "Interpolation (method: %s) started for channel %d/%d",
                method, i + 1, c)
            interp_val[:, :, i] = query(x, y, z, query_x, query_y, method)
            logger.info("... done")

    return interp_val


[docs]def grid_query_unstruct(uvs, values, grid_res, method=None):
    r"""Grid queries unstructured data given by coordinates and their values.

    If you are looking to grid query structured data, such as an image, check
    out :func:`grid_query_img`.

    This function interpolates values on a rectangular grid given some sparse,
    unstrucured samples. One use case is where you have some UV locations and
    their associated colors, and you want to "paint the colors" on a UV canvas.

    Args:
        uvs (numpy.ndarray): N-by-2 array of UV coordinates where we have
            values (e.g., colors). See
            :func:`xiuminglib.blender.object.smart_uv_unwrap` for the UV
            coordinate convention.
        values (numpy.ndarray): N-by-M array of M-D values at the N UV
            locations, or N-array of scalar values at the N UV locations.
            Channels are interpolated independently.
        grid_res (array_like): Resolution (height first; then width) of
            the query grid.
        method (dict, optional): Dictionary of method-specific parameters.
            Implemented methods and their default parameters:

            .. code-block:: python

                # Default
                method = {
                    'func': 'griddata',
                    # Which SciPy function to call.

                    'func_underlying': 'linear',
                    # Fed to `griddata` as the `method` parameter.

                    'fill_value': (0,), # black
                    # Will be used to fill in pixels outside the convex hulls
                    # formed by the UV locations, and if `max_l1_interp` is
                    # provided, also the pixels whose interpolation is too much
                    # of a stretch to be trusted. In the context of "canvas
                    # painting," this will be the canvas' base color.

                    'max_l1_interp': np.inf, # trust/accept all interpolations
                    # Maximum L1 distance, which we can trust in interpolation,
                    # to pixels that have values. Interpolation across a longer
                    # range will not be trusted, and hence will be filled with
                    # `fill_value`.
                }

            .. code-block:: python

                method = {
                    'func': 'rbf',
                    # Which SciPy function to call.

                    'func_underlying': 'linear',
                    # Fed to `Rbf` as the `method` parameter.

                    'smooth': 0, # no smoothing
                    # Fed to `Rbf` as the `smooth` parameter.
                }

    Returns:
        numpy.ndarray: Interpolated values at query locations, of shape
        ``grid_res`` for single-channel input or ``(grid_res[0], grid_res[1],
        values.shape[2])`` for multi-channel input.
    """
    if values.ndim == 1:
        values = values.reshape(-1, 1)
    assert values.ndim == 2 and values.shape[0] == uvs.shape[0]

    if method is None:
        method = {'func': 'griddata'}

    h, w = grid_res
    # Generate query coordinates
    grid_x, grid_y = np.meshgrid(np.linspace(0, 1, w), np.linspace(0, 1, h))
    # +---> x
    # |
    # v y
    grid_u, grid_v = grid_x, 1 - grid_y
    # ^ v
    # |
    # +---> u

    if method['func'] == 'griddata':
        from scipy.interpolate import griddata
        cv2 = preset_import('cv2', assert_success=True)

        func_underlying = method.get('func_underlying', 'linear')
        fill_value = method.get('fill_value', (0,))
        max_l1_interp = method.get('max_l1_interp', np.inf)

        fill_value = np.array(fill_value)
        if len(fill_value) == 1:
            fill_value = np.tile(fill_value, values.shape[1])
        assert len(fill_value) == values.shape[1]

        if max_l1_interp is None:
            max_l1_interp = np.inf # trust everything

        # Figure out which pixels can be trusted
        has_value = np.zeros((h, w), dtype=np.uint8)
        ri = ((1 - uvs[:, 1]) * (h - 1)).astype(int).ravel()
        ci = (uvs[:, 0] * (w - 1)).astype(int).ravel()
        in_canvas = np.logical_and.reduce(
            (ri >= 0, ri < h, ci >= 0, ci < w)) # to ignore out-of-canvas points
        has_value[ri[in_canvas], ci[in_canvas]] = 1
        dist2val = cv2.distanceTransform(1 - has_value, cv2.DIST_L1, 3)
        trusted = dist2val <= max_l1_interp

        # Process each color channel separately
        interps = []
        for ch_i in range(values.shape[1]):
            v_fill = fill_value[ch_i]
            v = values[:, ch_i]
            interp = griddata(uvs, v, (grid_u, grid_v),
                              method=func_underlying,
                              fill_value=v_fill)
            interp[~trusted] = v_fill
            interps.append(interp)
        interps = np.dstack(interps)

    elif method['func'] == 'rbf':
        from scipy.interpolate import Rbf

        func_underlying = method.get('func_underlying', 'linear')
        smooth = method.get('smooth', 0)

        # Process each color channel separately
        interps = []
        for ch_i in range(values.shape[1]):
            v = values[:, ch_i]
            rbfi = Rbf(uvs[:, 0], uvs[:, 1], v,
                       function=func_underlying,
                       smooth=smooth)
            interp = rbfi(grid_u, grid_v)
            interps.append(interp)
        interps = np.dstack(interps)

    else:
        raise NotImplementedError(method['func'])

    if interps.shape[2] == 1:
        return interps[:, :, 0].squeeze()
    return interps


[docs]def find_local_extrema(im, want_maxima, kernel_size=3):
    """Finds local maxima or minima in an image.

    Args:
        im (numpy.ndarray): H-by-W if single-channel (e.g., grayscale)
            or H-by-W-by-C for multi-channel (e.g., RGB) images. Extrema
            are found independently for each of the C channels.
        want_maxima (bool): Whether maxima or minima are wanted.
        kernel_size (int, optional): Side length of the square window under
            consideration. Must be larger than 1.

    Returns:
        numpy.ndarray: Binary map indicating if each pixel is a local extremum.
    """
    from scipy.ndimage.filters import minimum_filter, maximum_filter

    logger.error("find_local_extrema() not tested yet!")

    # Figure out image size and number of channels
    if im.ndim == 3:
        h, w, c = im.shape
        expanded = False
    elif im.ndim == 2:
        h, w = im.shape
        c = 1
        im = np.expand_dims(im, axis=2) # adds singleton dimension
        expanded = True
    else:
        raise ValueError("'im' must have either two or three dimensions")

    kernel = np.ones((kernel_size, kernel_size)).astype(bool)

    is_extremum = np.zeros((h, w, c), dtype=bool)

    for i in range(c):
        z = im[:, :, i]

        if want_maxima:
            equals_extremum = maximum_filter(z, footprint=kernel) == z
        else:
            equals_extremum = minimum_filter(z, footprint=kernel) == z

        is_extremum[:, :, i] = equals_extremum

    if expanded:
        is_extremum = is_extremum[:, :, 0]

    return is_extremum


[docs]def compute_gradients(im):
    """Computes magnitudes and orientations of image gradients.

    With Scharr operators:

    .. code-block:: none

        [ 3 0 -3 ]           [ 3  10  3]
        [10 0 -10]    and    [ 0   0  0]
        [ 3 0 -3 ]           [-3 -10 -3]

    Args:
        im (numpy.ndarray): H-by-W if single-channel (e.g., grayscale) or
            H-by-W-by-C if multi-channel (e.g., RGB) images. Gradients are
            computed independently for each of the C channels.

    Returns:
        tuple:
            - **grad_mag** (*numpy.ndarray*) -- Magnitude image of the
              gradients.
            - **grad_orient** (*numpy.ndarray*) -- Orientation image of the
              gradients (in radians).

              .. code-block:: none

                       y ^ pi/2
                         |
                pi       |
                 --------+--------> 0
                -pi      |       x
                         | -pi/2
    """
    cv2 = preset_import('cv2', assert_success=True)

    # Figure out image size and number of channels
    if im.ndim == 3:
        h, w, c = im.shape
        expanded = False
    elif im.ndim == 2:
        h, w = im.shape
        c = 1
        im = np.expand_dims(im, axis=2) # adds singleton dimension
        expanded = True
    else:
        raise ValueError("'im' must have either two or three dimensions")

    grad_mag = np.zeros((h, w, c))
    grad_orient = np.zeros((h, w, c))

    for i in range(c):
        z = im[:, :, i]
        ddepth = -1 # same depth as the source

        # Along horizontal direction
        xorder, yorder = 1, 0
        grad_h = cv2.Sobel(z, ddepth, xorder, yorder, ksize=-1) # 3x3 Scharr
        grad_h = grad_h.astype(float)

        # Along vertical direction
        xorder, yorder = 0, 1
        grad_v = cv2.Sobel(z, ddepth, xorder, yorder, ksize=-1) # 3x3 Scharr
        grad_v = grad_v.astype(float)

        # Magnitude
        grad_mag[:, :, i] = np.sqrt(np.square(grad_h) + np.square(grad_v))

        # Orientation
        grad_orient[:, :, i] = np.arctan2(grad_v, grad_h)

    if expanded:
        grad_mag = grad_mag[:, :, 0]
        grad_orient = grad_orient[:, :, 0]

    return grad_mag, grad_orient


[docs]def gamma_correct(im, gamma=2.2):
    r"""Applies gamma correction to an ``uint`` image.

    Args:
        im (numpy.ndarray): H-by-W if single-channel (e.g., grayscale) or
            H-by-W-by-C multi-channel (e.g., RGB) ``uint`` images.
        gamma (float, optional): Gamma value :math:`< 1` shifts image towards
            the darker end of the spectrum, while value :math:`> 1` towards
            the brighter.

    Returns:
        numpy.ndarray: Gamma-corrected image.
    """
    cv2 = preset_import('cv2', assert_success=True)
    assert im.dtype in ('uint8', 'uint16')

    # Don't correct alpha channel, if exists
    alpha = None
    if im.ndim == 3 and im.shape[2] == 4:
        alpha = im[:, :, 3]
        im = im[:, :, :3]

    # Correct with lookup table
    type_max = np.iinfo(im.dtype).max
    table = np.array([
        ((x / type_max) ** (1 / gamma)) * type_max
        for x in np.arange(0, type_max + 1)
    ]).astype(im.dtype)
    im_corrected = cv2.LUT(im, table)

    # Concat alpha channel back
    if alpha is not None:
        im_corrected = np.dstack((im_corrected, alpha))

    return im_corrected


[docs]def rgb2lum(im):
    """Converts RGB to relative luminance (if input is linear RGB) or luma
    (if input is gamma-corrected RGB).

    Args:
        im (numpy.ndarray): RGB array of shape ``(..., 3)``.

    Returns:
        numpy.ndarray: Relative luminance or luma array.
    """
    assert im.shape[-1] == 3, "Input's last dimension must hold RGB"

    lum = 0.2126 * im[..., 0] + 0.7152 * im[..., 1] + 0.0722 * im[..., 2]

    return lum


def _assert_float_0to1(arr):
    if arr.dtype.kind != 'f':
        raise TypeError("Input must be float (is %s)" % arr.dtype)
    if (arr < 0).any() or (arr > 1).any():
        raise ValueError("Input image has pixels outside [0, 1]")


def _assert_3ch(arr):
    if arr.ndim != 3:
        raise ValueError("Input image is not even 3D (H-by-W-by-3)")
    n_ch = arr.shape[2]
    if n_ch != 3:
        raise ValueError("Input image must have 3 channels, but has %d" % n_ch)


srgb_linear_thres = 0.0031308
srgb_linear_coeff = 12.92
srgb_exponential_coeff = 1.055
srgb_exponent = 2.4


[docs]def linear2srgb(im, clip=False):
    r"""Converts an image from linear RGB values to sRGB.

    Args:
        im (numpy.ndarray): Of type ``float``, and all pixels must be
            :math:`\in [0, 1]`.
        clip (bool, optional): Whether to clip values to :math:`[0,1]`.
            Defaults to ``False``.

    Returns:
        numpy.ndarray: Converted image in sRGB.
    """
    if clip:
        im = np.clip(im, 0, 1)
    _assert_float_0to1(im)
    im_ = deepcopy(im)
    # Guaranteed to be [0, 1] floats

    linear_ind = im_ <= srgb_linear_thres
    nonlinear_ind = im_ > srgb_linear_thres
    im_[linear_ind] = im_[linear_ind] * srgb_linear_coeff
    im_[nonlinear_ind] = srgb_exponential_coeff * (
        np.power(im_[nonlinear_ind], 1 / srgb_exponent)
    ) - (srgb_exponential_coeff - 1)

    return im_


[docs]def srgb2linear(im, clip=False):
    r"""Converts an image from sRGB values to linear RGB.

    Args:
        im (numpy.ndarray): Of type ``float``, and all pixels must be
            :math:`\in [0, 1]`.
        clip (bool, optional): Whether to clip values to :math:`[0,1]`.
            Defaults to ``False``.

    Returns:
        numpy.ndarray: Converted image in linear RGB.
    """
    if clip:
        im = np.clip(im, 0, 1)
    _assert_float_0to1(im)
    im_ = deepcopy(im)
    # Guaranteed to be [0, 1] floats

    gamma = (
        (im_ + srgb_exponential_coeff - 1) / srgb_exponential_coeff
    ) ** srgb_exponent
    scale = im_ / srgb_linear_coeff
    im_ = np.where(im_ > srgb_linear_thres * srgb_linear_coeff, gamma, scale)

    return im_


[docs]def tonemap(hdr, method='gamma', gamma=2.2):
    r"""Tonemaps an HDR image.

    Args:
        hdr (numpy.ndarray): HDR image.
        method (str, optional): Values accepted: ``'gamma'`` and ``'reinhard'``.
        gamma (float, optional): Gamma value used if method is ``'gamma'``.

    Returns:
        numpy.ndarray: Tonemapped image :math:`\in [0, 1]`.
    """
    if method == 'reinhard':
        cv2 = preset_import('cv2', assert_success=True)
        tonemapper = cv2.createTonemapReinhard(1, 1, 0, 0)
        img = tonemapper.process(hdr)
    elif method == 'gamma':
        img = (hdr / hdr.max()) ** (1 / gamma)
    else:
        raise ValueError(method)

    # Clip, if necessary, to guard against numerical errors
    minv, maxv = img.min(), img.max()
    if minv < 0:
        logger.warning("Clipping negative values (min.: %f)", minv)
        img = np.clip(img, 0, np.inf)
    if maxv > 1:
        logger.warning("Clipping >1 values (max.: %f)", maxv)
        img = np.clip(img, -np.inf, 1)

    return img